/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
protected:
};
-/// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
-/// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
-/// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
-struct detection_output_sort
- : public primitive_base<detection_output_sort> {
- CLDNN_DECLARE_PRIMITIVE(detection_output_sort)
-
- /// @brief Constructs detection output primitive.
- /// @param id This primitive id.
- /// @param input_bboxes Input bounding boxes primitive id.
- /// @param num_images Number of images to be predicted.
- /// @param num_classes Number of classes to be predicted.
- /// @param keep_top_k Number of total bounding boxes to be kept per image after NMS step.
- /// @param share_location If true bounding box are shared among different classes.
- /// @param top_k Maximum number of results to be kept in NMS.
- /// @param output_padding Output padding.
- detection_output_sort(const primitive_id& id,
- const primitive_id& input_bboxes,
- const uint32_t num_images,
- const uint32_t num_classes,
- const uint32_t keep_top_k,
- const bool share_location = true,
- const int top_k = -1,
- const int background_label_id = -1,
- const padding& output_padding = padding())
- : primitive_base(id, {input_bboxes}, output_padding),
- num_images(num_images),
- num_classes(num_classes),
- keep_top_k(keep_top_k),
- share_location(share_location),
- top_k(top_k),
- background_label_id(background_label_id) {}
-
- /// @brief Number of classes to be predicted.
- const uint32_t num_images;
- /// @brief Number of classes to be predicted.
- const uint32_t num_classes;
- /// @brief Number of total bounding boxes to be kept per image after NMS step.
- const int keep_top_k;
- /// @brief If true, bounding box are shared among different classes.
- const bool share_location;
- /// @brief Maximum number of results to be kept in NMS.
- const int top_k;
- /// @brief Background label id (-1 if there is no background class).
- const int background_label_id;
-};
/// @}
/// @}
/// @}
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
/// @brief Enable implicit static input reordering for user inputs (default: false).
allow_static_input_reorder,
- /// @brief Enable running detection output layer always on gpu, regardless performance
- detection_output_gpu,
-
/// @brief Enable debug mode (default: false).
/// @details This option enforce all program primitives to be accessible as outputs.
debug,
/// @brief Enable implicit reordering for static user inputs (default: false).
static std::shared_ptr<const build_option> allow_static_input_reorder(bool enable = false);
- /// @brief Enable running detection output layer always on GPU, regardless performance (default: false).
- static std::shared_ptr<const build_option> detection_output_gpu(bool enable = false);
-
/// @brief Enable debug mode (default: false).
/// @details This option enforce all program primitives to be accessible as outputs.
static std::shared_ptr<const build_option> debug(bool enable = false);
static std::shared_ptr<const build_option> make_default() { return build_option::allow_static_input_reorder(); }
};
template <>
-struct build_option_traits<build_option_type::detection_output_gpu> {
- typedef build_option_bool<build_option_type::detection_output_gpu> object_type;
- static std::shared_ptr<const build_option> make_default() { return build_option::detection_output_gpu(); }
-};
-template <>
struct build_option_traits<build_option_type::debug> {
typedef build_option_bool<build_option_type::debug> object_type;
static std::shared_ptr<const build_option> make_default() { return build_option::debug(); }
return std::make_shared<build_option_bool<build_option_type::allow_static_input_reorder>>(enable);
}
-inline std::shared_ptr<const build_option> build_option::detection_output_gpu(bool enable) {
- return std::make_shared<build_option_bool<build_option_type::detection_output_gpu>>(enable);
-}
-
inline std::shared_ptr<const build_option> build_option::debug(bool enable) {
return std::make_shared<build_option_bool<build_option_type::debug>>(enable);
}
PYRAMID_ROI_ALIGN,
CONTRACT,
ONE_HOT,
- DETECTION_OUTPUT,
GATHER,
SCATTER_UPDATE,
DEPTH_TO_SPACE,
+++ /dev/null
-// Copyright (c) 2018-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "detection_output_kernel_base.h"
-
-namespace kernel_selector {
-JitConstants DetectionOutputKernelBase::GetJitConstants(const detection_output_params& params) const {
- JitConstants jit = MakeBaseParamsJitConstants(params);
-
- const auto& detectOutParams = params.detectOutParams;
-
- jit.AddConstants({
- MakeJitConstant("NUM_IMAGES", detectOutParams.num_images),
- MakeJitConstant("NUM_CLASSES", detectOutParams.num_classes),
- MakeJitConstant("KEEP_TOP_K", detectOutParams.keep_top_k),
- MakeJitConstant("TOP_K", detectOutParams.top_k),
- MakeJitConstant("BACKGROUND_LABEL_ID", detectOutParams.background_label_id),
- MakeJitConstant("CODE_TYPE", detectOutParams.code_type),
- MakeJitConstant("CONF_SIZE_X", detectOutParams.conf_size_x),
- MakeJitConstant("CONF_SIZE_Y", detectOutParams.conf_size_y),
- MakeJitConstant("CONF_PADDING_X", detectOutParams.conf_padding_x),
- MakeJitConstant("CONF_PADDING_Y", detectOutParams.conf_padding_y),
- MakeJitConstant("SHARE_LOCATION", detectOutParams.share_location),
- MakeJitConstant("VARIANCE_ENCODED_IN_TARGET", detectOutParams.variance_encoded_in_target),
- MakeJitConstant("NMS_THRESHOLD", detectOutParams.nms_threshold),
- MakeJitConstant("ETA", detectOutParams.eta),
- MakeJitConstant("CONFIDENCE_THRESHOLD", detectOutParams.confidence_threshold),
- MakeJitConstant("IMAGE_WIDTH", detectOutParams.input_width),
- MakeJitConstant("IMAGE_HEIGH", detectOutParams.input_heigh),
- MakeJitConstant("ELEMENTS_PER_THREAD", detectOutParams.elements_per_thread),
- MakeJitConstant("PRIOR_COORD_OFFSET", detectOutParams.prior_coordinates_offset),
- MakeJitConstant("PRIOR_INFO_SIZE", detectOutParams.prior_info_size),
- MakeJitConstant("PRIOR_IS_NORMALIZED", detectOutParams.prior_is_normalized),
- });
-
- return jit;
-}
-
-DetectionOutputKernelBase::DispatchData DetectionOutputKernelBase::SetDefault(const detection_output_params& /*params*/) const {
- DispatchData dispatchData;
-
- dispatchData.gws[0] = 0;
- dispatchData.gws[1] = 0;
- dispatchData.gws[2] = 0;
-
- dispatchData.lws[0] = 0;
- dispatchData.lws[1] = 0;
- dispatchData.lws[2] = 0;
-
- return dispatchData;
-}
-} // namespace kernel_selector
+++ /dev/null
-// Copyright (c) 2018-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#pragma once
-
-#include "kernel_base_opencl.h"
-#include "kernel_selector_params.h"
-
-namespace kernel_selector {
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// detection_output_params
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct detection_output_params : public base_params {
- detection_output_params() : base_params(KernelType::DETECTION_OUTPUT), detectOutParams() {}
-
- struct DedicatedParams {
- uint32_t num_images;
- uint32_t num_classes;
- int32_t keep_top_k;
- int32_t top_k;
- int32_t background_label_id;
- int32_t code_type;
- int32_t conf_size_x;
- int32_t conf_size_y;
- int32_t conf_padding_x;
- int32_t conf_padding_y;
- int32_t elements_per_thread;
- int32_t input_width;
- int32_t input_heigh;
- int32_t prior_coordinates_offset;
- int32_t prior_info_size;
- bool prior_is_normalized;
- bool share_location;
- bool variance_encoded_in_target;
- float nms_threshold;
- float eta;
- float confidence_threshold;
- };
-
- DedicatedParams detectOutParams;
-
- virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// detection_output_optional_params
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct detection_output_optional_params : optional_params {
- detection_output_optional_params() : optional_params(KernelType::DETECTION_OUTPUT) {}
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// DetectionOutputKernelBase
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-class DetectionOutputKernelBase : public KernelBaseOpenCL {
-public:
- using KernelBaseOpenCL ::KernelBaseOpenCL;
- virtual ~DetectionOutputKernelBase() {}
-
- using DispatchData = CommonDispatchData;
-
-protected:
- JitConstants GetJitConstants(const detection_output_params& params) const;
- virtual DispatchData SetDefault(const detection_output_params& params) const;
-};
-} // namespace kernel_selector
+++ /dev/null
-// Copyright (c) 2018-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "detection_output_kernel_ref.h"
-#include "kernel_selector_utils.h"
-
-#define PRIOR_BOX_SIZE 4 // Each prior-box consists of [xmin, ymin, xmax, ymax].
-
-namespace kernel_selector {
-
-ParamsKey DetectionOutputKernel::GetSupportedKey() const {
- ParamsKey k;
- k.EnableInputDataType(Datatype::F16);
- k.EnableInputDataType(Datatype::F32);
- k.EnableOutputDataType(Datatype::F16);
- k.EnableOutputDataType(Datatype::F32);
- k.EnableInputLayout(DataLayout::bfyx);
- k.EnableOutputLayout(DataLayout::bfyx);
- k.EnableTensorOffset();
- k.EnableTensorPitches();
- k.EnableBatching();
- return k;
-}
-
-CommonDispatchData DetectionOutputKernel::SetDefault(const detection_output_params& params) const {
- CommonDispatchData dispatchData = DetectionOutputKernelBase::SetDefault(params);
-
- // Number of all work items is set to total number of bounding boxes -
- // one bounding box is procerssed by one work item
- size_t num_classes = (params.detectOutParams.share_location) ? 1 : params.detectOutParams.num_classes;
-
- // Size of input0 (input location), if shared loaction it is equal to size of one class,
- // else it has size of all items for all classes
- size_t bboxesNum = params.inputs[0].LogicalSize() / PRIOR_BOX_SIZE / num_classes;
- // Work group size is set to number of bounding boxes per image for sorting purpose
- // (access to one table with sorted values)
- size_t work_group_size = bboxesNum / params.inputs[0].Batch().v;
-
- if (work_group_size > 256) {
- work_group_size = work_group_size / ((work_group_size / 256) + 1) + 1;
- }
-
- bboxesNum = work_group_size * params.inputs[0].Batch().v;
-
- dispatchData.gws[0] = Align(bboxesNum, work_group_size);
- dispatchData.gws[1] = 1;
- dispatchData.gws[2] = 1;
-
- dispatchData.lws[0] = work_group_size;
- dispatchData.lws[1] = 1;
- dispatchData.lws[2] = 1;
-
- return dispatchData;
-}
-
-KernelsData DetectionOutputKernel::GetKernelsData(const Params& params, const optional_params& options) const {
- assert(params.GetType() == KernelType::DETECTION_OUTPUT && options.GetType() == KernelType::DETECTION_OUTPUT);
-
- KernelData kd = KernelData::Default<detection_output_params>(params);
- const detection_output_params& detectOutParams = static_cast<const detection_output_params&>(params);
- DispatchData dispatchData = SetDefault(detectOutParams);
-
- auto cldnnJit = GetJitConstants(detectOutParams);
- auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options);
- auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
-
- auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint);
- kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
- kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2});
-
- kd.estimatedTime = FORCE_PRIORITY_8;
-
- return {kd};
-}
-} // namespace kernel_selector
\ No newline at end of file
+++ /dev/null
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#pragma once
-
-#include "detection_output_kernel_base.h"
-
-namespace kernel_selector {
-
-class DetectionOutputKernel : public DetectionOutputKernelBase {
-public:
- DetectionOutputKernel() : DetectionOutputKernelBase("detection_output") {}
- virtual ~DetectionOutputKernel() {}
-
- KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
- ParamsKey GetSupportedKey() const override;
-
-private:
- CommonDispatchData SetDefault(const detection_output_params& params) const override;
-};
-} // namespace kernel_selector
\ No newline at end of file
+++ /dev/null
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "detection_output_kernel_selector.h"
-#include "detection_output_kernel_ref.h"
-#include "detection_output_kernel_sort.h"
-
-namespace kernel_selector {
-detection_output_kernel_selector::detection_output_kernel_selector() { Attach<DetectionOutputKernel>(); }
-
-KernelsData detection_output_kernel_selector::GetBestKernels(const Params& params,
- const optional_params& options) const {
- return GetNaiveBestKernel(params, options, KernelType::DETECTION_OUTPUT);
-}
-
-detection_output_sort_kernel_selector::detection_output_sort_kernel_selector() { Attach<DetectionOutputKernel_sort>(); }
-
-KernelsData detection_output_sort_kernel_selector::GetBestKernels(const Params& params,
- const optional_params& options) const {
- return GetNaiveBestKernel(params, options, KernelType::DETECTION_OUTPUT);
-}
-} // namespace kernel_selector
\ No newline at end of file
+++ /dev/null
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#pragma once
-
-#include "kernel_selector.h"
-
-namespace kernel_selector {
-class detection_output_kernel_selector : public kernel_selector_base {
-public:
- static detection_output_kernel_selector& Instance() {
- static detection_output_kernel_selector instance_;
- return instance_;
- }
-
- detection_output_kernel_selector();
-
- virtual ~detection_output_kernel_selector() {}
-
- KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
-};
-
-class detection_output_sort_kernel_selector : public kernel_selector_base {
-public:
- static detection_output_sort_kernel_selector& Instance() {
- static detection_output_sort_kernel_selector instance_;
- return instance_;
- }
-
- detection_output_sort_kernel_selector();
-
- virtual ~detection_output_sort_kernel_selector() {}
-
- KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
-};
-} // namespace kernel_selector
\ No newline at end of file
+++ /dev/null
-// Copyright (c) 2018-2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "detection_output_kernel_sort.h"
-#include "kernel_selector_utils.h"
-
-#define DETECTION_OUTPUT_ROW_SIZE 7 // Each detection consists of [image_id, label, confidence, xmin, ymin, xmax, ymax].
-
-namespace kernel_selector {
-
-ParamsKey DetectionOutputKernel_sort::GetSupportedKey() const {
- ParamsKey k;
- k.EnableInputDataType(Datatype::F16);
- k.EnableInputDataType(Datatype::F32);
- k.EnableOutputDataType(Datatype::F16);
- k.EnableOutputDataType(Datatype::F32);
- k.EnableInputLayout(DataLayout::bfyx);
- k.EnableOutputLayout(DataLayout::bfyx);
- k.EnableTensorOffset();
- k.EnableTensorPitches();
- k.EnableBatching();
- return k;
-}
-
-CommonDispatchData DetectionOutputKernel_sort::SetDefault(const detection_output_params& params) const {
- CommonDispatchData dispatchData = DetectionOutputKernelBase::SetDefault(params);
-
- unsigned class_num = params.detectOutParams.num_classes;
- if (params.detectOutParams.share_location && params.detectOutParams.background_label_id == 0) {
- class_num -= 1;
- }
- const size_t bboxesNum = class_num * params.detectOutParams.num_images;
- // Work group size is set to number of bounding boxes per image
- size_t work_group_size = class_num;
-
- if (work_group_size > 256) {
- work_group_size = (work_group_size + work_group_size % 2) / (work_group_size / 256 + 1);
- }
-
- dispatchData.gws[0] = Align(bboxesNum, work_group_size);
- dispatchData.gws[1] = 1;
- dispatchData.gws[2] = 1;
-
- dispatchData.lws[0] = work_group_size;
- dispatchData.lws[1] = 1;
- dispatchData.lws[2] = 1;
-
- return dispatchData;
-}
-
-KernelsData DetectionOutputKernel_sort::GetKernelsData(const Params& params, const optional_params& options) const {
- assert(params.GetType() == KernelType::DETECTION_OUTPUT &&
- options.GetType() == KernelType::DETECTION_OUTPUT);
-
- KernelData kd = KernelData::Default<detection_output_params>(params);
- const detection_output_params& detectOutParams = static_cast<const detection_output_params&>(params);
- DispatchData dispatchData = SetDefault(detectOutParams);
-
- auto cldnnJit = GetJitConstants(detectOutParams);
- auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options);
- auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
-
- auto& kernel = kd.kernels[0];
- FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entryPoint);
-
- kd.estimatedTime = FORCE_PRIORITY_8;
-
- return {kd};
-}
-} // namespace kernel_selector
\ No newline at end of file
+++ /dev/null
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#pragma once
-
-#include "detection_output_kernel_base.h"
-
-namespace kernel_selector {
-
-class DetectionOutputKernel_sort : public DetectionOutputKernelBase {
-public:
- DetectionOutputKernel_sort() : DetectionOutputKernelBase("detection_output_sort") {}
- virtual ~DetectionOutputKernel_sort() {}
-
- KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
- ParamsKey GetSupportedKey() const override;
-
-private:
- CommonDispatchData SetDefault(const detection_output_params& params) const override;
-};
-} // namespace kernel_selector
\ No newline at end of file
+++ /dev/null
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "include/include_all.cl"
-#include "include/detection_output_common.cl"
-
-KERNEL (detection_output)(__global UNIT_TYPE* input_location, __global UNIT_TYPE* output, __global UNIT_TYPE* input_confidence, __global UNIT_TYPE* input_prior_box)
-{
- const uint idx = get_global_id(0); // bbox idx
- const uint local_id = (uint)get_local_id(0) * NUM_OF_ITEMS; // All bboxes from one image in work group
- const uint idx_image = idx / NUM_OF_ITERATIONS; // idx of current image
-
- __local uint indexes[NUM_OF_PRIORS];
- __local uint scores_size[NUM_CLASSES * NUM_OF_IMAGES];
- __local bool stillSorting;
-
- uint indexes_class_0[NUM_OF_PRIORS];
-
- int last_bbox_in_class = NUM_OF_ITEMS;
- bool is_last_bbox_in_class = false;
- for (uint it = 0; it < NUM_OF_ITEMS; it ++)
- {
- if (((local_id + it + 1) % NUM_OF_PRIORS) == 0 )
- {
- last_bbox_in_class = it;
- is_last_bbox_in_class = true;
- break;
- }
- }
-
- for (uint idx_class = 0; idx_class < NUM_CLASSES; idx_class++)
- {
- if (idx_class == BACKGROUND_LABEL_ID)
- {
- continue;
- }
-
- for (uint it = 0; it < NUM_OF_ITEMS; it++)
- {
- indexes[local_id + it] = local_id + it;
- }
-
- stillSorting = true;
- barrier(CLK_LOCAL_MEM_FENCE);
-
- bool is_last_bbox_in_image = (is_last_bbox_in_class) && (idx_class == (NUM_CLASSES - 1));
-
- while(stillSorting)
- {
- barrier(CLK_LOCAL_MEM_FENCE);
- stillSorting = false;
-
- for (uint i = 0; i < 2; i++)
- {
- for (uint it = 0; it < NUM_OF_ITEMS; it++)
- {
- uint item_id = local_id + it;
-
- uint idx1 = indexes[item_id];
- uint idx2 = indexes[item_id+1];
- bool perform = false;
- if ((((i % 2) && (item_id % 2)) ||
- ((!(i % 2)) && (!(item_id % 2)))) &&
- (it < last_bbox_in_class))
- {
- perform = true;
- }
-
- if (perform &&
- (FUNC_CALL(get_score)(input_confidence, idx1, idx_class, idx_image) <
- FUNC_CALL(get_score)(input_confidence, idx2, idx_class, idx_image)))
- {
- indexes[item_id] = idx2;
- indexes[item_id+1] = idx1;
- stillSorting = true;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- }
- }
-
- // Do it only once per class in image
- if (is_last_bbox_in_class)
- {
- UNIT_TYPE adaptive_threshold = NMS_THRESHOLD;
- uint post_nms_count = 0;
- const uint shared_class = (SHARE_LOCATION)? 0 : idx_class;
- scores_size[idx_class] = 0;
-
- // Do the "keep" algorithm only for classes with confidence greater than CONFIDENCE_THRESHOLD.
- // Check first, the biggest one (after sort) element in class.
- if (FUNC_CALL(get_score)(input_confidence, indexes[0], idx_class, idx_image) != 0.0f)
- {
- for (uint i = 0; i < SCORES_COUNT; i++)
- {
- const uint bb_idx = indexes[i];
- bool keep = true;
- for (uint j = 0; j < post_nms_count; j++)
- {
- if (!keep)
- {
- break;
- }
-
- UNIT_TYPE overlap = 0.0;
- const uint bb_idx2 = indexes[j];
-
- UNIT_TYPE decoded_bbox1[4];
- FUNC_CALL(get_decoded_bbox)(decoded_bbox1, input_location, input_prior_box, bb_idx, shared_class, idx_image);
- UNIT_TYPE decoded_bbox2[4];
- FUNC_CALL(get_decoded_bbox)(decoded_bbox2, input_location, input_prior_box, bb_idx2, shared_class, idx_image);
- bool intersecting =
- (decoded_bbox1[0] < decoded_bbox2[2]) &
- (decoded_bbox2[0] < decoded_bbox1[2]) &
- (decoded_bbox1[1] < decoded_bbox2[3]) &
- (decoded_bbox2[1] < decoded_bbox1[3]);
-
- if (intersecting)
- {
- const UNIT_TYPE intersect_width = min(decoded_bbox1[2], decoded_bbox2[2]) - max(decoded_bbox1[0], decoded_bbox2[0]);
- const UNIT_TYPE intersect_height = min(decoded_bbox1[3], decoded_bbox2[3]) - max(decoded_bbox1[1], decoded_bbox2[1]);
- const UNIT_TYPE intersect_size = intersect_width * intersect_height;
- const UNIT_TYPE bbox1_area = (decoded_bbox1[2] - decoded_bbox1[0]) * (decoded_bbox1[3] - decoded_bbox1[1]);
- const UNIT_TYPE bbox2_area = (decoded_bbox2[2] - decoded_bbox2[0]) * (decoded_bbox2[3] - decoded_bbox2[1]);
- overlap = intersect_size / (bbox1_area + bbox2_area - intersect_size);
- }
- keep = (overlap <= adaptive_threshold);
- }
- if (keep)
- {
- indexes[post_nms_count] = indexes[i];
- ++post_nms_count;
- }
- if ((keep) && (ETA < 1) && (adaptive_threshold > 0.5))
- {
- adaptive_threshold *= ETA;
- }
- }
- }
- // Write number of scores to global memory, for proper output order in separated work groups
- scores_size[idx_class] = post_nms_count;
- }
-
- stillSorting = true;
- // Wait for scores number from all classes in images
- barrier(CLK_LOCAL_MEM_FENCE);
-
- uint output_offset = (idx_image * NUM_CLASSES_OUT + idx_class - HIDDEN_CLASS) * SCORES_COUNT;
-
- for (uint it = 0; it < NUM_OF_ITEMS; it++)
- {
- const uint local_id_out = local_id + it;
-
- if (local_id_out < scores_size[idx_class])
- {
- const uint score_idx = indexes[local_id_out];
- uint bb_idx = indexes[local_id_out];
- const uint shared_class = (SHARE_LOCATION)? 0 : idx_class;
- UNIT_TYPE decoded_bbox[4];
- FUNC_CALL(get_decoded_bbox)(decoded_bbox, input_location, input_prior_box, bb_idx, shared_class, idx_image);
-
- const uint out_idx = (local_id_out + output_offset) * OUTPUT_ROW_SIZE + OUTPUT_OFFSET;
- output[out_idx] = TO_UNIT_TYPE(idx_image);
- output[out_idx + 1] = TO_UNIT_TYPE(idx_class);
- output[out_idx + 2] = FUNC_CALL(get_score)(input_confidence, score_idx, idx_class, idx_image);
- output[out_idx + 3] = decoded_bbox[0];
- output[out_idx + 4] = decoded_bbox[1];
- output[out_idx + 5] = decoded_bbox[2];
- output[out_idx + 6] = decoded_bbox[3];
- }
- }
-
- // If work item is processing last bbox in image (we already know the number of all detections),
- // use it to fill rest of keep_top_k items if number of detections is smaller
- if (is_last_bbox_in_class)
- {
- uint out_idx = output_offset + scores_size[idx_class];
-
- uint current_top_k = output_offset + SCORES_COUNT;
- for (uint i = out_idx; i < current_top_k; i++)
- {
- out_idx = i * OUTPUT_ROW_SIZE + OUTPUT_OFFSET;
- output[out_idx] = -1.0;
- output[out_idx + 1] = 0.0;
- output[out_idx + 2] = 0.0;
- output[out_idx + 3] = 0.0;
- output[out_idx + 4] = 0.0;
- output[out_idx + 5] = 0.0;
- output[out_idx + 6] = 0.0;
- }
- }
-
- // Write number of scores kept in first step of detection output
- if (is_last_bbox_in_image)
- {
- uint scores_sum = 0;
- for (uint i = 0; i < NUM_CLASSES; i++)
- {
- scores_sum += scores_size[i];
- }
- output[idx_image] = scores_sum;
-
- }
- }
-}
+++ /dev/null
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "include/include_all.cl"
-#include "include/detection_output_common.cl"
-
-UNIT_TYPE FUNC(get_score_sort)(__global UNIT_TYPE* input_bboxes, const uint idx_bbox, const uint idx_image)
-{
- if (idx_bbox == KEEP_BBOXES_NUM)
- {
- // Idx set to dummy value, return -1 to exclude this element from sorting
- return -1;
- }
- else
- {
- return input_bboxes[(idx_bbox + idx_image * NUM_OF_IMAGE_BBOXES) * OUTPUT_ROW_SIZE + INPUT_OFFSET + SCORE_OFFSET];
- }
-}
-
-KERNEL (detection_output_sort)(__global UNIT_TYPE* input_bboxes, __global UNIT_TYPE* output)
-{
- __local uint indexes[NUM_CLASSES_IN];
- __local bool stillSorting;
- __local uint output_count;
- __local uint num_out_per_class[NUM_CLASSES_IN];
-
- output_count = 0;
- num_out_per_class[get_local_id(0)] = 0;
-
- const uint image_id = (uint)get_global_id(0) / NUM_CLASSES_IN;
- const uint local_id = (uint)get_local_id(0) * NUM_OF_ITEMS_SORT; // All bboxes from one image in work group
-
- uint image_offset_input = image_id * NUM_OF_IMAGE_BBOXES;
-
- uint count_sum = 0;
- for (uint i = 0; i < image_id; i++)
- {
- count_sum += (input_bboxes[i] < KEEP_TOP_K)? input_bboxes[i] : KEEP_TOP_K;
- }
-
- uint image_offset_output = count_sum * OUTPUT_ROW_SIZE;
-
- // If there is less elements than needed, write input to output
- if (input_bboxes[image_id] <= KEEP_TOP_K)
- {
- if (local_id == 0)
- {
- for (uint class = 0; class < NUM_CLASSES_IN; class++)
- {
- if (class == BACKGROUND_LABEL_ID && !HIDDEN_CLASS)
- {
- continue;
- }
- for (uint i = 0; i < NUM_OF_CLASS_BBOXES; i++)
- {
- uint input_idx = (i + image_offset_input + class * NUM_OF_CLASS_BBOXES) * OUTPUT_ROW_SIZE + INPUT_OFFSET;
- if (input_bboxes[input_idx] != -1)
- {
- uint out_idx = output_count * OUTPUT_ROW_SIZE + image_offset_output;
-
- for (uint idx = 0; idx < OUTPUT_ROW_SIZE; idx++)
- {
- output[out_idx + idx] = input_bboxes[input_idx + idx];
- }
-
- output_count++;
- }
- else
- {
- break;
- }
- }
- }
- }
- }
- else
- {
- uint sorted_output[KEEP_TOP_K * NUM_CLASSES_IN];
-
- for (uint it = 0; it < NUM_OF_ITEMS_SORT; it++)
- {
- indexes[local_id + it] = (local_id + it) * NUM_OF_CLASS_BBOXES;
- }
-
- while (output_count < KEEP_BBOXES_NUM)
- {
- stillSorting = true;
-
- while(stillSorting)
- {
- barrier(CLK_LOCAL_MEM_FENCE);
- stillSorting = false;
- for (uint it = 0; it < NUM_OF_ITEMS_SORT; it++)
- {
- uint item_id = local_id + it;
- for (uint i = 0; i < 2; i++)
- {
-
- uint idx1 = indexes[item_id];
- uint idx2 = indexes[item_id+1];
- bool perform = false;
- if ((((i % 2) && (item_id % 2)) ||
- ((!(i % 2)) && (!(item_id % 2)))) &&
- (item_id != (NUM_CLASSES_IN - 1)))
- {
- perform = true;
- }
-
- if (perform &&
- (FUNC_CALL(get_score_sort)(input_bboxes, idx1, image_id) <
- FUNC_CALL(get_score_sort)(input_bboxes, idx2, image_id)))
- {
- indexes[item_id] = idx2;
- indexes[item_id+1] = idx1;
- stillSorting = true;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- }
- }
-
- if (local_id == 0)
- {
- UNIT_TYPE top_score = FUNC_CALL(get_score_sort)(input_bboxes, indexes[0], image_id);
-
- if (top_score != 0)
- {
- for (uint it = 0; (it < NUM_CLASSES_IN) && (output_count < KEEP_BBOXES_NUM); it++)
- {
- if (FUNC_CALL(get_score_sort)(input_bboxes, indexes[it], image_id) == top_score)
- {
- // write to output, create counter, and check if keep_top_k is satisfied.
- uint input_idx = (indexes[it] + image_offset_input) * OUTPUT_ROW_SIZE + INPUT_OFFSET;
- uint class_idx = input_bboxes[input_idx + 1] - HIDDEN_CLASS;
-
- sorted_output[class_idx * KEEP_TOP_K + num_out_per_class[class_idx]] = input_idx;
- num_out_per_class[class_idx]++;
-
- indexes[it]++;
- output_count++;
-
- // If all class elements are written to output, set dummy value to exclude class from sorting.
- if ((indexes[it] % NUM_OF_CLASS_BBOXES) == 0)
- {
- indexes[it] = KEEP_BBOXES_NUM;
- }
- }
- }
- }
- else
- {
- // There is no more significant results to sort.
- output_count = KEEP_BBOXES_NUM;
- }
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- if (local_id == 0)
- {
- output_count = 0;
- for (uint i = 0; i < NUM_CLASSES_IN; i++)
- {
- for (uint j = 0; j < num_out_per_class[i]; j++)
- {
-
- uint out_idx = output_count * OUTPUT_ROW_SIZE + image_offset_output;
- for (uint idx = 0; idx < OUTPUT_ROW_SIZE; idx++)
- {
- output[out_idx + idx] = input_bboxes[sorted_output[i * KEEP_TOP_K + j] + idx];
- }
- output_count++;
- }
- }
- uint image_count_sum = (input_bboxes[image_id] < KEEP_TOP_K)? input_bboxes[image_id] : KEEP_TOP_K;
- for (output_count; output_count < image_count_sum; output_count++)
- {
- uint out_idx = output_count * OUTPUT_ROW_SIZE + image_offset_output;
- output[out_idx] = -1.0;
- output[out_idx + 1] = 0.0;
- output[out_idx + 2] = 0.0;
- output[out_idx + 3] = 0.0;
- output[out_idx + 4] = 0.0;
- output[out_idx + 5] = 0.0;
- output[out_idx + 6] = 0.0;
- }
- }
- }
-
- if (local_id == 0 &&
- image_id == (NUM_IMAGES - 1))
- {
- for (output_count += count_sum; output_count < (KEEP_TOP_K * NUM_IMAGES); output_count++ )
- {
- uint out_idx = output_count * OUTPUT_ROW_SIZE;
- output[out_idx] = -1.0;
- output[out_idx + 1] = 0.0;
- output[out_idx + 2] = 0.0;
- output[out_idx + 3] = 0.0;
- output[out_idx + 4] = 0.0;
- output[out_idx + 5] = 0.0;
- output[out_idx + 6] = 0.0;
- }
- }
-
-}
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// Add space for number of output results per image - needed in the next detection output step
output_size += ((input_layout.size.batch[0] + 15) / 16) * 16;
- if (node.get_program().get_options().get<build_option_type::detection_output_gpu>()->enabled()) {
- return {input_layout.data_type, cldnn::format::bfyx, cldnn::tensor(1, 1, 1, output_size)};
- } else {
- return {input_layout.data_type,
- cldnn::format::bfyx,
- cldnn::tensor(1,
- 1,
- DETECTION_OUTPUT_ROW_SIZE,
- node.get_primitive()->keep_top_k * input_layout.size.batch[0])};
- }
+ return {input_layout.data_type, cldnn::format::bfyx,
+ cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, node.get_primitive()->keep_top_k * input_layout.size.batch[0])};
}
std::string detection_output_inst::to_string(detection_output_node const& node) {
"Detection output layer doesn't support input padding in Prior-Box input");
}
-/************************ Detection Output keep_top_k part ************************/
-
-primitive_type_id detection_output_sort::type_id() {
- static primitive_type_base<detection_output_sort> instance;
- return &instance;
-}
-
-layout detection_output_sort_inst::calc_output_layout(detection_output_sort_node const& node) {
- assert(static_cast<bool>(node.get_primitive()->output_data_type) == false &&
- "Output data type forcing is not supported for "
- "detection_output_sort_node!");
- CLDNN_ERROR_NOT_EQUAL(node.id(),
- "Detection output layer input number",
- node.get_dependencies().size(),
- "expected number of inputs",
- static_cast<size_t>(1),
- "");
-
- auto input_layout = node.input().get_output_layout();
- int keep_top_k = node.as<detection_output_sort>().get_primitive()->keep_top_k;
- int num_images = node.as<detection_output_sort>().get_primitive()->num_images;
-
- // If detection output sort is used as a second part of detection output get proper info from detection otput node
- if (num_images == 0) {
- CLDNN_ERROR_BOOL(node.id(),
- "node.get_dependency(0).is_type<detection_output>()",
- !node.get_dependency(0).is_type<detection_output>(),
- "Cannot calculate output layout.");
- input_layout = node.get_dependency(0).as<detection_output>().location().get_output_layout();
- keep_top_k = node.get_dependency(0).as<detection_output>().get_primitive()->keep_top_k;
- num_images = input_layout.size.batch[0];
- }
- // Batch size and feature size are 1.
- // Number of bounding boxes to be kept is set to keep_top_k*batch size.
- // If number of detections is lower than keep_top_k, will write dummy results at the end with image_id=-1.
- // Each row is a 7 dimension vector, which stores:
- // [image_id, label, confidence, xmin, ymin, xmax, ymax]
- return {input_layout.data_type,
- cldnn::format::bfyx,
- cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, keep_top_k * num_images)};
-}
-
-std::string detection_output_sort_inst::to_string(detection_output_sort_node const& node) {
- auto node_info = node.desc_to_json();
- auto desc = node.get_primitive();
-
- auto& input_bboxes = node.input();
-
- std::stringstream primitive_description;
-
- json_composite detec_out_info;
- detec_out_info.add("input bboxes id", input_bboxes.id());
- detec_out_info.add("num_classes:", desc->num_images);
- detec_out_info.add("num_classes:", desc->num_classes);
- detec_out_info.add("keep_top_k", desc->keep_top_k);
- detec_out_info.add("share_location", desc->share_location);
- detec_out_info.add("top_k", desc->top_k);
- detec_out_info.dump(primitive_description);
-
- node_info->add("dection output info", detec_out_info);
- node_info->dump(primitive_description);
-
- return primitive_description.str();
-}
-
-detection_output_sort_inst::typed_primitive_inst(network_impl& network, detection_output_sort_node const& node)
- : parent(network, node) {
- CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(),
- "Input memory format",
- node.get_dependency(0).get_output_layout().format.value,
- "expected bfyx input format",
- format::bfyx);
-
- CLDNN_ERROR_BOOL(node.id(),
- "Detecion output layer padding",
- node.is_padded(),
- "Detection output layer doesn't support output padding.");
-}
} // namespace cldnn
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
#include "network_impl.h"
#include "implementation_map.h"
#include "math_utils.h"
+#include "register_gpu.hpp"
#include "cpu_impl_helpers.hpp"
#include <algorithm>
static primitive_impl* create(const detection_output_node& arg) { return new detection_output_cpu(arg); }
};
-primitive_impl* runDetectOutCpu(const detection_output_node& arg) { return new detection_output_cpu(arg); }
+namespace detail {
+
+attach_detection_output_gpu::attach_detection_output_gpu() {
+ implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_cpu::create);
+ implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_cpu::create);
+}
+
+} // namespace detail
} // namespace gpu
} // namespace cldnn
+++ /dev/null
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "detection_output_inst.h"
-#include "primitive_gpu_base.h"
-#include "error_handler.h"
-#include "kernel_selector_helper.h"
-#include "detection_output/detection_output_kernel_base.h"
-#include "detection_output/detection_output_kernel_selector.h"
-
-#ifdef FIX_OPENMP_RELEASE_ISSUE
-#ifdef OPENMP_FOUND
-#include <omp.h>
-#endif
-#endif
-
-namespace cldnn {
-namespace gpu {
-
-struct detection_output_gpu : typed_primitive_gpu_impl<detection_output> {
- using parent = typed_primitive_gpu_impl<detection_output>;
- using parent::parent;
-
-private:
- static void setDetectOutSpecificParams(kernel_selector::detection_output_params::DedicatedParams& detectOutParams,
- const detection_output_node& arg) {
- auto primitive = arg.get_primitive();
- detectOutParams.keep_top_k = primitive->keep_top_k;
- detectOutParams.num_classes = primitive->num_classes;
- detectOutParams.top_k = primitive->top_k;
- detectOutParams.background_label_id = primitive->background_label_id;
- detectOutParams.code_type = (int32_t)primitive->code_type;
- detectOutParams.share_location = primitive->share_location;
- detectOutParams.variance_encoded_in_target = primitive->variance_encoded_in_target;
- detectOutParams.nms_threshold = primitive->nms_threshold;
- detectOutParams.eta = primitive->eta;
- detectOutParams.confidence_threshold = primitive->confidence_threshold;
- detectOutParams.prior_coordinates_offset = primitive->prior_coordinates_offset;
- detectOutParams.prior_info_size = primitive->prior_info_size;
- detectOutParams.prior_is_normalized = primitive->prior_is_normalized;
- detectOutParams.input_width = primitive->input_width;
- detectOutParams.input_heigh = primitive->input_height;
- detectOutParams.conf_size_x = arg.confidence().get_output_layout().get_buffer_size().spatial[0];
- detectOutParams.conf_size_y = arg.confidence().get_output_layout().get_buffer_size().spatial[1];
- detectOutParams.conf_padding_x = arg.confidence().get_output_layout().data_padding.lower_size().spatial[0];
- detectOutParams.conf_padding_y = arg.confidence().get_output_layout().data_padding.lower_size().spatial[1];
- }
-
-public:
- static primitive_impl* create(const detection_output_node& arg) {
- if (!arg.get_program().get_options().get<build_option_type::detection_output_gpu>()->enabled()) {
- return runDetectOutCpu(arg);
- }
-
- auto detect_out_params = get_default_params<kernel_selector::detection_output_params>(arg);
- auto detect_out_optional_params =
- get_default_optional_params<kernel_selector::detection_output_optional_params>(arg.get_program());
-
- setDetectOutSpecificParams(detect_out_params.detectOutParams, arg);
-
- auto& kernel_selector = kernel_selector::detection_output_kernel_selector::Instance();
- auto best_kernels = kernel_selector.GetBestKernels(detect_out_params, detect_out_optional_params);
-
- CLDNN_ERROR_BOOL(arg.id(),
- "Best_kernel.empty()",
- best_kernels.empty(),
- "Cannot find a proper kernel with this arguments");
-
- auto detect_out = new detection_output_gpu(arg, best_kernels[0]);
-
- return detect_out;
- }
-};
-
-primitive_impl* runDetectOutGpu(const detection_output_node& arg, kernel_selector::KernelData kernel) {
- return new detection_output_gpu(arg, kernel);
-}
-
-/************************ Detection Output keep_top_k part ************************/
-
-struct detection_output_sort_gpu : typed_primitive_gpu_impl<detection_output_sort> {
- using parent = typed_primitive_gpu_impl<detection_output_sort>;
- using parent::parent;
-
-private:
- static void setDetectOutSpecificParams(kernel_selector::detection_output_params::DedicatedParams& detectOutParams,
- const detection_output_sort_node& arg) {
- if (arg.get_dependency(0).is_type<detection_output>()) {
- auto primitive = arg.get_dependency(0).as<detection_output>().get_primitive();
- detectOutParams.keep_top_k = primitive->keep_top_k;
- detectOutParams.num_classes = primitive->num_classes;
- detectOutParams.num_images =
- arg.get_dependency(0).as<detection_output>().location().get_output_layout().size.batch[0];
- detectOutParams.top_k = primitive->top_k;
- detectOutParams.share_location = primitive->share_location;
- detectOutParams.background_label_id = primitive->background_label_id;
- } else {
- auto primitive = arg.get_primitive();
- detectOutParams.keep_top_k = primitive->keep_top_k;
- detectOutParams.num_classes = primitive->num_classes;
- detectOutParams.num_images = primitive->num_images;
- detectOutParams.top_k = primitive->top_k;
- detectOutParams.share_location = primitive->share_location;
- detectOutParams.background_label_id = primitive->background_label_id;
- }
- }
-
-public:
- static primitive_impl* create(const detection_output_sort_node& arg) {
- auto detect_out_params = get_default_params<kernel_selector::detection_output_params>(arg);
- auto detect_out_optional_params =
- get_default_optional_params<kernel_selector::detection_output_optional_params>(arg.get_program());
-
- setDetectOutSpecificParams(detect_out_params.detectOutParams, arg);
-
- auto& kernel_selector = kernel_selector::detection_output_sort_kernel_selector::Instance();
- auto best_kernels = kernel_selector.GetBestKernels(detect_out_params, detect_out_optional_params);
-
- CLDNN_ERROR_BOOL(arg.id(),
- "Best_kernel.empty()",
- best_kernels.empty(),
- "Cannot find a proper kernel with this arguments");
-
- auto detect_out = new detection_output_sort_gpu(arg, best_kernels[0]);
-
- return detect_out;
- }
-};
-
-primitive_impl* runDetectOutSortGpu(const detection_output_sort_node& arg, kernel_selector::KernelData kernel) {
- return new detection_output_sort_gpu(arg, kernel);
-}
-
-namespace detail {
-
-attach_detection_output_gpu::attach_detection_output_gpu() {
- implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
- detection_output_gpu::create);
- implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
- detection_output_gpu::create);
- implementation_map<detection_output_sort>::add(
- std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
- detection_output_sort_gpu::create);
- implementation_map<detection_output_sort>::add(
- std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
- detection_output_sort_gpu::create);
-}
-
-} // namespace detail
-} // namespace gpu
-} // namespace cldnn
}
}
-void graph_initializations::handle_detection_output(program_impl& p) {
- auto itr = p.nodes_map.begin(); // note we need to use iterators since currently processed element can be removed
- while (itr != p.nodes_map.end()) {
- auto node_itr = itr++;
- auto& node = *(*node_itr).second;
- // Create second part detection output primitive and replace nodes names - do it only once
- if ((p.get_options().get<build_option_type::detection_output_gpu>()->enabled()) &&
- (node.is_type<detection_output>()) &&
- (node.id().find("_pre") ==
- std::string::npos)) { // ToDo: this will fail if user will name the primitive with using _pre like do_pre
- // we need to use node mark() or some other idea to prevent it
- // rename detection output
- const primitive_id detect_out_node_name = node.id();
- const primitive_id new_primitive_id = detect_out_node_name + "_pre";
- p.rename(node, new_primitive_id);
-
- auto detect_out_prim = node.as<detection_output>().typed_desc();
- // Create new primitive, "keep top k" part of detection output
- // ToDo: add a default parameters to the detection_output_sort class constructor to get rid off this
- // initialization from here
- auto detect_out_sort_prim =
- std::make_shared<detection_output_sort>(detect_out_node_name,
- node.id(),
- // not important params here - it will be set during
- // "primitive_impl* create" func in "detection_output_sort_gpu"
- 0, // num_images
- 0, // num_classes
- 0, // keep_top_k
- false, // share_location
- 0, // top_k
- -1, // background_label_id
- detect_out_prim->output_padding);
-
- p.get_or_create(detect_out_sort_prim);
-
- auto sort_node_itr = p.nodes_map.find(detect_out_node_name);
- if (sort_node_itr == p.nodes_map.end()) continue;
-
- auto sort_node = sort_node_itr->second;
-
- // Add connection to second part of detection output
- if (node.get_users().size()) {
- p.add_intermediate(*sort_node, *(node.get_users().front()), 0, false);
- } else {
- p.add_connection(node, *sort_node);
- }
- }
- }
-}
-
void graph_initializations::handle_lstm(program_impl& p) {
bool has_lstm_children;
auto itr = p.nodes_map.begin(); // note we need to use iterators since currently processed element can be removed
void graph_initializations::run(program_impl& p) {
replace_nodes(p);
- handle_detection_output(p);
handle_lstm(p);
handle_dynamic_lstm(p);
set_outputs(p);
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
using detection_output_inst = typed_primitive_inst<detection_output>;
-template <>
-class typed_program_node<detection_output_sort> : public typed_program_node_base<detection_output_sort> {
- using parent = typed_program_node_base<detection_output_sort>;
-
-public:
- using parent::parent;
-
- program_node& input() const { return get_dependency(0); }
-};
-
-using detection_output_sort_node = typed_program_node<detection_output_sort>;
-
-template <>
-class typed_primitive_inst<detection_output_sort> : public typed_primitive_inst_base<detection_output_sort> {
- using parent = typed_primitive_inst_base<detection_output_sort>;
-
-public:
- static layout calc_output_layout(detection_output_sort_node const& node);
- static std::string to_string(detection_output_sort_node const& node);
-
-public:
- typed_primitive_inst(network_impl& network, detection_output_sort_node const& node);
-};
-
-using detection_output_sort_inst = typed_primitive_inst<detection_output_sort>;
-
-namespace gpu {
-primitive_impl* runDetectOutCpu(const detection_output_node& arg);
-primitive_impl* runDetectOutGpu(const detection_output_node& arg, kernel_selector::KernelData kernel);
-primitive_impl* runDetectOutSortGpu(const detection_output_sort_node& arg, kernel_selector::KernelData kernel);
-} // namespace gpu
-
-} // namespace cldnn
\ No newline at end of file
+} // namespace cldnn
/*
-// Copyright (c) 2018-2019 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
private:
void run(program_impl& p) override;
void replace_nodes(program_impl& p);
- void handle_detection_output(program_impl& p);
void handle_lstm(program_impl& p);
void handle_dynamic_lstm(program_impl& p);
void set_outputs(program_impl& p);
/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
#include <api/engine.hpp>
#include "test_utils/test_utils.h"
-namespace cldnn
-{
+namespace cldnn {
template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
}
using namespace tests;
template <typename T>
-class detection_output_test : public ::testing::Test
-{
+class detection_output_test : public ::testing::Test {
public:
detection_output_test() :
- nms_threshold(0.1f)
- {}
+ nms_threshold(0.1f) {}
void init_buffers(cldnn::memory prior_memory, cldnn::memory confidence_memory, cldnn::memory location_memory,
bool share_location, bool variance_encoded_in_target = false,
- int prior_info_size = 4, int prior_coordinates_offset = 0, bool prior_is_normalized = true)
- {
+ int prior_info_size = 4, int prior_coordinates_offset = 0, bool prior_is_normalized = true) {
auto location_ptr = location_memory.pointer<T>();
auto confidence_ptr = confidence_memory.pointer<T>();
auto prior_box_ptr = prior_memory.pointer<T>();
const float prior_multiplier = prior_is_normalized ? 1.0f : static_cast<float>(this->img_size);
const float variance = 0.1f;
int idx = 0;
- for (int h = 0; h < 2; ++h)
- {
+ for (int h = 0; h < 2; ++h) {
float center_y = (h + 0.5f) * step;
- for (int w = 0; w < 2; ++w)
- {
+ for (int w = 0; w < 2; ++w) {
float center_x = (w + 0.5f) * step;
prior_data[idx+prior_coordinates_offset+0] = (center_x - box_size / 2) * prior_multiplier;
prior_data[idx+prior_coordinates_offset+1] = (center_y - box_size / 2) * prior_multiplier;
idx += prior_info_size;
}
}
- if (!variance_encoded_in_target)
- {
- for (int i = 0; i < idx; ++i)
- {
+ if (!variance_encoded_in_target) {
+ for (int i = 0; i < idx; ++i) {
prior_data[idx + i] = variance;
}
}
// Fill confidences.
idx = 0;
- for (int i = 0; i < num_of_images; ++i)
- {
- for (int j = 0; j < num_priors; ++j)
- {
- for (int c = 0; c < num_classes; ++c)
- {
- if (i % 2 == c % 2)
- {
+ for (int i = 0; i < num_of_images; ++i) {
+ for (int j = 0; j < num_priors; ++j) {
+ for (int c = 0; c < num_classes; ++c) {
+ if (i % 2 == c % 2) {
confidence_data[idx++] = j * 0.2f;
- }
- else
- {
+ } else {
confidence_data[idx++] = 1 - j * 0.2f;
}
}
const int num_loc_classes = share_location ? 1 : num_classes;
const float loc_multiplier = variance_encoded_in_target ? variance : 1.0f;
idx = 0;
- for (int i = 0; i < num_of_images; ++i)
- {
- for (int h = 0; h < 2; ++h)
- {
- for (int w = 0; w < 2; ++w)
- {
- for (int c = 0; c < num_loc_classes; ++c)
- {
+ for (int i = 0; i < num_of_images; ++i) {
+ for (int h = 0; h < 2; ++h) {
+ for (int w = 0; w < 2; ++w) {
+ for (int c = 0; c < num_loc_classes; ++c) {
location_data[idx++] = (w % 2 ? -1 : 1) * (i * 1 + c / 2.f + 0.5f) * loc_multiplier;
location_data[idx++] = (h % 2 ? -1 : 1) * (i * 1 + c / 2.f + 0.5f) * loc_multiplier;
location_data[idx++] = (w % 2 ? -1 : 1) * (i * 1 + c / 2.f + 0.5f) * loc_multiplier;
}
}
- void init_buffer_sort(cldnn::memory input_buff)
- {
+ void init_buffer_sort(cldnn::memory input_buff) {
auto input_data_ptr = input_buff.pointer<T>();
EXPECT_EQ((int)input_buff.count(), 128);
input_data[121] = -1; input_data[122] = 0; input_data[123] = 0; input_data[124] = 0; input_data[125] = 0; input_data[126] = 0; input_data[127] = 0;
}
- void check_results(const memory& output, const int num, const std::string values)
- {
+ void check_results(const memory& output, const int num, const std::string values) {
assert(num < output.get_layout().size.spatial[1]);
// Split values to vector of items.
// Check data.
auto out_ptr = output.pointer<T>();
const T* data = out_ptr.data();
- for (int i = 0; i < 2; ++i)
- {
+ for (int i = 0; i < 2; ++i) {
EXPECT_EQ(static_cast<int>((float)data[num * output.get_layout().size.spatial[0] + i]), atoi(items[i].c_str()));
}
- for (int i = 2; i < 7; ++i)
- {
+ for (int i = 2; i < 7; ++i) {
EXPECT_TRUE(floating_point_equal(data[num * output.get_layout().size.spatial[0] + i], (T)(float)atof(items[i].c_str())));
}
}
- void setup_basic(bool runOnGPU)
- {
+ void setup_basic() {
const bool share_location = true;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 150;
topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
}
- void setup_two_layers(bool runOnGPU)
- {
+ void setup_two_layers() {
const bool share_location = true;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 150;
topology.add(detection_output("detection_output_2", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
EXPECT_EQ(outputs.size(), size_t(2));
unsigned i = 1;
- for (auto it = outputs.begin(); it != outputs.begin(); it++)
- {
+ for (auto it = outputs.begin(); it != outputs.begin(); it++) {
EXPECT_EQ(it->first, "detection_output_" + std::to_string(i));
}
}
- void forward_share_location(bool runOnGPU)
- {
+ void forward_share_location() {
const bool share_location = true;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 4;
topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
check_results(output_prim, 7, "-1 0 0 0 0 0 0");
}
- void forward_num_detections_greater_than_keep_top_k(bool runOnGPU)
- {
+ void forward_num_detections_greater_than_keep_top_k() {
const bool share_location = true;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 1;
topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
check_results(output_prim, 1, "1 1 0.6 0.45 0.45 0.75 0.75");
}
- void forward_num_detections_smaller_than_keep_top_k(bool runOnGPU)
- {
+ void forward_num_detections_smaller_than_keep_top_k() {
const bool share_location = true;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 6;
topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
check_results(output_prim, 11, "-1 0 0 0 0 0 0");
}
- void test_forward_share_location_top_k(bool runOnGPU)
- {
+ void test_forward_share_location_top_k() {
const bool share_location = true;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 2;
topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
check_results(output_prim, 3, "-1 0 0 0 0 0 0");
}
- void forward_no_share_location(bool runOnGPU)
- {
+ void forward_no_share_location() {
const bool share_location = false;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 10;
topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
check_results(output_prim, 19, "-1 0 0 0 0 0 0");
}
- void forward_no_share_location_top_k(bool runOnGPU)
- {
+ void forward_no_share_location_top_k() {
const bool share_location = false;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 4;
topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
check_results(output_prim, 7, "-1 0 0 0 0 0 0");
}
- void forward_no_share_location_neg_0(bool runOnGPU)
- {
+ void forward_no_share_location_neg_0() {
const bool share_location = false;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 5;
topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
check_results(output_prim, 9, "-1 0 0 0 0 0 0");
}
- void forward_no_share_location_neg_0_top_k(bool runOnGPU)
- {
+ void forward_no_share_location_neg_0_top_k() {
const bool share_location = false;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 2;
topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
check_results(output_prim, 3, "-1 0 0 0 0 0 0");
}
- void forward_no_share_location_top_k_input_padding(bool runOnGPU)
- {
+ void forward_no_share_location_top_k_input_padding() {
const bool share_location = false;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 4;
topology.add(detection_output("detection_output", "input_location_padded", "input_confidence_padded", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
check_results(output_prim, 7, "-1 0 0 0 0 0 0");
}
- void test_forward_no_share_location_top_k_faster_rcnn_case(bool runOnGPU)
- {
+ void test_forward_no_share_location_top_k_faster_rcnn_case() {
const bool share_location = false;
const int num_loc_classes = share_location ? 1 : this->num_classes;
const int keep_top_k = 4;
));
build_options opts;
- if (runOnGPU)
- {
- opts.set_option(build_option::detection_output_gpu(true));
- }
-
network network(engine, topology, opts);
network.set_input_data("input_location", input_location);
network.set_input_data("input_confidence", input_confidence);
typedef ::testing::Types<float, FLOAT16> detection_output_test_types;
TYPED_TEST_CASE(detection_output_test, detection_output_test_types);
-TYPED_TEST(detection_output_test, test_setup_basic)
-{
- this->setup_basic(false);
-}
-
-TYPED_TEST(detection_output_test, test_setup_basic_gpu)
-{
- this->setup_basic(true);
-}
-
-TYPED_TEST(detection_output_test, test_setup_two_layers)
-{
- this->setup_two_layers(false);
-}
-
-TYPED_TEST(detection_output_test, test_setup_two_layers_gpu)
-{
- this->setup_two_layers(true);
-}
-
-TYPED_TEST(detection_output_test, test_forward_share_location)
-{
- this->forward_share_location(false);
-}
-
-TYPED_TEST(detection_output_test, DISABLED_test_forward_share_location_gpu)
-{
- this->forward_share_location(true);
-}
-
-TYPED_TEST(detection_output_test, test_forward_num_detections_greater_than_keep_top_k)
-{
- this->forward_num_detections_greater_than_keep_top_k(false);
-}
-
-TYPED_TEST(detection_output_test, test_forward_num_detections_greater_than_keep_top_k_gpu)
-{
- this->forward_num_detections_greater_than_keep_top_k(true);
+TYPED_TEST(detection_output_test, test_setup_basic) {
+ this->setup_basic();
}
-TYPED_TEST(detection_output_test, test_forward_num_detections_smaller_than_keep_top_k)
-{
- this->forward_num_detections_smaller_than_keep_top_k(false);
+TYPED_TEST(detection_output_test, test_setup_two_layers) {
+ this->setup_two_layers();
}
-TYPED_TEST(detection_output_test, DISABLED_test_forward_num_detections_smaller_than_keep_top_k_gpu)
-{
- this->forward_num_detections_smaller_than_keep_top_k(true);
+TYPED_TEST(detection_output_test, test_forward_share_location) {
+ this->forward_share_location();
}
-TYPED_TEST(detection_output_test, test_forward_share_location_top_k)
-{
- this->test_forward_share_location_top_k(false);
+TYPED_TEST(detection_output_test, test_forward_num_detections_greater_than_keep_top_k) {
+ this->forward_num_detections_greater_than_keep_top_k();
}
-TYPED_TEST(detection_output_test, test_forward_share_location_top_k_gpu)
-{
- this->test_forward_share_location_top_k(true);
+TYPED_TEST(detection_output_test, test_forward_num_detections_smaller_than_keep_top_k) {
+ this->forward_num_detections_smaller_than_keep_top_k();
}
-TYPED_TEST(detection_output_test, test_forward_no_share_location)
-{
- this->forward_no_share_location(false);
+TYPED_TEST(detection_output_test, test_forward_share_location_top_k) {
+ this->test_forward_share_location_top_k();
}
-TYPED_TEST(detection_output_test, test_forward_no_share_location_gpu)
-{
- this->forward_no_share_location(true);
+TYPED_TEST(detection_output_test, test_forward_no_share_location) {
+ this->forward_no_share_location();
}
-TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k)
-{
- this->forward_no_share_location_top_k(false);
+TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k) {
+ this->forward_no_share_location_top_k();
}
-TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_gpu)
-{
- this->forward_no_share_location_top_k(true);
+TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0) {
+ this->forward_no_share_location_neg_0();
}
-TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0)
-{
- this->forward_no_share_location_neg_0(false);
+TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_top_k) {
+ this->forward_no_share_location_neg_0_top_k();
}
-TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_gpu)
-{
- this->forward_no_share_location_neg_0(true);
+TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_input_padding) {
+ this->forward_no_share_location_top_k_input_padding();
}
-TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_top_k)
-{
- this->forward_no_share_location_neg_0_top_k(false);
+TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_faster_rcnn_case) {
+ this->test_forward_no_share_location_top_k_faster_rcnn_case();
}
-
-TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_top_k_gpu)
-{
- this->forward_no_share_location_neg_0_top_k(true);
-}
-
-TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_input_padding)
-{
- this->forward_no_share_location_top_k_input_padding(false);
-}
-
-TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_input_padding_gpu)
-{
- this->forward_no_share_location_top_k_input_padding(true);
-}
-
-TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_faster_rcnn_case)
-{
- this->test_forward_no_share_location_top_k_faster_rcnn_case(false);
-}
-
-TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_faster_rcnn_case_gpu)
-{
- this->test_forward_no_share_location_top_k_faster_rcnn_case(true);
-}
-
-TYPED_TEST(detection_output_test, test_detection_output_sort_gpu)
-{
- const bool share_location = false;
- const int num_loc_classes = share_location ? 1 : this->num_classes;
- const int keep_top_k = 10;
- const int background_label_id = -1;
- const int top_k = -1;
-
- const unsigned out_row_size = 7;
- const unsigned score_space = ((this->num_of_images + 15) / 16) * 16;
- int input_size = this->num_of_images * num_loc_classes * this->num_priors * out_row_size + score_space;
-
- const auto& engine = get_test_engine();
- cldnn::memory input_buff = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 1, 1, input_size } });
-
- this->init_buffer_sort(input_buff);
-
- topology topology;
- topology.add(input_layout("input_location", input_buff.get_layout()));
-
- topology.add(detection_output_sort("detection_output_sort", "input_location", this->num_of_images, this->num_classes, keep_top_k, share_location, top_k, background_label_id));
- network network(engine, topology);
- network.set_input_data("input_location", input_buff);
-
- auto outputs = network.execute();
-
- EXPECT_EQ(outputs.size(), size_t(1));
- EXPECT_EQ(outputs.begin()->first, "detection_output_sort");
-
- EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
- EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
- EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
- EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
-
- auto output_prim = outputs.begin()->second.get_memory();
-
- this->check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85");
- this->check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85");
- this->check_results(output_prim, 2, "0 0 0.2 0.55 0.15 0.85 0.45");
- this->check_results(output_prim, 3, "0 0 0.0 0.15 0.15 0.45 0.45");
- this->check_results(output_prim, 4, "0 1 1.0 0.20 0.20 0.50 0.50");
- this->check_results(output_prim, 5, "0 1 0.8 0.50 0.20 0.80 0.50");
- this->check_results(output_prim, 6, "0 1 0.6 0.20 0.50 0.50 0.80");
- this->check_results(output_prim, 7, "0 1 0.4 0.50 0.50 0.80 0.80");
- this->check_results(output_prim, 8, "1 0 1.0 0.25 0.25 0.55 0.55");
- this->check_results(output_prim, 9, "1 0 0.4 0.45 0.45 0.75 0.75");
- this->check_results(output_prim, 10, "1 1 0.6 0.40 0.40 0.70 0.70");
- this->check_results(output_prim, 11, "-1 0 0 0 0 0 0");
- this->check_results(output_prim, 12, "-1 0 0 0 0 0 0");
- this->check_results(output_prim, 13, "-1 0 0 0 0 0 0");
- this->check_results(output_prim, 14, "-1 0 0 0 0 0 0");
- this->check_results(output_prim, 15, "-1 0 0 0 0 0 0");
- this->check_results(output_prim, 16, "-1 0 0 0 0 0 0");
- this->check_results(output_prim, 17, "-1 0 0 0 0 0 0");
- this->check_results(output_prim, 18, "-1 0 0 0 0 0 0");
- this->check_results(output_prim, 19, "-1 0 0 0 0 0 0");
-}
-