return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node) ||
- std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node);
+ std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
+ std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
};
auto nGraphFunc = clonedNetwork->getFunction();
// Disable shape inference (WA for generic operations)
#include <api/depth_to_space.hpp>
#include <api/space_to_depth.hpp>
#include <api/batch_to_space.hpp>
+#include <api/space_to_batch.hpp>
#include <api/shuffle_channels.hpp>
#include <api/strided_slice.hpp>
#include <api/reverse_sequence.hpp>
{ "DepthToSpace" , DepthToSpace },
{ "SpaceToDepth" , SpaceToDepth },
{ "BatchToSpace", BatchToSpace },
+ { "SpaceToBatch" , SpaceToBatch },
{ "ShuffleChannels" , ShuffleChannels },
{ "StridedSlice" , StridedSlice },
{ "ReverseSequence" , ReverseSequence },
break;
case BatchToSpace: CreateBatchToSpacePrimitive(topology, layer);
break;
+ case SpaceToBatch: CreateSpaceToBatchPrimitive(topology, layer);
+ break;
case ShuffleChannels: CreateShuffleChannelsPrimitive(topology, layer);
break;
case StridedSlice: CreateStridedSlicePrimitive(topology, layer);
AddPrimitiveToProfiler(batchToSpaceName, layer);
}
+void Program::CreateSpaceToBatchPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
+ ValidateLayer(layer, 4);
+
+ auto inputPrimitives = GetPrevLayersPrimitives(layer);
+ auto spaceToBatch = as<InferenceEngine::GenericLayer*> (layer);
+ auto rank = spaceToBatch->input().get()->getTensorDesc().getDims().size();
+ auto format = FormatFromLayout(spaceToBatch->input()->getLayout());
+
+ std::vector<cldnn::tensor> inputs;
+ inputs.reserve(3);
+
+ for (size_t i = 1; i < 4; ++i) {
+ auto defaultIndexInput = layer->insData[i].lock();
+ auto defaultIndexInputCreator = defaultIndexInput->getCreatorLayer().lock();
+ if (defaultIndexInputCreator->blobs.size() == 1) {
+ auto constantBlob = defaultIndexInputCreator->blobs.begin()->second;
+ auto defaultIndexPrecision = constantBlob->getTensorDesc().getPrecision();
+ std::vector<int32_t> sizes;
+ sizes.reserve(rank);
+ int32_t default_size = i == 1 ? 1 : 0;
+ switch (defaultIndexPrecision) {
+ case InferenceEngine::Precision::I32: {
+ auto data = constantBlob->buffer().as<int32_t*>();
+ sizes = std::vector<int32_t>(data, data + rank);
+ break;
+ }
+ case InferenceEngine::Precision::I64: {
+ auto data = constantBlob->buffer().as<int64_t*>();
+ std::vector<int64_t> sizes_i64 = std::vector<int64_t>(data, data + rank);
+ for (size_t j = 0; j < sizes_i64.size(); ++j)
+ sizes.emplace_back(static_cast<int32_t>(sizes_i64[j]));
+ break;
+ }
+ default: {
+ THROW_IE_EXCEPTION << layer->name << "Incorrect SpaceToBatch precision";
+ break;
+ }
+ }
+ inputs.emplace_back(format, sizes, default_size);
+ }
+ }
+ auto out_size = CldnnTensorFromIEDims(spaceToBatch->outData[0]->getTensorDesc().getDims());
+
+ std::string spaceToBatchName = layer_type_name_ID(layer);
+ auto spaceToBatchPrim = cldnn::space_to_batch(
+ spaceToBatchName,
+ inputPrimitives[0], //input
+ inputs[0], //block_shape
+ inputs[1], //pads_begin
+ inputs[2], //pads_end
+ out_size);
+
+ topology.add(spaceToBatchPrim);
+ AddPrimitiveToProfiler(spaceToBatchName, layer);
+}
+
void Program::CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
ValidateLayer(layer, 1);
DepthToSpace,
SpaceToDepth,
BatchToSpace,
+ SpaceToBatch,
ShuffleChannels,
StridedSlice,
Broadcast,
void CreateDepthToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateSpaceToDepthPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateBatchToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
+ void CreateSpaceToBatchPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateStridedSlicePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateBroadcastPrimitive(cldnn::topology &topology, InferenceEngine::CNNLayerPtr &layer);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/space_to_batch.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+spaceToBatchParamsTuple stb_only_test_cases[] = {
+ spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 2, 2},
+ InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
+ spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 3, 2, 2},
+ InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
+ spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 4, 4},
+ InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
+ spaceToBatchParamsTuple({1, 1, 2, 2}, {0, 0, 0, 2}, {0, 0, 0, 0}, {2, 1, 2, 4},
+ InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
+ spaceToBatchParamsTuple({1, 1, 3, 2, 2}, {0, 0, 1, 0, 3}, {0, 0, 2, 0, 0}, {1, 1, 3, 2, 1},
+ InferenceEngine::Precision::FP32, CommonTestUtils::DEVICE_GPU),
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_CLDNN, SpaceToBatchLayerTest, ::testing::ValuesIn(stb_only_test_cases),
+ SpaceToBatchLayerTest::getTestCaseName);
+} // namespace
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "primitive.hpp"
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief SpaceToBatch operation divides "spatial" dimensions [1, ..., N - 1], N ∈ {4,5,6} of the data input
+/// into a grid of blocks of shape block_shape, and interleaves these blocks with the batch dimension (0) such that in the output,
+/// the spatial dimensions [1, ..., N - 1], N ∈ {4,5,6} correspond to the position within the grid,
+/// and the batch dimension combines both the position within a spatial block and the original batch position.
+/// Prior to division into blocks, the spatial dimensions of the input are optionally zero padded according to pads_begin and pads_end.
+/// @details The SpaceToBatch operation is similar to the TensorFlow* operation SpaceToBatchND (https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd)
+/// There are 4 inputs of this operation:
+/// 1) data - input N-D tensor [batch, D_1, D_2 ... D_{N-1}], N ∈ {4,5,6}. Required.
+/// 2) block_shape - input 1-D tensor with shape [N], N ∈ {4,5,6}. Consists of block_sizes each of which specifies the size of the value block to be moved.
+/// All values must be >= 1 and required. block_shape[0] is expected to be 1.
+/// 3) pads_begin - input 1-D tensor with shape [N], N ∈ {4,5,6}. Specifies the padding for the beginning along each axis of data input.
+/// All values must be non-negative and required. pads_begin[0] is expected to be 0.
+/// 4) pads_end - input 1-D tensor with shape [N], N ∈ {4,5,6}. Specifies the padding for the ending along each axis of data input.
+/// All values must be non-negative and required. pads_end[0] is expected to be 0.
+/// 3-4 inputs required that block_shape[i] divides data_shape[i] + pads_begin[i] + pads_end[i]
+///
+/// The operation is equivalent to the following transformation of the input tensor data of shape [batch, D_1, D_2 ... D_{N - 1}], N ∈ {4,5,6}
+/// and block_shape, pads_begin, pads_end of shapes [N] to Y output tensor.
+/// Zero-pad the start and end of dimensions [D_0, ..., D_{N - 1}] of the input according to `pads_begin` and `pads_end`
+///
+/// x' = reshape(x, [batch, (D_1 + P_1) / B_1, B_1, (D_2 + P_2) / B_2, B_2, ..., (D_{N - 1} + P_{N - 1}) / B_{N - 1}, B_{N - 1}]), where B_i = block_shape[i]
+///
+/// x'' = transpose(x', [2, 4, ..., (N - 1) + (N - 1), 0, 1, 3, ..., N + (N - 1)])
+///
+/// y = reshape(x'', [batch * B_1 * ... * B_{N - 1}, (D_1 + P_1) / B_1, (D_2 + P_2) / B_2, ... , (D_{N - 1} + P_{N - 1}) / B_{N - 1}])
+
+struct space_to_batch : public primitive_base<space_to_batch> {
+ CLDNN_DECLARE_PRIMITIVE(space_to_batch)
+
+ /// @brief Constructs space_to_batch primitive.
+ /// @param id This primitive id.
+ /// @param input Input data primitive id.
+ /// @param block_shape Array of block sizes.
+ /// @param pads_begin Amount to pad for the beginning along each axis of data input.
+ /// @param pads_end Amount to pad for the ending along each axis of data input.
+ /// @param out_size Size of output tensor.
+ space_to_batch(const primitive_id& id,
+ const primitive_id& input,
+ const tensor& block_shape,
+ const tensor& pads_begin,
+ const tensor& pads_end,
+ const tensor& out_size,
+ const padding& output_padding = padding())
+ : primitive_base(id, {input}, output_padding),
+ block_shape(block_shape),
+ pads_begin(pads_begin),
+ pads_end(pads_end),
+ out_size(out_size) {}
+
+ tensor block_shape;
+ tensor pads_begin;
+ tensor pads_end;
+ tensor out_size;
+};
+/// @}
+/// @}
+/// @}
+} // namespace cldnn
REDUCE,
GATHER_TREE,
SPACE_TO_DEPTH,
+ SPACE_TO_BATCH,
GRN,
CTC_GREEDY_DECODER,
CUM_SUM,
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "space_to_batch_kernel_base.h"
+#include "kernel_selector_utils.h"
+#include <string>
+
+namespace kernel_selector {
+
+bool SpaceToBatchKernelBase::Validate(const Params& p, const optional_params& o) const {
+ if (p.GetType() != KernelType::SPACE_TO_BATCH ||
+ o.GetType() != KernelType::SPACE_TO_BATCH) {
+ return false;
+ }
+
+ return true;
+}
+
+CommonDispatchData SpaceToBatchKernelBase::SetDefault(const space_to_batch_params& params, const optional_params&) const {
+ CommonDispatchData runInfo;
+
+ std::vector<size_t> global = { params.output.Batch().v,
+ params.output.Feature().v,
+ params.output.W().v * params.output.Z().v * params.output.Y().v * params.output.X().v };
+
+ auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+
+ runInfo.gws0 = global[0];
+ runInfo.gws1 = global[1];
+ runInfo.gws2 = global[2];
+
+ runInfo.lws0 = local[0];
+ runInfo.lws1 = local[1];
+ runInfo.lws2 = local[2];
+
+ return runInfo;
+}
+
+JitConstants SpaceToBatchKernelBase::GetJitConstants(const space_to_batch_params& params) const {
+ JitConstants jit = MakeBaseParamsJitConstants(params);
+
+ auto makeJitConstForParam = [](JitConstants& jit, const std::string name, const DimTensor<uint32_t>& args, const size_t default_value) {
+ jit.AddConstant(MakeJitConstant(name + "_SIZES", args));
+ jit.AddConstant(MakeJitConstant(name + "_BATCH", args.b));
+ jit.AddConstant(MakeJitConstant(name + "_FEATURE", args.f));
+ jit.AddConstant(MakeJitConstant(name + "_Y", args.y));
+ jit.AddConstant(MakeJitConstant(name + "_X", args.x));
+
+ if (args.w != 0) {
+ jit.AddConstant(MakeJitConstant(name + "_W", args.w));
+ jit.AddConstant(MakeJitConstant(name + "_Z", args.z));
+ } else if(args.z != 0) {
+ jit.AddConstant(MakeJitConstant(name + "_W", default_value));
+ jit.AddConstant(MakeJitConstant(name + "_Z", args.z));
+ } else {
+ jit.AddConstant(MakeJitConstant(name + "_W", default_value));
+ jit.AddConstant(MakeJitConstant(name + "_Z", default_value));
+ }
+ };
+
+ makeJitConstForParam(jit, "BLOCK_SHAPE", params.block_shape, 1);
+ makeJitConstForParam(jit, "PADS_BEGIN", params.pads_begin, 0);
+ makeJitConstForParam(jit, "PADS_END", params.pads_end, 0);
+
+ return jit;
+}
+
+KernelsData SpaceToBatchKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const {
+ KernelData kd = KernelData::Default<space_to_batch_params>(params);
+ space_to_batch_params& newParams = *static_cast<space_to_batch_params*>(kd.params.get());
+
+ if (!Validate(params, options)) {
+ return {};
+ }
+
+ auto runInfo = SetDefault(newParams, options);
+ auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+ auto cldnn_jit = GetJitConstants(newParams);
+ std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+ auto& kernel = kd.kernels[0];
+
+ FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+
+ kd.estimatedTime = estimatedTime;
+
+ return { kd };
+}
+} // namespace kernel_selector
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+#include <vector>
+
+namespace kernel_selector {
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// space_to_batch_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct space_to_batch_params : public base_params {
+ space_to_batch_params() : base_params(KernelType::SPACE_TO_BATCH) {}
+ DimTensor<uint32_t> block_shape;
+ DimTensor<uint32_t> pads_begin;
+ DimTensor<uint32_t> pads_end;
+
+ virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// space_to_batch_optional_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct space_to_batch_optional_params : optional_params {
+ space_to_batch_optional_params() : optional_params(KernelType::SPACE_TO_BATCH) {}
+};
+
+struct space_to_batch_fuse_params : fuse_params {
+ space_to_batch_fuse_params() : fuse_params(KernelType::SPACE_TO_BATCH) {}
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// SpaceToBatchKernelBase
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class SpaceToBatchKernelBase : public common_kernel_base {
+public:
+ using common_kernel_base::common_kernel_base;
+ virtual ~SpaceToBatchKernelBase() {}
+
+ struct DispatchData : public CommonDispatchData {};
+
+protected:
+ virtual bool Validate(const Params&, const optional_params&) const;
+ virtual JitConstants GetJitConstants(const space_to_batch_params& params) const;
+ virtual CommonDispatchData SetDefault(const space_to_batch_params& params, const optional_params&) const;
+ KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
+};
+} // namespace kernel_selector
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "space_to_batch_kernel_ref.h"
+#include "kernel_selector_utils.h"
+#include <string>
+#include <vector>
+
+namespace kernel_selector {
+ParamsKey SpaceToBatchKernelRef::GetSupportedKey() const {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::F32);
+
+ k.EnableInputLayout(DataLayout::bfyx);
+ k.EnableInputLayout(DataLayout::bfzyx);
+ k.EnableInputLayout(DataLayout::bfwzyx);
+ k.EnableOutputLayout(DataLayout::bfyx);
+ k.EnableOutputLayout(DataLayout::bfzyx);
+ k.EnableOutputLayout(DataLayout::bfwzyx);
+
+ k.EnableTensorOffset();
+ k.EnableTensorPitches();
+ k.EnableBatching();
+ return k;
+}
+
+KernelsData SpaceToBatchKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
+ return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+}
+} // namespace kernel_selector
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "space_to_batch_kernel_base.h"
+
+namespace kernel_selector {
+class SpaceToBatchKernelRef : public SpaceToBatchKernelBase {
+public:
+ SpaceToBatchKernelRef() : SpaceToBatchKernelBase("space_to_batch_ref") {}
+ virtual ~SpaceToBatchKernelRef() {}
+
+ KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+ ParamsKey GetSupportedKey() const override;
+};
+} // namespace kernel_selector
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "space_to_batch_kernel_selector.h"
+#include "space_to_batch_kernel_ref.h"
+
+namespace kernel_selector {
+
+space_to_batch_kernel_selector::space_to_batch_kernel_selector() {
+ Attach<SpaceToBatchKernelRef>();
+}
+
+KernelsData space_to_batch_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
+ return GetNaiveBestKernel(params, options, KernelType::SPACE_TO_BATCH);
+}
+} // namespace kernel_selector
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector {
+class space_to_batch_kernel_selector : public kernel_selector_base {
+public:
+ static space_to_batch_kernel_selector& Instance() {
+ static space_to_batch_kernel_selector instance_;
+ return instance_;
+ }
+
+ space_to_batch_kernel_selector();
+
+ virtual ~space_to_batch_kernel_selector() {}
+
+ KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+};
+} // namespace kernel_selector
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+KERNEL(space_to_batch_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
+{
+ const uint batch = get_global_id(0);
+ const uint feature = get_global_id(1);
+
+#ifdef OUTPUT_LAYOUT_BFYX
+ const uint w = 0;
+ const uint z = 0;
+ const uint y = (uint)get_global_id(2) / OUTPUT_SIZE_X;
+ const uint x = (uint)get_global_id(2) % OUTPUT_SIZE_X;
+#elif OUTPUT_LAYOUT_BFZYX
+ const uint w = 0;
+ const uint yx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ const uint z = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ const uint y = yx / OUTPUT_SIZE_X;
+ const uint x = yx % OUTPUT_SIZE_X;
+#elif OUTPUT_LAYOUT_BFWZYX
+ const uint zyx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z);
+ const uint w = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z);
+ const uint yx = zyx % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ const uint z = zyx / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+ const uint y = yx / OUTPUT_SIZE_X;
+ const uint x = yx % OUTPUT_SIZE_X;
+#endif
+
+ const uint input_batch = batch % INPUT0_BATCH_NUM;
+ const uint offset_batch = batch / INPUT0_BATCH_NUM;
+
+ const int input_feature = feature * BLOCK_SHAPE_FEATURE - PADS_BEGIN_FEATURE +
+ offset_batch / (BLOCK_SHAPE_W * BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
+ const uint offset_feature = offset_batch % (BLOCK_SHAPE_W * BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
+
+ const int input_w = w * BLOCK_SHAPE_W - PADS_BEGIN_W + offset_feature / (BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
+ const uint offset_w = offset_feature % (BLOCK_SHAPE_Z * BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
+
+ const int input_z = z * BLOCK_SHAPE_Z - PADS_BEGIN_Z + offset_w / (BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
+ const uint offset_z = offset_w % (BLOCK_SHAPE_Y * BLOCK_SHAPE_X);
+
+ const int input_y = y * BLOCK_SHAPE_Y - PADS_BEGIN_Y + offset_z / BLOCK_SHAPE_X;
+ const uint offset_y = offset_z % BLOCK_SHAPE_X;
+
+ const int input_x = x * BLOCK_SHAPE_X - PADS_BEGIN_X + offset_y;
+
+ const int input_index = GET_DATA_INDEX_6D(INPUT0, input_batch, input_feature, input_w, input_z, input_y, input_x);
+
+ const uint output_index = GET_DATA_INDEX_6D(OUTPUT, batch, feature, w, z, y, x);
+
+ const bool out_of_bounds = input_feature < 0 || input_feature >= INPUT0_FEATURE_NUM ||
+ input_w < 0 || input_w >= INPUT0_SIZE_W ||
+ input_z < 0 || input_z >= INPUT0_SIZE_Z ||
+ input_y < 0 || input_y >= INPUT0_SIZE_Y ||
+ input_x < 0 || input_x >= INPUT0_SIZE_X;
+
+ INPUT0_TYPE in = out_of_bounds ? INPUT0_VAL_ZERO : input[input_index];
+ output[output_index] = ACTIVATION(in, ACTIVATION_PARAMS);
+}
REGISTER_GPU(shuffle_channels);
REGISTER_GPU(softmax);
REGISTER_GPU(softmax_loss_grad);
+ REGISTER_GPU(space_to_batch);
REGISTER_GPU(space_to_depth);
REGISTER_GPU(strided_slice);
REGISTER_GPU(tile);
#include "api/shuffle_channels.hpp"
#include "api/softmax.hpp"
#include "api/softmax_loss_grad.hpp"
+#include "api/space_to_batch.hpp"
#include "api/strided_slice.hpp"
#include "api/tile.hpp"
#include "api/resample.hpp"
REGISTER_GPU(shuffle_channels);
REGISTER_GPU(softmax);
REGISTER_GPU(softmax_loss_grad);
+REGISTER_GPU(space_to_batch);
REGISTER_GPU(space_to_depth);
REGISTER_GPU(strided_slice);
REGISTER_GPU(tile);
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "space_to_batch_inst.h"
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "space_to_batch/space_to_batch_kernel_selector.h"
+#include "space_to_batch/space_to_batch_kernel_ref.h"
+#include "error_handler.h"
+#include "data_inst.h"
+#include <vector>
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace gpu {
+struct space_to_batch_gpu : typed_primitive_gpu_impl<space_to_batch> {
+ using parent = typed_primitive_gpu_impl<space_to_batch>;
+ using parent::parent;
+
+public:
+ static primitive_impl* create(const space_to_batch_node& arg) {
+ auto space_to_batch_params = get_default_params<kernel_selector::space_to_batch_params>(arg);
+ auto space_to_batch_optional_params =
+ get_default_optional_params<kernel_selector::space_to_batch_optional_params>(arg.get_program());
+
+ auto primitive = arg.get_primitive();
+
+ space_to_batch_params.block_shape = convert_dim_vector(primitive->block_shape);
+ space_to_batch_params.pads_begin = convert_dim_vector(primitive->pads_begin);
+ space_to_batch_params.pads_end = convert_dim_vector(primitive->pads_end);
+
+ auto& kernel_selector = kernel_selector::space_to_batch_kernel_selector::Instance();
+ auto best_kernels = kernel_selector.GetBestKernels(space_to_batch_params, space_to_batch_optional_params);
+
+ CLDNN_ERROR_BOOL(arg.id(),
+ "Best_kernel.empty()",
+ best_kernels.empty(),
+ "Cannot find a proper kernel with this arguments");
+
+ auto space_to_batch = new space_to_batch_gpu(arg, best_kernels[0]);
+
+ return space_to_batch;
+ }
+};
+
+namespace detail {
+
+attach_space_to_batch_gpu::attach_space_to_batch_gpu() {
+ auto val_fw = space_to_batch_gpu::create;
+ implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+ implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+ implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
+ implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+ implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
+ implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
+}
+
+} // namespace detail
+} // namespace gpu
+} // namespace cldnn
#include "gather_inst.h"
#include "reverse_sequence_inst.h"
#include "shuffle_channels_inst.h"
+#include "space_to_batch_inst.h"
#include "strided_slice_inst.h"
#include "cum_sum_inst.h"
#include "embedding_bag_inst.h"
!input.is_type<permute>() && !input.is_type<pooling>() && !input.is_type<reorder>() &&
!input.is_type<reshape>() && !input.is_type<roi_pooling>() && !input.is_type<scale>() &&
!input.is_type<softmax>() && !input.is_type<resample>() && !input.is_type<mvn>() &&
- !input.is_type<depth_to_space>() && !input.is_type<batch_to_space>() && !input.is_type<gather>() &&
- !input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() && !input.is_type<cum_sum>() &&
- !input.is_type<reverse_sequence>() && !input.is_type<embedding_bag>() && !input.is_type<fused_conv_eltwise>() &&
- !input.is_type<activation>()))
+ !input.is_type<depth_to_space>() && !input.is_type<batch_to_space>() && !input.is_type<space_to_batch>() &&
+ !input.is_type<gather>() && !input.is_type<shuffle_channels>() && !input.is_type<strided_slice>() &&
+ !input.is_type<cum_sum>() && !input.is_type<reverse_sequence>() && !input.is_type<embedding_bag>() &&
+ !input.is_type<fused_conv_eltwise>() && !input.is_type<activation>()))
return;
if (input.is_type<eltwise>()) {
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "api/space_to_batch.hpp"
+#include "primitive_inst.h"
+#include <string>
+
+namespace cldnn {
+template <>
+struct typed_program_node<space_to_batch> : public typed_program_node_base<space_to_batch> {
+ using parent = typed_program_node_base<space_to_batch>;
+
+public:
+ using parent::parent;
+
+ program_node& input(size_t index = 0) const { return get_dependency(index); }
+};
+
+using space_to_batch_node = typed_program_node<space_to_batch>;
+
+template <>
+class typed_primitive_inst<space_to_batch> : public typed_primitive_inst_base<space_to_batch> {
+ using parent = typed_primitive_inst_base<space_to_batch>;
+
+public:
+ static layout calc_output_layout(space_to_batch_node const& node);
+ static std::string to_string(space_to_batch_node const& node);
+
+public:
+ typed_primitive_inst(network_impl& network, space_to_batch_node const& desc);
+};
+
+using space_to_batch_inst = typed_primitive_inst<space_to_batch>;
+} // namespace cldnn
--- /dev/null
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "space_to_batch_inst.h"
+
+#include "primitive_type_base.h"
+#include "error_handler.h"
+#include "json_object.h"
+#include "data_inst.h"
+#include <string>
+#include <vector>
+
+namespace cldnn {
+primitive_type_id cldnn::space_to_batch::type_id() {
+ static primitive_type_base<space_to_batch> instance;
+ return &instance;
+}
+
+layout space_to_batch_inst::calc_output_layout(space_to_batch_node const& node) {
+ auto desc = node.get_primitive();
+
+ auto input_layout = node.input(0).get_output_layout();
+ auto input_format = input_layout.format;
+
+ const size_t dims_num = format::dimension(input_format);
+
+ const auto& block_shape = desc->block_shape;
+ const auto& pads_begin = desc->pads_begin;
+ const auto& pads_end = desc->pads_end;
+
+ if (block_shape.batch[0] != 1)
+ CLDNN_ERROR_MESSAGE(node.id(),
+ "block_shape[0] is expected to be 1. Actual block_shape[0] is " +
+ std::to_string(block_shape.batch[0]));
+
+ if (pads_begin.batch[0] != 0)
+ CLDNN_ERROR_MESSAGE(node.id(),
+ "pads_begin[0] is expected to be 0. Actual pads_begin[0] is " +
+ std::to_string(pads_begin.batch[0]));
+
+ if (pads_end.batch[0] != 0)
+ CLDNN_ERROR_MESSAGE(node.id(),
+ "pads_end[0] is expected to be 0. Actual pads_end[0] is " +
+ std::to_string(pads_end.batch[0]));
+
+ if ((input_layout.size.sizes(input_format)[1] + pads_begin.feature[0] + pads_end.feature[0]) % block_shape.feature[0] != 0)
+ CLDNN_ERROR_MESSAGE(node.id(),
+ "Input feature shape after padding must be divisible by block_shape");
+
+ for (size_t i = 2; i < dims_num; ++i)
+ if ((input_layout.size.sizes(input_format)[dims_num - i + 1] + pads_begin.sizes()[i] + pads_end.sizes()[i]) % block_shape.sizes()[i] != 0)
+ CLDNN_ERROR_MESSAGE(node.id(),
+ "Input spatial shapes after padding must be divisible by block_shape");
+
+ return layout{input_layout.data_type, input_format, desc->out_size};
+}
+
+std::string space_to_batch_inst::to_string(space_to_batch_node const& node) {
+ auto desc = node.get_primitive();
+ auto node_info = node.desc_to_json();
+ auto& input = node.input();
+
+ std::stringstream primitive_description;
+
+ json_composite space_to_batch_info;
+ space_to_batch_info.add("input id", input.id());
+
+ node_info->add("space_to_batch_info", space_to_batch_info);
+ node_info->dump(primitive_description);
+
+ return primitive_description.str();
+}
+
+space_to_batch_inst::typed_primitive_inst(network_impl& network, space_to_batch_node const& node)
+ : parent(network, node) {}
+
+} // namespace cldnn
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/input_layout.hpp>
+#include <api/memory.hpp>
+#include <api/space_to_batch.hpp>
+#include <api/topology.hpp>
+#include <api/network.hpp>
+#include <api/data.hpp>
+
+#include <cstddef>
+#include <tests/test_utils/test_utils.h>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(space_to_batch_fp16_gpu, i1222_bs1222_pb0000_pe0000) {
+ // Input : 1x2x2x2
+ // Block shape : 1x2x2x2
+ // Pads begin : 0x0x0x0
+ // Pads end : 0x0x0x0
+ // Output : 8x1x1x1
+ // Input values in fp16
+
+ engine engine;
+ auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {1,2,2,2} });
+
+ set_values(input, {
+ FLOAT16(0.0f), FLOAT16(1.0f),
+ FLOAT16(2.0f), FLOAT16(3.0f),
+ FLOAT16(4.0f), FLOAT16(5.0f),
+ FLOAT16(6.0f), FLOAT16(7.0f)
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1),
+ tensor(format::bfyx, {0,0,0,0}, 0),
+ tensor(format::bfyx, {0,0,0,0}, 0),
+ tensor(format::bfyx, {8,1,1,1}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<uint16_t>();
+
+ std::vector<float> expected_results = {
+ 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+ }
+}
+
+TEST(space_to_batch_fp16_gpu, i1242_bs1221_pb0020_pe0000) {
+ // Input : 1x2x4x2
+ // Block shape : 1x2x2x1
+ // Pads begin : 0x0x2x0
+ // Pads end : 0x0x0x0
+ // Output : 4x1x3x2
+ // Input values in fp16
+
+ engine engine;
+ auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {1,2,2,4} });
+
+ set_values(input, {
+ FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f),
+ FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f),
+ FLOAT16(8.0f), FLOAT16(9.0f), FLOAT16(10.0f), FLOAT16(11.0f),
+ FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f)
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,1}, 1),
+ tensor(format::bfyx, {0,0,2,0}, 0),
+ tensor(format::bfyx, {0,0,0,0}, 0),
+ tensor(format::bfyx, {4,1,3,2}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<uint16_t>();
+
+ std::vector<float> expected_results = {
+ 0.f, 0.f, 0.f, 1.f, 4.f, 5.f,
+ 0.f, 0.f, 2.f, 3.f, 6.f, 7.f,
+ 0.f, 0.f, 8.f, 9.f, 12.f, 13.f,
+ 0.f, 0.f, 10.f, 11.f, 14.f, 15.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+ }
+}
+
+TEST(space_to_batch_fp16_gpu, i2132_bs1222_pb0010_pe0100) {
+ // Input : 2x1x3x2
+ // Block shape : 1x2x2x2
+ // Pads begin : 0x0x1x0
+ // Pads end : 0x1x0x0
+ // Output : 16x1x2x1
+ // Input values in fp16
+
+ engine engine;
+ auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {2,1,2,3} });
+
+ set_values(input, {
+ FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f),
+ FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f),
+ FLOAT16(8.0f), FLOAT16(9.0f), FLOAT16(10.0f), FLOAT16(11.0f)
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1),
+ tensor(format::bfyx, {0,0,1,0}, 0),
+ tensor(format::bfyx, {0,1,0,0}, 0),
+ tensor(format::bfyx, {16,1,2,1}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<uint16_t>();
+
+ std::vector<float> expected_results = {
+ 0.f, 2.f, 0.f, 8.f, 0.f, 3.f, 0.f, 9.f,
+ 0.f, 4.f, 6.f, 10.f, 1.f, 5.f, 7.f, 11.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+ }
+}
+
+TEST(space_to_batch_fp16_gpu, i12132_bs12122_pb00010_pe00000) {
+ // Input : 1x2x1x3x2
+ // Block shape : 1x2x1x2x2
+ // Pads begin : 0x0x0x1x0
+ // Pads end : 0x0x0x0x0
+ // Output : 8x1x1x2x1
+ // Input values in fp16
+
+ engine engine;
+ auto input = memory::allocate(engine, { data_types::f16, format::bfzyx, {1,2,2,3,1} });
+
+ set_values(input, {
+ FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f),
+ FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f),
+ FLOAT16(8.0f), FLOAT16(9.0f), FLOAT16(10.0f), FLOAT16(11.0f)
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfzyx, {1,2,1,2,2}, 1),
+ tensor(format::bfzyx, {0,0,0,1,0}, 0),
+ tensor(format::bfzyx, {0,0,0,0,0}, 0),
+ tensor(format::bfzyx, {8,1,1,2,1}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<uint16_t>();
+
+ std::vector<float> expected_results = {
+ 0.f, 2.f, 0.f, 3.f, 0.f, 4.f, 1.f, 5.f,
+ 0.f, 8.f, 0.f, 9.f, 6.f, 10.f, 7.f, 11.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+ }
+}
+
+TEST(space_to_batch_fp16_gpu, i134121_bs142121_pb010100_pe000000) {
+ // Input : 1x3x4x1x2x1
+ // Block shape : 1x4x2x1x2x1
+ // Pads begin : 0x1x0x1x0x0
+ // Pads end : 0x0x0x0x0x0
+ // Output : 16x1x2x2x1x1
+ // Input values in fp16
+
+ engine engine;
+ tensor input_shape = tensor{ batch(1), feature(3), spatial(1, 2, 1, 4) };
+ auto input = memory::allocate(engine, { data_types::f16, format::bfwzyx, input_shape });
+
+ set_values(input, {
+ FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f),
+ FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f),
+ FLOAT16(8.0f), FLOAT16(9.0f), FLOAT16(10.0f), FLOAT16(11.0f),
+ FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f),
+ FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
+ FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f)
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfwzyx, {1,4,2,1,2,1}, 1),
+ tensor(format::bfwzyx, {0,1,0,1,0,0}, 0),
+ tensor(format::bfwzyx, {0,0,0,0,0,0}, 0),
+ tensor(format::bfwzyx, {16,1,2,2,1,1}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<uint16_t>();
+
+ std::vector<float> expected_results = {
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 4.f, 0.f, 1.f, 0.f, 5.f,
+ 0.f, 2.f, 0.f, 6.f, 0.f, 3.f, 0.f, 7.f,
+ 0.f, 8.f, 0.f, 12.f, 0.f, 9.f, 0.f, 13.f,
+ 0.f, 10.f, 0.f, 14.f, 0.f, 11.f, 0.f, 15.f,
+ 0.f, 16.f, 0.f, 20.f, 0.f, 17.f, 0.f, 21.f,
+ 0.f, 18.f, 0.f, 22.f, 0.f, 19.f, 0.f, 23.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+ }
+}
+
+TEST(space_to_batch_fp32_gpu, i1222_bs1222_pb0000_pe0000) {
+ // Input : 1x2x2x2
+ // Block shape : 1x2x2x2
+ // Pads begin : 0x0x0x0
+ // Pads end : 0x0x0x0
+ // Output : 8x1x1x1
+ // Input values in fp32
+
+ engine engine;
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,2} });
+
+ set_values(input, {
+ 0.0f, 1.0f, 2.0f, 3.0f,
+ 4.0f, 5.0f, 6.0f, 7.0f
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1),
+ tensor(format::bfyx, {0,0,0,0}, 0),
+ tensor(format::bfyx, {0,0,0,0}, 0),
+ tensor(format::bfyx, {8,1,1,1}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> expected_results = {
+ 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], output_ptr[i]);
+ }
+}
+
+TEST(space_to_batch_fp32_gpu, i1242_bs1221_pb0020_pe0000) {
+ // Input : 1x2x4x2
+ // Block shape : 1x2x2x1
+ // Pads begin : 0x0x2x0
+ // Pads end : 0x0x0x0
+ // Output : 4x1x3x2
+ // Input values in fp32
+
+ engine engine;
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,4} });
+
+ set_values(input, {
+ 0.0f, 1.0f, 2.0f, 3.0f,
+ 4.0f, 5.0f, 6.0f, 7.0f,
+ 8.0f, 9.0f, 10.0f, 11.0f,
+ 12.0f, 13.0f, 14.0f, 15.0f
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,1}, 1),
+ tensor(format::bfyx, {0,0,2,0}, 0),
+ tensor(format::bfyx, {0,0,0,0}, 0),
+ tensor(format::bfyx, {4,1,3,2}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> expected_results = {
+ 0.f, 0.f, 0.f, 1.f, 4.f, 5.f,
+ 0.f, 0.f, 2.f, 3.f, 6.f, 7.f,
+ 0.f, 0.f, 8.f, 9.f, 12.f, 13.f,
+ 0.f, 0.f, 10.f, 11.f, 14.f, 15.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], output_ptr[i]);
+ }
+}
+
+TEST(space_to_batch_fp32_gpu, i2132_bs1222_pb0010_pe0100) {
+ // Input : 2x1x3x2
+ // Block shape : 1x2x2x2
+ // Pads begin : 0x0x1x0
+ // Pads end : 0x1x0x0
+ // Output : 16x1x2x1
+ // Input values in fp32
+
+ engine engine;
+ auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {2,1,2,3} });
+
+ set_values(input, {
+ 0.0f, 1.0f, 2.0f, 3.0f,
+ 4.0f, 5.0f, 6.0f, 7.0f,
+ 8.0f, 9.0f, 10.0f, 11.0f
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1),
+ tensor(format::bfyx, {0,0,1,0}, 0),
+ tensor(format::bfyx, {0,1,0,0}, 0),
+ tensor(format::bfyx, {16,1,2,1}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> expected_results = {
+ 0.f, 2.f, 0.f, 8.f, 0.f, 3.f, 0.f, 9.f,
+ 0.f, 4.f, 6.f, 10.f, 1.f, 5.f, 7.f, 11.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], output_ptr[i]);
+ }
+}
+
+TEST(space_to_batch_fp32_gpu, i12132_bs12122_pb00010_pe00000) {
+ // Input : 1x2x1x3x2
+ // Block shape : 1x2x1x2x2
+ // Pads begin : 0x0x0x1x0
+ // Pads end : 0x0x0x0x0
+ // Output : 8x1x1x2x1
+ // Input values in fp32
+
+ engine engine;
+ auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, {1,2,2,3,1} });
+
+ set_values(input, {
+ 0.0f, 1.0f, 2.0f, 3.0f,
+ 4.0f, 5.0f, 6.0f, 7.0f,
+ 8.0f, 9.0f, 10.0f, 11.0f
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfzyx, {1,2,1,2,2}, 1),
+ tensor(format::bfzyx, {0,0,0,1,0}, 0),
+ tensor(format::bfzyx, {0,0,0,0,0}, 0),
+ tensor(format::bfzyx, {8,1,1,2,1}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> expected_results = {
+ 0.f, 2.f, 0.f, 3.f, 0.f, 4.f, 1.f, 5.f,
+ 0.f, 8.f, 0.f, 9.f, 6.f, 10.f, 7.f, 11.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], output_ptr[i]);
+ }
+}
+
+TEST(space_to_batch_fp32_gpu, i134121_bs142121_pb010100_pe000000) {
+ // Input : 1x3x4x1x2x1
+ // Block shape : 1x4x2x1x2x1
+ // Pads begin : 0x1x0x1x0x0
+ // Pads end : 0x0x0x0x0x0
+ // Output : 16x1x2x2x1x1
+ // Input values in fp32
+
+ engine engine;
+ tensor input_shape = tensor{ batch(1), feature(3), spatial(1, 2, 1, 4) };
+ auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, input_shape });
+
+ set_values(input, {
+ 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+ 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
+ 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
+ 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f
+ });
+
+ topology topology;
+ topology.add(input_layout("Input", input.get_layout()));
+ topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfwzyx, {1,4,2,1,2,1}, 1),
+ tensor(format::bfwzyx, {0,1,0,1,0,0}, 0),
+ tensor(format::bfwzyx, {0,0,0,0,0,0}, 0),
+ tensor(format::bfwzyx, {16,1,2,2,1,1}, 1)));
+ network network(engine, topology);
+
+ network.set_input_data("Input", input);
+
+ auto outputs = network.execute();
+
+ auto output = outputs.at("space_to_batch").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ std::vector<float> expected_results = {
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 4.f, 0.f, 1.f, 0.f, 5.f,
+ 0.f, 2.f, 0.f, 6.f, 0.f, 3.f, 0.f, 7.f,
+ 0.f, 8.f, 0.f, 12.f, 0.f, 9.f, 0.f, 13.f,
+ 0.f, 10.f, 0.f, 14.f, 0.f, 11.f, 0.f, 15.f,
+ 0.f, 16.f, 0.f, 20.f, 0.f, 17.f, 0.f, 21.f,
+ 0.f, 18.f, 0.f, 22.f, 0.f, 19.f, 0.f, 23.f
+ };
+
+ ASSERT_EQ(output_ptr.size(), expected_results.size());
+
+ for (size_t i = 0; i < expected_results.size(); ++i) {
+ EXPECT_EQ(expected_results[i], output_ptr[i]);
+ }
+}