1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
14 // Copyright (C) 2017, Intel Corporation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "../precomp.hpp"
44 #include "layers_common.hpp"
45 #include "../op_cuda.hpp"
46 #include "../op_halide.hpp"
47 #include "../op_inf_engine.hpp"
48 #include "../op_vkcom.hpp"
51 #include "opencl_kernels_dnn.hpp"
55 #include "../cuda4dnn/primitives/concat.hpp"
56 using namespace cv::dnn::cuda4dnn;
64 class ConcatLayerImpl CV_FINAL : public ConcatLayer
67 ConcatLayerImpl(const LayerParams& params)
69 setParamsFrom(params);
70 axis = params.get<int>("axis", 1);
71 padding = params.get<bool>("padding", false);
74 virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
75 const int requiredOutputs,
76 std::vector<MatShape> &outputs,
77 std::vector<MatShape> &internals) const CV_OVERRIDE
79 CV_Assert(inputs.size() > 0);
80 outputs.resize(1, inputs[0]);
81 int cAxis = clamp(axis, inputs[0]);
84 for (size_t i = 0; i < inputs.size(); i++)
86 MatShape curShape = inputs[i];
90 for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
92 outputs[0][curAxis] = std::max(outputs[0][curAxis], curShape[curAxis]);
97 CV_Assert(curShape.size() == outputs[0].size());
98 for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
100 if (curAxis != cAxis && outputs[0][curAxis] != curShape[curAxis])
101 CV_Error(Error::StsBadSize, "Inconsistent shape for ConcatLayer");
105 axisSum += curShape[cAxis];
107 outputs[0][cAxis] = axisSum;
111 virtual bool supportBackend(int backendId) CV_OVERRIDE
113 return backendId == DNN_BACKEND_OPENCV ||
114 backendId == DNN_BACKEND_CUDA ||
115 (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding) || // By channels
116 (backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !padding) ||
117 (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding);
120 class ChannelConcatInvoker : public ParallelLoopBody
123 std::vector<Mat>* inputs;
126 std::vector<const float*> chptrs;
128 static void run(std::vector<Mat>& inputs, Mat& output, int nstripes)
130 ChannelConcatInvoker cc;
133 cc.nstripes = nstripes;
135 size_t i, ninputs = inputs.size();
136 int nchannels = 0, batchsz = output.size[0];
137 for( i = 0; i < ninputs; i++ )
139 Mat& inp = inputs[i];
140 CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16S) &&
141 inp.dims == 4 && inp.size[0] == output.size[0] &&
142 inp.size[2] == output.size[2] &&
143 inp.size[3] == output.size[3] );
144 nchannels += inp.size[1];
146 CV_Assert( nchannels == output.size[1] );
147 CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16S) );
149 cc.chptrs.resize(nchannels*batchsz);
152 for( i = 0; i < ninputs; i++)
154 Mat& inp = inputs[i];
155 for( int j = 0; j < batchsz; j++ )
156 for( int k = 0; k < inp.size[1]; k++ )
158 const float* ptr = inp.ptr<float>(j, k);
159 cc.chptrs[ofs + j*nchannels + k] = ptr;
164 parallel_for_(Range(0, nstripes), cc, nstripes);
167 ChannelConcatInvoker() : inputs(0), output(0), nstripes(0) {}
169 void operator()(const Range& r) const CV_OVERRIDE
171 size_t planeSize = (size_t)output->size[2]*output->size[3];
172 size_t nch = chptrs.size();
173 size_t total = nch*planeSize;
174 size_t stripeSize = (total + nstripes - 1)/nstripes;
175 size_t stripeStart = r.start*stripeSize;
176 size_t stripeEnd = std::min(total, r.end*stripeSize);
177 const float** ptrs = (const float**)&chptrs[0];
178 float* outptr = output->ptr<float>();
179 size_t blockSize0 = 1 << 16;
181 for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; )
183 size_t ch = ofs0/planeSize;
184 size_t ofs = ofs0 - ch*planeSize;
185 size_t blockSize = std::min(blockSize0, planeSize - ofs);
186 memcpy(outptr + ofs0, ptrs[ch] + ofs, blockSize*sizeof(outptr[0]));
193 bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
195 std::vector<UMat> inputs;
196 std::vector<UMat> outputs;
198 bool use_half = (inps.depth() == CV_16S);
199 inps.getUMatVector(inputs);
200 outs.getUMatVector(outputs);
202 int cAxis = clamp(axis, inputs[0].dims);
206 int bottom_concat_axis;
207 int concat_size = total(shape(inputs[0]), cAxis + 1);
208 int top_concat_axis = outputs[0].size[cAxis];
209 int num_concats = total(shape(inputs[0]), 0, cAxis);
210 int offset_concat_axis = 0;
211 UMat& outMat = outputs[0];
212 String buildopt = format(" -DDtype=%s", (use_half) ? "half" : "float");
213 String kname = format("concat_%s", use_half ? "half" : "float");
215 for (size_t i = 0; i < inputs.size(); i++)
217 ocl::Kernel kernel(kname.c_str(), ocl::dnn::concat_oclsrc, buildopt);
221 UMat& inpMat = inputs[i];
222 bottom_concat_axis = inputs[i].size[cAxis];
223 size_t nthreads = inputs[i].total();
225 kernel.set(0, (int)nthreads);
226 kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
227 kernel.set(2, (int)num_concats);
228 kernel.set(3, (int)concat_size);
229 kernel.set(4, (int)top_concat_axis);
230 kernel.set(5, (int)bottom_concat_axis);
231 kernel.set(6, (int)offset_concat_axis);
232 kernel.set(7, ocl::KernelArg::PtrWriteOnly(outMat));
234 if (!kernel.run(1, &nthreads, NULL, false))
237 offset_concat_axis += bottom_concat_axis;
244 void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
247 CV_TRACE_ARG_VALUE(name, "name", name.c_str());
249 CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
250 forward_ocl(inputs_arr, outputs_arr, internals_arr))
252 std::vector<Mat> inputs, outputs;
253 inputs_arr.getMatVector(inputs);
254 outputs_arr.getMatVector(outputs);
256 int cAxis = clamp(axis, inputs[0].dims);
257 Mat& outMat = outputs[0];
262 if( cAxis == 1 && outMat.dims == 4 && !padding)
264 int nstripes = getNumThreads();
265 ChannelConcatInvoker::run(inputs, outMat, nstripes);
269 std::vector<Range> ranges(outputs[0].dims, Range::all());
271 ranges[cAxis].start = 0;
272 for (size_t i = 0; i < inputs.size(); i++)
274 ranges[cAxis].end = ranges[cAxis].start + inputs[i].size[cAxis];
275 for (int j = 0; j < outMat.dims; ++j)
277 if (j == cAxis) continue;
278 ranges[j].start = (outMat.size[j] - inputs[i].size[j]) / 2;
279 ranges[j].end = ranges[j].start + inputs[i].size[j];
281 inputs[i].copyTo(outMat(&ranges[0]));
282 ranges[cAxis].start = ranges[cAxis].end;
288 Ptr<BackendNode> initCUDA(
290 const std::vector<Ptr<BackendWrapper>>& inputs,
291 const std::vector<Ptr<BackendWrapper>>& outputs
294 auto context = reinterpret_cast<csl::CSLContext*>(context_);
296 auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
297 auto concat_axis = clamp(axis, input_wrapper->getRank());
298 return make_cuda_node<cuda4dnn::ConcatOp>(preferableTarget, std::move(context->stream), concat_axis, padding);
302 virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
305 vkcom::Tensor in = VkComTensor(input[0]);
306 int cAxis = clamp(axis, in.dimNum());
307 std::shared_ptr<vkcom::OpBase> op(new vkcom::OpConcat(cAxis));
308 return Ptr<BackendNode>(new VkComBackendNode(input, op));
309 #endif // HAVE_VULKAN
310 return Ptr<BackendNode>();
313 virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
316 std::vector<Halide::Buffer<> > inputBuffers = halideBuffers(input);
318 Halide::Var x("x"), y("y"), c("c"), n("n");
319 Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
320 int offset = inputBuffers[0].channels();
321 Halide::Expr topExpr = select(c < offset,
322 inputBuffers[0](x, y, c, n),
323 inputBuffers[1](x, y, c - offset, n));
324 for (int i = 2; i < input.size(); ++i)
326 offset += inputBuffers[i - 1].channels();
327 topExpr = select(c < offset, topExpr,
328 inputBuffers[i](x, y, c - offset, n));
330 top(x, y, c, n) = topExpr;
331 return Ptr<BackendNode>(new HalideBackendNode(top));
332 #endif // HAVE_HALIDE
333 return Ptr<BackendNode>();
336 #ifdef HAVE_INF_ENGINE
337 virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
339 InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
341 InferenceEngine::Builder::ConcatLayer ieLayer(name);
342 ieLayer.setAxis(clamp(axis, input->getDims().size()));
343 ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
344 return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
346 #endif // HAVE_INF_ENGINE
349 Ptr<ConcatLayer> ConcatLayer::create(const LayerParams& params)
351 return Ptr<ConcatLayer>(new ConcatLayerImpl(params));