From 34bfd7ef51b5d056ec35f9b60ced23b5a7a1811b Mon Sep 17 00:00:00 2001 From: Li Peng Date: Wed, 3 Jan 2018 21:43:48 +0800 Subject: [PATCH] add ocl implementation of proposal layer Signed-off-by: Li Peng --- modules/dnn/src/layers/proposal_layer.cpp | 95 +++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index 8da4c47..8fee7fa 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -148,11 +148,89 @@ public: deltasPermute->finalize(layerInputs, layerOutputs); } +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs; + std::vector outputs; + std::vector internals; + + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + internals_.getUMatVector(internals); + + CV_Assert(inputs.size() == 3); + CV_Assert(internals.size() == 3); + const UMat& scores = inputs[0]; + const UMat& bboxDeltas = inputs[1]; + const UMat& imInfo = inputs[2]; + UMat& priorBoxes = internals[0]; + UMat& permuttedScores = internals[1]; + UMat& permuttedDeltas = internals[2]; + + CV_Assert(imInfo.total() >= 2); + // We've chosen the smallest data type because we need just a shape from it. + Mat szMat; + imInfo.copyTo(szMat); + int rows = (int)szMat.at(0); + int cols = (int)szMat.at(1); + umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1); + umat_fakeImageBlob.setTo(0); + + // Generate prior boxes. + std::vector layerInputs(2), layerOutputs(1, priorBoxes); + layerInputs[0] = scores; + layerInputs[1] = umat_fakeImageBlob; + priorBoxLayer->forward(layerInputs, layerOutputs, internals); + + // Permute scores. + layerInputs.assign(1, getObjectScores(scores)); + layerOutputs.assign(1, permuttedScores); + scoresPermute->forward(layerInputs, layerOutputs, internals); + + // Permute deltas. + layerInputs.assign(1, bboxDeltas); + layerOutputs.assign(1, permuttedDeltas); + deltasPermute->forward(layerInputs, layerOutputs, internals); + + // Sort predictions by scores and apply NMS. DetectionOutputLayer allocates + // output internally because of different number of objects after NMS. + layerInputs.resize(4); + layerInputs[0] = permuttedDeltas; + layerInputs[1] = permuttedScores; + layerInputs[2] = priorBoxes; + layerInputs[3] = umat_fakeImageBlob; + + layerOutputs[0] = UMat(); + detectionOutputLayer->forward(layerInputs, layerOutputs, internals); + + // DetectionOutputLayer produces 1x1xNx7 output where N might be less or + // equal to keepTopAfterNMS. We fill the rest by zeros. + const int numDets = layerOutputs[0].total() / 7; + CV_Assert(numDets <= keepTopAfterNMS); + + MatShape s = shape(numDets, 7); + UMat src = layerOutputs[0].reshape(1, s.size(), &s[0]).colRange(3, 7); + UMat dst = outputs[0].rowRange(0, numDets); + src.copyTo(dst.colRange(1, 5)); + dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too. + + if (numDets < keepTopAfterNMS) + outputs[0].rowRange(numDets, keepTopAfterNMS).setTo(0); + + return true; + } +#endif + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) && + OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); } @@ -226,6 +304,20 @@ private: return slice(m, Range::all(), Range(channels / 2, channels)); } +#ifdef HAVE_OPENCL + static UMat getObjectScores(const UMat& m) + { + CV_Assert(m.dims == 4); + CV_Assert(m.size[0] == 1); + int channels = m.size[1]; + CV_Assert((channels & 1) == 0); + + Range r = Range(channels / 2, channels); + Range ranges[4] = { Range::all(), r, Range::all(), Range::all() }; + return m(&ranges[0]); + } +#endif + Ptr priorBoxLayer; Ptr detectionOutputLayer; @@ -233,6 +325,9 @@ private: Ptr scoresPermute; uint32_t keepTopAfterNMS; Mat fakeImageBlob; +#ifdef HAVE_OPENCL + UMat umat_fakeImageBlob; +#endif }; -- 2.7.4