deltasPermute->finalize(layerInputs, layerOutputs);
}
+#ifdef HAVE_OPENCL
+ bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
+ {
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+ std::vector<UMat> internals;
+
+ inputs_.getUMatVector(inputs);
+ outputs_.getUMatVector(outputs);
+ internals_.getUMatVector(internals);
+
+ CV_Assert(inputs.size() == 3);
+ CV_Assert(internals.size() == 3);
+ const UMat& scores = inputs[0];
+ const UMat& bboxDeltas = inputs[1];
+ const UMat& imInfo = inputs[2];
+ UMat& priorBoxes = internals[0];
+ UMat& permuttedScores = internals[1];
+ UMat& permuttedDeltas = internals[2];
+
+ CV_Assert(imInfo.total() >= 2);
+ // We've chosen the smallest data type because we need just a shape from it.
+ Mat szMat;
+ imInfo.copyTo(szMat);
+ int rows = (int)szMat.at<float>(0);
+ int cols = (int)szMat.at<float>(1);
+ umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1);
+ umat_fakeImageBlob.setTo(0);
+
+ // Generate prior boxes.
+ std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes);
+ layerInputs[0] = scores;
+ layerInputs[1] = umat_fakeImageBlob;
+ priorBoxLayer->forward(layerInputs, layerOutputs, internals);
+
+ // Permute scores.
+ layerInputs.assign(1, getObjectScores(scores));
+ layerOutputs.assign(1, permuttedScores);
+ scoresPermute->forward(layerInputs, layerOutputs, internals);
+
+ // Permute deltas.
+ layerInputs.assign(1, bboxDeltas);
+ layerOutputs.assign(1, permuttedDeltas);
+ deltasPermute->forward(layerInputs, layerOutputs, internals);
+
+ // Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
+ // output internally because of different number of objects after NMS.
+ layerInputs.resize(4);
+ layerInputs[0] = permuttedDeltas;
+ layerInputs[1] = permuttedScores;
+ layerInputs[2] = priorBoxes;
+ layerInputs[3] = umat_fakeImageBlob;
+
+ layerOutputs[0] = UMat();
+ detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
+
+ // DetectionOutputLayer produces 1x1xNx7 output where N might be less or
+ // equal to keepTopAfterNMS. We fill the rest by zeros.
+ const int numDets = layerOutputs[0].total() / 7;
+ CV_Assert(numDets <= keepTopAfterNMS);
+
+ MatShape s = shape(numDets, 7);
+ UMat src = layerOutputs[0].reshape(1, s.size(), &s[0]).colRange(3, 7);
+ UMat dst = outputs[0].rowRange(0, numDets);
+ src.copyTo(dst.colRange(1, 5));
+ dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
+
+ if (numDets < keepTopAfterNMS)
+ outputs[0].rowRange(numDets, keepTopAfterNMS).setTo(0);
+
+ return true;
+ }
+#endif
+
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+ CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+ OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
return slice(m, Range::all(), Range(channels / 2, channels));
}
+#ifdef HAVE_OPENCL
+ static UMat getObjectScores(const UMat& m)
+ {
+ CV_Assert(m.dims == 4);
+ CV_Assert(m.size[0] == 1);
+ int channels = m.size[1];
+ CV_Assert((channels & 1) == 0);
+
+ Range r = Range(channels / 2, channels);
+ Range ranges[4] = { Range::all(), r, Range::all(), Range::all() };
+ return m(&ranges[0]);
+ }
+#endif
+
Ptr<PriorBoxLayer> priorBoxLayer;
Ptr<DetectionOutputLayer> detectionOutputLayer;
Ptr<PermuteLayer> scoresPermute;
uint32_t keepTopAfterNMS;
Mat fakeImageBlob;
+#ifdef HAVE_OPENCL
+ UMat umat_fakeImageBlob;
+#endif
};