return true;
}
+#ifdef HAVE_OPENCL
+ bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
+ {
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+ std::vector<UMat> internals;
+
+ inputs_.getUMatVector(inputs);
+ outputs_.getUMatVector(outputs);
+ internals_.getUMatVector(internals);
+
+ CV_Assert(inputs.size() == 1 && outputs.size() == 1);
+ CV_Assert(inputs[0].total() == outputs[0].total());
+
+ const UMat& inp0 = inputs[0];
+ UMat& buffer = internals[0];
+ size_t num = inp0.size[0];
+ size_t channels = inp0.size[1];
+ size_t channelSize = inp0.total() / (num * channels);
+ for (size_t i = 0; i < num; ++i)
+ {
+ MatShape s = shape(channels, channelSize);
+ UMat src = inputs[i].reshape(1, s.size(), &s[0]);
+ UMat dst = outputs[i].reshape(1, s.size(), &s[0]);
+
+ UMat abs_mat;
+ absdiff(src, cv::Scalar::all(0), abs_mat);
+ pow(abs_mat, pnorm, buffer);
+
+ if (acrossSpatial)
+ {
+ // add eps to avoid overflow
+ float absSum = sum(buffer)[0] + epsilon;
+ float norm = pow(absSum, 1.0f / pnorm);
+ multiply(src, 1.0f / norm, dst);
+ }
+
+ if (!blobs.empty())
+ {
+ // scale the output
+ Mat scale = blobs[0];
+ if (scale.total() == 1)
+ {
+ // _scale: 1 x 1
+ multiply(dst, scale.at<float>(0, 0), dst);
+ }
+ else
+ {
+ // _scale: _channels x 1
+ CV_Assert(scale.total() == channels);
+ repeat(scale, 1, dst.cols, buffer);
+ multiply(dst, buffer, dst);
+ }
+ }
+ }
+ return true;
+ }
+#endif
+
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+ CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+ OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
runTorchNet("net_normalize", DNN_TARGET_CPU, "", false, true);
}
+OCL_TEST(Torch_Importer, net_normalize)
+{
+ runTorchNet("net_normalize", DNN_TARGET_OPENCL, "", false, true);
+}
+
TEST(Torch_Importer, net_padding)
{
runTorchNet("net_padding", DNN_TARGET_CPU, "", false, true);