}
};
+#ifdef HAVE_OPENCL
+ bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
+ {
+ std::vector<UMat> inputs;
+ std::vector<UMat> outputs;
+
+ inputs_.getUMatVector(inputs);
+ outputs_.getUMatVector(outputs);
+
+ switch (op)
+ {
+ case SUM:
+ if (coeffs.empty())
+ {
+ add(inputs[0], inputs[1], outputs[0]);
+ for (int i = 2; i < inputs.size(); ++i)
+ add(outputs[0], inputs[i], outputs[0]);
+ }
+ else
+ {
+ UMat mul0, mul1;
+ multiply(coeffs[0], inputs[0], mul0);
+ multiply(coeffs[1], inputs[1], mul1);
+ add(mul0, mul1, outputs[0]);
+ for (int i = 2; i < inputs.size(); ++i)
+ {
+ multiply(coeffs[i], inputs[i], mul0);
+ add(mul0, outputs[0], outputs[0]);
+ }
+ }
+ break;
+ case PROD:
+ multiply(inputs[0], inputs[1], outputs[0]);
+ for (int i = 2; i < inputs.size(); ++i)
+ multiply(inputs[i], outputs[0], outputs[0]);
+ break;
+ case MAX:
+ max(inputs[0], inputs[1], outputs[0]);
+ for (int i = 2; i < inputs.size(); ++i)
+ max(inputs[i], outputs[0], outputs[0]);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+#endif
+
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+ CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
+ OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ forward_ocl(inputs_arr, outputs_arr, internals_arr))
+
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
return findDataFile("dnn/tensorflow/" + file, false);
}
-static void runTensorFlowNet(const std::string& prefix, bool hasText = false,
+static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGET_CPU, bool hasText = false,
double l1 = 1e-5, double lInf = 1e-4,
bool memoryLoad = false)
{
ASSERT_FALSE(net.empty());
+ net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+ net.setPreferableTarget(targetId);
+
cv::Mat input = blobFromNPY(inpPath);
cv::Mat target = blobFromNPY(outPath);
runTensorFlowNet("eltwise_add_mul");
}
+OCL_TEST(Test_TensorFlow, eltwise_add_mul)
+{
+ runTensorFlowNet("eltwise_add_mul", DNN_TARGET_OPENCL);
+}
+
TEST(Test_TensorFlow, pad_and_concat)
{
runTensorFlowNet("pad_and_concat");
{
runTensorFlowNet("batch_norm");
runTensorFlowNet("fused_batch_norm");
- runTensorFlowNet("batch_norm_text", true);
+ runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true);
}
TEST(Test_TensorFlow, pooling)
{
const float l1 = 1e-3;
const float lInf = 1e-2;
- runTensorFlowNet("fp16_single_conv", false, l1, lInf);
- runTensorFlowNet("fp16_deconvolution", false, l1, lInf);
- runTensorFlowNet("fp16_max_pool_odd_same", false, l1, lInf);
- runTensorFlowNet("fp16_padding_valid", false, l1, lInf);
- runTensorFlowNet("fp16_eltwise_add_mul", false, l1, lInf);
- runTensorFlowNet("fp16_max_pool_odd_valid", false, l1, lInf);
- runTensorFlowNet("fp16_pad_and_concat", false, l1, lInf);
- runTensorFlowNet("fp16_max_pool_even", false, l1, lInf);
- runTensorFlowNet("fp16_padding_same", false, l1, lInf);
+ runTensorFlowNet("fp16_single_conv", DNN_TARGET_CPU, false, l1, lInf);
+ runTensorFlowNet("fp16_deconvolution", DNN_TARGET_CPU, false, l1, lInf);
+ runTensorFlowNet("fp16_max_pool_odd_same", DNN_TARGET_CPU, false, l1, lInf);
+ runTensorFlowNet("fp16_padding_valid", DNN_TARGET_CPU, false, l1, lInf);
+ runTensorFlowNet("fp16_eltwise_add_mul", DNN_TARGET_CPU, false, l1, lInf);
+ runTensorFlowNet("fp16_max_pool_odd_valid", DNN_TARGET_CPU, false, l1, lInf);
+ runTensorFlowNet("fp16_pad_and_concat", DNN_TARGET_CPU, false, l1, lInf);
+ runTensorFlowNet("fp16_max_pool_even", DNN_TARGET_CPU, false, l1, lInf);
+ runTensorFlowNet("fp16_padding_same", DNN_TARGET_CPU, false, l1, lInf);
}
TEST(Test_TensorFlow, quantized)
TEST(Test_TensorFlow, lstm)
{
- runTensorFlowNet("lstm", true);
+ runTensorFlowNet("lstm", DNN_TARGET_CPU, true);
}
TEST(Test_TensorFlow, split)
{
double l1 = 1e-5;
double lInf = 1e-4;
- runTensorFlowNet("lstm", true, l1, lInf, true);
+ runTensorFlowNet("lstm", DNN_TARGET_CPU, true, l1, lInf, true);
- runTensorFlowNet("batch_norm", false, l1, lInf, true);
- runTensorFlowNet("fused_batch_norm", false, l1, lInf, true);
- runTensorFlowNet("batch_norm_text", true, l1, lInf, true);
+ runTensorFlowNet("batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
+ runTensorFlowNet("fused_batch_norm", DNN_TARGET_CPU, false, l1, lInf, true);
+ runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
}
}