CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
- CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
- OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
{
CV_TRACE_FUNCTION();
- CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget) &&
- OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ CV_OCL_RUN(IS_DNN_OPENCL_TARGET(this->preferableTarget),
func.applyOCL(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
{
- if (preferableTarget == DNN_TARGET_OPENCL && !fuse_batch_norm)
+ if (!fuse_batch_norm)
{
top->getScaleShift(scale, shift);
fuse_batch_norm = !scale.empty() || !shift.empty();
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
- CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
- OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
for( i = 0; i < splitDim; i++ )
newRows *= inpBlob.size[i];
- if (inpBlob.total() == newRows)
+ Mat inpMat = inpBlob.reshape(1, newRows);
+ Mat outMat = outBlob.reshape(1, newRows);
+
+ if ( inpBlob.total() == newRows )
{
// MVN is applied to single values at an every row.
- outBlob.setTo(0);
+ if (shift.empty())
+ {
+ outBlob.setTo(0);
+ }
+ else
+ {
+ for ( i = 0; i < newRows; i++ )
+ {
+ outMat.row(i).setTo(((float*)shift.data)[i]);
+ }
+ }
return;
}
- Mat inpMat = inpBlob.reshape(1, newRows);
- Mat outMat = outBlob.reshape(1, newRows);
-
Scalar mean, dev;
for ( i = 0; i < newRows; i++)
{
Mat inpRow = inpMat.row(i);
Mat outRow = outMat.row(i);
-
+ float weight = 1.f;
+ float bias = 0.f;
+ if (fuse_batch_norm)
+ {
+ weight = i < scale.cols ? ((float*)scale.data)[i] : weight;
+ bias = i < shift.cols ? ((float*)shift.data)[i] : bias;
+ }
cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
double alpha = (normVariance) ? 1/(eps + dev[0]) : 1;
- inpRow.convertTo(outRow, outRow.type(), alpha, -mean[0] * alpha);
+ double normalizationScale = 1.0;
+ double normalizationShift = 0.0;
+ if (fuse_batch_norm)
+ {
+ normalizationScale = alpha * weight;
+ normalizationShift = -mean[0] * normalizationScale + bias;
+ }
+ else
+ {
+ normalizationScale = alpha;
+ normalizationShift = -mean[0] * alpha;
+ }
+ inpRow.convertTo(outRow, outRow.type(), normalizationScale, normalizationShift);
}
}
}
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
- CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
- OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
+ CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
Dtype mean_val = mean[x];
vec_type src_vec = load(src, index);
- vec_type dst_vec = native_powr(src_vec - (vec_type)mean_val, 2);
+ vec_type dst_vec = src_vec - (vec_type)mean_val;
+ dst_vec = dst_vec * dst_vec;
store(dst_vec, dst, index);
}
const T4 a2 = vload4(i, src0_read + 2 * A_col_size);
const T4 a3 = vload4(i, src0_read + 3 * A_col_size);
- dot0 = native_powr(convert_float4(a0) - (Dtype4)sum.x, 2);
- dot1 = native_powr(convert_float4(a1) - (Dtype4)sum.y, 2);
- dot2 = native_powr(convert_float4(a2) - (Dtype4)sum.z, 2);
- dot3 = native_powr(convert_float4(a3) - (Dtype4)sum.w, 2);
+ dot0 = convert_float4(a0) - (Dtype4)sum.x;
+ dot1 = convert_float4(a1) - (Dtype4)sum.y;
+ dot2 = convert_float4(a2) - (Dtype4)sum.z;
+ dot3 = convert_float4(a3) - (Dtype4)sum.w;
+ dot0 = dot0 * dot0;
+ dot1 = dot1 * dot1;
+ dot2 = dot2 * dot2;
+ dot3 = dot3 * dot3;
vstore4(dot0, i, dst0_read);
vstore4(dot1, i, dst0_read + A_col_size);
TEST_P(Test_TensorFlow_layers, pooling)
{
int targetId = GetParam();
+ cv::ocl::Device d = cv::ocl::Device::getDefault();
+ bool loosenFlag = targetId == DNN_TARGET_OPENCL && d.isIntel() && d.type() == cv::ocl::Device::TYPE_CPU;
runTensorFlowNet("max_pool_even", targetId);
runTensorFlowNet("max_pool_odd_valid", targetId);
runTensorFlowNet("ave_pool_same", targetId);
- runTensorFlowNet("max_pool_odd_same", targetId);
+ runTensorFlowNet("max_pool_odd_same", targetId, false, loosenFlag ? 3e-5 : 1e-5, loosenFlag ? 3e-4 : 1e-4);
runTensorFlowNet("reduce_mean", targetId); // an average pooling over all spatial dimensions.
}