typedef typename internal::VecTraits<T>::vec128 vec128;
typedef typename internal::VecTraits<T>::vec64 vec64;
+#if defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
+ static_assert(std::numeric_limits<T>::is_integer, "template implementation is for integer types only");
+#endif
+
if (scale == 0.0f ||
(std::numeric_limits<T>::is_integer &&
(scale * std::numeric_limits<T>::max()) < 1.0f &&
typedef typename internal::VecTraits<T>::vec128 vec128;
typedef typename internal::VecTraits<T>::vec64 vec64;
+#if defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
+ static_assert(std::numeric_limits<T>::is_integer, "template implementation is for integer types only");
+#endif
+
if (scale == 0.0f ||
(std::numeric_limits<T>::is_integer &&
scale < 1.0f &&
return;
}
- float32x4_t v_zero = vdupq_n_f32(0.0f);
-
size_t roiw128 = size.width >= 3 ? size.width - 3 : 0;
size_t roiw64 = size.width >= 1 ? size.width - 1 : 0;
float32x4_t v_src0 = vld1q_f32(src0 + j);
float32x4_t v_src1 = vld1q_f32(src1 + j);
- uint32x4_t v_mask = vceqq_f32(v_src1,v_zero);
- vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
- vreinterpretq_u32_f32(vmulq_f32(v_src0, internal::vrecpq_f32(v_src1))), v_mask)));
+ vst1q_f32(dst + j, vmulq_f32(v_src0, internal::vrecpq_f32(v_src1)));
}
for (; j < roiw64; j += 2)
float32x2_t v_src0 = vld1_f32(src0 + j);
float32x2_t v_src1 = vld1_f32(src1 + j);
- uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero));
- vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
- vreinterpret_u32_f32(vmul_f32(v_src0, internal::vrecp_f32(v_src1))), v_mask)));
+ vst1_f32(dst + j, vmul_f32(v_src0, internal::vrecp_f32(v_src1)));
}
for (; j < size.width; j++)
{
- dst[j] = src1[j] ? src0[j] / src1[j] : 0.0f;
+ dst[j] = src0[j] / src1[j];
}
}
}
float32x4_t v_src0 = vld1q_f32(src0 + j);
float32x4_t v_src1 = vld1q_f32(src1 + j);
- uint32x4_t v_mask = vceqq_f32(v_src1,v_zero);
- vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
- vreinterpretq_u32_f32(vmulq_f32(vmulq_n_f32(v_src0, scale),
- internal::vrecpq_f32(v_src1))), v_mask)));
+ vst1q_f32(dst + j, vmulq_f32(vmulq_n_f32(v_src0, scale),
+ internal::vrecpq_f32(v_src1)));
}
for (; j < roiw64; j += 2)
float32x2_t v_src0 = vld1_f32(src0 + j);
float32x2_t v_src1 = vld1_f32(src1 + j);
- uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero));
- vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
- vreinterpret_u32_f32(vmul_f32(vmul_n_f32(v_src0, scale),
- internal::vrecp_f32(v_src1))), v_mask)));
+ vst1_f32(dst + j, vmul_f32(vmul_n_f32(v_src0, scale),
+ internal::vrecp_f32(v_src1)));
}
for (; j < size.width; j++)
{
- dst[j] = src1[j] ? src0[j] * scale / src1[j] : 0.0f;
+ dst[j] = src0[j] * scale / src1[j];
}
}
}
return;
}
- float32x4_t v_zero = vdupq_n_f32(0.0f);
-
size_t roiw128 = size.width >= 3 ? size.width - 3 : 0;
size_t roiw64 = size.width >= 1 ? size.width - 1 : 0;
float32x4_t v_src1 = vld1q_f32(src1 + j);
- uint32x4_t v_mask = vceqq_f32(v_src1,v_zero);
- vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
- vreinterpretq_u32_f32(internal::vrecpq_f32(v_src1)), v_mask)));
+ vst1q_f32(dst + j, internal::vrecpq_f32(v_src1));
}
for (; j < roiw64; j += 2)
{
float32x2_t v_src1 = vld1_f32(src1 + j);
- uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero));
- vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
- vreinterpret_u32_f32(internal::vrecp_f32(v_src1)), v_mask)));
+ vst1_f32(dst + j, internal::vrecp_f32(v_src1));
}
for (; j < size.width; j++)
{
- dst[j] = src1[j] ? 1.0f / src1[j] : 0;
+ dst[j] = 1.0f / src1[j];
}
}
}
float32x4_t v_src1 = vld1q_f32(src1 + j);
- uint32x4_t v_mask = vceqq_f32(v_src1,v_zero);
- vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
- vreinterpretq_u32_f32(vmulq_n_f32(internal::vrecpq_f32(v_src1),
- scale)),v_mask)));
+ vst1q_f32(dst + j, vmulq_n_f32(internal::vrecpq_f32(v_src1), scale));
}
for (; j < roiw64; j += 2)
{
float32x2_t v_src1 = vld1_f32(src1 + j);
- uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero));
- vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
- vreinterpret_u32_f32(vmul_n_f32(internal::vrecp_f32(v_src1),
- scale)), v_mask)));
+ vst1_f32(dst + j, vmul_n_f32(internal::vrecp_f32(v_src1), scale));
}
for (; j < size.width; j++)
{
- dst[j] = src1[j] ? scale / src1[j] : 0;
+ dst[j] = scale / src1[j];
}
}
}
return x;
v_float32x4 v_scale = v_setall_f32((float)scale);
- v_float32x4 v_zero = v_setzero_f32();
for ( ; x <= width - 8; x += 8)
{
v_float32x4 res0 = f0 * v_scale / f2;
v_float32x4 res1 = f1 * v_scale / f3;
- res0 = v_select(f2 == v_zero, v_zero, res0);
- res1 = v_select(f3 == v_zero, v_zero, res1);
-
v_store(dst + x, res0);
v_store(dst + x + 4, res1);
}
return x;
v_float32x4 v_scale = v_setall_f32((float)scale);
- v_float32x4 v_zero = v_setzero_f32();
for ( ; x <= width - 8; x += 8)
{
v_float32x4 res0 = v_scale / f0;
v_float32x4 res1 = v_scale / f1;
- res0 = v_select(f0 == v_zero, v_zero, res0);
- res1 = v_select(f1 == v_zero, v_zero, res1);
-
v_store(dst + x, res0);
v_store(dst + x + 4, res1);
}
return x;
v_float64x2 v_scale = v_setall_f64(scale);
- v_float64x2 v_zero = v_setzero_f64();
for ( ; x <= width - 4; x += 4)
{
v_float64x2 res0 = f0 * v_scale / f2;
v_float64x2 res1 = f1 * v_scale / f3;
- res0 = v_select(f2 == v_zero, v_zero, res0);
- res1 = v_select(f3 == v_zero, v_zero, res1);
-
v_store(dst + x, res0);
v_store(dst + x + 2, res1);
}
return x;
v_float64x2 v_scale = v_setall_f64(scale);
- v_float64x2 v_zero = v_setzero_f64();
for ( ; x <= width - 4; x += 4)
{
v_float64x2 res0 = v_scale / f0;
v_float64x2 res1 = v_scale / f1;
- res0 = v_select(f0 == v_zero, v_zero, res0);
- res1 = v_select(f1 == v_zero, v_zero, res1);
-
v_store(dst + x, res0);
v_store(dst + x + 2, res1);
}
#define PROCESS_ELEM storedst(convertToDT(srcelem1 * scale * srcelem2))
#elif defined OP_DIV
+#ifdef CV_DST_TYPE_IS_INTEGER
#define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \
storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero))
+#else
+#define PROCESS_ELEM \
+ workT e2 = srcelem2; \
+ storedst(convertToDT(srcelem1 / e2))
+#endif
#elif defined OP_DIV_SCALE
#undef EXTRA_PARAMS
#else
#define EXTRA_PARAMS , scaleT scale
#endif
+#ifdef CV_DST_TYPE_IS_INTEGER
#define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \
storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)))
+#else
+#define PROCESS_ELEM \
+ workT e2 = srcelem2; \
+ storedst(convertToDT(srcelem1 * (workT)(scale) / e2))
+#endif
#elif defined OP_RDIV_SCALE
#undef EXTRA_PARAMS
#else
#define EXTRA_PARAMS , scaleT scale
#endif
+#ifdef CV_DST_TYPE_IS_INTEGER
#define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \
storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)))
+#else
+#define PROCESS_ELEM \
+ workT e1 = srcelem1; \
+ storedst(convertToDT(srcelem2 * (workT)(scale) / e1))
+#endif
#elif defined OP_RECIP_SCALE
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , scaleT scale
+#ifdef CV_DST_TYPE_IS_INTEGER
#define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \
storedst(convertToDT(e1 != zero ? scale / e1 : zero))
+#else
+#define PROCESS_ELEM \
+ workT e1 = srcelem1; \
+ storedst(convertToDT(scale / e1))
+#endif
#elif defined OP_ADDW
#undef EXTRA_PARAMS