/wd4701 /wd4703 # potentially uninitialized local/pointer variable 'value' used
/wd4505 # unreferenced local function has been removed
)
- if(MSVC_VERSION LESS 1910) # MSVS 2015
+ if(MSVC_VERSION LESS 1920) # MSVS 2015/2017
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4309) # 'static_cast': truncation of constant value
endif()
else()
/wd4305 /wd4127 /wd4100 /wd4512 /wd4125 /wd4389 /wd4510 /wd4610
/wd4702 /wd4456 /wd4457 /wd4065 /wd4310 /wd4661 /wd4506
)
- if(MSVC_VERSION LESS 1910) # MSVS 2015, .pb.cc generated files
+ if(MSVC_VERSION LESS 1920) # MSVS 2015/2017, .pb.cc generated files
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4309) # 'static_cast': truncation of constant value
endif()
if(MSVC_VERSION LESS 1920) # <MSVS2019, .pb.cc generated files
class AddPerfTest : public TestPerfParams<tuple<cv::Size, MatType, int, cv::GCompileArgs>> {};
class AddCPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, int, cv::GCompileArgs>> {};
class SubPerfTest : public TestPerfParams<tuple<cv::Size, MatType, int, cv::GCompileArgs>> {};
- class SubCPerfTest : public TestPerfParams<tuple<cv::Size, MatType, int, cv::GCompileArgs>> {};
+ class SubCPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, int, cv::GCompileArgs>> {};
class SubRCPerfTest : public TestPerfParams<tuple<cv::Size, MatType, int, cv::GCompileArgs>> {};
class MulPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, int, double, cv::GCompileArgs>> {};
class MulDoublePerfTest : public TestPerfParams<tuple<cv::Size, MatType, int, cv::GCompileArgs>> {};
PERF_TEST_P_(SubCPerfTest, TestPerformance)
{
- Size sz = get<0>(GetParam());
- MatType type = get<1>(GetParam());
- int dtype = get<2>(GetParam());
- cv::GCompileArgs compile_args = get<3>(GetParam());
+ compare_f cmpF;
+ cv::Size sz;
+ MatType type = -1;
+ int dtype = -1;
+ cv::GCompileArgs compile_args;
+
+ std::tie(cmpF, sz, type, dtype, compile_args) = GetParam();
initMatsRandU(type, sz, dtype, false);
}
// Comparison ////////////////////////////////////////////////////////////
- // FIXIT unrealiable check: EXPECT_EQ(0, cv::countNonZero(out_mat_gapi != out_mat_ocv));
- EXPECT_EQ(out_mat_gapi.size(), sz);
+ {
+ EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+ }
SANITY_CHECK_NOTHING();
}
Values(cv::compile_args(CORE_CPU))));
INSTANTIATE_TEST_CASE_P(SubCPerfTestCPU, SubCPerfTest,
- Combine(Values(szSmall128, szVGA, sz720p, sz1080p),
+ Combine(Values(AbsExact().to_compare_f()),
+ Values(szSmall128, szVGA, sz720p, sz1080p),
Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
Values(-1, CV_8U, CV_16U, CV_32F),
Values(cv::compile_args(CORE_CPU))));
Values(-1, CV_8U, CV_32F),
Values(cv::compile_args(CORE_FLUID))));
-// INSTANTIATE_TEST_CASE_P(SubCPerfTestFluid, SubCPerfTest,
-// Combine(Values(szSmall128, szVGA, sz720p, sz1080p),
-// Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-// Values(-1, CV_8U, CV_16U, CV_32F),
-// Values(cv::compile_args(CORE_FLUID))));
+ INSTANTIATE_TEST_CASE_P(SubCPerfTestFluid, SubCPerfTest,
+ Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
+ Values(szSmall128, szVGA, sz720p, sz1080p),
+ Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+ Values(-1, CV_8U, CV_16U, CV_16S, CV_32F),
+ Values(cv::compile_args(CORE_FLUID))));
// INSTANTIATE_TEST_CASE_P(SubRCPerfTestFluid, SubRCPerfTest,
// Combine(Values(szSmall128, szVGA, sz720p, sz1080p),
Values(cv::compile_args(CORE_GPU))));
INSTANTIATE_TEST_CASE_P(SubCPerfTestGPU, SubCPerfTest,
- Combine(Values( szSmall128, szVGA, sz720p, sz1080p ),
+ Combine(Values(AbsExact().to_compare_f()),
+ Values( szSmall128, szVGA, sz720p, sz1080p ),
Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
Values( -1, CV_8U, CV_16U, CV_32F ),
Values(cv::compile_args(CORE_GPU))));
//
//--------------------------------------
-static inline v_uint16x8 v_sub_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return x - y; }
static inline v_uint16x8 v_subr_16u(const v_uint16x8 &x, const v_uint16x8 &y) { return y - x; }
-static inline v_float32x4 v_sub_32f(const v_float32x4 &x, const v_float32x4 &y) { return x - y; }
static inline v_float32x4 v_subr_32f(const v_float32x4 &x, const v_float32x4 &y) { return y - x; }
-static inline int s_sub_8u(uchar x, uchar y) { return x - y; }
static inline int s_subr_8u(uchar x, uchar y) { return y - x; }
-static inline float s_sub_32f(float x, float y) { return x - y; }
static inline float s_subr_32f(float x, float y) { return y - x; }
// manual SIMD if important case 8UC3
}
}
-static void run_arithm_s_sub3(uchar out[], const uchar in[], int width, const uchar scalar[])
-{
- run_arithm_s3(out, in, width, scalar, v_sub_16u, s_sub_8u);
-}
-
static void run_arithm_s_subr3(uchar out[], const uchar in[], int width, const uchar scalar[])
{
run_arithm_s3(out, in, width, scalar, v_subr_16u, s_subr_8u); // reverse: subr
}
-static void run_arithm_s_sub1(uchar out[], const float in[], int width, const float scalar[])
-{
- run_arithm_s1(out, in, width, scalar, v_sub_32f, s_sub_32f);
-}
-
static void run_arithm_s_subr1(uchar out[], const float in[], int width, const float scalar[])
{
run_arithm_s1(out, in, width, scalar, v_subr_32f, s_subr_32f); // reverse: subr
int width = dst.length();
int chan = dst.meta().chan;
+ const int length = width * chan;
switch (arithm)
{
{
int w = 0;
#if CV_SIMD
- w = addc_simd(in, scalar, out, width, chan);
+ w = addc_simd(in, scalar, out, length, chan);
#endif
-
- for (; w < width * chan; ++w)
+ for (; w < length; ++w)
out[w] = add<DST>(in[w], scalar[w % chan]);
break;
}
case ARITHM_SUBTRACT:
{
- // What if we cast the scalar into the SRC type?
- const SRC myscal[4] = { static_cast<SRC>(scalar[0]), static_cast<SRC>(scalar[1]),
- static_cast<SRC>(scalar[2]), static_cast<SRC>(scalar[3]) };
- bool usemyscal = (myscal[0] == scalar[0]) && (myscal[1] == scalar[1]) &&
- (myscal[2] == scalar[2]) && (myscal[3] == scalar[3]);
-
- if (usemyscal)
- {
- if (std::is_same<DST, uchar>::value &&
- std::is_same<SRC, uchar>::value &&
- chan == 3)
- run_arithm_s_sub3((uchar*)out, (const uchar*)in, width, (const uchar*)myscal);
- else if (std::is_same<DST, uchar>::value &&
- std::is_same<SRC, float>::value &&
- chan == 1)
- run_arithm_s_sub1((uchar*)out, (const float*)in, width, (const float*)myscal);
- else
- run_arithm_s(out, in, width, chan, myscal, sub<DST, SRC, SRC>);
- }
- else
- run_arithm_s(out, in, width, chan, scalar, sub<DST, SRC, float>);
+ int w = 0;
+#if CV_SIMD
+ w = subc_simd(in, scalar, out, length, chan);
+#endif
+ for (; w < length; ++w)
+ out[w] = sub<DST>(in[w], scalar[w % chan]);
break;
}
// TODO: optimize miltiplication and division
}
};
+CV_ALWAYS_INLINE void initScratchBuffer(Buffer& scratch)
+{
+#if CV_SIMD
+ // 512 bits / 32 bits = 16 elements of float32 can contain a AVX 512 SIMD vector.
+ constexpr int maxNlanes = 16;
+
+ // +2 is offset for 3-channel case.
+ // Offset is need to right load coefficients from scalar array to SIMD vectors for 3-channel case.
+ // Scalar array looks like: scalar[] = {C1, C2, C3, C1, C2, C3, ...}
+ // The first scalar SIMD vector should looks like:
+ // C1 C2 C3 C1
+ // The second:
+ // C2 C3 C1 C2
+ // The third:
+ // C3 C1 C2 C3
+ constexpr int offset = 2;
+ constexpr int buflen = maxNlanes + offset;
+#else
+ constexpr int buflen = 4;
+#endif
+ cv::Size bufsize(buflen, 1);
+ GMatDesc bufdesc = { CV_32F, 1, bufsize };
+ Buffer buffer(bufdesc);
+ scratch = std::move(buffer);
+}
+
GAPI_FLUID_KERNEL(GFluidAddC, cv::gapi::core::GAddC, true)
{
static const int Window = 1;
static void initScratch(const GMatDesc&, const GScalarDesc&, int, Buffer& scratch)
{
-#if CV_SIMD
- // 512 bits / 32 bits = 16 elements of float32 can contain a AVX 512 SIMD vector.
- constexpr int maxNlanes = 16;
-
- // +2 is offset for 3-channel case.
- // Offset is need to right load coefficients from scalar array to SIMD vectors for 3-channel case.
- // Scalar array looks like: scalar[] = {C1, C2, C3, C1, C2, C3, ...}
- // The first scalar SIMD vector should looks like:
- // C1 C2 C3 C1
- // The second:
- // C2 C3 C1 C2
- // The third:
- // C3 C1 C2 C3
- constexpr int offset = 2;
- constexpr int buflen = maxNlanes + offset;
-#else
- constexpr int buflen = 4;
-#endif
- cv::Size bufsize(buflen, 1);
- GMatDesc bufdesc = { CV_32F, 1, bufsize };
- Buffer buffer(bufdesc);
- scratch = std::move(buffer);
+ initScratchBuffer(scratch);
}
- static void resetScratch(Buffer& /* scratch */)
+ static void resetScratch(Buffer& /*scratch*/)
{
}
};
-GAPI_FLUID_KERNEL(GFluidSubC, cv::gapi::core::GSubC, false)
+GAPI_FLUID_KERNEL(GFluidSubC, cv::gapi::core::GSubC, true)
{
static const int Window = 1;
- static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst)
+ static void run(const View& src, const cv::Scalar& _scalar, int /*dtype*/, Buffer& dst, Buffer& scratch)
{
- const float scalar[4] = {
- static_cast<float>(_scalar[0]),
- static_cast<float>(_scalar[1]),
- static_cast<float>(_scalar[2]),
- static_cast<float>(_scalar[3])
- };
+ GAPI_Assert(src.meta().chan <= 4);
+
+ if (dst.y() == 0)
+ {
+ const int chan = src.meta().chan;
+ float* sc = scratch.OutLine<float>();
+
+ for (int i = 0; i < scratch.length(); ++i)
+ sc[i] = static_cast<float>(_scalar[i % chan]);
+ }
+
+ const float* scalar = scratch.OutLine<float>();
// DST SRC OP __VA_ARGS__
- UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
- UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
- UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
- UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
- UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
- UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
- UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(uchar, uchar, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(uchar, ushort, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(uchar, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(uchar, float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(ushort, ushort, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(ushort, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(ushort, uchar, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(ushort, float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(short, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(short, ushort, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(short, uchar, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(short, float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(float, uchar , run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(float, ushort, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(float, short, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
+ UNARY_(float, float, run_arithm_s, dst, src, scalar, ARITHM_SUBTRACT);
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
}
+
+ static void initScratch(const GMatDesc&, const GScalarDesc&, int, Buffer& scratch)
+ {
+ initScratchBuffer(scratch);
+ }
+
+ static void resetScratch(Buffer& /*scratch*/)
+ {
+ }
};
GAPI_FLUID_KERNEL(GFluidSubRC, cv::gapi::core::GSubRC, false)
CV_CPU_DISPATCH_MODES_ALL); \
}
-
MUL_SIMD(uchar, uchar)
MUL_SIMD(ushort, uchar)
MUL_SIMD(short, uchar)
#define ADDC_SIMD(SRC, DST) \
int addc_simd(const SRC in[], const float scalar[], DST out[], \
- const int width, const int chan) \
+ const int length, const int chan) \
{ \
- CV_CPU_DISPATCH(addc_simd, (in, scalar, out, width, chan), \
+ CV_CPU_DISPATCH(addc_simd, (in, scalar, out, length, chan), \
CV_CPU_DISPATCH_MODES_ALL); \
}
#undef ADDC_SIMD
+#define SUBC_SIMD(SRC, DST) \
+int subc_simd(const SRC in[], const float scalar[], DST out[], \
+ const int length, const int chan) \
+{ \
+ CV_CPU_DISPATCH(subc_simd, (in, scalar, out, length, chan), \
+ CV_CPU_DISPATCH_MODES_ALL); \
+}
+
+SUBC_SIMD(uchar, uchar)
+SUBC_SIMD(ushort, uchar)
+SUBC_SIMD(short, uchar)
+SUBC_SIMD(float, uchar)
+SUBC_SIMD(short, short)
+SUBC_SIMD(ushort, short)
+SUBC_SIMD(uchar, short)
+SUBC_SIMD(float, short)
+SUBC_SIMD(ushort, ushort)
+SUBC_SIMD(uchar, ushort)
+SUBC_SIMD(short, ushort)
+SUBC_SIMD(float, ushort)
+SUBC_SIMD(uchar, float)
+SUBC_SIMD(ushort, float)
+SUBC_SIMD(short, float)
+SUBC_SIMD(float, float)
+
+#undef SUBC_SIMD
+
} // namespace fluid
} // namespace gapi
} // namespace cv
#define ADDC_SIMD(SRC, DST) \
int addc_simd(const SRC in[], const float scalar[], DST out[], \
- const int width, const int chan);
+ const int length, const int chan);
ADDC_SIMD(uchar, uchar)
ADDC_SIMD(ushort, uchar)
#undef ADDC_SIMD
+#define SUBC_SIMD(SRC, DST) \
+int subc_simd(const SRC in[], const float scalar[], DST out[], \
+ const int length, const int chan);
+
+SUBC_SIMD(uchar, uchar)
+SUBC_SIMD(ushort, uchar)
+SUBC_SIMD(short, uchar)
+SUBC_SIMD(float, uchar)
+SUBC_SIMD(short, short)
+SUBC_SIMD(ushort, short)
+SUBC_SIMD(uchar, short)
+SUBC_SIMD(float, short)
+SUBC_SIMD(ushort, ushort)
+SUBC_SIMD(uchar, ushort)
+SUBC_SIMD(short, ushort)
+SUBC_SIMD(float, ushort)
+SUBC_SIMD(uchar, float)
+SUBC_SIMD(ushort, float)
+SUBC_SIMD(short, float)
+SUBC_SIMD(float, float)
+
+#undef SUBC_SIMD
+
} // namespace fluid
} // namespace gapi
} // namespace cv
#define ADDC_SIMD(SRC, DST) \
int addc_simd(const SRC in[], const float scalar[], DST out[], \
- const int width, const int chan);
+ const int length, const int chan);
ADDC_SIMD(uchar, uchar)
ADDC_SIMD(ushort, uchar)
#undef ADDC_SIMD
+#define SUBC_SIMD(SRC, DST) \
+int subc_simd(const SRC in[], const float scalar[], DST out[], \
+ const int length, const int chan);
+
+SUBC_SIMD(uchar, uchar)
+SUBC_SIMD(ushort, uchar)
+SUBC_SIMD(short, uchar)
+SUBC_SIMD(float, uchar)
+SUBC_SIMD(short, short)
+SUBC_SIMD(ushort, short)
+SUBC_SIMD(uchar, short)
+SUBC_SIMD(float, short)
+SUBC_SIMD(ushort, ushort)
+SUBC_SIMD(uchar, ushort)
+SUBC_SIMD(short, ushort)
+SUBC_SIMD(float, ushort)
+SUBC_SIMD(uchar, float)
+SUBC_SIMD(ushort, float)
+SUBC_SIMD(short, float)
+SUBC_SIMD(float, float)
+
+#undef SUBC_SIMD
+
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
struct scale_tag {};
//
//-------------------------
-CV_ALWAYS_INLINE void addc_pack_store_c3(short* outx, const v_int32& c1,
- const v_int32& c2, const v_int32& c3,
- const v_int32& c4, const v_int32& c5,
- const v_int32& c6)
+struct add_tag {};
+struct sub_tag {};
+
+CV_ALWAYS_INLINE void arithmOpScalar_pack_store_c3(short* outx, const v_int32& c1,
+ const v_int32& c2, const v_int32& c3,
+ const v_int32& c4, const v_int32& c5,
+ const v_int32& c6)
{
constexpr int nlanes = v_int16::nlanes;
vx_store(outx, v_pack(c1, c2));
vx_store(&outx[2*nlanes], v_pack(c5, c6));
}
-CV_ALWAYS_INLINE void addc_pack_store_c3(ushort* outx, const v_int32& c1,
- const v_int32& c2, const v_int32& c3,
- const v_int32& c4, const v_int32& c5,
- const v_int32& c6)
+CV_ALWAYS_INLINE void arithmOpScalar_pack_store_c3(ushort* outx, const v_int32& c1,
+ const v_int32& c2, const v_int32& c3,
+ const v_int32& c4, const v_int32& c5,
+ const v_int32& c6)
{
constexpr int nlanes = v_uint16::nlanes;
vx_store(outx, v_pack_u(c1, c2));
vx_store(&outx[2*nlanes], v_pack_u(c5, c6));
}
-template<typename SRC, typename DST>
+CV_ALWAYS_INLINE v_float32 oper(add_tag, const v_float32& a, const v_float32& sc)
+{
+ return a + sc;
+}
+
+CV_ALWAYS_INLINE v_float32 oper(sub_tag, const v_float32& a, const v_float32& sc)
+{
+ return a - sc;
+}
+
+template<typename oper_tag, typename SRC, typename DST>
CV_ALWAYS_INLINE
typename std::enable_if<(std::is_same<DST, ushort>::value ||
std::is_same<DST, short>::value), void>::type
-addc_simd_common_impl(const SRC* inx, DST* outx, const v_float32& sc, const int nlanes)
+arithmOpScalar_simd_common_impl(oper_tag t, const SRC* inx, DST* outx,
+ const v_float32& sc, const int nlanes)
{
v_float32 a1 = vg_load_f32(inx);
v_float32 a2 = vg_load_f32(&inx[nlanes/2]);
- v_store_i16(outx, v_round(a1 + sc), v_round(a2 + sc));
+ v_store_i16(outx, v_round(oper(t, a1, sc)), v_round(oper(t, a2, sc)));
}
//-------------------------------------------------------------------------------------------------
-template<typename SRC>
-CV_ALWAYS_INLINE void addc_simd_common_impl(const SRC* inx, uchar* outx, const v_float32& sc, const int nlanes)
+template<typename oper_tag, typename SRC>
+CV_ALWAYS_INLINE void arithmOpScalar_simd_common_impl(oper_tag t, const SRC* inx,
+ uchar* outx, const v_float32& sc,
+ const int nlanes)
{
v_float32 a1 = vg_load_f32(inx);
v_float32 a2 = vg_load_f32(&inx[nlanes/4]);
v_float32 a3 = vg_load_f32(&inx[nlanes/2]);
v_float32 a4 = vg_load_f32(&inx[3 * nlanes/4]);
- vx_store(outx, v_pack_u(v_pack(v_round(a1 + sc),
- v_round(a2 + sc)),
- v_pack(v_round(a3 + sc),
- v_round(a4 + sc))));
+ vx_store(outx, v_pack_u(v_pack(v_round(oper(t, a1, sc)),
+ v_round(oper(t, a2, sc))),
+ v_pack(v_round(oper(t, a3, sc)),
+ v_round(oper(t, a4, sc)))));
}
//-------------------------------------------------------------------------------------------------
-template<typename SRC>
-CV_ALWAYS_INLINE void addc_simd_common_impl(const SRC* inx, float* outx, const v_float32& sc, const int)
+template<typename oper_tag, typename SRC>
+CV_ALWAYS_INLINE void arithmOpScalar_simd_common_impl(oper_tag t, const SRC* inx,
+ float* outx, const v_float32& sc, const int)
{
v_float32 a1 = vg_load_f32(inx);
- vx_store(outx, a1 + sc);
+ vx_store(outx, oper(t, a1, sc));
}
//-------------------------------------------------------------------------------------------------
-template<typename SRC, typename DST>
+template<typename oper_tag, typename SRC, typename DST>
CV_ALWAYS_INLINE
typename std::enable_if<std::is_same<DST, short>::value ||
std::is_same<DST, ushort>::value, void>::type
-addc_simd_c3_impl(const SRC* inx, DST* outx, const v_float32& s1, const v_float32& s2,
+arithmOpScalar_simd_c3_impl(oper_tag t, const SRC* inx, DST* outx, const v_float32& s1, const v_float32& s2,
const v_float32& s3, const int nlanes)
{
v_float32 a1 = vg_load_f32(inx);
v_float32 a5 = vg_load_f32(&inx[2 * nlanes]);
v_float32 a6 = vg_load_f32(&inx[5 * nlanes / 2]);
- addc_pack_store_c3(outx, v_round(a1 + s1),
- v_round(a2 + s2),
- v_round(a3 + s3),
- v_round(a4 + s1),
- v_round(a5 + s2),
- v_round(a6 + s3));
+ arithmOpScalar_pack_store_c3(outx, v_round(oper(t, a1, s1)),
+ v_round(oper(t, a2, s2)),
+ v_round(oper(t, a3, s3)),
+ v_round(oper(t, a4, s1)),
+ v_round(oper(t, a5, s2)),
+ v_round(oper(t, a6, s3)));
}
//-------------------------------------------------------------------------------------------------
-template<typename SRC>
-CV_ALWAYS_INLINE void addc_simd_c3_impl(const SRC* inx, uchar* outx,
- const v_float32& s1, const v_float32& s2,
- const v_float32& s3, const int nlanes)
+template<typename oper_tag, typename SRC>
+CV_ALWAYS_INLINE void arithmOpScalar_simd_c3_impl(oper_tag t, const SRC* inx, uchar* outx,
+ const v_float32& s1, const v_float32& s2,
+ const v_float32& s3, const int nlanes)
{
vx_store(outx,
- v_pack_u(v_pack(v_round(vg_load_f32(inx) + s1),
- v_round(vg_load_f32(&inx[nlanes/4]) + s2)),
- v_pack(v_round(vg_load_f32(&inx[nlanes/2]) + s3),
- v_round(vg_load_f32(&inx[3*nlanes/4]) + s1))));
+ v_pack_u(v_pack(v_round(oper(t, vg_load_f32(inx), s1)),
+ v_round(oper(t, vg_load_f32(&inx[nlanes/4]), s2))),
+ v_pack(v_round(oper(t, vg_load_f32(&inx[nlanes/2]), s3)),
+ v_round(oper(t, vg_load_f32(&inx[3*nlanes/4]), s1)))));
vx_store(&outx[nlanes],
- v_pack_u(v_pack(v_round(vg_load_f32(&inx[nlanes]) + s2),
- v_round(vg_load_f32(&inx[5*nlanes/4]) + s3)),
- v_pack(v_round(vg_load_f32(&inx[3*nlanes/2]) + s1),
- v_round(vg_load_f32(&inx[7*nlanes/4]) + s2))));
+ v_pack_u(v_pack(v_round(oper(t, vg_load_f32(&inx[nlanes]), s2)),
+ v_round(oper(t, vg_load_f32(&inx[5*nlanes/4]), s3))),
+ v_pack(v_round(oper(t, vg_load_f32(&inx[3*nlanes/2]), s1)),
+ v_round(oper(t, vg_load_f32(&inx[7*nlanes/4]), s2)))));
vx_store(&outx[2 * nlanes],
- v_pack_u(v_pack(v_round(vg_load_f32(&inx[2*nlanes]) + s3),
- v_round(vg_load_f32(&inx[9*nlanes/4]) + s1)),
- v_pack(v_round(vg_load_f32(&inx[5*nlanes/2]) + s2),
- v_round(vg_load_f32(&inx[11*nlanes/4]) + s3))));
+ v_pack_u(v_pack(v_round(oper(t, vg_load_f32(&inx[2*nlanes]), s3)),
+ v_round(oper(t, vg_load_f32(&inx[9*nlanes/4]), s1))),
+ v_pack(v_round(oper(t, vg_load_f32(&inx[5*nlanes/2]), s2)),
+ v_round(oper(t, vg_load_f32(&inx[11*nlanes/4]), s3)))));
}
//-------------------------------------------------------------------------------------------------
-template<typename SRC>
-CV_ALWAYS_INLINE void addc_simd_c3_impl(const SRC* in, float* out,
- const v_float32& s1, const v_float32& s2,
- const v_float32& s3, const int nlanes)
+template<typename oper_tag, typename SRC>
+CV_ALWAYS_INLINE void arithmOpScalar_simd_c3_impl(oper_tag t, const SRC* in, float* out,
+ const v_float32& s1, const v_float32& s2,
+ const v_float32& s3, const int nlanes)
{
v_float32 a1 = vg_load_f32(in);
v_float32 a2 = vg_load_f32(&in[nlanes]);
v_float32 a3 = vg_load_f32(&in[2*nlanes]);
- vx_store(out, a1 + s1);
- vx_store(&out[nlanes], a2 + s2);
- vx_store(&out[2*nlanes], a3 + s3);
+ vx_store(out, oper(t, a1, s1));
+ vx_store(&out[nlanes], oper(t, a2, s2));
+ vx_store(&out[2*nlanes], oper(t, a3, s3));
}
//-------------------------------------------------------------------------------------------------
-template<typename SRC, typename DST>
-CV_ALWAYS_INLINE int addc_simd_c3(const SRC in[], const float scalar[], DST out[], const int length)
+template<typename oper_tag, typename SRC, typename DST>
+CV_ALWAYS_INLINE int arithmOpScalar_simd_c3(oper_tag t, const SRC in[],
+ const float scalar[], DST out[],
+ const int length)
{
constexpr int chan = 3;
constexpr int nlanes = vector_type_of_t<DST>::nlanes;
{
for (; x <= length - lanes; x += lanes)
{
- addc_simd_c3_impl(&in[x], &out[x], s1, s2, s3, nlanes);
+ arithmOpScalar_simd_c3_impl(t, &in[x], &out[x], s1, s2, s3, nlanes);
}
if (x < length)
return x;
}
-template<typename SRC, typename DST>
-CV_ALWAYS_INLINE int addc_simd_common(const SRC in[], const float scalar[], DST out[], const int length)
+//-------------------------------------------------------------------------------------------------
+
+template<typename oper_tag, typename SRC, typename DST>
+CV_ALWAYS_INLINE int arithmOpScalar_simd_common(oper_tag t, const SRC in[],
+ const float scalar[], DST out[],
+ const int length)
{
constexpr int nlanes = vector_type_of_t<DST>::nlanes;
{
for (; x <= length - nlanes; x += nlanes)
{
- addc_simd_common_impl(&in[x], &out[x], sc, nlanes);
+ arithmOpScalar_simd_common_impl(t, &in[x], &out[x], sc, nlanes);
}
if (x < length)
return x;
}
-#define ADDC_SIMD(SRC, DST) \
-int addc_simd(const SRC in[], const float scalar[], DST out[], \
- const int width, const int chan) \
-{ \
- const int length = width * chan; \
- switch (chan) \
- { \
- case 1: \
- case 2: \
- case 4: \
- return addc_simd_common(in, scalar, out, length); \
- case 3: \
- return addc_simd_c3(in, scalar, out, length); \
- default: \
- GAPI_Assert(chan <= 4); \
- break; \
- } \
- return 0; \
+
+
+#define ADDC_SIMD(SRC, DST) \
+int addc_simd(const SRC in[], const float scalar[], DST out[], \
+ const int length, const int chan) \
+{ \
+ switch (chan) \
+ { \
+ case 1: \
+ case 2: \
+ case 4: \
+ return arithmOpScalar_simd_common(add_tag{}, in, scalar, out, length); \
+ case 3: \
+ return arithmOpScalar_simd_c3(add_tag{}, in, scalar, out, length); \
+ default: \
+ GAPI_Assert(chan <= 4); \
+ break; \
+ } \
+ return 0; \
}
ADDC_SIMD(uchar, uchar)
#undef ADDC_SIMD
+#define SUBC_SIMD(SRC, DST) \
+int subc_simd(const SRC in[], const float scalar[], DST out[], \
+ const int length, const int chan) \
+{ \
+ switch (chan) \
+ { \
+ case 1: \
+ case 2: \
+ case 4: \
+ return arithmOpScalar_simd_common(sub_tag{}, in, scalar, out, length); \
+ case 3: \
+ return arithmOpScalar_simd_c3(sub_tag{}, in, scalar, out, length); \
+ default: \
+ GAPI_Assert(chan <= 4); \
+ break; \
+ } \
+ return 0; \
+}
+
+SUBC_SIMD(uchar, uchar)
+SUBC_SIMD(ushort, uchar)
+SUBC_SIMD(short, uchar)
+SUBC_SIMD(float, uchar)
+SUBC_SIMD(short, short)
+SUBC_SIMD(ushort, short)
+SUBC_SIMD(uchar, short)
+SUBC_SIMD(float, short)
+SUBC_SIMD(ushort, ushort)
+SUBC_SIMD(uchar, ushort)
+SUBC_SIMD(short, ushort)
+SUBC_SIMD(float, ushort)
+SUBC_SIMD(uchar, float)
+SUBC_SIMD(ushort, float)
+SUBC_SIMD(short, float)
+SUBC_SIMD(float, float)
+
+#undef SUBC_SIMD
+
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
CV_CPU_OPTIMIZATION_NAMESPACE_END
LOGV("buffer size: %zu", bufferSize);
LOGV("width (frame): %d", frameWidth);
LOGV("height (frame): %d", frameHeight);
- if (info.flags & AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM) {
+ if (info.flags & AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM)
+ {
LOGV("output EOS");
sawOutputEOS = true;
}
+ if ((size_t)frameWidth * frameHeight * 3 / 2 > bufferSize)
+ {
+ if (bufferSize == 3110400 && frameWidth == 1920 && frameHeight == 1088)
+ {
+ frameHeight = 1080;
+ LOGV("Buffer size is too small, force using height = %d", frameHeight);
+ }
+ else if(bufferSize == 3110400 && frameWidth == 1088 && frameHeight == 1920)
+ {
+ frameWidth = 1080;
+ LOGV("Buffer size is too small, force using width = %d", frameWidth);
+ }
+ else
+ {
+ LOGE("Buffer size is too small. Frame is ignored. Enable verbose logging to see actual values of parameters");
+ return false;
+ }
+ }
AMediaCodec_releaseOutputBuffer(mediaCodec.get(), bufferIndex, info.size != 0);
return true;
} else if (bufferIndex == AMEDIACODEC_INFO_OUTPUT_BUFFERS_CHANGED) {