if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX")
endif()
+ if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1600)
+ set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2")
+ endif()
+ if(ENABLE_FMA3 AND NOT MSVC_VERSION LESS 1600)
+ set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:FMA")
+ endif()
if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1")
}
MERGE2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
+MERGE3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128);
+MERGE4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128);
+
MERGE2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
-MERGE2_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps);
-MERGE3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128);
+#if CV_SSE4_1
MERGE3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128);
-MERGE3_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps);
-
-MERGE4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128);
MERGE4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128);
+#endif
+
+MERGE2_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps);
+MERGE3_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps);
MERGE4_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps);
#endif
{
__m256 scale256 = _mm256_set1_ps(scale);
__m256 shift256 = _mm256_set1_ps(shift);
- int shuffle = 0xD8;
+ const int shuffle = 0xD8;
for ( ; x <= size.width - 16; x += 16)
{
float32x4_t v_cb, v_cg, v_cr;
};
-#elif CV_SSE2
+#elif CV_SSE4_1
template <>
struct RGB2Gray<ushort>
int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta, v_delta2;
};
-#elif CV_SSE2
+#elif CV_SSE4_1
template <>
struct RGB2YCrCb_i<uchar>
__m128i v_zero;
};
-#if CV_SSE4_1
-
template <>
struct RGB2YCrCb_i<ushort>
{
#endif // CV_SSE4_1
-
-#endif
-
template<typename _Tp> struct YCrCb2RGB_f
{
typedef _Tp channel_type;
if (cn == 1)
{
- int shuffle_lo = _MM_SHUFFLE(2, 0, 2, 0), shuffle_hi = _MM_SHUFFLE(3, 1, 3, 1);
+ const int shuffle_lo = _MM_SHUFFLE(2, 0, 2, 0), shuffle_hi = _MM_SHUFFLE(3, 1, 3, 1);
for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4)
{
__m128 v_row00 = _mm_loadu_ps(S0), v_row01 = _mm_loadu_ps(S0 + 4),