From: Anna Khakimova Date: Mon, 20 Jul 2020 17:09:23 +0000 (+0300) Subject: Fix for issue (#1335) X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=898f0626adbd308d23d04d89771fe2e4e3caa977;p=platform%2Fupstream%2Fdldt.git Fix for issue (#1335) --- diff --git a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp index d790517..a181561 100644 --- a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp +++ b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp @@ -234,14 +234,6 @@ static inline void verticalPass_lpi4_8U(const uint8_t* src0[], const uint8_t* sr } } -static inline void insert64(v_uint8& val, const short mapsx[], - uint8_t tmp[], const int& x, const int& shift) { - val = v_insert64<0>(val, *reinterpret_cast(&tmp[4 * mapsx[x + shift + 0]])); - val = v_insert64<1>(val, *reinterpret_cast(&tmp[4 * mapsx[x + shift + 1]])); - val = v_insert64<2>(val, *reinterpret_cast(&tmp[4 * mapsx[x + shift + 2]])); - val = v_insert64<3>(val, *reinterpret_cast(&tmp[4 * mapsx[x + shift + 3]])); -} - static inline v_uint8 setHorizontalShufMask1() { return v_setr_s8(0, 4, 8, 12, 2, 6, 10, 14, 1, 5, 9, 13, 3, 7, 11, 15, @@ -262,7 +254,8 @@ static inline void horizontalPass_lpi4_8UC1(const short clone[], const short map v_uint8 val_0, val_1, val_2, val_3, res1, res2; constexpr int shift = 4; v_uint8 shuf_mask1 = setHorizontalShufMask1(); - v_uint8 shuf_mask2 = setHorizontalShufMask2();; + v_uint8 shuf_mask2 = setHorizontalShufMask2(); + v_uint32 idxs = v_setr_s32(0, 2, 4, 6, 1, 3, 5, 7); for (int x = 0; x < length; ) { @@ -272,11 +265,7 @@ static inline void horizontalPass_lpi4_8UC1(const short clone[], const short map v_int16 a54 = vx_load(&clone[4 * (x + 8)]); v_int16 a76 = vx_load(&clone[4 * (x + 12)]); - insert64(val_0, mapsx, tmp, x, 0); - insert64(val_1, mapsx, tmp, x, shift); - insert64(val_2, mapsx, tmp, x, shift*2); - insert64(val_3, mapsx, tmp, x, shift*3); - + v_setr64(val_0, val_1, val_2, val_3, mapsx, tmp, x, shift); val_0 = v_permutevar8x32(val_0, idxs); val_1 = v_permutevar8x32(val_1, idxs); val_2 = v_permutevar8x32(val_2, idxs); diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp index 4d2f854..ab9db8a 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp +++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp @@ -805,7 +805,7 @@ static void calcRowLinear(const cv::gapi::fluid::View & in, src1[l] = in.InLine(index1); dst[l] = out.OutLine(l); } -#if 1 + #ifdef HAVE_AVX2 if (with_cpu_x86_avx2()) { if (std::is_same::value) { @@ -825,7 +825,7 @@ static void calcRowLinear(const cv::gapi::fluid::View & in, } } #endif -#endif + #ifdef HAVE_SSE if (with_cpu_x86_sse42()) { if (std::is_same::value) { diff --git a/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp b/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp index c057e3a..640d589 100644 --- a/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp +++ b/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp @@ -3142,10 +3142,35 @@ static inline v_uint8x32 v_shuffle_s8(const v_uint8x32& a, const v_uint8x32& mas return v_uint8x32(_mm256_shuffle_epi8(a.val, mask.val)); } +#if !defined(__GNUC__) || defined(__GNUC__) && defined(__x86_64) template -static inline v_uint8x32 v_insert64(v_uint8x32& a, const int64_t& i) +static inline __m256i v_insert64(v_uint8x32& a, const int64_t& i) { - return v_uint8x32(_mm256_insert_epi64(a.val, i, index)); + return _mm256_insert_epi64(a.val, i, index); +} +#endif + +static inline void v_setr64(v_uint8x32& val_0, v_uint8x32& val_1,v_uint8x32& val_2, v_uint8x32& val_3, const short mapsx[], + uint8_t tmp[], const int& x, const int& shift) { + val_0.val = _mm256_setr_epi64x(*reinterpret_cast(&tmp[4 * mapsx[x + 0]]), + *reinterpret_cast(&tmp[4 * mapsx[x + 1]]), + *reinterpret_cast(&tmp[4 * mapsx[x + 2]]), + *reinterpret_cast(&tmp[4 * mapsx[x + 3]])); + + val_1.val = _mm256_setr_epi64x(*reinterpret_cast(&tmp[4 * mapsx[x + shift + 0]]), + *reinterpret_cast(&tmp[4 * mapsx[x + shift + 1]]), + *reinterpret_cast(&tmp[4 * mapsx[x + shift + 2]]), + *reinterpret_cast(&tmp[4 * mapsx[x + shift + 3]])); + + val_2.val = _mm256_setr_epi64x(*reinterpret_cast(&tmp[4 * mapsx[x + 2*shift + 0]]), + *reinterpret_cast(&tmp[4 * mapsx[x + 2*shift + 1]]), + *reinterpret_cast(&tmp[4 * mapsx[x + 2*shift + 2]]), + *reinterpret_cast(&tmp[4 * mapsx[x + 2*shift + 3]])); + + val_3.val = _mm256_setr_epi64x(*reinterpret_cast(&tmp[4 * mapsx[x + 3 * shift + 0]]), + *reinterpret_cast(&tmp[4 * mapsx[x + 3 * shift + 1]]), + *reinterpret_cast(&tmp[4 * mapsx[x + 3 * shift + 2]]), + *reinterpret_cast(&tmp[4 * mapsx[x + 3 * shift + 3]])); } static inline v_uint8x32 v_permutevar8x32(v_uint8x32& a, v_uint32x8& idxs)