Fix for issue (#1335)

author Anna Khakimova <anna.khakimova@intel.com>

Mon, 20 Jul 2020 17:09:23 +0000 (20:09 +0300)

committer GitHub <noreply@github.com>

Mon, 20 Jul 2020 17:09:23 +0000 (20:09 +0300)
author Anna Khakimova <anna.khakimova@intel.com>
Mon, 20 Jul 2020 17:09:23 +0000 (20:09 +0300)
committer GitHub <noreply@github.com>
Mon, 20 Jul 2020 17:09:23 +0000 (20:09 +0300)
diff --git a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp

index d790517..a181561 100644 (file)
--- a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp
+++ b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp
@@ -234,14 +234,6 @@ static inline void verticalPass_lpi4_8U(const uint8_t* src0[], const uint8_t* sr
      }
  }
  
-static inline void insert64(v_uint8& val, const short mapsx[],
-                            uint8_t tmp[], const int& x, const int& shift) {
-    val = v_insert64<0>(val, *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + shift + 0]]));
-    val = v_insert64<1>(val, *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + shift + 1]]));
-    val = v_insert64<2>(val, *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + shift + 2]]));
-    val = v_insert64<3>(val, *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + shift + 3]]));
-}
-
  static inline v_uint8 setHorizontalShufMask1() {
      return v_setr_s8(0, 4, 8, 12, 2, 6, 10, 14,
                       1, 5, 9, 13, 3, 7, 11, 15,
@@ -262,7 +254,8 @@ static inline void horizontalPass_lpi4_8UC1(const short clone[], const short map
      v_uint8 val_0, val_1, val_2, val_3, res1, res2;
      constexpr int shift = 4;
      v_uint8 shuf_mask1 = setHorizontalShufMask1();
-    v_uint8 shuf_mask2 = setHorizontalShufMask2();;
+    v_uint8 shuf_mask2 = setHorizontalShufMask2();
+
      v_uint32 idxs = v_setr_s32(0, 2, 4, 6, 1, 3, 5, 7);
  
      for (int x = 0; x < length; ) {
@@ -272,11 +265,7 @@ static inline void horizontalPass_lpi4_8UC1(const short clone[], const short map
              v_int16 a54 = vx_load(&clone[4 * (x + 8)]);
              v_int16 a76 = vx_load(&clone[4 * (x + 12)]);
  
-            insert64(val_0, mapsx, tmp, x, 0);
-            insert64(val_1, mapsx, tmp, x, shift);
-            insert64(val_2, mapsx, tmp, x, shift*2);
-            insert64(val_3, mapsx, tmp, x, shift*3);
-
+            v_setr64(val_0, val_1, val_2, val_3, mapsx, tmp, x, shift);
              val_0 = v_permutevar8x32(val_0, idxs);
              val_1 = v_permutevar8x32(val_1, idxs);
              val_2 = v_permutevar8x32(val_2, idxs);
diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp

index 4d2f854..ab9db8a 100644 (file)
--- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
+++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
@@ -805,7 +805,7 @@ static void calcRowLinear(const cv::gapi::fluid::View  & in,
          src1[l] = in.InLine<const T>(index1);
          dst[l] = out.OutLine<T>(l);
      }
-#if 1
+
      #ifdef HAVE_AVX2
      if (with_cpu_x86_avx2()) {
          if (std::is_same<T, uint8_t>::value) {
@@ -825,7 +825,7 @@ static void calcRowLinear(const cv::gapi::fluid::View  & in,
          }
      }
      #endif
-#endif
+
      #ifdef HAVE_SSE
      if (with_cpu_x86_sse42()) {
          if (std::is_same<T, uint8_t>::value) {
diff --git a/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp b/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp

index c057e3a..640d589 100644 (file)
--- a/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp
+++ b/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp
@@ -3142,10 +3142,35 @@ static inline v_uint8x32 v_shuffle_s8(const v_uint8x32& a, const v_uint8x32& mas
      return v_uint8x32(_mm256_shuffle_epi8(a.val, mask.val));
  }
  
+#if !defined(__GNUC__) || defined(__GNUC__) && defined(__x86_64)
  template<int index>
-static inline v_uint8x32 v_insert64(v_uint8x32& a, const int64_t& i)
+static inline __m256i v_insert64(v_uint8x32& a, const int64_t& i)
  {
-    return v_uint8x32(_mm256_insert_epi64(a.val, i, index));
+    return _mm256_insert_epi64(a.val, i, index);
+}
+#endif
+
+static inline void v_setr64(v_uint8x32& val_0, v_uint8x32& val_1,v_uint8x32& val_2, v_uint8x32& val_3, const short mapsx[],
+                            uint8_t tmp[], const int& x, const int& shift) {
+    val_0.val = _mm256_setr_epi64x(*reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 0]]),
+                                   *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 1]]),
+                                   *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 2]]),
+                                   *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 3]]));
+
+    val_1.val = _mm256_setr_epi64x(*reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + shift + 0]]),
+                                  *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + shift + 1]]),
+                                  *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + shift + 2]]),
+                                  *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + shift + 3]]));
+
+    val_2.val = _mm256_setr_epi64x(*reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 2*shift + 0]]),
+                                  *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 2*shift + 1]]),
+                                  *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 2*shift + 2]]),
+                                  *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 2*shift + 3]]));
+
+    val_3.val = _mm256_setr_epi64x(*reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 3 * shift + 0]]),
+                                  *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 3 * shift + 1]]),
+                                  *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 3 * shift + 2]]),
+                                  *reinterpret_cast<int64_t*>(&tmp[4 * mapsx[x + 3 * shift + 3]]));
  }
  
  static inline v_uint8x32 v_permutevar8x32(v_uint8x32& a, v_uint32x8& idxs)
author	Anna Khakimova <anna.khakimova@intel.com>
	Mon, 20 Jul 2020 17:09:23 +0000 (20:09 +0300)
committer	GitHub <noreply@github.com>
	Mon, 20 Jul 2020 17:09:23 +0000 (20:09 +0300)
inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp		patch \| blob \| history
inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp		patch \| blob \| history
inference-engine/thirdparty/ocv/opencv_hal_avx.hpp		patch \| blob \| history