From b465c826963f9ea01d466f0281b568196e6ef933 Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <pmur@users.noreply.github.com>
Date: Thu, 12 Sep 2019 09:45:56 -0500
Subject: [PATCH] core: workaround old gcc vec_mul{e,o} (Issue #15506)

ISA 2.07 (aka POWER8) effectively extended the expanding multiply
operation to word types. The altivec intrinsics prior to gcc 8 did
not get the update.

Workaround this deficiency similar to other fixes.

This was exposed by commit 33fb253a66275abaa5060ef318c9a5cc87c5fd6e
which leverages the int -> dword expanding multiply.

This fixes Issue #15506
---
 modules/core/include/opencv2/core/vsx_utils.hpp | 27 +++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp
index c60da3c..0f1029d 100644
--- a/modules/core/include/opencv2/core/vsx_utils.hpp
+++ b/modules/core/include/opencv2/core/vsx_utils.hpp
@@ -124,6 +124,33 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b)  \
 
 #define VSX_IMPL_2VRG(rt, rg, opc, fnm) VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%1,%2", fnm)
 
+#if __GNUG__ < 8
+
+    // Support for int4 -> dword2 expanding multiply was added in GCC 8.
+    #ifdef vec_mule
+        #undef vec_mule
+    #endif
+    #ifdef vec_mulo
+        #undef vec_mulo
+    #endif
+
+    VSX_REDIRECT_2RG(vec_ushort8,  vec_uchar16,  vec_mule, __builtin_vec_mule)
+    VSX_REDIRECT_2RG(vec_short8,  vec_char16,  vec_mule, __builtin_vec_mule)
+    VSX_REDIRECT_2RG(vec_int4,  vec_short8,  vec_mule, __builtin_vec_mule)
+    VSX_REDIRECT_2RG(vec_uint4,  vec_ushort8,  vec_mule, __builtin_vec_mule)
+    VSX_REDIRECT_2RG(vec_ushort8,  vec_uchar16,  vec_mulo, __builtin_vec_mulo)
+    VSX_REDIRECT_2RG(vec_short8,  vec_char16,  vec_mulo, __builtin_vec_mulo)
+    VSX_REDIRECT_2RG(vec_int4,  vec_short8,  vec_mulo, __builtin_vec_mulo)
+    VSX_REDIRECT_2RG(vec_uint4,  vec_ushort8,  vec_mulo, __builtin_vec_mulo)
+
+    // dword2 support arrived in ISA 2.07 and GCC 8+
+    VSX_IMPL_2VRG(vec_dword2,  vec_int4,  vmulesw, vec_mule)
+    VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmuleuw, vec_mule)
+    VSX_IMPL_2VRG(vec_dword2,  vec_int4,  vmulosw, vec_mulo)
+    VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmulouw, vec_mulo)
+
+#endif
+
 #if __GNUG__ < 7
 // up to GCC 6 vec_mul only supports precisions and llong
 #   ifdef vec_mul
-- 
2.7.4