From 8d31eb8f17fa17f63d46651af1c69fb8eca2d04a Mon Sep 17 00:00:00 2001
From: David Edelsohn <dje.gcc@gmail.com>
Date: Mon, 19 Oct 2020 08:59:46 -0400
Subject: [PATCH] rs6000: correct BE vextract_fp_from_short[hl] vperm masks

xvcvhpsp instruction converts a vector of bfloat16 half precision to single
precision.  The intrinsics vextract_fp_from_shorth and
vextract_fp_from_shortl select the high or low four elements of a
half precision vector to convert.  The intrinsics use vperm to select
the appropriate portion of the half precision vector and redistribute
the values for the xvcvhpsp instruction.  The big endian versions of the
masks for the intrinsics were initialized wrong.  This patch replaces the
masks with the correct values.  This corrects the failure of
builtins-3-p9-runnable.c testcase on big endian systems.

Bootstrapped powerpc-ibm-aix7.2.3.0 Power9.

gcc/ChangeLog:

	* config/rs6000/vsx.md (vextract_fp_from_shorth):  Fix vals_be.
	(vextract_fp_from_shortl) Same.
---
 gcc/config/rs6000/vsx.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4ff5245..c023bc0 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5659,7 +5659,7 @@
 {
   int i;
   int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
-  int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
+  int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
 
   rtx rvals[16];
   rtx mask = gen_reg_rtx (V16QImode);
@@ -5693,7 +5693,7 @@
   "TARGET_P9_VECTOR"
 {
   int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
-  int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
+  int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
 
   int i;
   rtx rvals[16];
-- 
2.7.4