* config/i386/i386.c (expand_vec_perm_palignr): Handle

author jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>

Wed, 1 Oct 2014 20:41:37 +0000 (20:41 +0000)

committer jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>

Wed, 1 Oct 2014 20:41:37 +0000 (20:41 +0000)
author jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 1 Oct 2014 20:41:37 +0000 (20:41 +0000)
committer jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 1 Oct 2014 20:41:37 +0000 (20:41 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 54b6b0b..b87279f 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,8 @@
  2014-10-01  Jakub Jelinek  <jakub@redhat.com>
  
+       * config/i386/i386.c (expand_vec_perm_palignr): Handle
+       256-bit vectors for TARGET_AVX2.
+
         * config/i386/i386.c (expand_vec_perm_vperm2f128): Canonicalize
         dfirst permutation.
  
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index b40852d..8b70f6f 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -43297,44 +43297,75 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
    rtx shift, target;
    struct expand_vec_perm_d dcopy;
  
-  /* Even with AVX, palignr only operates on 128-bit vectors.  */
-  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
+  /* Even with AVX, palignr only operates on 128-bit vectors,
+     in AVX2 palignr operates on both 128-bit lanes.  */
+  if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
+      && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
      return false;
  
-  min = nelt, max = 0;
+  min = 2 * nelt, max = 0;
    for (i = 0; i < nelt; ++i)
      {
        unsigned e = d->perm[i];
+      if (GET_MODE_SIZE (d->vmode) == 32)
+       e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
        if (e < min)
         min = e;
        if (e > max)
         max = e;
      }
-  if (min == 0 || max - min >= nelt)
+  if (min == 0
+      || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
      return false;
  
    /* Given that we have SSSE3, we know we'll be able to implement the
-     single operand permutation after the palignr with pshufb.  */
-  if (d->testing_p)
+     single operand permutation after the palignr with pshufb for
+     128-bit vectors.  */
+  if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16)
      return true;
  
    dcopy = *d;
-  shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
-  target = gen_reg_rtx (TImode);
-  emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
-                                 gen_lowpart (TImode, d->op0), shift));
-
-  dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
-  dcopy.one_operand_p = true;
  
    in_order = true;
    for (i = 0; i < nelt; ++i)
      {
-      unsigned e = dcopy.perm[i] - min;
+      unsigned e = dcopy.perm[i];
+      if (GET_MODE_SIZE (d->vmode) == 32
+         && e >= nelt
+         && (e & (nelt / 2 - 1)) < min)
+       e = e - min - (nelt / 2);
+      else
+       e = e - min;
        if (e != i)
         in_order = false;
        dcopy.perm[i] = e;
      }
+  dcopy.one_operand_p = true;
+
+  /* For AVX2, test whether we can permute the result in one instruction.  */
+  if (d->testing_p)
+    {
+      if (in_order)
+       return true;
+      dcopy.op1 = dcopy.op0;
+      return expand_vec_perm_1 (&dcopy);
+    }
+
+  shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
+  if (GET_MODE_SIZE (d->vmode) == 16)
+    {
+      target = gen_reg_rtx (TImode);
+      emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
+                                     gen_lowpart (TImode, d->op0), shift));
+    }
+  else
+    {
+      target = gen_reg_rtx (V2TImode);
+      emit_insn (gen_avx2_palignrv2ti (target, gen_lowpart (V2TImode, d->op1),
+                                      gen_lowpart (V2TImode, d->op0), shift));
+    }
+
+  dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
  
    /* Test for the degenerate case where the alignment by itself
       produces the desired permutation.  */
@@ -43345,7 +43376,7 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
      }
  
    ok = expand_vec_perm_1 (&dcopy);
-  gcc_assert (ok);
+  gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
  
    return ok;
  }
author	jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>
	Wed, 1 Oct 2014 20:41:37 +0000 (20:41 +0000)
committer	jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>
	Wed, 1 Oct 2014 20:41:37 +0000 (20:41 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/i386/i386.c		patch \| blob \| history