[AVX-512] Add support for changing the element size of PALIGNR/VALIGND/VALIGNQ shuffl...

author Craig Topper <craig.topper@gmail.com>

Tue, 22 Nov 2016 03:51:53 +0000 (03:51 +0000)

committer Craig Topper <craig.topper@gmail.com>

Tue, 22 Nov 2016 03:51:53 +0000 (03:51 +0000)
author Craig Topper <craig.topper@gmail.com>
Tue, 22 Nov 2016 03:51:53 +0000 (03:51 +0000)
committer Craig Topper <craig.topper@gmail.com>
Tue, 22 Nov 2016 03:51:53 +0000 (03:51 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index d98b3c3..4ef26f3 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27772,6 +27772,58 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
    return SDValue();
  }
  
+// If this is a bitcasted op that can be represented as another type, push the
+// the bitcast to the inputs. This allows more opportunities for pattern
+// matching masked instructions. This is called when we know that the operation
+// is used as one of the inputs of a vselect.
+static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
+                                      TargetLowering::DAGCombinerInfo &DCI) {
+  // Make sure we have a bitcast.
+  if (OrigOp.getOpcode() != ISD::BITCAST)
+    return false;
+
+  SDValue Op = OrigOp.getOperand(0);
+
+  // If the operation is used by anything other than the bitcast, we shouldn't
+  // do this combine as that would replicate the operation.
+  if (!Op.hasOneUse())
+    return false;
+
+  MVT VT = OrigOp.getSimpleValueType();
+  MVT EltVT = VT.getVectorElementType();
+  SDLoc DL(Op.getNode());
+
+  switch (Op.getOpcode()) {
+  case X86ISD::PALIGNR:
+    // PALIGNR can be converted to VALIGND/Q for 128-bit vectors.
+    if (!VT.is128BitVector())
+      return false;
+    LLVM_FALLTHROUGH;
+  case X86ISD::VALIGN: {
+    if (EltVT != MVT::i32 && EltVT != MVT::i64)
+      return false;
+    uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+    MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+    unsigned ShiftAmt = Imm * OpEltVT.getSizeInBits();
+    unsigned EltSize = EltVT.getSizeInBits();
+    // Make sure we can represent the same shift with the new VT.
+    if ((ShiftAmt % EltSize) != 0)
+      return false;
+    Imm = ShiftAmt / EltSize;
+    SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0));
+    DCI.AddToWorklist(Op0.getNode());
+    SDValue Op1 = DAG.getBitcast(VT, Op.getOperand(1));
+    DCI.AddToWorklist(Op1.getNode());
+    DCI.CombineTo(OrigOp.getNode(),
+                  DAG.getNode(X86ISD::VALIGN, DL, VT, Op0, Op1,
+                              DAG.getConstant(Imm, DL, MVT::i8)));
+    return true;
+  }
+  }
+
+  return false;
+}
+
  /// Do target-specific dag combines on SELECT and VSELECT nodes.
  static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
                               TargetLowering::DAGCombinerInfo &DCI,
@@ -28133,6 +28185,17 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
      }
    }
  
+  // Look for vselects with LHS/RHS being bitcasted from an operation that
+  // can be executed on another type. Push the bitcast to the inputs of
+  // the operation. This exposes opportunities for using masking instructions.
+  if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalizeOps() &&
+      CondVT.getVectorElementType() == MVT::i1) {
+    if (combineBitcastForMaskedOp(LHS, DAG, DCI))
+      return SDValue(N, 0);
+    if (combineBitcastForMaskedOp(RHS, DAG, DCI))
+      return SDValue(N, 0);
+  }
+
    return SDValue();
  }
  
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll

index 02c4974..250cf9f 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -360,9 +360,9 @@ define <16 x i32> @shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_0
  define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, <16 x i32> %passthru, i16 %mask) {
  ; ALL-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01:
  ; ALL:       # BB#0:
-; ALL-NEXT:    valignq {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,0]
  ; ALL-NEXT:    kmovw %edi, %k1
-; ALL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; ALL-NEXT:    valignd {{.*#+}} zmm1 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
+; ALL-NEXT:    vmovdqa64 %zmm1, %zmm0
  ; ALL-NEXT:    retq
    %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1>
    %mask.cast = bitcast i16 %mask to <16 x i1>
@@ -373,9 +373,9 @@ define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15
  define <16 x i32> @mask_shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) {
  ; ALL-LABEL: mask_shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
  ; ALL:       # BB#0:
-; ALL-NEXT:    valignq {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7],zmm1[0]
  ; ALL-NEXT:    kmovw %edi, %k1
-; ALL-NEXT:    vpblendmd %zmm0, %zmm2, %zmm0 {%k1}
+; ALL-NEXT:    valignd {{.*#+}} zmm2 {%k1} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1]
+; ALL-NEXT:    vmovdqa64 %zmm2, %zmm0
  ; ALL-NEXT:    retq
    %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
    %mask.cast = bitcast i16 %mask to <16 x i1>
@@ -386,9 +386,8 @@ define <16 x i32> @mask_shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14
  define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, i16 %mask) {
  ; ALL-LABEL: maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01:
  ; ALL:       # BB#0:
-; ALL-NEXT:    valignq {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7,0]
  ; ALL-NEXT:    kmovw %edi, %k1
-; ALL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; ALL-NEXT:    valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
  ; ALL-NEXT:    retq
    %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1>
    %mask.cast = bitcast i16 %mask to <16 x i1>
@@ -399,9 +398,8 @@ define <16 x i32> @maskz_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_1
  define <16 x i32> @maskz_shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
  ; ALL-LABEL: maskz_shuffle_v16i32_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
  ; ALL:       # BB#0:
-; ALL-NEXT:    valignq {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7],zmm1[0]
  ; ALL-NEXT:    kmovw %edi, %k1
-; ALL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; ALL-NEXT:    valignd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm1[0,1]
  ; ALL-NEXT:    retq
    %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
    %mask.cast = bitcast i16 %mask to <16 x i1>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-masked.ll b/llvm/test/CodeGen/X86/vector-shuffle-masked.ll

index 24cf6f9..b1d2523 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-masked.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-masked.ll
@@ -4,9 +4,9 @@
  define <4 x i32> @mask_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) {
  ; CHECK-LABEL: mask_shuffle_v4i32_1234:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
  ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    vpblendmd %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT:    valignd {{.*#+}} xmm2 {%k1} = xmm0[1,2,3],xmm1[0]
+; CHECK-NEXT:    vmovdqa64 %xmm2, %xmm0
  ; CHECK-NEXT:    retq
    %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
    %mask.cast = bitcast i8 %mask to <8 x i1>
@@ -18,9 +18,8 @@ define <4 x i32> @mask_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, <4 x i32>
  define <4 x i32> @maskz_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
  ; CHECK-LABEL: maskz_shuffle_v4i32_1234:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
  ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3],xmm1[0]
  ; CHECK-NEXT:    retq
    %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
    %mask.cast = bitcast i8 %mask to <8 x i1>
@@ -32,9 +31,9 @@ define <4 x i32> @maskz_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, i8 %mask)
  define <4 x i32> @mask_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) {
  ; CHECK-LABEL: mask_shuffle_v4i32_2345:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
  ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    vpblendmd %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT:    valignd {{.*#+}} xmm2 {%k1} = xmm0[2,3],xmm1[0,1]
+; CHECK-NEXT:    vmovdqa64 %xmm2, %xmm0
  ; CHECK-NEXT:    retq
    %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
    %mask.cast = bitcast i8 %mask to <8 x i1>
@@ -46,9 +45,8 @@ define <4 x i32> @mask_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, <4 x i32>
  define <4 x i32> @maskz_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
  ; CHECK-LABEL: maskz_shuffle_v4i32_2345:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
  ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3],xmm1[0,1]
  ; CHECK-NEXT:    retq
    %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
    %mask.cast = bitcast i8 %mask to <8 x i1>
@@ -60,9 +58,9 @@ define <4 x i32> @maskz_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, i8 %mask)
  define <2 x i64> @mask_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passthru, i8 %mask) {
  ; CHECK-LABEL: mask_shuffle_v2i64_12:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
  ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    vpblendmq %xmm0, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT:    valignq {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
+; CHECK-NEXT:    vmovdqa64 %xmm2, %xmm0
  ; CHECK-NEXT:    retq
    %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
    %mask.cast = bitcast i8 %mask to <8 x i1>
@@ -74,9 +72,8 @@ define <2 x i64> @mask_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, <2 x i64> %p
  define <2 x i64> @maskz_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
  ; CHECK-LABEL: maskz_shuffle_v2i64_12:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
  ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    valignq {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
  ; CHECK-NEXT:    retq
    %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
    %mask.cast = bitcast i8 %mask to <8 x i1>
@@ -167,9 +164,9 @@ define <8 x i32> @maskz_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, i8 %m
  define <8 x i32> @mask_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) {
  ; CHECK-LABEL: mask_shuffle_v8i32_23456789:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    valignq {{.*#+}} ymm0 = ymm0[1,2,3],ymm1[0]
  ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k1}
+; CHECK-NEXT:    valignd {{.*#+}} ymm2 {%k1} = ymm0[2,3,4,5,6,7],ymm1[0,1]
+; CHECK-NEXT:    vmovdqa64 %ymm2, %ymm0
  ; CHECK-NEXT:    retq
    %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
    %mask.cast = bitcast i8 %mask to <8 x i1>
@@ -180,9 +177,8 @@ define <8 x i32> @mask_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, <8 x i
  define <8 x i32> @maskz_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
  ; CHECK-LABEL: maskz_shuffle_v8i32_23456789:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    valignq {{.*#+}} ymm0 = ymm0[1,2,3],ymm1[0]
  ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,4,5,6,7],ymm1[0,1]
  ; CHECK-NEXT:    retq
    %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
    %mask.cast = bitcast i8 %mask to <8 x i1>
author	Craig Topper <craig.topper@gmail.com>
	Tue, 22 Nov 2016 03:51:53 +0000 (03:51 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Tue, 22 Nov 2016 03:51:53 +0000 (03:51 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-masked.ll		patch \| blob \| history