DAG: Fix creating select with wrong condition type

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 25 Oct 2017 07:14:07 +0000 (07:14 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 25 Oct 2017 07:14:07 +0000 (07:14 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 25 Oct 2017 07:14:07 +0000 (07:14 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 25 Oct 2017 07:14:07 +0000 (07:14 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

index 7785733..5d6c499 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3101,7 +3101,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
  
    // If this is a splitted VSELECT that was previously already handled, do
    // nothing.
-  if (Cond->getValueType(0).getScalarSizeInBits() != 1)
+  EVT CondVT = Cond->getValueType(0);
+  if (CondVT.getScalarSizeInBits() != 1)
      return SDValue();
  
    EVT VSelVT = N->getValueType(0);
@@ -3125,6 +3126,14 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
      EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
      if (SetCCResVT.getScalarSizeInBits() == 1)
        return SDValue();
+  } else if (CondVT.getScalarType() == MVT::i1) {
+    // If there is support for an i1 vector mask (or only scalar i1 conditions),
+    // don't touch.
+    while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal)
+      CondVT = TLI.getTypeToTransformTo(Ctx, CondVT);
+
+    if (CondVT.getScalarType() == MVT::i1)
+      return SDValue();
    }
  
    // Get the VT and operands for VSELECT, and widen if needed.
diff --git a/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll

new file mode 100644 (file)

index 0000000..4ae2199
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; Check that DAGTypeLegalizer::WidenVSELECTAndMask doesn't try to
+; create vselects with i64 condition masks.
+
+; FIXME: Should be able to avoid intermediate vselect
+; GCN-LABEL: {{^}}widen_vselect_and_mask_v4f64:
+; GCN: v_cmp_u_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]],
+; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
+; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[VSEL]]:[[VSEL_EXT]]{{\]}}
+define amdgpu_kernel void @widen_vselect_and_mask_v4f64(<4 x double> %arg) #0 {
+bb:
+  %tmp = extractelement <4 x double> %arg, i64 0
+  %tmp1 = fcmp uno double %tmp, 0.000000e+00
+  %tmp2 = sext i1 %tmp1 to i64
+  %tmp3 = insertelement <4 x i64> undef, i64 %tmp2, i32 0
+  %tmp4 = insertelement <4 x i64> %tmp3, i64 undef, i32 1
+  %tmp5 = insertelement <4 x i64> %tmp4, i64 undef, i32 2
+  %tmp6 = insertelement <4 x i64> %tmp5, i64 undef, i32 3
+  %tmp7 = fcmp une <4 x double> %arg, zeroinitializer
+  %tmp8 = icmp sgt <4 x i64> %tmp6, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %tmp9 = and <4 x i1> %tmp8, %tmp7
+  %tmp10 = select <4 x i1> %tmp9, <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, <4 x double> zeroinitializer
+  store <4 x double> %tmp10, <4 x double> addrspace(1)* null, align 32
+  ret void
+}
+
+; GCN-LABEL: {{^}}widen_vselect_and_mask_v4i64:
+; GCN: v_cmp_eq_u64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]],
+; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
+; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[VSEL]]:[[VSEL_EXT]]{{\]}}
+define amdgpu_kernel void @widen_vselect_and_mask_v4i64(<4 x i64> %arg) #0 {
+bb:
+  %tmp = extractelement <4 x i64> %arg, i64 0
+  %tmp1 = icmp eq i64 %tmp, 0
+  %tmp2 = sext i1 %tmp1 to i64
+  %tmp3 = insertelement <4 x i64> undef, i64 %tmp2, i32 0
+  %tmp4 = insertelement <4 x i64> %tmp3, i64 undef, i32 1
+  %tmp5 = insertelement <4 x i64> %tmp4, i64 undef, i32 2
+  %tmp6 = insertelement <4 x i64> %tmp5, i64 undef, i32 3
+  %tmp7 = icmp ne <4 x i64> %arg, zeroinitializer
+  %tmp8 = icmp sgt <4 x i64> %tmp6, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %tmp9 = and <4 x i1> %tmp8, %tmp7
+  %tmp10 = select <4 x i1> %tmp9, <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i64> zeroinitializer
+  store <4 x i64> %tmp10, <4 x i64> addrspace(1)* null, align 32
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/X86/avx512-vselect.ll b/llvm/test/CodeGen/X86/avx512-vselect.ll

index b9f2409..5fc84a0 100644 (file)
--- a/llvm/test/CodeGen/X86/avx512-vselect.ll
+++ b/llvm/test/CodeGen/X86/avx512-vselect.ll
@@ -23,35 +23,16 @@ entry:
  ; both formulations of vselect. All of this trickery is because we can't
  ; directly form an SDAG input to the lowering.
  define <16 x double> @test2(<16 x float> %x, <16 x float> %y, <16 x double> %a, <16 x double> %b) {
-; CHECK-SKX-LABEL: test2:
-; CHECK-SKX:       # BB#0: # %entry
-; CHECK-SKX-NEXT:    vxorps %xmm6, %xmm6, %xmm6
-; CHECK-SKX-NEXT:    vcmpltps %zmm0, %zmm6, %k0
-; CHECK-SKX-NEXT:    vcmpltps %zmm6, %zmm1, %k1
-; CHECK-SKX-NEXT:    korw %k1, %k0, %k0
-; CHECK-SKX-NEXT:    kshiftrw $8, %k0, %k1
-; CHECK-SKX-NEXT:    vpmovm2q %k1, %zmm1
-; CHECK-SKX-NEXT:    vpmovm2q %k0, %zmm0
-; CHECK-SKX-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; CHECK-SKX-NEXT:    vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
-; CHECK-SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1
-; CHECK-SKX-NEXT:    vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
-; CHECK-SKX-NEXT:    retq
-;
-; CHECK-KNL-LABEL: test2:
-; CHECK-KNL:       # BB#0: # %entry
-; CHECK-KNL-NEXT:    vxorps %xmm6, %xmm6, %xmm6
-; CHECK-KNL-NEXT:    vcmpltps %zmm0, %zmm6, %k0
-; CHECK-KNL-NEXT:    vcmpltps %zmm6, %zmm1, %k1
-; CHECK-KNL-NEXT:    korw %k1, %k0, %k1
-; CHECK-KNL-NEXT:    kshiftrw $8, %k1, %k2
-; CHECK-KNL-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; CHECK-KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; CHECK-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; CHECK-KNL-NEXT:    vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
-; CHECK-KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
-; CHECK-KNL-NEXT:    vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
-; CHECK-KNL-NEXT:    retq
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    vxorps %xmm6, %xmm6, %xmm6
+; CHECK-NEXT:    vcmpltps %zmm0, %zmm6, %k0
+; CHECK-NEXT:    vcmpltps %zmm6, %zmm1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k1
+; CHECK-NEXT:    vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
+; CHECK-NEXT:    kshiftrw $8, %k1, %k1
+; CHECK-NEXT:    vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
+; CHECK-NEXT:    retq
  entry:
    %gt.m = fcmp ogt <16 x float> %x, zeroinitializer
    %lt.m = fcmp olt <16 x float> %y, zeroinitializer
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 25 Oct 2017 07:14:07 +0000 (07:14 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 25 Oct 2017 07:14:07 +0000 (07:14 +0000)
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/avx512-vselect.ll		patch \| blob \| history