[X86][SSE] combineX86ShufflesConstants - early out for zeroable vectors (PR45443)

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 7 Apr 2020 13:45:16 +0000 (14:45 +0100)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 7 Apr 2020 13:45:29 +0000 (14:45 +0100)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 7 Apr 2020 13:45:16 +0000 (14:45 +0100)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 7 Apr 2020 13:45:29 +0000 (14:45 +0100)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 777e376..a9db423 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34749,6 +34749,7 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
      return SDValue();
  
    // Shuffle the constant bits according to the mask.
+  SDLoc DL(Root);
    APInt UndefElts(NumMaskElts, 0);
    APInt ZeroElts(NumMaskElts, 0);
    APInt ConstantElts(NumMaskElts, 0);
@@ -34786,6 +34787,10 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
    }
    assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue());
  
+  // Attempt to create a zero vector.
+  if ((UndefElts | ZeroElts).isAllOnesValue())
+    return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL);
+
    // Create the constant data.
    MVT MaskSVT;
    if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64))
@@ -34794,8 +34799,9 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
      MaskSVT = MVT::getIntegerVT(MaskSizeInBits);
  
    MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts);
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT))
+    return SDValue();
  
-  SDLoc DL(Root);
    SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL);
    return DAG.getBitcast(VT, CstOp);
  }
diff --git a/llvm/test/CodeGen/X86/pr45443.ll b/llvm/test/CodeGen/X86/pr45443.ll

new file mode 100644 (file)

index 0000000..1e40ab9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr45443.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64
+
+define <16 x float> @PR45443() {
+; CHECK-LABEL: PR45443:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    vfmadd231ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+bb:
+  %tmp = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> <i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040>, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>)
+  %tmp4 = tail call fast <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> <float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000>, <16 x float> undef)
+  %tmp5 = icmp ult <16 x i32> %tmp, <i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216>
+  %tmp6 = and <16 x i32> %tmp, <i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215>
+  %tmp7 = icmp ne <16 x i32> %tmp6, zeroinitializer
+  %tmp8 = and <16 x i1> %tmp7, %tmp5
+  %tmp9 = select fast <16 x i1> %tmp8, <16 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>, <16 x float> %tmp4
+  ret <16 x float> %tmp9
+}
+declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
+declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>)
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 7 Apr 2020 13:45:16 +0000 (14:45 +0100)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 7 Apr 2020 13:45:29 +0000 (14:45 +0100)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/pr45443.ll	[new file with mode: 0644]	patch \| blob