[X86] SimplifyDemandedBits - only narrow a broadcast source if we only have one use.

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 19 Sep 2021 21:53:13 +0000 (22:53 +0100)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 19 Sep 2021 21:53:30 +0000 (22:53 +0100)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 19 Sep 2021 21:53:13 +0000 (22:53 +0100)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 19 Sep 2021 21:53:30 +0000 (22:53 +0100)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 3187c41..3520bbf 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40381,7 +40381,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
      // Don't attempt this on AVX512 as it might affect broadcast folding.
      // TODO: Should we attempt this for i32/i16 splats? They tend to be slower.
      if ((BitWidth == 64) && SrcVT.isScalarInteger() && !Subtarget.hasAVX512() &&
-        OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2)) {
+        OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2) &&
+        Src->hasOneUse()) {
        MVT NewSrcVT = MVT::getIntegerVT(BitWidth / 2);
        SDValue NewSrc =
            TLO.DAG.getNode(ISD::TRUNCATE, SDLoc(Src), NewSrcVT, Src);
diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll

index 0a3ddb8..b7f7321 100644 (file)
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -2261,13 +2261,10 @@ define <16 x i32> @splat_v3i32(<3 x i32>* %ptr) {
  ;
  ; AVX1-LABEL: splat_v3i32:
  ; AVX1:       # %bb.0:
-; AVX1-NEXT:    movq (%rdi), %rax
-; AVX1-NEXT:    vmovq %rax, %xmm0
-; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3,4,5,6,7]
-; AVX1-NEXT:    vmovd %eax, %xmm2
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7]
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0],ymm1[1],ymm2[2,3,4,5,6,7]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7]
  ; AVX1-NEXT:    retq
  ;
  ; AVX2-SLOW-LABEL: splat_v3i32:
@@ -2289,13 +2286,10 @@ define <16 x i32> @splat_v3i32(<3 x i32>* %ptr) {
  ;
  ; XOP-LABEL: splat_v3i32:
  ; XOP:       # %bb.0:
-; XOP-NEXT:    movq (%rdi), %rax
-; XOP-NEXT:    vmovq %rax, %xmm0
-; XOP-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; XOP-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3,4,5,6,7]
-; XOP-NEXT:    vmovd %eax, %xmm2
-; XOP-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; XOP-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7]
+; XOP-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
+; XOP-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0],ymm1[1],ymm2[2,3,4,5,6,7]
+; XOP-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7]
  ; XOP-NEXT:    retq
    %1 = load <3 x i32>, <3 x i32>* %ptr, align 1
    %2 = shufflevector <3 x i32> %1, <3 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 19 Sep 2021 21:53:13 +0000 (22:53 +0100)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 19 Sep 2021 21:53:30 +0000 (22:53 +0100)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/oddshuffles.ll		patch \| blob \| history