From 6c95bea072a8999818c26da9fec9332cdf6d9bee Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 19 Dec 2018 13:37:59 +0000 Subject: [PATCH] [TargetLowering] Fix propagation of undefs in zero extension ops (PR40091) As described on PR40091, we have several places where zext (and zext_vector_inreg) fold an undef input into an undef output. For zero extensions this is incorrect as the output should guarantee to least have the new upper bits set to zero. SimplifyDemandedVectorElts is the worst offender (and its the most likely to cause new undefs to appear) but DAGCombiner's tryToFoldExtendOfConstant has a similar issue. Thanks to @dmgreen for catching this. Differential Revision: https://reviews.llvm.org/D55883 llvm-svn: 349625 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 ++++++++---- .../CodeGen/SelectionDAG/TargetLowering.cpp | 14 +++++++++++++ llvm/test/CodeGen/AArch64/pr40091.ll | 2 +- .../test/CodeGen/X86/fold-vector-sext-zext.ll | 20 +++++++++---------- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7a22caf9c8b1..10cce7813dc9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8064,10 +8064,15 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); - for (unsigned i=0; i != NumElts; ++i) { - SDValue Op = N0->getOperand(i); - if (Op->isUndef()) { - Elts.push_back(DAG.getUNDEF(SVT)); + // For zero-extensions, UNDEF elements still guarantee to have the upper + // bits set to zero. + bool IsZext = + Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.isUndef()) { + Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT)); continue; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4c551d5b231f..7053e596237d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1848,6 +1848,13 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; KnownZero = SrcZero.zextOrTrunc(NumElts); KnownUndef = SrcUndef.zextOrTrunc(NumElts); + + if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { + // zext(undef) upper bits are guaranteed to be zero. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + KnownUndef.clearAllBits(); + } break; } case ISD::OR: @@ -1892,6 +1899,13 @@ bool TargetLowering::SimplifyDemandedVectorElts( if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; + + if (Op.getOpcode() == ISD::ZERO_EXTEND) { + // zext(undef) upper bits are guaranteed to be zero. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + KnownUndef.clearAllBits(); + } break; default: { if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { diff --git a/llvm/test/CodeGen/AArch64/pr40091.ll b/llvm/test/CodeGen/AArch64/pr40091.ll index 8cf51f4beb5f..b70ae8a39b7e 100644 --- a/llvm/test/CodeGen/AArch64/pr40091.ll +++ b/llvm/test/CodeGen/AArch64/pr40091.ll @@ -4,7 +4,7 @@ define i64 @test(i64 %aa) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-NEXT: movi v0.8b, #137 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll b/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll index a8e78cc5565f..465c7cebf540 100644 --- a/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll +++ b/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll @@ -261,12 +261,12 @@ define <4 x i64> @test_zext_4i8_4i64() { define <4 x i16> @test_zext_4i8_4i16_undef() { ; X32-LABEL: test_zext_4i8_4i16_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i16_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} xmm0 = +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253] ; X64-NEXT: retq %1 = insertelement <4 x i8> undef, i8 undef, i32 0 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 @@ -279,12 +279,12 @@ define <4 x i16> @test_zext_4i8_4i16_undef() { define <4 x i32> @test_zext_4i8_4i32_undef() { ; X32-LABEL: test_zext_4i8_4i32_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u> +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i32_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u> +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0] ; X64-NEXT: retq %1 = insertelement <4 x i8> undef, i8 0, i32 0 %2 = insertelement <4 x i8> %1, i8 undef, i32 1 @@ -297,12 +297,12 @@ define <4 x i32> @test_zext_4i8_4i32_undef() { define <4 x i64> @test_zext_4i8_4i64_undef() { ; X32-LABEL: test_zext_4i8_4i64_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} ymm0 = +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,0,0] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i64_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} ymm0 = +; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,0] ; X64-NEXT: retq %1 = insertelement <4 x i8> undef, i8 undef, i32 0 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 @@ -359,12 +359,12 @@ define <8 x i32> @test_zext_8i8_8i32() { define <8 x i16> @test_zext_8i8_8i16_undef() { ; X32-LABEL: test_zext_8i8_8i16_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_8i8_8i16_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} xmm0 = +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249] ; X64-NEXT: retq %1 = insertelement <8 x i8> undef, i8 undef, i32 0 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 @@ -381,12 +381,12 @@ define <8 x i16> @test_zext_8i8_8i16_undef() { define <8 x i32> @test_zext_8i8_8i32_undef() { ; X32-LABEL: test_zext_8i8_8i32_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u> +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_8i8_8i32_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u> +; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0] ; X64-NEXT: retq %1 = insertelement <8 x i8> undef, i8 0, i32 0 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 -- 2.34.1