From 846d06c7078585c337937838ff468efba3591621 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 4 Jan 2023 22:31:33 +0300 Subject: [PATCH] [DAG] `tryToFoldExtendOfConstant()`: `sext undef` is not `undef` https://alive2.llvm.org/ce/z/cLGpWV, but https://alive2.llvm.org/ce/z/TGNH4P --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++------ llvm/test/CodeGen/X86/fold-vector-sext-zext.ll | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7673bf0..d23a150 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11740,15 +11740,13 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SmallVector Elts; unsigned NumElts = VT.getVectorNumElements(); - // For zero-extensions, UNDEF elements still guarantee to have the upper - // bits set to zero. - bool IsZext = - Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG; - for (unsigned i = 0; i != NumElts; ++i) { SDValue Op = N0.getOperand(i); if (Op.isUndef()) { - Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT)); + if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) + Elts.push_back(DAG.getUNDEF(SVT)); + else + Elts.push_back(DAG.getConstant(0, DL, SVT)); continue; } diff --git a/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll b/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll index 982b29c..3ff68ac 100644 --- a/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll +++ b/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll @@ -29,12 +29,12 @@ define <4 x i16> @test_sext_4i8_4i16() { define <4 x i16> @test_sext_4i8_4i16_undef() { ; X32-LABEL: test_sext_4i8_4i16_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = +; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,0,65533,u,u,u,u> ; X32-NEXT: retl ; ; X64-LABEL: test_sext_4i8_4i16_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} xmm0 = +; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,0,65533,u,u,u,u> ; X64-NEXT: retq %1 = insertelement <4 x i8> undef, i8 undef, i32 0 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 @@ -65,12 +65,12 @@ define <4 x i32> @test_sext_4i8_4i32() { define <4 x i32> @test_sext_4i8_4i32_undef() { ; X32-LABEL: test_sext_4i8_4i32_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,0,4294967293] ; X32-NEXT: retl ; ; X64-LABEL: test_sext_4i8_4i32_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} xmm0 = +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,0,4294967293] ; X64-NEXT: retq %1 = insertelement <4 x i8> undef, i8 undef, i32 0 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 @@ -101,12 +101,12 @@ define <4 x i64> @test_sext_4i8_4i64() { define <4 x i64> @test_sext_4i8_4i64_undef() { ; X32-LABEL: test_sext_4i8_4i64_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} ymm0 = +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,4294967295,4294967295,0,0,4294967293,4294967295] ; X32-NEXT: retl ; ; X64-LABEL: test_sext_4i8_4i64_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} ymm0 = +; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,0,18446744073709551613] ; X64-NEXT: retq %1 = insertelement <4 x i8> undef, i8 undef, i32 0 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 @@ -163,12 +163,12 @@ define <8 x i32> @test_sext_8i8_8i32() { define <8 x i16> @test_sext_8i8_8i16_undef() { ; X32-LABEL: test_sext_8i8_8i16_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,0,65533,0,65531,0,65529] ; X32-NEXT: retl ; ; X64-LABEL: test_sext_8i8_8i16_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} xmm0 = +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,0,65533,0,65531,0,65529] ; X64-NEXT: retq %1 = insertelement <8 x i8> undef, i8 undef, i32 0 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 @@ -185,12 +185,12 @@ define <8 x i16> @test_sext_8i8_8i16_undef() { define <8 x i32> @test_sext_8i8_8i32_undef() { ; X32-LABEL: test_sext_8i8_8i32_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,u,4,u,6,u> +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,0,4,0,6,0] ; X32-NEXT: retl ; ; X64-LABEL: test_sext_8i8_8i32_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,u,4,u,6,u> +; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,0,4,0,6,0] ; X64-NEXT: retq %1 = insertelement <8 x i8> undef, i8 0, i32 0 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 -- 2.7.4