From d54bae65258a3c7fdd1e9b680a5dc91dba3d9453 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 7 Apr 2016 07:52:45 +0000 Subject: [PATCH] [X86][SSE] Add support for VZEXT constant folding llvm-svn: 265646 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++++++ llvm/test/CodeGen/X86/vector-rotate-128.ll | 3 +-- llvm/test/CodeGen/X86/vector-shift-ashr-128.ll | 3 +-- llvm/test/CodeGen/X86/vector-shift-lshr-128.ll | 3 +-- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a93eaa5..836caa9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29609,11 +29609,29 @@ static SDValue combineVZext(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc DL(N); MVT VT = N->getSimpleValueType(0); + MVT SVT = VT.getVectorElementType(); SDValue Op = N->getOperand(0); MVT OpVT = Op.getSimpleValueType(); MVT OpEltVT = OpVT.getVectorElementType(); unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements(); + // Perform any constant folding. + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + SmallVector Vals; + for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + SDValue OpElt = Op.getOperand(i); + if (OpElt.getOpcode() == ISD::UNDEF) { + Vals.push_back(DAG.getUNDEF(SVT)); + continue; + } + APInt Cst = cast(OpElt.getNode())->getAPIntValue(); + assert(Cst.getBitWidth() == OpEltVT.getSizeInBits()); + Cst = Cst.zextOrTrunc(SVT.getSizeInBits()); + Vals.push_back(DAG.getConstant(Cst, DL, SVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Vals); + } + // (vzext (bitcast (vzext (x)) -> (vzext x) SDValue V = peekThroughBitcasts(Op); if (V != Op && V.getOpcode() == X86ISD::VZEXT) { diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index 9fbc829..e3e7168 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -990,8 +990,7 @@ define <8 x i16> @constant_rotate_v8i16(<8 x i16> %a) nounwind { ; AVX2: # BB#0: ; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] ; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index 4d9471f..3920dd9 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -1215,8 +1215,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; AVX2-LABEL: constant_shift_v8i16: ; AVX2: # BB#0: ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] ; AVX2-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index 1106a7e..bbe7887 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -949,8 +949,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; AVX2-LABEL: constant_shift_v8i16: ; AVX2: # BB#0: ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] ; AVX2-NEXT: vzeroupper -- 2.7.4