From f2c53b5d6c5516ed5f270e3e8e90281ea6acbea9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 16 Mar 2019 15:02:00 +0000 Subject: [PATCH] [X86][SSE] Constant fold PEXTRB/PEXTRW/EXTRACT_VECTOR_ELT nodes. Replaces existing i1-only fold. llvm-svn: 356325 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 43 ++++++++++++++++++++------------- llvm/test/CodeGen/X86/pr34177.ll | 20 ++++++--------- llvm/test/CodeGen/X86/widen_load-2.ll | 12 +++------ 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d6ee4a3..8fa0309 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34552,23 +34552,41 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget)) return NewOp; + SDValue InputVector = N->getOperand(0); + SDValue EltIdx = N->getOperand(1); + auto *CIdx = dyn_cast(EltIdx); + + EVT SrcVT = InputVector.getValueType(); + EVT VT = N->getValueType(0); + SDLoc dl(InputVector); + bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT; + + // Integer Constant Folding. + if (VT.isInteger() && CIdx && + CIdx->getAPIntValue().ult(SrcVT.getVectorNumElements())) { + APInt UndefVecElts; + SmallVector EltBits; + unsigned VecEltBitWidth = SrcVT.getScalarSizeInBits(); + if (getTargetConstantBitsFromNode(InputVector, VecEltBitWidth, UndefVecElts, + EltBits, true, false)) { + uint64_t Idx = CIdx->getZExtValue(); + if (UndefVecElts[Idx]) + return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); + return DAG.getConstant(EltBits[Idx].zextOrSelf(VT.getScalarSizeInBits()), + dl, VT); + } + } + // TODO - Remove this once we can handle the implicit zero-extension of // X86ISD::PEXTRW/X86ISD::PEXTRB in: // XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and // combineBasicSADPattern. - if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + if (IsPextr) return SDValue(); if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI)) return NewOp; - SDValue InputVector = N->getOperand(0); - SDValue EltIdx = N->getOperand(1); - - EVT SrcVT = InputVector.getValueType(); - EVT VT = N->getValueType(0); - SDLoc dl(InputVector); - // Detect mmx extraction of all bits as a i64. It works better as a bitcast. if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) { @@ -34589,15 +34607,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc); } - if (VT == MVT::i1 && InputVector.getOpcode() == ISD::BITCAST && - isa(EltIdx) && - isa(InputVector.getOperand(0))) { - uint64_t ExtractedElt = N->getConstantOperandVal(1); - const APInt &InputValue = InputVector.getConstantOperandAPInt(0); - uint64_t Res = InputValue[ExtractedElt]; - return DAG.getConstant(Res, dl, MVT::i1); - } - // Check whether this extract is the root of a sum of absolute differences // pattern. This has to be done here because we really want it to happen // pre-legalization, diff --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll index 6926e30..056682b 100644 --- a/llvm/test/CodeGen/X86/pr34177.ll +++ b/llvm/test/CodeGen/X86/pr34177.ll @@ -8,29 +8,23 @@ target triple = "x86_64-unknown-linux-gnu" define void @test(<4 x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unnamed_addr { ; CHECK-LABEL: test: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] -; CHECK-NEXT: vmovq %xmm1, %r8 -; CHECK-NEXT: vmovq %xmm0, %r9 -; CHECK-NEXT: vpextrq $1, %xmm1, %r10 -; CHECK-NEXT: vpextrq $1, %xmm0, %r11 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2,3] -; CHECK-NEXT: vmovq %xmm1, %rax +; CHECK-NEXT: vmovq %xmm0, %rax +; CHECK-NEXT: vpextrq $1, %xmm0, %rcx ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vmovq %xmm0, %rcx -; CHECK-NEXT: vpextrq $1, %xmm1, %rdx +; CHECK-NEXT: vmovq %xmm0, %rdx ; CHECK-NEXT: vpextrq $1, %xmm0, %rsi -; CHECK-NEXT: cmpq %rsi, %rdx +; CHECK-NEXT: cmpq $3, %rsi ; CHECK-NEXT: fld1 ; CHECK-NEXT: fldz ; CHECK-NEXT: fld %st(0) ; CHECK-NEXT: fcmove %st(2), %st -; CHECK-NEXT: cmpq %rcx, %rax +; CHECK-NEXT: cmpq $2, %rdx ; CHECK-NEXT: fld %st(1) ; CHECK-NEXT: fcmove %st(3), %st -; CHECK-NEXT: cmpq %r11, %r10 +; CHECK-NEXT: cmpq $1, %rcx ; CHECK-NEXT: fld %st(2) ; CHECK-NEXT: fcmove %st(4), %st -; CHECK-NEXT: cmpq %r9, %r8 +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: fxch %st(3) ; CHECK-NEXT: fcmove %st(4), %st ; CHECK-NEXT: fstp %st(4) diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll index 1fc7aee..23b68b2 100644 --- a/llvm/test/CodeGen/X86/widen_load-2.ll +++ b/llvm/test/CodeGen/X86/widen_load-2.ll @@ -368,12 +368,10 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movdqa {{.*#+}} xmm0 = [10395294,10395294,10395294,10395294] -; X86-NEXT: pextrw $0, %xmm0, (%edx) ; X86-NEXT: movb $-98, 2(%edx) -; X86-NEXT: movdqa {{.*#+}} xmm0 = [65793,65793,65793,65793] -; X86-NEXT: pextrw $0, %xmm0, (%ecx) +; X86-NEXT: movw $-24930, (%edx) # imm = 0x9E9E ; X86-NEXT: movb $1, 2(%ecx) +; X86-NEXT: movw $257, (%ecx) # imm = 0x101 ; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X86-NEXT: movdqa %xmm0, %xmm1 ; X86-NEXT: psrld $1, %xmm1 @@ -387,12 +385,10 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa ; X64-LABEL: rot: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movdqa {{.*#+}} xmm0 = [10395294,10395294,10395294,10395294] -; X64-NEXT: pextrw $0, %xmm0, (%rsi) ; X64-NEXT: movb $-98, 2(%rsi) -; X64-NEXT: movdqa {{.*#+}} xmm0 = [65793,65793,65793,65793] -; X64-NEXT: pextrw $0, %xmm0, (%rdx) +; X64-NEXT: movw $-24930, (%rsi) # imm = 0x9E9E ; X64-NEXT: movb $1, 2(%rdx) +; X64-NEXT: movw $257, (%rdx) # imm = 0x101 ; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: movdqa %xmm0, %xmm1 ; X64-NEXT: psrld $1, %xmm1 -- 2.7.4