From 0bfeede669f0d9c051344977805005c98e8cb41d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 20 Sep 2020 18:38:54 +0100 Subject: [PATCH] [X86][SSE] Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0)) -> EXTEND_VECTOR_INREG(X) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 28 ++++++++++++++++++++++++- llvm/test/CodeGen/X86/min-legal-vector-width.ll | 18 ++++++++-------- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f032758..ca14924 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6184,6 +6184,22 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { return DAG.getBitcast(VT, Vec); } +// Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode. +static unsigned getOpcode_EXTEND(unsigned Opcode) { + switch (Opcode) { + case ISD::ANY_EXTEND: + case ISD::ANY_EXTEND_VECTOR_INREG: + return ISD::ANY_EXTEND; + case ISD::ZERO_EXTEND: + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ISD::ZERO_EXTEND; + case ISD::SIGN_EXTEND: + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ISD::SIGN_EXTEND; + } + llvm_unreachable("Unknown opcode"); +} + // Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode. static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) { switch (Opcode) { @@ -49258,6 +49274,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDValue In = N->getOperand(0); unsigned Opcode = N->getOpcode(); + unsigned InOpcode = In.getOpcode(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Try to merge vector loads and extend_inreg to an extload. @@ -49283,9 +49300,18 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, } // Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X). - if (Opcode == In.getOpcode()) + if (Opcode == InOpcode) return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0)); + // Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0)) + // -> EXTEND_VECTOR_INREG(X). + // TODO: Handle non-zero subvector indices. + if (InOpcode == ISD::EXTRACT_SUBVECTOR && In.getConstantOperandVal(1) == 0 && + In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) && + In.getOperand(0).getOperand(0).getValueSizeInBits() == + In.getValueSizeInBits()) + return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0)); + // Attempt to combine as a shuffle. // TODO: General ZERO_EXTEND_VECTOR_INREG support. if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG || diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index fe445d7..e5240d5 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -976,18 +976,18 @@ define void @zext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal define void @sext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: sext_v16i8_v16i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; CHECK-NEXT: vpmovsxwq %xmm1, %ymm1 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2 -; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] -; CHECK-NEXT: vpmovsxwq %xmm3, %ymm3 -; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0 +; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1 +; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] ; CHECK-NEXT: vpmovsxwq %xmm2, %ymm2 -; CHECK-NEXT: vmovdqa %ymm2, 64(%rdi) +; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1 +; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; CHECK-NEXT: vpmovsxwq %xmm3, %ymm3 +; CHECK-NEXT: vpmovsxwq %xmm1, %ymm1 +; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0 ; CHECK-NEXT: vmovdqa %ymm0, (%rdi) +; CHECK-NEXT: vmovdqa %ymm1, 64(%rdi) ; CHECK-NEXT: vmovdqa %ymm3, 96(%rdi) -; CHECK-NEXT: vmovdqa %ymm1, 32(%rdi) +; CHECK-NEXT: vmovdqa %ymm2, 32(%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %a = sext <16 x i8> %x to <16 x i64> -- 2.7.4