From 37cf7275cdba85c105352b11e6e427f6a8275518 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 12 Feb 2022 14:04:55 +0000 Subject: [PATCH] [X86] Enable vector splitting of ISD::AVGCEILU nodes on AVX1 and non-BWI targets --- llvm/lib/Target/X86/X86ISelLowering.cpp | 28 ++++++++++++++++++++-------- llvm/test/CodeGen/X86/avg.ll | 32 ++++++++++++++++---------------- 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8399465..60351b7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1355,10 +1355,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); setOperationAction(ISD::MULHU, MVT::v32i8, Custom); setOperationAction(ISD::MULHS, MVT::v32i8, Custom); - if (HasInt256) { - setOperationAction(ISD::AVGCEILU, MVT::v16i16, Legal); - setOperationAction(ISD::AVGCEILU, MVT::v32i8, Legal); - } + setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMULO, MVT::v32i8, Custom); setOperationAction(ISD::UMULO, MVT::v32i8, Custom); @@ -1658,10 +1656,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom); setOperationAction(ISD::MULHS, MVT::v64i8, Custom); setOperationAction(ISD::MULHU, MVT::v64i8, Custom); - if (HasBWI) { - setOperationAction(ISD::AVGCEILU, MVT::v32i16, Legal); - setOperationAction(ISD::AVGCEILU, MVT::v64i8, Legal); - } + setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom); + setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom); setOperationAction(ISD::SMULO, MVT::v64i8, Custom); setOperationAction(ISD::UMULO, MVT::v64i8, Custom); @@ -28404,6 +28400,21 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, return SDValue(); } +static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + + // For AVX1 cases, split to use legal ops (everything but v4i64). + if (VT.is256BitVector() && !Subtarget.hasInt256()) + return splitVectorIntBinary(Op, DAG); + + if (VT == MVT::v32i16 || VT == MVT::v64i8) + return splitVectorIntBinary(Op, DAG); + + // Default to expand. + return SDValue(); +} + static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); @@ -31722,6 +31733,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UMAX: case ISD::UMIN: return LowerMINMAX(Op, DAG); case ISD::ABS: return LowerABS(Op, Subtarget, DAG); + case ISD::AVGCEILU: return LowerAVG(Op, Subtarget, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index db2ba38..22171ec 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -166,10 +166,10 @@ define void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) nounwind { ; ; AVX1-LABEL: avg_v32i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa (%rsi), %xmm0 -; AVX1-NEXT: vmovdqa 16(%rsi), %xmm1 -; AVX1-NEXT: vpavgb (%rdi), %xmm0, %xmm0 -; AVX1-NEXT: vpavgb 16(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vpavgb (%rsi), %xmm0, %xmm0 +; AVX1-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1 ; AVX1-NEXT: vmovdqu %xmm1, (%rax) ; AVX1-NEXT: vmovdqu %xmm0, (%rax) ; AVX1-NEXT: retq @@ -316,10 +316,10 @@ define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) nounwind { ; ; AVX512F-LABEL: avg_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 -; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1 -; AVX512F-NEXT: vpavgb (%rdi), %ymm0, %ymm0 -; AVX512F-NEXT: vpavgb 32(%rdi), %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1 +; AVX512F-NEXT: vpavgb (%rsi), %ymm0, %ymm0 +; AVX512F-NEXT: vpavgb 32(%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqu %ymm1, (%rax) ; AVX512F-NEXT: vmovdqu %ymm0, (%rax) ; AVX512F-NEXT: vzeroupper @@ -411,10 +411,10 @@ define void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) nounwind { ; ; AVX1-LABEL: avg_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa (%rsi), %xmm0 -; AVX1-NEXT: vmovdqa 16(%rsi), %xmm1 -; AVX1-NEXT: vpavgw (%rdi), %xmm0, %xmm0 -; AVX1-NEXT: vpavgw 16(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vpavgw (%rsi), %xmm0, %xmm0 +; AVX1-NEXT: vpavgw 16(%rsi), %xmm1, %xmm1 ; AVX1-NEXT: vmovdqu %xmm1, (%rax) ; AVX1-NEXT: vmovdqu %xmm0, (%rax) ; AVX1-NEXT: retq @@ -492,10 +492,10 @@ define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) nounwind { ; ; AVX512F-LABEL: avg_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 -; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1 -; AVX512F-NEXT: vpavgw (%rdi), %ymm0, %ymm0 -; AVX512F-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 +; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1 +; AVX512F-NEXT: vpavgw (%rsi), %ymm0, %ymm0 +; AVX512F-NEXT: vpavgw 32(%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqu %ymm1, (%rax) ; AVX512F-NEXT: vmovdqu %ymm0, (%rax) ; AVX512F-NEXT: vzeroupper -- 2.7.4