From 83288f8063377f03cbcf3e89c940d2a62c855a96 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 26 Dec 2022 21:46:20 +0300 Subject: [PATCH] [AArch64] Custom lower `ISD::ZERO_EXTEND_VECTOR_INREG` The baseline legalization for `ISD::ZERO_EXTEND_VECTOR_INREG` (`VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG`), blends-in the zeros, but as mentioned e.g. in b4bd0a404fe26071dab0854dfd9767974909c7c4, there is no such thing for AArch64. So some of the shuffles that would be nicely lowered by `LowerVECTOR_SHUFFLE()`, e.g. into `ZIP1`, would now be unrecognizable after round-tripping through `ISD::ZERO_EXTEND_VECTOR_INREG` recognition & legalization. The most obvious solution is to just custom-lower `ISD::ZERO_EXTEND_VECTOR_INREG` as the `ZIP1`-with-zeros, like it would have been originally in that test case. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 24 ++++++++++++++++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 + llvm/test/CodeGen/AArch64/aarch64-vuzp.ll | 7 ++----- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2627e44..d65693f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1554,6 +1554,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); @@ -5919,6 +5920,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return LowerZERO_EXTEND_VECTOR_INREG(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::SPLAT_VECTOR: @@ -11443,6 +11446,27 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op, Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask}); } +// Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros, +// but we don't have an appropriate instruction, +// so custom-lower it as ZIP1-with-zeros. +SDValue +AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + SDValue SrcOp = Op.getOperand(0); + EVT SrcVT = SrcOp.getValueType(); + assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 && + "Unexpected extension factor."); + unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits(); + // FIXME: support multi-step zipping? + if (Scale != 2) + return SDValue(); + SDValue Zeros = DAG.getConstant(0, dl, SrcVT); + return DAG.getBitcast(VT, + DAG.getNode(AArch64ISD::ZIP1, dl, SrcVT, SrcOp, Zeros)); +} + SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 1eac0af..e62a1bf 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1039,6 +1039,7 @@ private: SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AArch64/aarch64-vuzp.ll b/llvm/test/CodeGen/AArch64/aarch64-vuzp.ll index 1032699..c1d9cae 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-vuzp.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-vuzp.ll @@ -59,10 +59,8 @@ entry: ; Check that this pattern is recognized as a VZIP and ; that the vector blend transform does not scramble the pattern. -; FIXME: we can not recognize generic ZERO_EXTEND_VECTOR_INREG legalization -; as a zip1. ; CHECK-LABEL: vzipNoBlend: -; CHECK-NOT: zip1 +; CHECK: zip1 define <8 x i8> @vzipNoBlend(ptr %A, ptr %B) nounwind { %t = load <8 x i8>, ptr %A %vzip = shufflevector <8 x i8> %t, <8 x i8> , <8 x i32> @@ -78,9 +76,8 @@ define <8 x i8> @vzipNoBlendCommutted(ptr %A, ptr %B) nounwind { ret <8 x i8> %vzip } -; FIXME: this is identical to @vzipNoBlend ; CHECK-LABEL: vzipStillZExt: -; CHECK-NOT: zip1 +; CHECK: zip1 define <8 x i8> @vzipStillZExt(ptr %A, ptr %B) nounwind { %t = load <8 x i8>, ptr %A %vzip = shufflevector <8 x i8> %t, <8 x i8> , <8 x i32> -- 2.7.4