From 3c817574c2db9dbb003914dc40d58fe1dcfda855 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 8 Sep 2022 07:06:05 -0700 Subject: [PATCH] [Hexagon] Handle shifts of short vectors of i8 --- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 44 ++++- llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll | 239 ++++++++++++++++++++++++ 2 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 94411b2..f2076a6 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2345,7 +2345,49 @@ HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) SDValue HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { - return getVectorShiftByInt(Op, DAG); + const SDLoc &dl(Op); + + // First try to convert the shift (by vector) to a shift by a scalar. + // If we first split the shift, the shift amount will become 'extract + // subvector', and will no longer be recognized as scalar. + SDValue Res = Op; + if (SDValue S = getVectorShiftByInt(Op, DAG)) + Res = S; + + MVT ResTy = ty(Res); + if (ResTy.getVectorElementType() != MVT::i8) + return Res; + + // For shifts of i8, extend the inputs to i16, then truncate back to i8. + assert(ResTy.getVectorElementType() == MVT::i8); + unsigned Opc = Res.getOpcode(); + switch (Opc) { + case HexagonISD::VASR: + case HexagonISD::VLSR: + case HexagonISD::VASL: + break; + default: + // No instructions for shifts by non-scalars. + return SDValue(); + } + + SDValue Val = Res.getOperand(0), Amt = Res.getOperand(1); + + auto ShiftPartI8 = [&dl, &DAG, this](unsigned Opc, SDValue V, SDValue A) { + MVT Ty = ty(V); + MVT ExtTy = MVT::getVectorVT(MVT::i16, Ty.getVectorNumElements()); + SDValue ExtV = Opc == HexagonISD::VASR ? DAG.getSExtOrTrunc(V, dl, ExtTy) + : DAG.getZExtOrTrunc(V, dl, ExtTy); + SDValue ExtS = DAG.getNode(Opc, dl, ExtTy, {ExtV, A}); + return DAG.getZExtOrTrunc(ExtS, dl, Ty); + }; + + if (ResTy.getSizeInBits() == 32) + return ShiftPartI8(Opc, Val, Amt); + + auto [LoV, HiV] = opSplit(Val, dl, DAG); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, + {ShiftPartI8(Opc, LoV, Amt), ShiftPartI8(Opc, HiV, Amt)}); } SDValue diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll b/llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll new file mode 100644 index 0000000..db1c454 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll @@ -0,0 +1,239 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define <4 x i8> @f0(<4 x i8> %a0) unnamed_addr #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vsxtbh(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vasrh(r1:0,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = vtrunehb(r1:0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = ashr <4 x i8> %a0, + ret <4 x i8> %v0 +} + +define <4 x i8> @f1(<4 x i8> %a0) unnamed_addr #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vzxtbh(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vlsrh(r1:0,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = vtrunehb(r1:0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = lshr <4 x i8> %a0, + ret <4 x i8> %v0 +} + +define <4 x i8> @f2(<4 x i8> %a0) unnamed_addr #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vzxtbh(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vaslh(r1:0,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = vtrunehb(r1:0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = shl <4 x i8> %a0, + ret <4 x i8> %v0 +} + + +define <8 x i8> @f3(<8 x i8> %a0) unnamed_addr #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r3:2 = vsxtbh(r1) +; CHECK-NEXT: r5:4 = vsxtbh(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vasrh(r5:4,#1) +; CHECK-NEXT: r3:2 = vasrh(r3:2,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = vtrunehb(r1:0) +; CHECK-NEXT: r1 = vtrunehb(r3:2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = ashr <8 x i8> %a0, + ret <8 x i8> %v0 +} + +define <8 x i8> @f4(<8 x i8> %a0) unnamed_addr #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r3:2 = vzxtbh(r1) +; CHECK-NEXT: r5:4 = vzxtbh(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vlsrh(r5:4,#1) +; CHECK-NEXT: r3:2 = vlsrh(r3:2,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = vtrunehb(r1:0) +; CHECK-NEXT: r1 = vtrunehb(r3:2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = lshr <8 x i8> %a0, + ret <8 x i8> %v0 +} + +define <8 x i8> @f5(<8 x i8> %a0) unnamed_addr #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r3:2 = vzxtbh(r1) +; CHECK-NEXT: r5:4 = vzxtbh(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vaslh(r5:4,#1) +; CHECK-NEXT: r3:2 = vaslh(r3:2,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = vtrunehb(r1:0) +; CHECK-NEXT: r1 = vtrunehb(r3:2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = shl <8 x i8> %a0, + ret <8 x i8> %v0 +} + + +define <2 x i16> @f6(<2 x i16> %a0) unnamed_addr #0 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vasrh(r1:0,#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = ashr <2 x i16> %a0, + ret <2 x i16> %v0 +} + +define <2 x i16> @f7(<2 x i16> %a0) unnamed_addr #0 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vlsrh(r1:0,#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = lshr <2 x i16> %a0, + ret <2 x i16> %v0 +} + +define <2 x i16> @f8(<2 x i16> %a0) unnamed_addr #0 { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vaslh(r1:0,#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = shl <2 x i16> %a0, + ret <2 x i16> %v0 +} + + +define <4 x i16> @f9(<4 x i16> %a0) unnamed_addr #0 { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vasrh(r1:0,#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = ashr <4 x i16> %a0, + ret <4 x i16> %v0 +} + +define <4 x i16> @f10(<4 x i16> %a0) unnamed_addr #0 { +; CHECK-LABEL: f10: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vlsrh(r1:0,#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = lshr <4 x i16> %a0, + ret <4 x i16> %v0 +} + +define <4 x i16> @f11(<4 x i16> %a0) unnamed_addr #0 { +; CHECK-LABEL: f11: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vaslh(r1:0,#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = shl <4 x i16> %a0, + ret <4 x i16> %v0 +} + + +define <2 x i32> @f12(<2 x i32> %a0) unnamed_addr #0 { +; CHECK-LABEL: f12: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vasrw(r1:0,#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = ashr <2 x i32> %a0, + ret <2 x i32> %v0 +} + +define <2 x i32> @f13(<2 x i32> %a0) unnamed_addr #0 { +; CHECK-LABEL: f13: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vlsrw(r1:0,#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = lshr <2 x i32> %a0, + ret <2 x i32> %v0 +} + +define <2 x i32> @f14(<2 x i32> %a0) unnamed_addr #0 { +; CHECK-LABEL: f14: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vaslw(r1:0,#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = shl <2 x i32> %a0, + ret <2 x i32> %v0 +} + +attributes #0 = { nounwind } -- 2.7.4