From a8a4f99afb7c1f527ae9d1b274a67d0d3f2c3c4c Mon Sep 17 00:00:00 2001 From: Ikhlas Ajbar Date: Thu, 9 Jan 2020 13:23:28 -0600 Subject: [PATCH] [Hexagon] Lower bitcast of a vector predicate This patch lowers bitcast of vector predicate of type v32i1/v64i1 to i32/i64 type. --- .../Target/Hexagon/HexagonISelLowering.cpp | 15 +- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 2 +- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 132 +++++++++++++++++- .../Hexagon/autohvx/isel-hvx-pred-bitcast.ll | 26 ++++ .../test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll | 51 +++++++ 5 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-pred-bitcast.ll create mode 100644 llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 77924e8a74a9..ee50ff7ef3dd 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1694,6 +1694,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Custom-lower bitcasts from i8 to v8i1. setOperationAction(ISD::BITCAST, MVT::i8, Custom); + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::SETCC, MVT::v2i16, Custom); setOperationAction(ISD::VSELECT, MVT::v4i8, Custom); setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); @@ -2266,13 +2268,16 @@ HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); // Handle conversion from i8 to v8i1. - if (ResTy == MVT::v8i1) { - SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV); - SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32); - return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG); + if (InpTy == MVT::i8) { + if (ResTy == MVT::v8i1) { + SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV); + SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32); + return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG); + } + return SDValue(); } - return SDValue(); + return Op; } bool diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 5d8d25604b82..b411a03bd502 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -445,7 +445,7 @@ namespace HexagonISD { SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 211f2bf84296..7f99c1588b8f 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1437,6 +1437,136 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { return T7; } +SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op, + SelectionDAG &DAG) const { + auto *N = Op.getNode(); + EVT VT = N->getValueType(0); + const SDLoc &dl(Op); + SDValue Q0 = N->getOperand(0); + EVT VTOp = Q0.getNode()->getValueType(0); + if (!(VT == MVT::i64 || VT == MVT::i32) || + !(VTOp == MVT::v64i1 || VTOp == MVT::v32i1)) { + return Op; + } + MVT VecTy; + int Length; + if (Subtarget.useHVX64BOps()) { + VecTy = MVT::getVectorVT(MVT::i32, 16); + Length = 2; + } + if (Subtarget.useHVX128BOps()) { + VecTy = MVT::getVectorVT(MVT::i32, 32); + Length = 4; + } + // r0 = ##0x08040201 // Pre-rotated bits per 4 consecutive bytes. + SDValue C8421 = DAG.getTargetConstant(0x08040201, dl, MVT::i32); + SDValue InstrC8421 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C8421, DAG); + // v0 = vand(q0,r0) + SDValue Vand = + getInstr(Hexagon::V6_vandqrt, dl, VecTy, {Q0, InstrC8421}, DAG); + + // Or the bytes in each word into a single byte: that will form packs + // of 4 bits of the output. + // v1 = valign(v0,v0,#2) + SDValue C2 = DAG.getTargetConstant(2, dl, MVT::i32); + SDValue Valign = + getInstr(Hexagon::V6_valignbi, dl, VecTy, {Vand, Vand, C2}, DAG); + // v0 = vor(v0,v1) + SDValue Vor = getInstr(Hexagon::V6_vor, dl, VecTy, {Vand, Valign}, DAG); + // v1 = valign(v0,v0,#1) + SDValue C1 = DAG.getTargetConstant(1, dl, MVT::i32); + SDValue Valign1 = + getInstr(Hexagon::V6_valignbi, dl, VecTy, {Vor, Vor, C1}, DAG); + // v0 = vor(v0,v1) + SDValue Vor1 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor, Valign1}, DAG); + + // Clear all the bytes per word except the lowest one. + // r0 = #0xff + SDValue Cff = DAG.getTargetConstant(0xff, dl, MVT::i32); + SDValue InstrCff = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, Cff, DAG); + // v1 = vsplat(r0) + SDValue Vsplat = getInstr(Hexagon::V6_lvsplatw, dl, VecTy, InstrCff, DAG); + // v0 = vand(v0,v1) + SDValue Vand1 = getInstr(Hexagon::V6_vand, dl, VecTy, {Vor1, Vsplat}, DAG); + + // Shift each word left by its index to position the 4-bit packs for oring. + // The words 0..8 and 16..31 need to be ored to form the 64-bit output. + // r0 = ##.Lshifts + // .Lshifts: + // .word 0 + // .word 4 + // .word 8 + // .word 12 + // .word 16 + // .word 20 + // .word 24 + // .word 28 + // .word 0 + // .word 4 + // .word 8 + // .word 12 + // .word 16 + // .word 20 + // .word 24 + // .word 28 + // v1 = vmem(r0+#0) + SmallVector Elems; + for (int i = 0; i < Length; ++i) { + Elems.push_back(DAG.getConstant(0, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(4, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(8, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(12, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(16, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(20, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(24, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(28, dl, MVT::i32)); + } + + SDValue BV = DAG.getBuildVector(VecTy, dl, Elems); + // v0.w = vasl(v0.w,v1.w) + SDValue Vasl = getInstr(Hexagon::V6_vaslwv, dl, VecTy, {Vand1, BV}, DAG); + + // 3 rounds of oring. + // r0 = #16 // HwLen/4 + SDValue C16 = DAG.getTargetConstant(16, dl, MVT::i32); + SDValue InstrC16 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C16, DAG); + // v1 = vror(v0,r0) + SDValue Vror = getInstr(Hexagon::V6_vror, dl, VecTy, {Vasl, InstrC16}, DAG); + // v0 = vor(v0,v1) + SDValue Vor2 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vasl, Vror}, DAG); + // r0 = #8 // HwLen/8 + SDValue C8 = DAG.getTargetConstant(8, dl, MVT::i32); + SDValue InstrC8 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C8, DAG); + // v1 = vror(v0,r0) + SDValue Vror1 = getInstr(Hexagon::V6_vror, dl, VecTy, {Vor2, InstrC8}, DAG); + // v0 = vor(v0,v1) + SDValue Vor3 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor2, Vror1}, DAG); + // r0 = #4 // HwLen/16 + SDValue C4 = DAG.getTargetConstant(4, dl, MVT::i32); + SDValue InstrC4 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C4, DAG); + // v1 = vror(v0,r0) + SDValue Vror2 = getInstr(Hexagon::V6_vror, dl, VecTy, {Vor3, InstrC4}, DAG); + // v0 = vor(v0,v1) + SDValue Vor4 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor3, Vror2}, DAG); + // The output is v.w[8]:v.w[0] + // r3 = #0 + SDValue C0 = DAG.getTargetConstant(0, dl, MVT::i32); + SDValue InstrC0 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C0, DAG); + // r0 = vextract(v0,r3) + SDValue Res = + getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC0}, DAG); + if (VT == MVT::i64) { + // r3 = #32 + SDValue C32 = DAG.getTargetConstant(32, dl, MVT::i32); + SDValue InstrC32 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C32, DAG); + // r1 = vextract(v0,r3) + SDValue Vextract = + getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC32}, DAG); + Res = getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract, Res}, DAG); + } + return Res; +} + SDValue HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { // Sign- and zero-extends are legal. @@ -1595,7 +1725,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); - + case ISD::BITCAST: return LowerHvxBitcast(Op, DAG); case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-pred-bitcast.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-pred-bitcast.ll new file mode 100644 index 000000000000..c07251035240 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-pred-bitcast.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: danny: +; CHECK: vand +define i64 @danny(<64 x i8> %a0, <64 x i8> %a1) #0 { + %v0 = icmp eq <64 x i8> %a0, %a1 + %v1 = bitcast <64 x i1> %v0 to i64 + ret i64 %v1 +} + +; CHECK-LABEL: sammy: +; CHECK: vand +define i32 @sammy(<32 x i16> %a0, <32 x i16> %a1) #0 { + %v0 = icmp eq <32 x i16> %a0, %a1 + %v1 = bitcast <32 x i1> %v0 to i32 + ret i32 %v1 +} + +; This one still doesn't work. +; define i16 @kirby(<16 x i32> %a0, <16 x i32> %a1) #0 { +; %v0 = icmp eq <16 x i32> %a0, %a1 +; %v1 = bitcast <16 x i1> %v0 to i16 +; ret i16 %v1 +; } + +attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+v66,+hvx,+hvxv66,+hvx-length64b" } diff --git a/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll b/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll new file mode 100644 index 000000000000..6aae095440d6 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Test that LLVM does not assert and bitcast v64i1 to i64 is lowered. + +; CHECK: v[[REG1:[0-9]+]] = valign(v{{[0-9]+}},v{{[0-9]+}},#2) +; CHECK: v[[REG2:[0-9]+]] = vor(v{{[0-9]+}},v[[REG1]]) +; CHECK: v[[REG3:[0-9]+]] = valign(v[[REG2]],v[[REG2]],#1) +; CHECK: v[[REG4:[0-9]+]] = vor(v{{[0-9]+}},v[[REG3]]) +; CHECK: v[[REG5:[0-9]+]] = vand(v[[REG4]],v{{[0-9]+}}) +; CHECK: v{{[0-9]+}}.w = vasl(v[[REG5]].w,v{{[0-9]+}}.w) + +target triple = "hexagon" + +define dso_local void @fun() local_unnamed_addr #0 { +entry: + br i1 undef, label %cleanup, label %if.end + +if.end: + %0 = load i8, i8* undef, align 1 + %conv13.i = zext i8 %0 to i32 + %trip.count.minus.1216 = add nsw i32 %conv13.i, -1 + %broadcast.splatinsert221 = insertelement <64 x i32> undef, i32 %trip.count.minus.1216, i32 0 + %broadcast.splat222 = shufflevector <64 x i32> %broadcast.splatinsert221, <64 x i32> undef, <64 x i32> zeroinitializer + %1 = icmp ule <64 x i32> undef, %broadcast.splat222 + %wide.masked.load223 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* nonnull undef, i32 1, <64 x i1> %1, <64 x i8> undef) + %2 = lshr <64 x i8> %wide.masked.load223, + %3 = and <64 x i8> %2, + %4 = zext <64 x i8> %3 to <64 x i32> + %5 = add nsw <64 x i32> undef, %4 + %6 = select <64 x i1> %1, <64 x i32> %5, <64 x i32> undef + %bin.rdx225 = add <64 x i32> %6, undef + %bin.rdx227 = add <64 x i32> %bin.rdx225, undef + %bin.rdx229 = add <64 x i32> %bin.rdx227, undef + %bin.rdx231 = add <64 x i32> %bin.rdx229, undef + %bin.rdx233 = add <64 x i32> %bin.rdx231, undef + %bin.rdx235 = add <64 x i32> %bin.rdx233, undef + %bin.rdx237 = add <64 x i32> %bin.rdx235, undef + %7 = extractelement <64 x i32> %bin.rdx237, i32 0 + %nChans = getelementptr inbounds i8, i8* null, i32 2160 + %8 = bitcast i8* %nChans to i32* + store i32 %7, i32* %8, align 4 + br label %cleanup + +cleanup: + ret void +} + +; Function Attrs: argmemonly nounwind readonly willreturn +declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>) + +attributes #0 = { "target-features"="+hvx-length64b,+hvxv67,+v67,-long-calls" } -- 2.34.1