From 1b5baa42bc93432e6ae33e0a0fdce4d3e7c98dcb Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 23 Oct 2020 18:05:06 -0500 Subject: [PATCH] [Hexagon] Handle selection between HVX vector predicates Make sure that (select i1 q0 q1) is handled properly. --- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 1 + llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 55 ++++- llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 23 +- .../Hexagon/autohvx/isel-q-legalization-loop.ll | 40 ++++ llvm/test/CodeGen/Hexagon/autohvx/isel-select-q.ll | 237 +++++++++++++++++++++ 5 files changed, 337 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-q-legalization-loop.ll create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-select-q.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index b7715cc..e174e52 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -477,6 +477,7 @@ private: SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 1582443..0b06e6e 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -94,6 +94,7 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::MUL, T, Legal); setOperationAction(ISD::CTPOP, T, Legal); setOperationAction(ISD::CTLZ, T, Legal); + setOperationAction(ISD::SELECT, T, Legal); setOperationAction(ISD::SPLAT_VECTOR, T, Legal); if (T != ByteV) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); @@ -211,6 +212,7 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom); + setOperationAction(ISD::SELECT, BoolV, Custom); setOperationAction(ISD::AND, BoolV, Legal); setOperationAction(ISD::OR, BoolV, Legal); setOperationAction(ISD::XOR, BoolV, Legal); @@ -1620,6 +1622,26 @@ HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { } SDValue +HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const { + MVT ResTy = ty(Op); + if (ResTy.getVectorElementType() != MVT::i1) + return Op; + + const SDLoc &dl(Op); + unsigned HwLen = Subtarget.getVectorLength(); + unsigned VecLen = ResTy.getVectorNumElements(); + assert(HwLen % VecLen == 0); + unsigned ElemSize = HwLen / VecLen; + + MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen); + SDValue S = + DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0), + DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)), + DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2))); + return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S); +} + +SDValue HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { if (SDValue S = getVectorShiftByInt(Op, DAG)) return S; @@ -2031,6 +2053,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); case ISD::CTTZ: return LowerHvxCttz(Op, DAG); + case ISD::SELECT: return LowerHvxSelect(Op, DAG); case ISD::SRA: case ISD::SHL: case ISD::SRL: return LowerHvxShift(Op, DAG); @@ -2143,27 +2166,41 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) if (DCI.isBeforeLegalizeOps()) return SDValue(); + SmallVector Ops(N->ops().begin(), N->ops().end()); + switch (Opc) { case ISD::VSELECT: { // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) - SDValue Cond = Op.getOperand(0); + SDValue Cond = Ops[0]; if (Cond->getOpcode() == ISD::XOR) { SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); if (C1->getOpcode() == HexagonISD::QTRUE) - return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, - Op.getOperand(2), Op.getOperand(1)); + return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]); } break; } + case HexagonISD::V2Q: + if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) { + if (const auto *C = dyn_cast(Ops[0].getOperand(0))) + return C->isNullValue() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op)) + : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op)); + } + break; + case HexagonISD::Q2V: + if (Ops[0].getOpcode() == HexagonISD::QTRUE) + return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op), + DAG.getConstant(-1, dl, MVT::i32)); + if (Ops[0].getOpcode() == HexagonISD::QFALSE) + return getZero(dl, ty(Op), DAG); + break; case HexagonISD::VINSERTW0: - if (isUndef(Op.getOperand(1))) - return Op.getOperand(0); + if (isUndef(Ops[1])) + return Ops[0];; break; case HexagonISD::VROR: { - SDValue Op0 = Op.getOperand(0); - if (Op0.getOpcode() == HexagonISD::VROR) { - SDValue Vec = Op0.getOperand(0); - SDValue Rot0 = Op.getOperand(1), Rot1 = Op0.getOperand(1); + if (Ops[0].getOpcode() == HexagonISD::VROR) { + SDValue Vec = Ops[0].getOperand(0); + SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1); SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1}); return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot}); } diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 2d85d2f..e37cfe3 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -172,16 +172,19 @@ let Predicates = [UseHVX] in { } let Predicates = [UseHVX] in { - def: Pat<(VecI8 vzero), (V6_vd0)>; - def: Pat<(VecI16 vzero), (V6_vd0)>; - def: Pat<(VecI32 vzero), (V6_vd0)>; - def: Pat<(VecPI8 vzero), (PS_vdd0)>; - def: Pat<(VecPI16 vzero), (PS_vdd0)>; - def: Pat<(VecPI32 vzero), (PS_vdd0)>; - - def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>; - def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>; - def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>; + let AddedComplexity = 100 in { + // These should be preferred over a vsplat of 0. + def: Pat<(VecI8 vzero), (V6_vd0)>; + def: Pat<(VecI16 vzero), (V6_vd0)>; + def: Pat<(VecI32 vzero), (V6_vd0)>; + def: Pat<(VecPI8 vzero), (PS_vdd0)>; + def: Pat<(VecPI16 vzero), (PS_vdd0)>; + def: Pat<(VecPI32 vzero), (PS_vdd0)>; + + def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>; + def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>; + def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>; + } def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-q-legalization-loop.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-q-legalization-loop.ll new file mode 100644 index 0000000..949f86c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-q-legalization-loop.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; REQUIRES: asserts + +; Check that this doesn't crash. +; CHECK: vand + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +%s.0 = type { [4 x <32 x i32>] } + +declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #0 +declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) #0 +declare <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32>, <32 x i32>, i32) #0 +declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #0 + +; Function Attrs: nounwind +define void @f0() local_unnamed_addr #1 { +b0: + %v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> undef, i32 16843009) + %v1 = getelementptr inbounds %s.0, %s.0* null, i32 0, i32 0, i32 3 + br label %b1 + +b1: ; preds = %b1, %b0 + %v2 = phi i32 [ 0, %b0 ], [ %v11, %b1 ] + %v3 = and i32 %v2, 1 + %v4 = icmp eq i32 %v3, 0 + %v5 = select i1 %v4, <128 x i1> zeroinitializer, <128 x i1> %v0 + %v6 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v5, <32 x i32> undef, <32 x i32> undef) + %v7 = tail call <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32> undef, <32 x i32> %v6, i32 -32) + %v8 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v7) + %v9 = tail call <64 x i32> @llvm.hexagon.V6.vdealvdd.128B(<32 x i32> undef, <32 x i32> %v8, i32 -32) + %v10 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v9) + store <32 x i32> %v10, <32 x i32>* %v1, align 128 + %v11 = add nuw nsw i32 %v2, 1 + br label %b1 +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-select-q.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-select-q.ll new file mode 100644 index 0000000..f189775 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-select-q.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that selection (based on i1) between vector predicates works. +define <128 x i8> @f0(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, i32 %a4) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.b,v1.b) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q1 = vcmp.gt(v1.b,v2.b) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = #-1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gt(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vand(q1,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v2 = vand(q0,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q3 = vand(v0,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q3,v1,v3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = icmp sgt <128 x i8> %a0, %a1 + %v1 = icmp sgt <128 x i8> %a1, %a2 + %v2 = icmp sgt i32 %a4, 0 + %v3 = select i1 %v2, <128 x i1> %v0, <128 x i1> %v1 + %v4 = select <128 x i1> %v3, <128 x i8> %a1, <128 x i8> %a3 + ret <128 x i8> %v4 +} + +define <64 x i16> @f1(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %a2, <64 x i16> %a3, i32 %a4) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q1 = vcmp.gt(v1.h,v2.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = #-1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gt(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vand(q1,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v2 = vand(q0,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q3 = vand(v0,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q3,v1,v3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = icmp sgt <64 x i16> %a0, %a1 + %v1 = icmp sgt <64 x i16> %a1, %a2 + %v2 = icmp sgt i32 %a4, 0 + %v3 = select i1 %v2, <64 x i1> %v0, <64 x i1> %v1 + %v4 = select <64 x i1> %v3, <64 x i16> %a1, <64 x i16> %a3 + ret <64 x i16> %v4 +} + +define <32 x i32> @f2(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %a2, <32 x i32> %a3, i32 %a4) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q1 = vcmp.gt(v1.w,v2.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = #-1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gt(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vand(q1,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v2 = vand(q0,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q3 = vand(v0,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q3,v1,v3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = icmp sgt <32 x i32> %a0, %a1 + %v1 = icmp sgt <32 x i32> %a1, %a2 + %v2 = icmp sgt i32 %a4, 0 + %v3 = select i1 %v2, <32 x i1> %v0, <32 x i1> %v1 + %v4 = select <32 x i1> %v3, <32 x i32> %a1, <32 x i32> %a3 + ret <32 x i32> %v4 +} + +; Selection of vector predicates first converts them into regular vectors. +; Check that all-true and all-false bool vectors are optimized into splat(-1) +; and vxor(v,v). +define <128 x i8> @f3(<128 x i8> %a0, <128 x i8> %a1, i32 %a2) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r2 = #-1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gt(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v2 = vxor(v2,v2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v3 = vsplat(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v2 = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <128 x i1> undef, i1 true, i32 0 + %v1 = shufflevector <128 x i1> %v0, <128 x i1> undef, <128 x i32> zeroinitializer + %v2 = icmp sgt i32 %a2, 0 + %v3 = select i1 %v2, <128 x i1> %v1, <128 x i1> zeroinitializer + %v4 = select <128 x i1> %v3, <128 x i8> %a0, <128 x i8> %a1 + ret <128 x i8> %v4 +} + +define <64 x i16> @f4(<64 x i16> %a0, <64 x i16> %a1, i32 %a2) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r2 = #-1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gt(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v2 = vxor(v2,v2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v3 = vsplat(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v2 = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <64 x i1> undef, i1 true, i32 0 + %v1 = shufflevector <64 x i1> %v0, <64 x i1> undef, <64 x i32> zeroinitializer + %v2 = icmp sgt i32 %a2, 0 + %v3 = select i1 %v2, <64 x i1> %v1, <64 x i1> zeroinitializer + %v4 = select <64 x i1> %v3, <64 x i16> %a0, <64 x i16> %a1 + ret <64 x i16> %v4 +} + +define <32 x i32> @f5(<32 x i32> %a0, <32 x i32> %a1, i32 %a2) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r2 = #-1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gt(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v2 = vxor(v2,v2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v3 = vsplat(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v2 = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <32 x i1> undef, i1 true, i32 0 + %v1 = shufflevector <32 x i1> %v0, <32 x i1> undef, <32 x i32> zeroinitializer + %v2 = icmp sgt i32 %a2, 0 + %v3 = select i1 %v2, <32 x i1> %v1, <32 x i1> zeroinitializer + %v4 = select <32 x i1> %v3, <32 x i32> %a0, <32 x i32> %a1 + ret <32 x i32> %v4 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b,-packets" } + -- 2.7.4