From db83e3e5071afeb161ce3cdc246668ace9e1bb8e Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 30 Dec 2021 12:38:36 -0800 Subject: [PATCH] [Hexagon] Generate HVX/FP arithmetic instructions Co-authored-by: Anirudh Sundar Subramaniam Co-authored-by: Sumanth Gundapaneni Co-authored-by: Joshua Herrera --- llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 12 ++ llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 46 ++++++ llvm/test/CodeGen/Hexagon/autohvx/arith-float.ll | 167 +++++++++++++++++++++ 3 files changed, 225 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/arith-float.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 8028751..a65cecc 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -93,6 +93,12 @@ HexagonTargetLowering::initializeHVXLowering() { Subtarget.useHVXFloatingPoint()) { setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal); setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal); + setOperationAction(ISD::FADD, MVT::v64f16, Legal); + setOperationAction(ISD::FSUB, MVT::v64f16, Legal); + setOperationAction(ISD::FMUL, MVT::v64f16, Legal); + setOperationAction(ISD::FADD, MVT::v32f32, Legal); + setOperationAction(ISD::FSUB, MVT::v32f32, Legal); + setOperationAction(ISD::FMUL, MVT::v32f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom); @@ -126,6 +132,9 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::LOAD, MVT::v64f32, Custom); setOperationAction(ISD::STORE, MVT::v64f32, Custom); + setOperationAction(ISD::FADD, MVT::v64f32, Custom); + setOperationAction(ISD::FSUB, MVT::v64f32, Custom); + setOperationAction(ISD::FMUL, MVT::v64f32, Custom); setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom); setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom); setOperationAction(ISD::VSELECT, MVT::v64f32, Custom); @@ -2291,6 +2300,9 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ: case ISD::CTTZ: case ISD::MUL: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::MULHS: diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index f72f02e..33bf8ed7 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -453,6 +453,52 @@ let Predicates = [UseHVX] in { (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } +// For now, we always deal with vector floating point in SF mode. +class OpR_RR_pat_conv + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (V6_vconv_sf_qf32 (VecF32 (MI RsPred:$Rs, RtPred:$Rt)))>; + +class OpR_RR_pat_conv_hf + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (V6_vconv_hf_qf16 (VecF16 (MI RsPred:$Rs, RtPred:$Rt)))>; + +let Predicates = [UseHVXV68, UseHVXQFloat] in { + def: OpR_RR_pat_conv_hf, VecF16, HVF16>; + def: OpR_RR_pat_conv_hf, VecF16, HVF16>; + def: OpR_RR_pat_conv_hf, VecF16, HVF16>; + def: OpR_RR_pat_conv, VecF32, HVF32>; + def: OpR_RR_pat_conv, VecF32, HVF32>; + def: OpR_RR_pat_conv, VecF32, HVF32>; + + // For now we assume that the fp32 register is always coming in as IEEE float + // since the qfloat arithmetic instructions above always generate the + // accompanying conversions as part of their pattern + def: Pat<(VecF16 (pf1 HWF32:$Vuu)), + (V6_vdealh (V6_vconv_hf_qf32 + (VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)), + (V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0)) + ))))>; + +} + +// HVX IEEE arithmetic Instructions +let Predicates = [UseHVXV68, UseHVXIEEEFP] in { + def: Pat<(fadd HVF16:$Rs, HVF16:$Rt), + (V6_vadd_hf_hf HVF16:$Rs, HVF16:$Rt)>; + def: Pat<(fadd HVF32:$Rs, HVF32:$Rt), + (V6_vadd_sf_sf HVF32:$Rs, HVF32:$Rt)>; + def: Pat<(fsub HVF16:$Rs, HVF16:$Rt), + (V6_vsub_hf_hf HVF16:$Rs, HVF16:$Rt)>; + def: Pat<(fsub HVF32:$Rs, HVF32:$Rt), + (V6_vsub_sf_sf HVF32:$Rs, HVF32:$Rt)>; + def: Pat<(fmul HVF16:$Rs, HVF16:$Rt), + (V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>; + def: Pat<(fmul HVF32:$Rs, HVF32:$Rt), + (V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>; +} + let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt), (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/arith-float.ll b/llvm/test/CodeGen/Hexagon/autohvx/arith-float.ll new file mode 100644 index 0000000..0ba7f2c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/arith-float.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define <64 x half> @f0(<64 x half> %a0, <64 x half> %a1) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf16 = vadd(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = v0.qf16 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fadd <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f1(<32 x float> %a0, <32 x float> %a1) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf32 = vadd(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = v0.qf32 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fadd <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f2(<64 x half> %a0, <64 x half> %a1) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf16 = vsub(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = v0.qf16 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fsub <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f3(<32 x float> %a0, <32 x float> %a1) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf32 = vsub(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = v0.qf32 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fsub <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f4(<64 x half> %a0, <64 x half> %a1) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf16 = vmpy(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = v0.qf16 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fmul <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f5(<32 x float> %a0, <32 x float> %a1) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.qf32 = vmpy(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = v0.qf32 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fmul <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f6(<64 x half> %a0, <64 x half> %a1) #1 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vadd(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fadd <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f7(<32 x float> %a0, <32 x float> %a1) #1 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vadd(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fadd <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f8(<64 x half> %a0, <64 x half> %a1) #1 { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vsub(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fsub <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f9(<32 x float> %a0, <32 x float> %a1) #1 { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vsub(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fsub <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +define <64 x half> @f10(<64 x half> %a0, <64 x half> %a1) #1 { +; CHECK-LABEL: f10: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmpy(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fmul <64 x half> %a0, %a1 + ret <64 x half> %v0 +} + +define <32 x float> @f11(<32 x float> %a0, <32 x float> %a1) #1 { +; CHECK-LABEL: f11: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmpy(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = fmul <32 x float> %a0, %a1 + ret <32 x float> %v0 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } +attributes #1 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-ieee-fp" } -- 2.7.4