void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
bool IsTupleInput, unsigned Opc);
+ void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
template <unsigned MaxIdx, unsigned Scale>
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
CurDAG->RemoveDeadNode(N);
}
+void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
+ unsigned Opcode) {
+ if (N->getValueType(0) != MVT::nxv4f32)
+ return;
+ SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
+}
+
void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
unsigned Op) {
SDLoc DL(N);
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
AArch64::UZP_VG4_4Z4Z_Q);
return;
+ case Intrinsic::aarch64_sve_frinta_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frinta_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintm_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintm_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintn_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintn_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintp_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintp_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
+ return;
}
break;
}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
+
+; FRINTA
+
+define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frinta_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vec_frinta_x2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: frinta { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frinta_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
+; CHECK-LABEL: multi_vec_frinta_x4_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z6.d, z3.d
+; CHECK-NEXT: mov z5.d, z2.d
+; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: frinta { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
+ ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+; FRINTM
+
+define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintm_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vec_frintm_x2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: frintm { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintm_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
+; CHECK-LABEL: multi_vec_frintm_x4_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z6.d, z3.d
+; CHECK-NEXT: mov z5.d, z2.d
+; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: frintm { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
+ ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+; FRINTN
+
+define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintn_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vec_frintn_x2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: frintn { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintn_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
+; CHECK-LABEL: multi_vec_frintn_x4_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z6.d, z3.d
+; CHECK-NEXT: mov z5.d, z2.d
+; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: frintn { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
+ ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+; FRINTP
+
+define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintp_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vec_frintp_x2_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z2.d, z1.d
+; CHECK-NEXT: frintp { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintp_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
+; CHECK-LABEL: multi_vec_frintp_x4_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z7.d, z4.d
+; CHECK-NEXT: mov z6.d, z3.d
+; CHECK-NEXT: mov z5.d, z2.d
+; CHECK-NEXT: mov z4.d, z1.d
+; CHECK-NEXT: frintp { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT: ret
+ %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
+ ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
+}
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)