VecOf<f16>, (args VecOf<f16>:$inactive, Vector:$a, PredOf<f32>:$pred),
(IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>;
} // params = [f32], pnt = PNT_None
+
+ let params = [f16], pnt = PNT_None in {
+ def vcvt#half#q_f32: Intrinsic<VecOf<f32>, (args Vector:$a),
+ (IRInt<"vcvt_widen"> $a, halfconst)>;
+ defm vcvt#half#q: IntrinsicMX<
+ VecOf<f32>, (args Vector:$a, PredOf<f32>:$pred),
+ (IRInt<"vcvt_widen_predicated"> $inactive, $a, halfconst, $pred),
+ 1, "_f32">;
+ } // params = [f16], pnt = PNT_None
} // loop over half = "b", "t"
multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi, IRBuilderBase itof> {
{
return vcvtq_x_n_u32_f32(a, 32, p);
}
+
+// CHECK-LABEL: @test_vcvtbq_f32_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 0)
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvtbq_f32_f16(float16x8_t a)
+{
+ return vcvtbq_f32_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvttq_f32_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 1)
+// CHECK-NEXT: ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvttq_f32_f16(float16x8_t a)
+{
+ return vcvttq_f32_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtbq_m_f32_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtbq_m_f32_f16(float32x4_t inactive, float16x8_t a, mve_pred16_t p)
+{
+ return vcvtbq_m_f32_f16(inactive, a, p);
+}
+
+// CHECK-LABEL: @test_vcvttq_m_f32_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvttq_m_f32_f16(float32x4_t inactive, float16x8_t a, mve_pred16_t p)
+{
+ return vcvttq_m_f32_f16(inactive, a, p);
+}
+
+// CHECK-LABEL: @test_vcvtbq_x_f32_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtbq_x_f32_f16(float16x8_t a, mve_pred16_t p)
+{
+ return vcvtbq_x_f32_f16(a, p);
+}
+
+// CHECK-LABEL: @test_vcvttq_x_f32_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> undef, <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvttq_x_f32_f16(float16x8_t a, mve_pred16_t p)
+{
+ return vcvttq_x_f32_f16(a, p);
+}
LLVMMatchType<0>, rets[0])], props>;
}
+// Intrinsic with a predicated and a non-predicated case. The predicated case
+// has two additional parameters: inactive (the value for inactive lanes, can
+// be undef) and predicate.
+multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
+ list<LLVMType> params, LLVMType inactive,
+ LLVMType predicate,
+ list<IntrinsicProperty> props = [IntrNoMem]> {
+ def "": Intrinsic<rets, flags # params, props>;
+ def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate],
+ props>;
+}
+
defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
[llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>;
+defm int_arm_mve_vcvt_widen: MVEMXPredicated<[llvm_v4f32_ty], [],
+ [llvm_v8f16_ty, llvm_i32_ty], llvm_v4f32_ty, llvm_v4i1_ty>;
defm int_arm_mve_vldr_gather_base: MVEPredicated<
[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;
-// Intrinsic with a predicated and a non-predicated case. The predicated case
-// has two additional parameters: inactive (the value for inactive lanes, can
-// be undef) and predicate.
-multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
- list<LLVMType> params, LLVMType inactive,
- LLVMType predicate,
- list<IntrinsicProperty> props = [IntrNoMem]> {
- def "": Intrinsic<rets, flags # params, props>;
- def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate],
- props>;
-}
-
// The first two parameters are compile-time constants:
// * Halving: 0 means halving (vhcaddq), 1 means non-halving (vcaddq)
// instruction. Note: the flag is inverted to match the corresonding
multiclass MVE_VCVT_h2f_m<string iname, int half> {
def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half, (ins), vpred_r, "">;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4f32 (int_arm_mve_vcvt_widen (v8f16 MQPR:$Qm), (i32 half))),
+ (v4f32 (Inst (v8f16 MQPR:$Qm)))>;
+ def : Pat<(v4f32 (int_arm_mve_vcvt_widen_predicated
+ (v4f32 MQPR:$inactive), (v8f16 MQPR:$Qm), (i32 half),
+ (v4i1 VCCR:$mask))),
+ (v4f32 (Inst (v8f16 MQPR:$Qm), ARMVCCThen,
+ (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive)))>;
+ }
}
defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>;
declare <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half>, <4 x float>, i32)
declare <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half>, <4 x float>, i32, <4 x i1>)
+declare <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half>, i32)
+declare <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float>, <8 x half>, i32, <4 x i1>)
declare <8 x half> @llvm.arm.mve.vcvt.fix.v8f16.v8i16(i32, <8 x i16>, i32)
declare <4 x float> @llvm.arm.mve.vcvt.fix.v4f32.v4i32(i32, <4 x i32>, i32)
%2 = call <4 x i32> @llvm.arm.mve.vcvt.fix.predicated.v4i32.v4f32.v4i1(i32 1, <4 x i32> undef, <4 x float> %a, i32 32, <4 x i1> %1)
ret <4 x i32> %2
}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_f32_f16(<8 x half> %a) {
+; CHECK-LABEL: test_vcvtbq_f32_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvtb.f32.f16 q0, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 0)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvttq_f32_f16(<8 x half> %a) {
+; CHECK-LABEL: test_vcvttq_f32_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvtt.f32.f16 q0, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> %a, i32 1)
+ ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtbq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtbq_m_f32_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvtbt.f32.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 0, <4 x i1> %1)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvttq_m_f32_f16(<4 x float> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvttq_m_f32_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvttt.f32.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> %inactive, <8 x half> %a, i32 1, <4 x i1> %1)
+ ret <4 x float> %2
+}