From: Eli Friedman Date: Mon, 3 Aug 2020 19:29:40 +0000 (-0700) Subject: [AArch64] Add missing isel patterns for fcvtzs/u intrinsic on v1f64. X-Git-Tag: llvmorg-13-init~15956 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=dca23ed8952383701a62b778104f4db6f5d4b799;p=platform%2Fupstream%2Fllvm.git [AArch64] Add missing isel patterns for fcvtzs/u intrinsic on v1f64. Fixes test-suite compile failure caused by 8dfb5d7. While I'm in the area, add some more test coverage to related operations, to make sure we aren't missing any other patterns. --- diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 61a43ed..39e1ee3 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4483,6 +4483,10 @@ def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), (FCVTPSv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), (FCVTPUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), + (FCVTZSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), + (FCVTZUv1i64 FPR64:$Rn)>; def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), (FRECPEv1f16 FPR16:$Rn)>; diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll index d236aea..9ab7247 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll @@ -30,9 +30,19 @@ define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtas_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtas_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtas d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtau_2s: @@ -61,9 +71,19 @@ define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtau_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtau_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtau d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtms_2s: @@ -92,9 +112,19 @@ define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtms_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtms_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtms d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtmu_2s: @@ -123,9 +153,19 @@ define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtmu_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtmu_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtmu d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtps_2s: @@ -154,9 +194,19 @@ define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtps_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtps_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtps d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtpu_2s: @@ -185,9 +235,19 @@ define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtpu_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtpu_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtpu d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtns_2s: @@ -216,9 +276,19 @@ define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtns_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtns_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtns d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtnu_2s: @@ -247,9 +317,19 @@ define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtnu_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtnu_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtnu d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtzs_2s: @@ -278,6 +358,57 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +; FIXME: Generate "fcvtzs d0, d0"? +define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtzs_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtzs x8, d0 +;CHECK-NEXT: mov d0, x8 +;CHECK-NEXT: ret + %tmp3 = fptosi <1 x double> %A to <1 x i64> + ret <1 x i64> %tmp3 +} + +define <2 x i32> @fcvtzs_2s_intrinsic(<2 x float> %A) nounwind { +;CHECK-LABEL: fcvtzs_2s_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzs.2s v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtzs.v2i32.v2f32(<2 x float> %A) + ret <2 x i32> %tmp3 +} + +define <4 x i32> @fcvtzs_4s_intrinsic(<4 x float> %A) nounwind { +;CHECK-LABEL: fcvtzs_4s_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzs.4s v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float> %A) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @fcvtzs_2d_intrinsic(<2 x double> %A) nounwind { +;CHECK-LABEL: fcvtzs_2d_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzs.2d v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double> %A) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @fcvtzs_1d_intrinsic(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtzs_1d_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzs d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + +declare <2 x i32> @llvm.aarch64.neon.fcvtzs.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtzu_2s: @@ -306,6 +437,58 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +; FIXME: Generate "fcvtzu d0, d0"? +define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtzu_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtzu x8, d0 +;CHECK-NEXT: mov d0, x8 +;CHECK-NEXT: ret + %tmp3 = fptoui <1 x double> %A to <1 x i64> + ret <1 x i64> %tmp3 +} + +define <2 x i32> @fcvtzu_2s_intrinsic(<2 x float> %A) nounwind { +;CHECK-LABEL: fcvtzu_2s_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzu.2s v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtzu.v2i32.v2f32(<2 x float> %A) + ret <2 x i32> %tmp3 +} + +define <4 x i32> @fcvtzu_4s_intrinsic(<4 x float> %A) nounwind { +;CHECK-LABEL: fcvtzu_4s_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzu.4s v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float> %A) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @fcvtzu_2d_intrinsic(<2 x double> %A) nounwind { +;CHECK-LABEL: fcvtzu_2d_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzu.2d v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double> %A) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @fcvtzu_1d_intrinsic(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtzu_1d_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzu d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + +declare <2 x i32> @llvm.aarch64.neon.fcvtzu.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>) nounwind readnone + define <2 x float> @frinta_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: frinta_2s: ;CHECK-NOT: ld1 diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll index c8333b2..ff19e6a 100644 --- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll +++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll @@ -16,6 +16,10 @@ declare i64 @llvm.aarch64.neon.fcvtau.i64.f16(half) declare i32 @llvm.aarch64.neon.fcvtau.i32.f16(half) declare i64 @llvm.aarch64.neon.fcvtas.i64.f16(half) declare i32 @llvm.aarch64.neon.fcvtas.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half) declare half @llvm.aarch64.neon.frsqrte.f16(half) declare half @llvm.aarch64.neon.frecpx.f16(half) declare half @llvm.aarch64.neon.frecpe.f16(half) @@ -138,6 +142,42 @@ entry: ret i64 %0 } +define i32 @fcvtzu_intrinsic_i32(half %a) { +; CHECK-LABEL: fcvtzu_intrinsic_i32: +; CHECK: fcvtzu w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) + ret i32 %fcvt +} + +define i64 @fcvtzu_intrinsic_i64(half %a) { +; CHECK-LABEL: fcvtzu_intrinsic_i64: +; CHECK: fcvtzs x0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a) + ret i64 %fcvt +} + +define i32 @fcvtzs_intrinsic_i32(half %a) { +; CHECK-LABEL: fcvtzs_intrinsic_i32: +; CHECK: fcvtzs w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) + ret i32 %fcvt +} + +define i64 @fcvtzs_intrinsic_i64(half %a) { +; CHECK-LABEL: fcvtzs_intrinsic_i64: +; CHECK: fcvtzs x0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a) + ret i64 %fcvt +} + define dso_local i16 @t19(half %a) { ; CHECK-LABEL: t19: ; CHECK: fcvtas w0, h0