From b1e1a26e8e7e61c924d39ae53e2922dc4364e6bb Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Mon, 16 Sep 2019 07:32:13 +0000 Subject: [PATCH] [AArch64] Some more FP16 FMA pattern matching After our previous machinecombiner exercises (rL371321, rL371818, rL371833), we were still missing a few FP16 FMA patterns. Differential Revision: https://reviews.llvm.org/D67576 llvm-svn: 371960 --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 21 +++++++- llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll | 8 ++- .../CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll | 61 ++++++++++++++++++---- 3 files changed, 72 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 541b7ce..df23422 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3315,20 +3315,37 @@ defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", // N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike // the NEON variant. + +// Here we handle first -(a + b*c) for FNMADD: + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), + (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and -// "(-a) + b*(-c)". +// Now it's time for "(-a) + (-b)*c" + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), + (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +// And here "(-a) + b*(-c)" + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))), + (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll index 9e276cd..856b00a 100644 --- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll +++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll @@ -175,8 +175,7 @@ entry: define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmsh_lane_f16: -; CHECK: fneg h1, h1 -; CHECK: fmadd h0, h1, h2, h0 +; CHECK: fmsub h0, h1, h2, h0 ; CHECK-NEXT: ret entry: %0 = fsub half 0xH8000, %b @@ -187,9 +186,8 @@ entry: define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmsh_laneq_f16: -; CHECK: fneg h1, h1 -; CHECK-NEXT: fmadd h0, h1, h2, h0 -; CHECK-NEXT: ret +; CHECK: fmsub h0, h1, h2, h0 +; CHECK-NEXT: ret entry: %0 = fsub half 0xH8000, %b %extract = extractelement <8 x half> %c, i32 0 diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll index 9a5a0a5..f3ad71b 100644 --- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll +++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll @@ -10,44 +10,83 @@ entry: } define half @fnma16(half %a, half %b, half %c) nounwind readnone ssp { -entry: ; CHECK-LABEL: fnma16: ; CHECK: fnmadd h0, h0, h1, h2 +entry: %0 = tail call half @llvm.fma.f16(half %a, half %b, half %c) %mul = fmul half %0, -1.000000e+00 ret half %mul } define half @fms16(half %a, half %b, half %c) nounwind readnone ssp { -entry: ; CHECK-LABEL: fms16: ; CHECK: fmsub h0, h0, h1, h2 +entry: %mul = fmul half %b, -1.000000e+00 %0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c) ret half %0 } define half @fms16_com(half %a, half %b, half %c) nounwind readnone ssp { -entry: ; CHECK-LABEL: fms16_com: - -; FIXME: This should be a fmsub. - -; CHECK: fneg h1, h1 -; CHECK-NEXT: fmadd h0, h1, h0, h2 +; CHECK: fmsub h0, h1, h0, h2 +; CHECK-NEXT: ret +entry: %mul = fmul half %b, -1.000000e+00 %0 = tail call half @llvm.fma.f16(half %mul, half %a, half %c) ret half %0 } define half @fnms16(half %a, half %b, half %c) nounwind readnone ssp { -entry: ; CHECK-LABEL: fnms16: -; CHECK: fnmsub h0, h0, h1, h2 +; CHECK: fnmsub h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: %mul = fmul half %c, -1.000000e+00 %0 = tail call half @llvm.fma.f16(half %a, half %b, half %mul) ret half %0 } -declare half @llvm.fma.f16(half, half, half) +define half @test_fmsub(half %a, half %b, half %c) { +; CHECK-LABEL: test_fmsub: +; CHECK: fmsub h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: + %nega = fsub half -0.0, %a + %val = call half @llvm.fma.f16(half %nega, half %b, half %c) + ret half %val +} + +define half @test_fnmadd(half %a, half %b, half %c) { +; CHECK-LABEL: test_fnmadd: +; CHECK: fnmadd h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: + %nega = fsub half -0.0, %a + %negc = fsub half -0.0, %c + %val = call half @llvm.fma.f16(half %nega, half %b, half %negc) + ret half %val +} +define half @test_fmadd(half %a, half %b, half %c) { +; CHECK-LABEL: test_fmadd: +; CHECK: fmadd h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: + %nega = fsub half -0.0, %a + %negb = fsub half -0.0, %b + %val = call half @llvm.fma.f16(half %nega, half %negb, half %c) + ret half %val +} + +define half @test_fnmsub(half %a, half %b, half %c) { +; CHECK-LABEL: test_fnmsub: +; CHECK: fnmsub h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: + %negc = fsub half -0.0, %c + %val = call half @llvm.fma.f16(half %a, half %b, half %negc) + ret half %val +} + +declare half @llvm.fma.f16(half, half, half) -- 2.7.4