[AArch64] Some more FP16 FMA pattern matching

author Sjoerd Meijer <sjoerd.meijer@arm.com>

Mon, 16 Sep 2019 07:32:13 +0000 (07:32 +0000)

committer Sjoerd Meijer <sjoerd.meijer@arm.com>

Mon, 16 Sep 2019 07:32:13 +0000 (07:32 +0000)
author Sjoerd Meijer <sjoerd.meijer@arm.com>
Mon, 16 Sep 2019 07:32:13 +0000 (07:32 +0000)
committer Sjoerd Meijer <sjoerd.meijer@arm.com>
Mon, 16 Sep 2019 07:32:13 +0000 (07:32 +0000)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td

index 541b7ce..df23422 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3315,20 +3315,37 @@ defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
  
  // N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
  // the NEON variant.
+
+// Here we handle first -(a + b*c) for FNMADD:
+
+let Predicates = [HasNEON, HasFullFP16] in
+def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
+          (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
+
  def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
            (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
  
  def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
            (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
  
-// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and
-// "(-a) + b*(-c)".
+// Now it's time for "(-a) + (-b)*c"
+
+let Predicates = [HasNEON, HasFullFP16] in
+def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
+          (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
+
  def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
            (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
  
  def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
            (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
  
+// And here "(-a) + b*(-c)"
+
+let Predicates = [HasNEON, HasFullFP16] in
+def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))),
+          (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
+
  def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
            (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
  
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll

index 9e276cd..856b00a 100644 (file)
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
@@ -175,8 +175,7 @@ entry:
  
  define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
  ; CHECK-LABEL: t_vfmsh_lane_f16:
-; CHECK:         fneg h1, h1
-; CHECK:         fmadd h0, h1, h2, h0
+; CHECK:         fmsub h0, h1, h2, h0
  ; CHECK-NEXT:    ret
  entry:
    %0 = fsub half 0xH8000, %b
@@ -187,9 +186,8 @@ entry:
  
  define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
  ; CHECK-LABEL: t_vfmsh_laneq_f16:
-; CHECK:         fneg h1, h1
-; CHECK-NEXT:    fmadd h0, h1, h2, h0
-; CHECK-NEXT:    ret
+; CHECK:       fmsub h0, h1, h2, h0
+; CHECK-NEXT:  ret
  entry:
    %0 = fsub half 0xH8000, %b
    %extract = extractelement <8 x half> %c, i32 0
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll

index 9a5a0a5..f3ad71b 100644 (file)
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll
@@ -10,44 +10,83 @@ entry:
  }
  
  define half @fnma16(half %a, half %b, half %c) nounwind readnone ssp {
-entry:
  ; CHECK-LABEL: fnma16:
  ; CHECK: fnmadd h0, h0, h1, h2
+entry:
    %0 = tail call half @llvm.fma.f16(half %a, half %b, half %c)
    %mul = fmul half %0, -1.000000e+00
    ret half %mul
  }
  
  define half @fms16(half %a, half %b, half %c) nounwind readnone ssp {
-entry:
  ; CHECK-LABEL: fms16:
  ; CHECK: fmsub h0, h0, h1, h2
+entry:
    %mul = fmul half %b, -1.000000e+00
    %0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c)
    ret half %0
  }
  
  define half @fms16_com(half %a, half %b, half %c) nounwind readnone ssp {
-entry:
  ; CHECK-LABEL: fms16_com:
-
-; FIXME:       This should be a fmsub.
-
-; CHECK:       fneg  h1, h1
-; CHECK-NEXT:  fmadd h0, h1, h0, h2
+; CHECK:       fmsub h0, h1, h0, h2
+; CHECK-NEXT:  ret
+entry:
    %mul = fmul half %b, -1.000000e+00
    %0 = tail call half @llvm.fma.f16(half %mul, half %a, half %c)
    ret half %0
  }
  
  define half @fnms16(half %a, half %b, half %c) nounwind readnone ssp {
-entry:
  ; CHECK-LABEL: fnms16:
-; CHECK: fnmsub h0, h0, h1, h2
+; CHECK:       fnmsub h0, h0, h1, h2
+; CHECK-NEXT:  ret
+entry:
    %mul = fmul half %c, -1.000000e+00
    %0 = tail call half @llvm.fma.f16(half %a, half %b, half %mul)
    ret half %0
  }
  
-declare half @llvm.fma.f16(half, half, half)
+define half @test_fmsub(half %a, half %b, half %c) {
+; CHECK-LABEL: test_fmsub:
+; CHECK:       fmsub h0, h0, h1, h2
+; CHECK-NEXT:  ret
+entry:
+  %nega = fsub half -0.0, %a
+  %val = call half @llvm.fma.f16(half %nega, half %b, half %c)
+  ret half %val
+}
+
+define half @test_fnmadd(half %a, half %b, half %c) {
+; CHECK-LABEL: test_fnmadd:
+; CHECK:       fnmadd h0, h0, h1, h2
+; CHECK-NEXT:  ret
+entry:
+  %nega = fsub half -0.0, %a
+  %negc = fsub half -0.0, %c
+  %val = call half @llvm.fma.f16(half %nega, half %b, half %negc)
+  ret half %val
+}
  
+define half @test_fmadd(half %a, half %b, half %c) {
+; CHECK-LABEL: test_fmadd:
+; CHECK:       fmadd h0, h0, h1, h2
+; CHECK-NEXT:  ret
+entry:
+  %nega = fsub half -0.0, %a
+  %negb = fsub half -0.0, %b
+  %val = call half @llvm.fma.f16(half %nega, half %negb, half %c)
+  ret half %val
+}
+
+define half @test_fnmsub(half %a, half %b, half %c) {
+; CHECK-LABEL: test_fnmsub:
+; CHECK:       fnmsub h0, h0, h1, h2
+; CHECK-NEXT:  ret
+entry:
+  %negc = fsub half -0.0, %c
+  %val = call half @llvm.fma.f16(half %a, half %b, half %negc)
+  ret half %val
+}
+
+declare half @llvm.fma.f16(half, half, half)
author	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Mon, 16 Sep 2019 07:32:13 +0000 (07:32 +0000)
committer	Sjoerd Meijer <sjoerd.meijer@arm.com>
	Mon, 16 Sep 2019 07:32:13 +0000 (07:32 +0000)
llvm/lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll		patch \| blob \| history