[SVE] Fix the FP arithmetic instruction costs for SVE

author David Sherwood <david.sherwood@arm.com>

Tue, 31 Aug 2021 13:07:50 +0000 (14:07 +0100)

committer David Sherwood <david.sherwood@arm.com>

Thu, 2 Sep 2021 08:55:13 +0000 (09:55 +0100)
author David Sherwood <david.sherwood@arm.com>
Tue, 31 Aug 2021 13:07:50 +0000 (14:07 +0100)
committer David Sherwood <david.sherwood@arm.com>
Thu, 2 Sep 2021 08:55:13 +0000 (09:55 +0100)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

index 8692fe3..6abc8ca 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1393,9 +1393,13 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
      return (Cost + 1) * LT.first;
  
    case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FNEG:
      // These nodes are marked as 'custom' just to lower them to SVE.
      // We know said lowering will incur no additional cost.
-    if (isa<FixedVectorType>(Ty) && !Ty->getScalarType()->isFP128Ty())
+    if (!Ty->getScalarType()->isFP128Ty())
        return (Cost + 2) * LT.first;
  
      return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll

new file mode 100644 (file)

index 0000000..60f1f02
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=aarch64 -mattr=+fullfp16 -mattr=+sve | FileCheck %s
+
+define void @fadd() {
+; CHECK-LABEL: 'fadd'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <vscale x 2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <vscale x 4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <vscale x 2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V4F16 = fadd <vscale x 4 x half> undef, undef
+  %V8F16 = fadd <vscale x 8 x half> undef, undef
+  %V16F16 = fadd <vscale x 16 x half> undef, undef
+
+  %V2F32 = fadd <vscale x 2 x float> undef, undef
+  %V4F32 = fadd <vscale x 4 x float> undef, undef
+  %V8F32 = fadd <vscale x 8 x float> undef, undef
+
+  %V2F64 = fadd <vscale x 2 x double> undef, undef
+  %V4F64 = fadd <vscale x 4 x double> undef, undef
+
+  ret void
+}
+
+define void @fsub() {
+; CHECK-LABEL: 'fsub'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <vscale x 2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <vscale x 4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <vscale x 2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V4F16 = fsub <vscale x 4 x half> undef, undef
+  %V8F16 = fsub <vscale x 8 x half> undef, undef
+  %V16F16 = fsub <vscale x 16 x half> undef, undef
+
+  %V2F32 = fsub <vscale x 2 x float> undef, undef
+  %V4F32 = fsub <vscale x 4 x float> undef, undef
+  %V8F32 = fsub <vscale x 8 x float> undef, undef
+
+  %V2F64 = fsub <vscale x 2 x double> undef, undef
+  %V4F64 = fsub <vscale x 4 x double> undef, undef
+
+  ret void
+}
+
+define void @fneg() {
+; CHECK-LABEL: 'fneg'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <vscale x 2 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <vscale x 4 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <vscale x 8 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <vscale x 16 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <vscale x 2 x float> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <vscale x 4 x float> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <vscale x 8 x float> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg <vscale x 2 x double> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <vscale x 4 x double> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V2F16 = fneg <vscale x 2 x half> undef
+  %V4F16 = fneg <vscale x 4 x half> undef
+  %V8F16 = fneg <vscale x 8 x half> undef
+  %V16F16 = fneg <vscale x 16 x half> undef
+
+  %V2F32 = fneg <vscale x 2 x float> undef
+  %V4F32 = fneg <vscale x 4 x float> undef
+  %V8F32 = fneg <vscale x 8 x float> undef
+
+  %V2F64 = fneg <vscale x 2 x double> undef
+  %V4F64 = fneg <vscale x 4 x double> undef
+
+  ret void
+}
+
+define void @fmul() {
+; CHECK-LABEL: 'fmul'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <vscale x 2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <vscale x 4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <vscale x 2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V4F16 = fmul <vscale x 4 x half> undef, undef
+  %V8F16 = fmul <vscale x 8 x half> undef, undef
+  %V16F16 = fmul <vscale x 16 x half> undef, undef
+
+  %V2F32 = fmul <vscale x 2 x float> undef, undef
+  %V4F32 = fmul <vscale x 4 x float> undef, undef
+  %V8F32 = fmul <vscale x 8 x float> undef, undef
+
+  %V2F64 = fmul <vscale x 2 x double> undef, undef
+  %V4F64 = fmul <vscale x 4 x double> undef, undef
+
+  ret void
+}
+
+define void @fdiv() {
+; CHECK-LABEL: 'fdiv'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <vscale x 2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <vscale x 4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <vscale x 2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V4F16 = fdiv <vscale x 4 x half> undef, undef
+  %V8F16 = fdiv <vscale x 8 x half> undef, undef
+  %V16F16 = fdiv <vscale x 16 x half> undef, undef
+
+  %V2F32 = fdiv <vscale x 2 x float> undef, undef
+  %V4F32 = fdiv <vscale x 4 x float> undef, undef
+  %V8F32 = fdiv <vscale x 8 x float> undef, undef
+
+  %V2F64 = fdiv <vscale x 2 x double> undef, undef
+  %V4F64 = fdiv <vscale x 4 x double> undef, undef
+
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

index e6247ac..19c2539 100644 (file)
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -42,7 +42,7 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll

index 09a0591..d733356 100644 (file)
--- a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
@@ -8,7 +8,7 @@ declare <vscale x 2 x double> @llvm.sqrt.v2f64(<vscale x 2 x double>)
  
  define <vscale x 2 x double> @fadd_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
  ; THRU-LABEL: 'fadd_v2f64'
-; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %r = fadd <vscale x 2 x double> %a, %b
+; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r = fadd <vscale x 2 x double> %a, %b
  ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 2 x double> %r
  ;
  ; LATE-LABEL: 'fadd_v2f64'
author	David Sherwood <david.sherwood@arm.com>
	Tue, 31 Aug 2021 13:07:50 +0000 (14:07 +0100)
committer	David Sherwood <david.sherwood@arm.com>
	Thu, 2 Sep 2021 08:55:13 +0000 (09:55 +0100)
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp		patch \| blob \| history
llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll		patch \| blob \| history
llvm/test/Analysis/CostModel/AArch64/sve-math.ll		patch \| blob \| history