From d581d943858195f14863ee8f2a5050aafed883a2 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 31 Aug 2021 14:07:50 +0100 Subject: [PATCH] [SVE] Fix the FP arithmetic instruction costs for SVE Several FP instructions (fadd, fsub, etc.) were incorrectly assigned a higher cost for SVE because they have custom lowering, however we know they are legal. This patch explicitly assigns a cost of 2 to these opcodes. Tests added here: Analysis/CostModel/AArch64/arith-fp-sve.ll Differential Revision: https://reviews.llvm.org/D108993 --- .../Target/AArch64/AArch64TargetTransformInfo.cpp | 6 +- .../Analysis/CostModel/AArch64/arith-fp-sve.ll | 134 +++++++++++++++++++++ .../Analysis/CostModel/AArch64/sve-intrinsics.ll | 2 +- llvm/test/Analysis/CostModel/AArch64/sve-math.ll | 2 +- 4 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 8692fe3..6abc8ca 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1393,9 +1393,13 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( return (Cost + 1) * LT.first; case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FNEG: // These nodes are marked as 'custom' just to lower them to SVE. // We know said lowering will incur no additional cost. - if (isa(Ty) && !Ty->getScalarType()->isFP128Ty()) + if (!Ty->getScalarType()->isFP128Ty()) return (Cost + 2) * LT.first; return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll new file mode 100644 index 0000000..60f1f02 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=aarch64 -mattr=+fullfp16 -mattr=+sve | FileCheck %s + +define void @fadd() { +; CHECK-LABEL: 'fadd' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V4F16 = fadd undef, undef + %V8F16 = fadd undef, undef + %V16F16 = fadd undef, undef + + %V2F32 = fadd undef, undef + %V4F32 = fadd undef, undef + %V8F32 = fadd undef, undef + + %V2F64 = fadd undef, undef + %V4F64 = fadd undef, undef + + ret void +} + +define void @fsub() { +; CHECK-LABEL: 'fsub' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V4F16 = fsub undef, undef + %V8F16 = fsub undef, undef + %V16F16 = fsub undef, undef + + %V2F32 = fsub undef, undef + %V4F32 = fsub undef, undef + %V8F32 = fsub undef, undef + + %V2F64 = fsub undef, undef + %V4F64 = fsub undef, undef + + ret void +} + +define void @fneg() { +; CHECK-LABEL: 'fneg' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V2F16 = fneg undef + %V4F16 = fneg undef + %V8F16 = fneg undef + %V16F16 = fneg undef + + %V2F32 = fneg undef + %V4F32 = fneg undef + %V8F32 = fneg undef + + %V2F64 = fneg undef + %V4F64 = fneg undef + + ret void +} + +define void @fmul() { +; CHECK-LABEL: 'fmul' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V4F16 = fmul undef, undef + %V8F16 = fmul undef, undef + %V16F16 = fmul undef, undef + + %V2F32 = fmul undef, undef + %V4F32 = fmul undef, undef + %V8F32 = fmul undef, undef + + %V2F64 = fmul undef, undef + %V4F64 = fmul undef, undef + + ret void +} + +define void @fdiv() { +; CHECK-LABEL: 'fdiv' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V4F16 = fdiv undef, undef + %V8F16 = fdiv undef, undef + %V16F16 = fdiv undef, undef + + %V2F32 = fdiv undef, undef + %V4F32 = fdiv undef, undef + %V8F32 = fdiv undef, undef + + %V2F64 = fdiv undef, undef + %V4F64 = fdiv undef, undef + + ret void +} diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index e6247ac..19c2539 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -42,7 +42,7 @@ define void @reductions( %v0, %v1, %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64( %v1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, %v2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, %v3) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, %v3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32( %v2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64( %v3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32( %v2) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll index 09a0591..d733356 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll @@ -8,7 +8,7 @@ declare @llvm.sqrt.v2f64() define @fadd_v2f64( %a, %b) { ; THRU-LABEL: 'fadd_v2f64' -; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = fadd %a, %b +; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = fadd %a, %b ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %r ; ; LATE-LABEL: 'fadd_v2f64' -- 2.7.4