[ARM] Add a very basic active_lane_mask cost

author David Green <david.green@arm.com>

Sat, 17 Oct 2020 09:09:42 +0000 (10:09 +0100)

committer David Green <david.green@arm.com>

Sat, 17 Oct 2020 09:09:42 +0000 (10:09 +0100)
author David Green <david.green@arm.com>
Sat, 17 Oct 2020 09:09:42 +0000 (10:09 +0100)
committer David Green <david.green@arm.com>
Sat, 17 Oct 2020 09:09:42 +0000 (10:09 +0100)
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

index 82156af..1cb9e72 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1409,6 +1409,21 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
    return ScalarCost;
  }
  
+int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                                      TTI::TargetCostKind CostKind) {
+  // Currently we make a somewhat optimistic assumption that active_lane_mask's
+  // are always free. In reality it may be freely folded into a tail predicated
+  // loop, expanded into a VCPT or expanded into a lot of add/icmp code. We
+  // may need to improve this in the future, but being able to detect if it
+  // is free or not involves looking at a lot of other code. We currently assume
+  // that the vectorizer inserted these, and knew what it was doing in adding
+  // one.
+  if (ST->hasMVEIntegerOps() && ICA.getID() == Intrinsic::get_active_lane_mask)
+    return 0;
+
+  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+}
+
  bool ARMTTIImpl::isLoweredToCall(const Function *F) {
    if (!F->isIntrinsic())
      BaseT::isLoweredToCall(F);
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h

index 689b484..ee80be5 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -247,6 +247,9 @@ public:
                                    Align Alignment, TTI::TargetCostKind CostKind,
                                    const Instruction *I = nullptr);
  
+  int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+                            TTI::TargetCostKind CostKind);
+
    bool maybeLoweredToCall(Instruction &I);
    bool isLoweredToCall(const Function *F);
    bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll

new file mode 100644 (file)

index 0000000..ae23b3e
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s
+
+; Note that these instructions like this (not in a look that could be tail
+; predicated) should not really be free. We currently assume that all active
+; lane masks are free.
+
+define void @v4i32(i32 %index, i32 %TC) {
+; CHECK-LABEL: 'v4i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
+  ret void
+}
+
+define void @v8i16(i32 %index, i32 %TC) {
+; CHECK-LABEL: 'v8i16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
+  ret void
+}
+
+define void @v16i8(i32 %index, i32 %TC) {
+; CHECK-LABEL: 'v16i8'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
+  ret void
+}
+
+declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
+declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
+declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
author	David Green <david.green@arm.com>
	Sat, 17 Oct 2020 09:09:42 +0000 (10:09 +0100)
committer	David Green <david.green@arm.com>
	Sat, 17 Oct 2020 09:09:42 +0000 (10:09 +0100)
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMTargetTransformInfo.h		patch \| blob \| history
llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll	[new file with mode: 0644]	patch \| blob