[AArch64] Use type-legalization cost for code size memop cost.

author Florian Hahn <flo@fhahn.com>

Thu, 15 Apr 2021 08:22:32 +0000 (09:22 +0100)

committer Florian Hahn <flo@fhahn.com>

Thu, 15 Apr 2021 09:11:05 +0000 (10:11 +0100)
author Florian Hahn <flo@fhahn.com>
Thu, 15 Apr 2021 08:22:32 +0000 (09:22 +0100)
committer Florian Hahn <flo@fhahn.com>
Thu, 15 Apr 2021 09:11:05 +0000 (10:11 +0100)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

index cb516c3..754e4cf 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -944,10 +944,6 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
                                                  unsigned AddressSpace,
                                                  TTI::TargetCostKind CostKind,
                                                  const Instruction *I) {
-  // TODO: Handle other cost kinds.
-  if (CostKind != TTI::TCK_RecipThroughput)
-    return 1;
-
    // Type legalization can't handle structs
    if (TLI->getValueType(DL, Ty,  true) == MVT::Other)
      return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
@@ -955,6 +951,13 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
  
    auto LT = TLI->getTypeLegalizationCost(DL, Ty);
  
+  // TODO: consider latency as well for TCK_SizeAndLatency.
+  if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
+    return LT.first;
+
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return 1;
+
    if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
        LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
      // Unaligned stores are extremely inefficient. We don't split all
diff --git a/llvm/test/Analysis/CostModel/AArch64/store.ll b/llvm/test/Analysis/CostModel/AArch64/store.ll

index 6374175..3f8e2a4 100644 (file)
--- a/llvm/test/Analysis/CostModel/AArch64/store.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/store.ll
@@ -30,13 +30,13 @@ define void @getMemoryOpCost() {
  ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  ;
  ; SIZE-LABEL: 'getMemoryOpCost'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x half> undef, <16 x half>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x half> undef, <16 x half>* undef, align 4
  ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
  ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
  ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll b/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll

index 0ef3625..650567c 100644 (file)
--- a/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll
@@ -6,70 +6,22 @@ target triple = "arm64-apple-ios5.0.0"
  
  ; The loop in the function only contains a few instructions, but they will get
  ; lowered to a very large amount of target instructions.
-; FIXME: Currently the cost-model assigns a cost of 1 to those large vector ops.
  define void @loop_with_large_vector_ops(i32 %i, <225 x double>* %A, <225 x double>* %B) {
  ; CHECK-LABEL: @loop_with_large_vector_ops(
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    br label [[LOOP:%.*]]
  ; CHECK:       loop:
-; CHECK-NEXT:    [[LV_1:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT:    [[LV_2:%.*]] = load <225 x double>, <225 x double>* [[A]], align 8
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr <225 x double>, <225 x double>* [[A:%.*]], i32 [[IV]]
+; CHECK-NEXT:    [[LV_1:%.*]] = load <225 x double>, <225 x double>* [[A_GEP]], align 8
+; CHECK-NEXT:    [[B_GEP:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 [[IV]]
+; CHECK-NEXT:    [[LV_2:%.*]] = load <225 x double>, <225 x double>* [[B_GEP]], align 8
  ; CHECK-NEXT:    [[MUL:%.*]] = fmul <225 x double> [[LV_1]], [[LV_2]]
-; CHECK-NEXT:    store <225 x double> [[MUL]], <225 x double>* [[A]], align 8
-; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 1
-; CHECK-NEXT:    [[LV_1_1:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_1]], align 8
-; CHECK-NEXT:    [[B_GEP_1:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 1
-; CHECK-NEXT:    [[LV_2_1:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_1]], align 8
-; CHECK-NEXT:    [[MUL_1:%.*]] = fmul <225 x double> [[LV_1_1]], [[LV_2_1]]
-; CHECK-NEXT:    store <225 x double> [[MUL_1]], <225 x double>* [[B_GEP_1]], align 8
-; CHECK-NEXT:    [[A_GEP_2:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 2
-; CHECK-NEXT:    [[LV_1_2:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_2]], align 8
-; CHECK-NEXT:    [[B_GEP_2:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 2
-; CHECK-NEXT:    [[LV_2_2:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_2]], align 8
-; CHECK-NEXT:    [[MUL_2:%.*]] = fmul <225 x double> [[LV_1_2]], [[LV_2_2]]
-; CHECK-NEXT:    store <225 x double> [[MUL_2]], <225 x double>* [[B_GEP_2]], align 8
-; CHECK-NEXT:    [[A_GEP_3:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 3
-; CHECK-NEXT:    [[LV_1_3:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_3]], align 8
-; CHECK-NEXT:    [[B_GEP_3:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 3
-; CHECK-NEXT:    [[LV_2_3:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_3]], align 8
-; CHECK-NEXT:    [[MUL_3:%.*]] = fmul <225 x double> [[LV_1_3]], [[LV_2_3]]
-; CHECK-NEXT:    store <225 x double> [[MUL_3]], <225 x double>* [[B_GEP_3]], align 8
-; CHECK-NEXT:    [[A_GEP_4:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 4
-; CHECK-NEXT:    [[LV_1_4:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_4]], align 8
-; CHECK-NEXT:    [[B_GEP_4:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 4
-; CHECK-NEXT:    [[LV_2_4:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_4]], align 8
-; CHECK-NEXT:    [[MUL_4:%.*]] = fmul <225 x double> [[LV_1_4]], [[LV_2_4]]
-; CHECK-NEXT:    store <225 x double> [[MUL_4]], <225 x double>* [[B_GEP_4]], align 8
-; CHECK-NEXT:    [[A_GEP_5:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 5
-; CHECK-NEXT:    [[LV_1_5:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_5]], align 8
-; CHECK-NEXT:    [[B_GEP_5:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 5
-; CHECK-NEXT:    [[LV_2_5:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_5]], align 8
-; CHECK-NEXT:    [[MUL_5:%.*]] = fmul <225 x double> [[LV_1_5]], [[LV_2_5]]
-; CHECK-NEXT:    store <225 x double> [[MUL_5]], <225 x double>* [[B_GEP_5]], align 8
-; CHECK-NEXT:    [[A_GEP_6:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 6
-; CHECK-NEXT:    [[LV_1_6:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_6]], align 8
-; CHECK-NEXT:    [[B_GEP_6:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 6
-; CHECK-NEXT:    [[LV_2_6:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_6]], align 8
-; CHECK-NEXT:    [[MUL_6:%.*]] = fmul <225 x double> [[LV_1_6]], [[LV_2_6]]
-; CHECK-NEXT:    store <225 x double> [[MUL_6]], <225 x double>* [[B_GEP_6]], align 8
-; CHECK-NEXT:    [[A_GEP_7:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 7
-; CHECK-NEXT:    [[LV_1_7:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_7]], align 8
-; CHECK-NEXT:    [[B_GEP_7:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 7
-; CHECK-NEXT:    [[LV_2_7:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_7]], align 8
-; CHECK-NEXT:    [[MUL_7:%.*]] = fmul <225 x double> [[LV_1_7]], [[LV_2_7]]
-; CHECK-NEXT:    store <225 x double> [[MUL_7]], <225 x double>* [[B_GEP_7]], align 8
-; CHECK-NEXT:    [[A_GEP_8:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 8
-; CHECK-NEXT:    [[LV_1_8:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_8]], align 8
-; CHECK-NEXT:    [[B_GEP_8:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 8
-; CHECK-NEXT:    [[LV_2_8:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_8]], align 8
-; CHECK-NEXT:    [[MUL_8:%.*]] = fmul <225 x double> [[LV_1_8]], [[LV_2_8]]
-; CHECK-NEXT:    store <225 x double> [[MUL_8]], <225 x double>* [[B_GEP_8]], align 8
-; CHECK-NEXT:    [[A_GEP_9:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 9
-; CHECK-NEXT:    [[LV_1_9:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_9]], align 8
-; CHECK-NEXT:    [[B_GEP_9:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 9
-; CHECK-NEXT:    [[LV_2_9:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_9]], align 8
-; CHECK-NEXT:    [[MUL_9:%.*]] = fmul <225 x double> [[LV_1_9]], [[LV_2_9]]
-; CHECK-NEXT:    store <225 x double> [[MUL_9]], <225 x double>* [[B_GEP_9]], align 8
+; CHECK-NEXT:    store <225 x double> [[MUL]], <225 x double>* [[B_GEP]], align 8
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], 10
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
  ; CHECK-NEXT:    ret void
  ;
  entry:
author	Florian Hahn <flo@fhahn.com>
	Thu, 15 Apr 2021 08:22:32 +0000 (09:22 +0100)
committer	Florian Hahn <flo@fhahn.com>
	Thu, 15 Apr 2021 09:11:05 +0000 (10:11 +0100)
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp		patch \| blob \| history
llvm/test/Analysis/CostModel/AArch64/store.ll		patch \| blob \| history
llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll		patch \| blob \| history