From c3277a88285e5e40e6d5ee9da7342fae42eaf82e Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Sat, 24 Jul 2021 13:41:40 +0100 Subject: [PATCH] [BasicTTI] Set scalarization cost of scalable vector casts to Invalid. When BasicTTIImpl::getCastInstrCost can't determine the cost of a vector cast operation when the types need legalization, it falls back to calculating scalarization costs. Instead of crashing on `cast(DstVTy)` when the type is a scalable vector, return an Invalid cost. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D106655 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 4 +++ llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll | 38 +++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 66e0f5f..f5557a2 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1004,6 +1004,10 @@ public: CostKind, I)); } + // Scalarization cost is Invalid, can't assume any num elements. + if (isa(DstVTy)) + return InstructionCost::getInvalid(); + // In other cases where the source or destination are illegal, assume // the operation will get scalarized. unsigned Num = cast(DstVTy)->getNumElements(); diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll index 4715eaa..ee96085 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll @@ -5,6 +5,24 @@ target triple = "aarch64-unknown-linux-gnu" define void @sve-fptoi() { ; CHECK-LABEL: 'sve-fptoi' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_si8 = fptosi undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_ui8 = fptoui undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_si32 = fptosi undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_ui32 = fptoui undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_si64 = fptosi undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_ui64 = fptoui undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_si8 = fptosi undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_ui8 = fptoui undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_si16 = fptosi undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_ui16 = fptoui undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_si64 = fptosi undef to +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_ui64 = fptoui undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv1f64_to_si8 = fptosi undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv1f64_to_ui8 = fptoui undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv1f64_to_si16 = fptosi undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv1f64_to_ui16 = fptoui undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv1f64_to_si32 = fptosi undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv1f64_to_ui32 = fptoui undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f16_to_si8 = fptosi undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f16_to_ui8 = fptoui undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f16_to_si32 = fptosi undef to @@ -59,6 +77,26 @@ define void @sve-fptoi() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nv8f64_to_ui16 = fptoui undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nv8f64_to_si32 = fptosi undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nv8f64_to_ui32 = fptoui undef to + %nv1f16_to_si8 = fptosi undef to + %nv1f16_to_ui8 = fptoui undef to + %nv1f16_to_si32 = fptosi undef to + %nv1f16_to_ui32 = fptoui undef to + %nv1f16_to_si64 = fptosi undef to + %nv1f16_to_ui64 = fptoui undef to + + %nv1f32_to_si8 = fptosi undef to + %nv1f32_to_ui8 = fptoui undef to + %nv1f32_to_si16 = fptosi undef to + %nv1f32_to_ui16 = fptoui undef to + %nv1f32_to_si64 = fptosi undef to + %nv1f32_to_ui64 = fptoui undef to + + %nv1f64_to_si8 = fptosi undef to + %nv1f64_to_ui8 = fptoui undef to + %nv1f64_to_si16 = fptosi undef to + %nv1f64_to_ui16 = fptoui undef to + %nv1f64_to_si32 = fptosi undef to + %nv1f64_to_ui32 = fptoui undef to %nv2f16_to_si8 = fptosi undef to %nv2f16_to_ui8 = fptoui undef to -- 2.7.4