From a804bbee9b8360ee749cc42e5cbd44413c149010 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 5 Feb 2013 14:05:55 +0000 Subject: [PATCH] ARM cost model: Cost for scalar integer casts and floating point conversions Also adds some costs for vector integer float conversions. llvm-svn: 174371 --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 115 ++++++++++++++++-- llvm/test/Analysis/CostModel/ARM/cast.ll | 158 +++++++++++++++++++++++++ 2 files changed, 266 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/cast.ll diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2ded63f..bf83d51 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -177,25 +177,126 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); // Some arithmetic, load and store operations have specific instructions - // to cast up/down their types automatically at no extra cost - // TODO: Get these tables to know at least what the related operations are - static const TypeConversionCostTblEntry NEONConversionTbl[] = { + // to cast up/down their types automatically at no extra cost. + // TODO: Get these tables to know at least what the related operations are. + static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + + // Vector float <-> i32 conversions. + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + + // Vector double <-> i32 conversions. + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 } }; - if (ST->hasNEON()) { - int Idx = ConvertCostTableLookup(NEONConversionTbl, - array_lengthof(NEONConversionTbl), + if (SrcTy.isVector() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, + array_lengthof(NEONVectorConversionTbl), ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) - return NEONConversionTbl[Idx].Cost; + return NEONVectorConversionTbl[Idx].Cost; + } + + // Scalar float to integer conversions. + static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = { + { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 }, + { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } + }; + if (SrcTy.isFloatingPoint() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, + array_lengthof(NEONFloatConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONFloatConversionTbl[Idx].Cost; + } + + + // Scalar integer to float conversions. + static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = { + { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 } + }; + + if (SrcTy.isInteger() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, + array_lengthof(NEONIntegerConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONIntegerConversionTbl[Idx].Cost; } + // Scalar integer conversion costs. + static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = { + // i16 -> i64 requires two dependent operations. + { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, + + // Truncates on i64 are assumed to be free. + { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 } + }; + + if (SrcTy.isInteger()) { + int Idx = + ConvertCostTableLookup(ARMIntegerConversionTbl, + array_lengthof(ARMIntegerConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return ARMIntegerConversionTbl[Idx].Cost; + } + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll new file mode 100644 index 0000000..464b6ec --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -0,0 +1,158 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-apple-ios6.0.0" + +define i32 @casts() { + + ; -- scalars -- + ; CHECK: cost of 1 {{.*}} sext + %r0 = sext i1 undef to i8 + ; CHECK: cost of 1 {{.*}} zext + %r1 = zext i1 undef to i8 + ; CHECK: cost of 1 {{.*}} sext + %r2 = sext i1 undef to i16 + ; CHECK: cost of 1 {{.*}} zext + %r3 = zext i1 undef to i16 + ; CHECK: cost of 1 {{.*}} sext + %r4 = sext i1 undef to i32 + ; CHECK: cost of 1 {{.*}} zext + %r5 = zext i1 undef to i32 + ; CHECK: cost of 1 {{.*}} sext + %r6 = sext i1 undef to i64 + ; CHECK: cost of 1 {{.*}} zext + %r7 = zext i1 undef to i64 + ; CHECK: cost of 0 {{.*}} trunc + %r8 = trunc i8 undef to i1 + ; CHECK: cost of 1 {{.*}} sext + %r9 = sext i8 undef to i16 + ; CHECK: cost of 1 {{.*}} zext + %r10 = zext i8 undef to i16 + ; CHECK: cost of 1 {{.*}} sext + %r11 = sext i8 undef to i32 + ; CHECK: cost of 1 {{.*}} zext + %r12 = zext i8 undef to i32 + ; CHECK: cost of 1 {{.*}} sext + %r13 = sext i8 undef to i64 + ; CHECK: cost of 1 {{.*}} zext + %r14 = zext i8 undef to i64 + ; CHECK: cost of 0 {{.*}} trunc + %r15 = trunc i16 undef to i1 + ; CHECK: cost of 0 {{.*}} trunc + %r16 = trunc i16 undef to i8 + ; CHECK: cost of 1 {{.*}} sext + %r17 = sext i16 undef to i32 + ; CHECK: cost of 1 {{.*}} zext + %r18 = zext i16 undef to i32 + ; CHECK: cost of 2 {{.*}} sext + %r19 = sext i16 undef to i64 + ; CHECK: cost of 1 {{.*}} zext + %r20 = zext i16 undef to i64 + ; CHECK: cost of 0 {{.*}} trunc + %r21 = trunc i32 undef to i1 + ; CHECK: cost of 0 {{.*}} trunc + %r22 = trunc i32 undef to i8 + ; CHECK: cost of 0 {{.*}} trunc + %r23 = trunc i32 undef to i16 + ; CHECK: cost of 1 {{.*}} sext + %r24 = sext i32 undef to i64 + ; CHECK: cost of 1 {{.*}} zext + %r25 = zext i32 undef to i64 + ; CHECK: cost of 0 {{.*}} trunc + %r26 = trunc i64 undef to i1 + ; CHECK: cost of 0 {{.*}} trunc + %r27 = trunc i64 undef to i8 + ; CHECK: cost of 0 {{.*}} trunc + %r28 = trunc i64 undef to i16 + ; CHECK: cost of 0 {{.*}} trunc + %r29 = trunc i64 undef to i32 + + ; -- floating point conversions -- + ; Moves between scalar and NEON registers. + ; CHECK: cost of 2 {{.*}} fptoui + %r30 = fptoui float undef to i1 + ; CHECK: cost of 2 {{.*}} fptosi + %r31 = fptosi float undef to i1 + ; CHECK: cost of 2 {{.*}} fptoui + %r32 = fptoui float undef to i8 + ; CHECK: cost of 2 {{.*}} fptosi + %r33 = fptosi float undef to i8 + ; CHECK: cost of 2 {{.*}} fptoui + %r34 = fptoui float undef to i16 + ; CHECK: cost of 2 {{.*}} fptosi + %r35 = fptosi float undef to i16 + ; CHECK: cost of 2 {{.*}} fptoui + %r36 = fptoui float undef to i32 + ; CHECK: cost of 2 {{.*}} fptosi + %r37 = fptosi float undef to i32 + ; CHECK: cost of 10 {{.*}} fptoui + %r38 = fptoui float undef to i64 + ; CHECK: cost of 10 {{.*}} fptosi + %r39 = fptosi float undef to i64 + ; CHECK: cost of 2 {{.*}} fptoui + %r40 = fptoui double undef to i1 + ; CHECK: cost of 2 {{.*}} fptosi + %r41 = fptosi double undef to i1 + ; CHECK: cost of 2 {{.*}} fptoui + %r42 = fptoui double undef to i8 + ; CHECK: cost of 2 {{.*}} fptosi + %r43 = fptosi double undef to i8 + ; CHECK: cost of 2 {{.*}} fptoui + %r44 = fptoui double undef to i16 + ; CHECK: cost of 2 {{.*}} fptosi + %r45 = fptosi double undef to i16 + ; CHECK: cost of 2 {{.*}} fptoui + %r46 = fptoui double undef to i32 + ; CHECK: cost of 2 {{.*}} fptosi + %r47 = fptosi double undef to i32 + ; Function call + ; CHECK: cost of 10 {{.*}} fptoui + %r48 = fptoui double undef to i64 + ; CHECK: cost of 10 {{.*}} fptosi + %r49 = fptosi double undef to i64 + + ; CHECK: cost of 2 {{.*}} sitofp + %r50 = sitofp i1 undef to float + ; CHECK: cost of 2 {{.*}} uitofp + %r51 = uitofp i1 undef to float + ; CHECK: cost of 2 {{.*}} sitofp + %r52 = sitofp i1 undef to double + ; CHECK: cost of 2 {{.*}} uitofp + %r53 = uitofp i1 undef to double + ; CHECK: cost of 2 {{.*}} sitofp + %r54 = sitofp i8 undef to float + ; CHECK: cost of 2 {{.*}} uitofp + %r55 = uitofp i8 undef to float + ; CHECK: cost of 2 {{.*}} sitofp + %r56 = sitofp i8 undef to double + ; CHECK: cost of 2 {{.*}} uitofp + %r57 = uitofp i8 undef to double + ; CHECK: cost of 2 {{.*}} sitofp + %r58 = sitofp i16 undef to float + ; CHECK: cost of 2 {{.*}} uitofp + %r59 = uitofp i16 undef to float + ; CHECK: cost of 2 {{.*}} sitofp + %r60 = sitofp i16 undef to double + ; CHECK: cost of 2 {{.*}} uitofp + %r61 = uitofp i16 undef to double + ; CHECK: cost of 2 {{.*}} sitofp + %r62 = sitofp i32 undef to float + ; CHECK: cost of 2 {{.*}} uitofp + %r63 = uitofp i32 undef to float + ; CHECK: cost of 2 {{.*}} sitofp + %r64 = sitofp i32 undef to double + ; CHECK: cost of 2 {{.*}} uitofp + %r65 = uitofp i32 undef to double + ; Function call + ; CHECK: cost of 10 {{.*}} sitofp + %r66 = sitofp i64 undef to float + ; CHECK: cost of 10 {{.*}} uitofp + %r67 = uitofp i64 undef to float + ; CHECK: cost of 10 {{.*}} sitofp + %r68 = sitofp i64 undef to double + ; CHECK: cost of 10 {{.*}} uitofp + %r69 = uitofp i64 undef to double + + ;CHECK: cost of 0 {{.*}} ret + ret i32 undef +} + -- 2.7.4