From 23848f8f1d7dfb926bc9e42a47771d4ff9b01171 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 2 Nov 2012 23:27:16 +0000 Subject: [PATCH] Add a stub for the x86 cost model impl. Implement a basic cost rule for inserting/extracting from XMM registers. llvm-svn: 167333 --- llvm/lib/Target/X86/X86ISelLowering.h | 16 +++++++++++ llvm/lib/Target/X86/X86TargetMachine.h | 4 +-- .../CostModel/X86/insert-extract-at-zero.ll | 33 ++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index c77d8b6..d4c3036 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -19,6 +19,7 @@ #include "X86RegisterInfo.h" #include "X86MachineFunctionInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetTransformImpl.h" #include "llvm/Target/TargetOptions.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -946,6 +947,21 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } + + class X86VectorTargetTransformInfo : public VectorTargetTransformImpl { + public: + explicit X86VectorTargetTransformInfo(const TargetLowering *TL) : + VectorTargetTransformImpl(TL) {} + + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + // Floating point scalars are already located in index #0. + if (Val->getScalarType()->isFloatingPointTy() && Index == 0) + return 0; + return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index); + } + }; + } #endif // X86ISELLOWERING_H diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h index 01296c3..12311a1 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.h +++ b/llvm/lib/Target/X86/X86TargetMachine.h @@ -82,7 +82,7 @@ class X86_32TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86JITInfo JITInfo; ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; + X86VectorTargetTransformInfo VTTI; public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -119,7 +119,7 @@ class X86_64TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86JITInfo JITInfo; ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; + X86VectorTargetTransformInfo VTTI; public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, diff --git a/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll b/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll new file mode 100644 index 0000000..eea5b60 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) { + ;CHECK: cost of 0 {{.*}} extract + %A = extractelement <4 x float> undef, i32 0 + ;CHECK: cost of 1 {{.*}} extract + %B = extractelement <4 x i32> undef, i32 0 + ;CHECK: cost of 1 {{.*}} extract + %C = extractelement <4 x float> undef, i32 1 + + ;CHECK: cost of 0 {{.*}} extract + %D = extractelement <8 x float> undef, i32 0 + ;CHECK: cost of 1 {{.*}} extract + %E = extractelement <8 x float> undef, i32 1 + + ;CHECK: cost of 1 {{.*}} extract + %F = extractelement <8 x float> undef, i32 %arg + + ;CHECK: cost of 0 {{.*}} insert + %G = insertelement <4 x float> undef, float %fl, i32 0 + ;CHECK: cost of 1 {{.*}} insert + %H = insertelement <4 x float> undef, float %fl, i32 1 + ;CHECK: cost of 1 {{.*}} insert + %I = insertelement <4 x i32> undef, i32 %arg, i32 0 + + ;CHECK: cost of 0 {{.*}} insert + %J = insertelement <4 x double> undef, double undef, i32 0 + + ret i32 0 +} -- 2.7.4