From 59767cea79e9ca727b5d2de8a6aa2318d671afd5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 25 Mar 2016 00:14:11 +0000 Subject: [PATCH] AMDGPU: TTI: Make insertelement free. We don't want to have a cost to scalarizing operations. llvm-svn: 264364 --- .../Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 5 +++ .../Analysis/CostModel/AMDGPU/insertelement.ll | 37 ++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 9cd3c39..39be33e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -99,6 +99,11 @@ int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index) { switch (Opcode) { case Instruction::ExtractElement: + case Instruction::InsertElement: + // Extracts are just reads of a subregister, so are free. Inserts are + // considered free because we don't want to have any cost for scalarizing + // operations, and we don't have to copy into a different register class. + // Dynamic indexing isn't free and is best avoided. return Index == ~0u ? 2 : 0; default: diff --git a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll new file mode 100644 index 0000000..1765afe --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll @@ -0,0 +1,37 @@ +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s + +; CHECK: 'insertelement_v2i32' +; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i32> +define void @insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) { + %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr + %insert = insertelement <2 x i32> %vec, i32 1, i32 123 + store <2 x i32> %insert, <2 x i32> addrspace(1)* %out + ret void +} + +; CHECK: 'insertelement_v2i64' +; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i64> +define void @insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) { + %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr + %insert = insertelement <2 x i64> %vec, i64 1, i64 123 + store <2 x i64> %insert, <2 x i64> addrspace(1)* %out + ret void +} + +; CHECK: 'insertelement_v2i16' +; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i16> +define void @insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %insert = insertelement <2 x i16> %vec, i16 1, i16 123 + store <2 x i16> %insert, <2 x i16> addrspace(1)* %out + ret void +} + +; CHECK: 'insertelement_v2i8' +; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i8> +define void @insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %vaddr) { + %vec = load <2 x i8>, <2 x i8> addrspace(1)* %vaddr + %insert = insertelement <2 x i8> %vec, i8 1, i8 123 + store <2 x i8> %insert, <2 x i8> addrspace(1)* %out + ret void +} -- 2.7.4