From: Adam Nemet Date: Thu, 29 May 2014 23:35:33 +0000 (+0000) Subject: [X86] Auto-upgrade AVX1 vbroadcast intrinsics X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=39066800e9eb8fdad2e52592b344a0498e288353;p=platform%2Fupstream%2Fllvm.git [X86] Auto-upgrade AVX1 vbroadcast intrinsics They are replaced with the same IR that is generated for the vector-initializers in avxintrin.h. The test verifies that we get back the original instruction. I haven't seen this approach to be used in other auto-upgrade tests (i.e. llc + FileCheck) but I think it's the most direct way to test this case. I believe this should work because llc upgrades calls during parsing. (Other tests mostly check that assembling and disassembling yields the upgraded IR.) llvm-svn: 209863 --- diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e255113..05b3745 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -114,6 +114,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.avx.movnt.pd.256" || Name == "x86.avx.movnt.ps.256" || Name == "x86.sse42.crc32.64.8" || + Name == "x86.avx.vbroadcast.ss" || + Name == "x86.avx.vbroadcast.ss.256" || + Name == "x86.avx.vbroadcast.sd.256" || (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { NewFn = nullptr; return true; @@ -335,6 +338,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1)); Rep = Builder.CreateZExt(Rep, CI->getType(), ""); + } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { + // Replace broadcasts with a series of insertelements. + Type *VecTy = CI->getType(); + Type *EltTy = VecTy->getVectorElementType(); + unsigned EltNum = VecTy->getVectorNumElements(); + Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), + EltTy->getPointerTo()); + Value *Load = Builder.CreateLoad(Cast); + Type *I32Ty = Type::getInt32Ty(C); + Rep = UndefValue::get(VecTy); + for (unsigned I = 0; I < EltNum; ++I) + Rep = Builder.CreateInsertElement(Rep, Load, + ConstantInt::get(I32Ty, I)); } else { bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; if (Name == "llvm.x86.avx.vpermil.pd.256") diff --git a/llvm/test/Bitcode/x86-upgrade-avx-vbroadcast.ll b/llvm/test/Bitcode/x86-upgrade-avx-vbroadcast.ll new file mode 100644 index 0000000..d885f1c --- /dev/null +++ b/llvm/test/Bitcode/x86-upgrade-avx-vbroadcast.ll @@ -0,0 +1,41 @@ +; RUN: llc -mattr=+avx < %s | FileCheck %s + +; Check that we properly upgrade the AVX vbroadcast intrinsics to IR. The +; expectation is that we should still get the original instruction back that +; maps to the intrinsic. + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; CHECK-LABEL: test_mm_broadcast_ss: +define <4 x float> @test_mm_broadcast_ss(float* readonly %__a){ +entry: + %0 = bitcast float* %__a to i8* +; CHECK: vbroadcastss (%{{.*}}), %xmm + %1 = tail call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %0) + ret <4 x float> %1 +} + +; CHECK-LABEL: test_mm256_broadcast_sd: +define <4 x double> @test_mm256_broadcast_sd(double* readonly %__a) { +entry: + %0 = bitcast double* %__a to i8* +; CHECK: vbroadcastsd (%{{.*}}), %ymm + %1 = tail call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %0) + ret <4 x double> %1 +} + +; CHECK-LABEL: test_mm256_broadcast_ss: +define <8 x float> @test_mm256_broadcast_ss(float* readonly %__a) { +entry: + %0 = bitcast float* %__a to i8* +; CHECK: vbroadcastss (%{{.*}}), %ymm + %1 = tail call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %0) + ret <8 x float> %1 +} + +declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) + +declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) + +declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*)