From 1f7a17661cb617a1bd82aa72977bca288a2d7f9d Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Wed, 4 Mar 2015 00:13:25 +0000 Subject: [PATCH] Remove 'llvm.x86.avx2.vbroadcasti128' intrinsic. The intrinsic is no longer generated by the front-end. Remove the intrinsic and auto-upgrade it to a vector shuffle. Reviewed by Nadav This is related to rdar://problem/18742778. llvm-svn: 231182 --- llvm/include/llvm/IR/IntrinsicsX86.td | 2 -- llvm/lib/IR/AutoUpgrade.cpp | 12 ++++++++++++ llvm/lib/Target/X86/X86InstrSSE.td | 5 ----- llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 7 ------- llvm/test/CodeGen/X86/x86-upgrade-avx2-vbroadcast.ll | 18 ++++++++++++++++++ 5 files changed, 30 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/X86/x86-upgrade-avx2-vbroadcast.ll diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 622dabe..4a59f0d 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -1732,8 +1732,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_vbroadcast_ss_ps_256 : GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx2_vbroadcasti128 : - Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx2_pbroadcastb_128 : GCCBuiltin<"__builtin_ia32_pbroadcastb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 8306a1a..6800e7a 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -179,6 +179,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.avx2.pblendw" || Name == "x86.avx2.pblendd.128" || Name == "x86.avx2.pblendd.256" || + Name == "x86.avx2.vbroadcasti128" || (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { NewFn = nullptr; return true; @@ -553,6 +554,17 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { for (unsigned I = 0; I < EltNum; ++I) Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I)); + } else if (Name == "llvm.x86.avx2.vbroadcasti128") { + // Replace vbroadcasts with a vector shuffle. + Value *Op = Builder.CreatePointerCast( + CI->getArgOperand(0), + PointerType::getUnqual(VectorType::get(Type::getInt64Ty(C), 2))); + Value *Load = Builder.CreateLoad(Op); + SmallVector Idxs; // 0, 1, 0, 1. + for (unsigned i = 0; i != 4; ++i) + Idxs.push_back(Builder.getInt32(i & 1)); + Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), + ConstantVector::get(Idxs)); } else if (Name == "llvm.x86.sse2.psll.dq") { // 128-bit shift left specified in bits. unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 8e8bdb6..6d3cfd5 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7834,11 +7834,6 @@ def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, int_x86_avx2_vbroadcast_sd_pd_256, WriteFShuffle256>, VEX_L; -let Predicates = [HasAVX2] in -def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, - int_x86_avx2_vbroadcasti128, WriteLoad>, - VEX_L; - let Predicates = [HasAVX] in def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index da0f17a..b7e2b74 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -641,13 +641,6 @@ define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) { declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone -define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) { - ; CHECK: vbroadcasti128 - %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly - define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { ; CHECK: vbroadcastsd %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1] diff --git a/llvm/test/CodeGen/X86/x86-upgrade-avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/x86-upgrade-avx2-vbroadcast.ll new file mode 100644 index 0000000..08571f5 --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-upgrade-avx2-vbroadcast.ll @@ -0,0 +1,18 @@ +; RUN: llc -mattr=+avx2 < %s | FileCheck %s + +; Check that we properly upgrade the AVX2 vbroadcast intrinsic to IR. + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +define <4 x i64> @broadcast128(<2 x i64> %src) { + CHECK-LABEL: broadcast128 + CHECK: vinsertf128 $1, %xmm0, %ymm0, %ymm0 + %1 = alloca <2 x i64>, align 16 + %2 = bitcast <2 x i64>* %1 to i8* + store <2 x i64> %src, <2 x i64>* %1, align 16 + %3 = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %2) + ret <4 x i64> %3 +} + +declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) #1 -- 2.7.4