From 7a50c8c2ba58beaa70879d22c628ebd213fcf4ee Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 24 Aug 2016 12:42:31 +0000 Subject: [PATCH] [X86][AVX2] Ensure on 32-bit targets that we broadcast f64 types not i64 (PR29101) llvm-svn: 279622 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 ++++ llvm/test/CodeGen/X86/avx2-vbroadcast.ll | 55 ++++++++++++++++++++++++++------ 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 105b02d..293d5a4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8733,6 +8733,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, V = DAG.getBitcast(SrcVT, V); } + // 32-bit targets need to load i64 as a f64 and then bitcast the result. + if (!Subtarget.is64Bit() && SrcVT == MVT::i64) { + V = DAG.getBitcast(MVT::f64, V); + unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements(); + BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts); + } + return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V)); } diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll index 2ecf2fa..0b6d2fe 100644 --- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll @@ -232,6 +232,43 @@ entry: ret <4 x i64> %q3 } +define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) { +; X32-LABEL: broadcast_mem_v4i16_v8i16: +; X32: ## BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X32-NEXT: retl +; +; X64-LABEL: broadcast_mem_v4i16_v8i16: +; X64: ## BB#0: +; X64-NEXT: vpbroadcastq (%rdi), %xmm0 +; X64-NEXT: retq + %load = load <4 x i16>, <4 x i16>* %ptr + %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> + ret <8 x i16> %shuf +} + +define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) { +; X32-LABEL: broadcast_mem_v4i16_v16i16: +; X32: ## BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; X32-NEXT: vpbroadcastq %xmm0, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: broadcast_mem_v4i16_v16i16: +; X64: ## BB#0: +; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; X64-NEXT: vpbroadcastq %xmm0, %ymm0 +; X64-NEXT: retq + %load = load <4 x i16>, <4 x i16>* %ptr + %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> + ret <16 x i16> %shuf +} + ; FIXME: Pointer adjusted broadcasts define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp { @@ -563,7 +600,7 @@ entry: define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp { ; X32-LABEL: V111: ; X32: ## BB#0: ## %entry -; X32-NEXT: vpbroadcastd LCPI27_0, %ymm1 +; X32-NEXT: vpbroadcastd LCPI29_0, %ymm1 ; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; @@ -580,7 +617,7 @@ entry: define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp { ; X32-LABEL: V113: ; X32: ## BB#0: ## %entry -; X32-NEXT: vbroadcastss LCPI28_0, %ymm1 +; X32-NEXT: vbroadcastss LCPI30_0, %ymm1 ; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; @@ -597,7 +634,7 @@ entry: define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp { ; X32-LABEL: _e2: ; X32: ## BB#0: -; X32-NEXT: vbroadcastss LCPI29_0, %xmm0 +; X32-NEXT: vbroadcastss LCPI31_0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: _e2: @@ -637,25 +674,25 @@ define void @crash() nounwind alwaysinline { ; X32: ## BB#0: ## %WGLoopsEntry ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: testb %al, %al -; X32-NEXT: je LBB31_1 +; X32-NEXT: je LBB33_1 ; X32-NEXT: ## BB#2: ## %ret ; X32-NEXT: retl ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: LBB31_1: ## %footer349VF +; X32-NEXT: LBB33_1: ## %footer349VF ; X32-NEXT: ## =>This Inner Loop Header: Depth=1 -; X32-NEXT: jmp LBB31_1 +; X32-NEXT: jmp LBB33_1 ; ; X64-LABEL: crash: ; X64: ## BB#0: ## %WGLoopsEntry ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: testb %al, %al -; X64-NEXT: je LBB31_1 +; X64-NEXT: je LBB33_1 ; X64-NEXT: ## BB#2: ## %ret ; X64-NEXT: retq ; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: LBB31_1: ## %footer349VF +; X64-NEXT: LBB33_1: ## %footer349VF ; X64-NEXT: ## =>This Inner Loop Header: Depth=1 -; X64-NEXT: jmp LBB31_1 +; X64-NEXT: jmp LBB33_1 WGLoopsEntry: br i1 undef, label %ret, label %footer329VF -- 2.7.4