From ec8b1fb5398f57b233e6b07bf5f3cc8a57e94ba0 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Wed, 5 Apr 2017 00:14:39 +0000 Subject: [PATCH] [X86] Relax assert in broadcast-of-subvector lowering. Before r294774, there was a problem when lowering broadcasts to use 128-bit subvectors. When we looked through a bitcast to find the broadcast input, we'd keep using the original type, so you'd end up with things like: (v8f32 (broadcast (v4f32 (extract_subvector (v8i32 V), ...)) )) r294774 fixed it to always emit subvectors with the scalar type of the original source. It also introduced some asserts, to check that we use scalars with the same size, and vectors with the same number of elements. The scalar size equality is checked earlier when looking through bitcasts, and is a useful assert. However, the number of elements don't have to be identical: we're always going to extract a 128-bit subvector, and we can have different size inputs if we looked through a concat_vector to find a 256-bit source. Relax the overzealous assert. Replace it with a check of the original source vector being 256 or 512 bits. If it's 128 bits, we can't extract_subvector from it. Fixes PR32371. llvm-svn: 299490 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++-- llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll | 10 ++++++++++ llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll | 16 ++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3474a93..4212635 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10046,8 +10046,8 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, MVT SrcVT = V.getSimpleValueType(); assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() && "Unexpected vector element size"); - assert(SrcVT.getVectorNumElements() == BroadcastVT.getVectorNumElements() && - "Unexpected vector num elements"); + assert((SrcVT.is256BitVector() || SrcVT.is512BitVector()) && + "Unexpected vector size"); MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / EltSize); V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V, diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index 6472a50..fa3471c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -404,6 +404,16 @@ define <16 x i32> @shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12 ret <16 x i32> %shuffle } +define <16 x float> @shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<8 x float> %a) { +; ALL-LABEL: shuffle_v8f32_v16f32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: +; ALL: # BB#0: +; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 +; ALL-NEXT: vbroadcastss %xmm0, %zmm0 +; ALL-NEXT: retq + %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> + ret <16 x float> %shuffle +} + define <16 x i32> @mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01(<16 x i32> %a, <16 x i32> %passthru, i16 %mask) { ; AVX512F-LABEL: mask_shuffle_v16i32_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_01: ; AVX512F: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index 8843c6f..30c8d1b 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -2608,6 +2608,22 @@ define <8 x i64> @shuffle_v8i64_01234589(<8 x i64> %a, <8 x i64> %b) { ret <8 x i64> %shuffle } +define <8 x double> @shuffle_v4f64_v8f64_22222222(<4 x double> %a) { +; AVX512F-LABEL: shuffle_v4f64_v8f64_22222222: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: shuffle_v4f64_v8f64_22222222: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0 +; AVX512F-32-NEXT: retl + %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> + ret <8 x double> %shuffle +} + define <8 x i64> @shuffle_v2i64_v8i64_01010101(<2 x i64> %a) { ; AVX512F-LABEL: shuffle_v2i64_v8i64_01010101: ; AVX512F: # BB#0: -- 2.7.4