From 5c7255fe3a8570a329d894c22421b54a5e5d5dc7 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 15 Nov 2021 18:55:45 +0300 Subject: [PATCH] [X86][Costmodel] `getReplicationShuffleCost()`: promote 8 bit-wide elements to 32 bit when no AVX512VBMI Currently `X86TTIImpl::getInterleavedMemoryOpCostAVX512()` asks about i8 elt type, so this change does affect vectorization. In the end, it will ask about i1. We should also try to promote to i16 if we have AVX512BW, i'll do that in a follow-up. All costs here look good, i've added the missing truncation costs in preparatory patches. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D113853 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 4 +- .../X86/interleaved-store-accesses-with-gaps.ll | 16 ++-- .../CostModel/X86/shuffle-replication-i8.ll | 98 +++++++++++----------- 3 files changed, 59 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 73e68be..637e9cc 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3659,8 +3659,8 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, break; // AVX512BW case 8: if (!ST->hasVBMI()) - return bailout(); - break; + PromEltTyBits = 32; // promote to i32, AVX512F. + break; // AVX512VBMI default: return bailout(); } diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll index 5b7a7bd..5f1b672 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll @@ -40,16 +40,16 @@ target triple = "x86_64-unknown-linux-gnu" ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 36 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 73 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { entry: @@ -107,16 +107,16 @@ for.end: ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 36 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 73 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { entry: diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll index ac38a72..462f583 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll @@ -73,13 +73,13 @@ define void @replication_i8_stride2() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'replication_i8_stride2' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'replication_i8_stride2' @@ -164,13 +164,13 @@ define void @replication_i8_stride3() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'replication_i8_stride3' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'replication_i8_stride3' @@ -255,13 +255,13 @@ define void @replication_i8_stride4() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'replication_i8_stride4' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'replication_i8_stride4' @@ -346,13 +346,13 @@ define void @replication_i8_stride5() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'replication_i8_stride5' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 182 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'replication_i8_stride5' @@ -437,13 +437,13 @@ define void @replication_i8_stride6() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'replication_i8_stride6' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1040 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'replication_i8_stride6' @@ -528,13 +528,13 @@ define void @replication_i8_stride7() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'replication_i8_stride7' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'replication_i8_stride7' @@ -619,13 +619,13 @@ define void @replication_i8_stride8() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'replication_i8_stride8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'replication_i8_stride8' -- 2.7.4