From: Simon Pilgrim Date: Mon, 19 Sep 2022 15:44:03 +0000 (+0100) Subject: [CostModel][X86] Add CostKinds handling for vector ctlz instructions X-Git-Tag: upstream/17.0.6~33159 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=135c9b2c4b47679dd978ca5534559ae66ddc0e6e;p=platform%2Fupstream%2Fllvm.git [CostModel][X86] Add CostKinds handling for vector ctlz instructions This was achieved with the 'cost-tables vs llvm-mca' script D103695 --- diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 7450cfa..b3c6ce8 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3330,18 +3330,18 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTPOP, MVT::v4i32, { 1, 1, 1, 1 } }, }; static const CostKindTblEntry AVX512CDCostTbl[] = { - { ISD::CTLZ, MVT::v8i64, { 1 } }, - { ISD::CTLZ, MVT::v16i32, { 1 } }, - { ISD::CTLZ, MVT::v32i16, { 18 } }, - { ISD::CTLZ, MVT::v64i8, { 3 } }, - { ISD::CTLZ, MVT::v4i64, { 1 } }, - { ISD::CTLZ, MVT::v8i32, { 1 } }, - { ISD::CTLZ, MVT::v16i16, { 8 } }, - { ISD::CTLZ, MVT::v32i8, { 2 } }, - { ISD::CTLZ, MVT::v2i64, { 1 } }, - { ISD::CTLZ, MVT::v4i32, { 1 } }, - { ISD::CTLZ, MVT::v8i16, { 3 } }, - { ISD::CTLZ, MVT::v16i8, { 2 } }, + { ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } }, + { ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } }, + { ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } }, + { ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } }, + { ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } }, + { ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } }, + { ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } }, + { ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } }, + { ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } }, + { ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } }, + { ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } }, + { ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } }, { ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } }, { ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } }, @@ -3360,10 +3360,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BSWAP, MVT::v8i64, { 1 } }, { ISD::BSWAP, MVT::v16i32, { 1 } }, { ISD::BSWAP, MVT::v32i16, { 1 } }, - { ISD::CTLZ, MVT::v8i64, { 8 } }, - { ISD::CTLZ, MVT::v16i32, { 8 } }, - { ISD::CTLZ, MVT::v32i16, { 4 } }, - { ISD::CTLZ, MVT::v64i8, { 3 } }, + { ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } }, + { ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } }, + { ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } }, + { ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } }, { ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } }, { ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } }, { ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } }, @@ -3428,10 +3428,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BSWAP, MVT::v8i64, { 4 } }, { ISD::BSWAP, MVT::v16i32, { 4 } }, { ISD::BSWAP, MVT::v32i16, { 4 } }, - { ISD::CTLZ, MVT::v8i64, { 10 } }, - { ISD::CTLZ, MVT::v16i32, { 11 } }, - { ISD::CTLZ, MVT::v32i16, { 8 } }, - { ISD::CTLZ, MVT::v64i8, { 5 } }, + { ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } }, + { ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } }, + { ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } }, + { ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } }, { ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } }, { ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } }, { ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } }, @@ -3560,14 +3560,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BSWAP, MVT::v4i64, { 1 } }, { ISD::BSWAP, MVT::v8i32, { 1 } }, { ISD::BSWAP, MVT::v16i16, { 1 } }, - { ISD::CTLZ, MVT::v2i64, { 7 } }, - { ISD::CTLZ, MVT::v4i64, { 14 } }, - { ISD::CTLZ, MVT::v4i32, { 5 } }, - { ISD::CTLZ, MVT::v8i32, { 10 } }, - { ISD::CTLZ, MVT::v8i16, { 4 } }, - { ISD::CTLZ, MVT::v16i16, { 6 } }, - { ISD::CTLZ, MVT::v16i8, { 3 } }, - { ISD::CTLZ, MVT::v32i8, { 4 } }, + { ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } }, + { ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } }, + { ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } }, + { ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } }, + { ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } }, + { ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } }, + { ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } }, + { ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } }, { ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } }, { ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } }, { ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } }, @@ -3627,14 +3627,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BSWAP, MVT::v4i64, { 4 } }, { ISD::BSWAP, MVT::v8i32, { 4 } }, { ISD::BSWAP, MVT::v16i16, { 4 } }, - { ISD::CTLZ, MVT::v4i64, { 29 } }, // 2 x 128-bit Op + extract/insert - { ISD::CTLZ, MVT::v2i64, { 14 } }, - { ISD::CTLZ, MVT::v8i32, { 24 } }, // 2 x 128-bit Op + extract/insert - { ISD::CTLZ, MVT::v4i32, { 12 } }, - { ISD::CTLZ, MVT::v16i16, { 19 } }, // 2 x 128-bit Op + extract/insert - { ISD::CTLZ, MVT::v8i16, { 9 } }, - { ISD::CTLZ, MVT::v32i8, { 14 } }, // 2 x 128-bit Op + extract/insert - { ISD::CTLZ, MVT::v16i8, { 7 } }, + { ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } }, // 2 x 128-bit Op + extract/insert + { ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } }, + { ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } }, // 2 x 128-bit Op + extract/insert + { ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } }, + { ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } }, // 2 x 128-bit Op + extract/insert + { ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } }, + { ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } }, // 2 x 128-bit Op + extract/insert + { ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } }, { ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } }, // 2 x 128-bit Op + extract/insert { ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } }, { ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } }, // 2 x 128-bit Op + extract/insert @@ -3726,10 +3726,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BSWAP, MVT::v2i64, { 1 } }, { ISD::BSWAP, MVT::v4i32, { 1 } }, { ISD::BSWAP, MVT::v8i16, { 1 } }, - { ISD::CTLZ, MVT::v2i64, { 18 } }, - { ISD::CTLZ, MVT::v4i32, { 15 } }, - { ISD::CTLZ, MVT::v8i16, { 13 } }, - { ISD::CTLZ, MVT::v16i8, { 11 } }, + { ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } }, + { ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } }, + { ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } }, + { ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } }, { ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } }, { ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } }, { ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } }, @@ -3751,10 +3751,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BSWAP, MVT::v2i64, { 7 } }, { ISD::BSWAP, MVT::v4i32, { 7 } }, { ISD::BSWAP, MVT::v8i16, { 7 } }, - { ISD::CTLZ, MVT::v2i64, { 10 } }, - { ISD::CTLZ, MVT::v4i32, { 10 } }, - { ISD::CTLZ, MVT::v8i16, { 9 } }, - { ISD::CTLZ, MVT::v16i8, { 8 } }, + { ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } }, + { ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } }, + { ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } }, + { ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } }, { ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } }, { ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } }, { ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } }, diff --git a/llvm/test/Analysis/CostModel/X86/ctlz-codesize.ll b/llvm/test/Analysis/CostModel/X86/ctlz-codesize.ll index 386bed5..bdc338b 100644 --- a/llvm/test/Analysis/CostModel/X86/ctlz-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/ctlz-codesize.ll @@ -2,10 +2,10 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=-lzcnt,+sse2 | FileCheck %s -check-prefixes=NOLZCNT ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+sse2 | FileCheck %s -check-prefixes=LZCNT,SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+sse4.2 | FileCheck %s -check-prefixes=LZCNT,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=LZCNT,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=LZCNT,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=LZCNT,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=LZCNT,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=LZCNT,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=LZCNT,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq,+avx512cd | FileCheck %s -check-prefixes=LZCNT,AVX512CD ; Verify the cost of scalar leading zero count instructions. @@ -138,99 +138,85 @@ declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1) define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v2i64' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz +; AVX-LABEL: 'var_ctlz_v2i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v2i64' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; -; AVX-LABEL: 'var_ctlz_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0) ret <2 x i64> %ctlz } define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v2i64u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v2i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v2i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v2i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v2i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz +; AVX-LABEL: 'var_ctlz_v2i64u' +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v2i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v2i64u' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; -; AVX-LABEL: 'var_ctlz_v2i64u' -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1) ret <2 x i64> %ctlz } define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i64' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i64' @@ -243,27 +229,27 @@ define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) { define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i64u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i64u' @@ -276,28 +262,32 @@ define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) { define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i64' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; AVX512F-LABEL: 'var_ctlz_v8i64' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v8i64' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i64' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) @@ -309,28 +299,32 @@ define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) { define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i64u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v8i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; AVX512F-LABEL: 'var_ctlz_v8i64u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v8i64u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i64u' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) @@ -342,99 +336,85 @@ define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) { define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i32' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz +; AVX-LABEL: 'var_ctlz_v4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i32' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; -; AVX-LABEL: 'var_ctlz_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0) ret <4 x i32> %ctlz } define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i32u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v4i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v4i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz +; AVX-LABEL: 'var_ctlz_v4i32u' +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i32u' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; -; AVX-LABEL: 'var_ctlz_v4i32u' -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1) ret <4 x i32> %ctlz } define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i32' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i32' @@ -447,27 +427,27 @@ define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) { define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i32u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i32u' @@ -480,28 +460,32 @@ define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) { define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i32' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; AVX512F-LABEL: 'var_ctlz_v16i32' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v16i32' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i32' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) @@ -513,28 +497,32 @@ define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) { define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i32u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v16i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; AVX512F-LABEL: 'var_ctlz_v16i32u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v16i32u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i32u' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) @@ -550,32 +538,25 @@ define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) { ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz +; AVX-LABEL: 'var_ctlz_v8i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i16' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; -; AVX-LABEL: 'var_ctlz_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0) ret <8 x i16> %ctlz } @@ -586,32 +567,25 @@ define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) { ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v8i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v8i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz +; AVX-LABEL: 'var_ctlz_v8i16u' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i16u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; -; AVX-LABEL: 'var_ctlz_v8i16u' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1) ret <8 x i16> %ctlz } @@ -622,27 +596,27 @@ define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) { ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i16' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0) @@ -655,27 +629,27 @@ define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) { ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i16u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1) @@ -688,27 +662,31 @@ define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) { ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz +; AVX512F-LABEL: 'var_ctlz_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i16' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 0) @@ -721,27 +699,31 @@ define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) { ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v32i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz +; AVX512F-LABEL: 'var_ctlz_v32i16u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v32i16u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i16u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 1) @@ -750,103 +732,89 @@ define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) { define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i8' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz +; AVX-LABEL: 'var_ctlz_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i8' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX-LABEL: 'var_ctlz_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0) ret <16 x i8> %ctlz } define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i8u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v16i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v16i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz +; AVX-LABEL: 'var_ctlz_v16i8u' +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i8u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX-LABEL: 'var_ctlz_v16i8u' -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1) ret <16 x i8> %ctlz } define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i8' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i8' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0) @@ -855,31 +823,31 @@ define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) { define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i8u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v32i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i8u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1) @@ -888,31 +856,35 @@ define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) { define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v64i8' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; AVX512F-LABEL: 'var_ctlz_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v64i8' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 0) @@ -921,31 +893,35 @@ define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) { define <64 x i8> @var_ctlz_v64i8u(<64 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v64i8u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v64i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v64i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v64i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v64i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v64i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; AVX512F-LABEL: 'var_ctlz_v64i8u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v64i8u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v64i8u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 1) diff --git a/llvm/test/Analysis/CostModel/X86/ctlz-latency.ll b/llvm/test/Analysis/CostModel/X86/ctlz-latency.ll index 1c5070f..e11ea7f 100644 --- a/llvm/test/Analysis/CostModel/X86/ctlz-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/ctlz-latency.ll @@ -2,10 +2,10 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=-lzcnt,+sse2 | FileCheck %s -check-prefixes=NOLZCNT ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+sse2 | FileCheck %s -check-prefixes=LZCNT,SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+sse4.2 | FileCheck %s -check-prefixes=LZCNT,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=LZCNT,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=LZCNT,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=LZCNT,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=LZCNT,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=LZCNT,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=LZCNT,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq,+avx512cd | FileCheck %s -check-prefixes=LZCNT,AVX512CD ; Verify the cost of scalar leading zero count instructions. @@ -138,103 +138,97 @@ declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1) define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v2i64' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v2i64' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; -; AVX-LABEL: 'var_ctlz_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0) ret <2 x i64> %ctlz } define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v2i64u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v2i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v2i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v2i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v2i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v2i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v2i64u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; -; AVX-LABEL: 'var_ctlz_v2i64u' -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1) ret <2 x i64> %ctlz } define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i64' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i64' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0) @@ -243,31 +237,31 @@ define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) { define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i64u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i64u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1) @@ -276,31 +270,35 @@ define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) { define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i64' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; AVX512F-LABEL: 'var_ctlz_v8i64' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v8i64' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i64' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 0) @@ -309,31 +307,35 @@ define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) { define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i64u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v8i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; AVX512F-LABEL: 'var_ctlz_v8i64u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v8i64u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i64u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 1) @@ -342,103 +344,97 @@ define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) { define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i32' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i32' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; -; AVX-LABEL: 'var_ctlz_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0) ret <4 x i32> %ctlz } define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i32u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i32u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; -; AVX-LABEL: 'var_ctlz_v4i32u' -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1) ret <4 x i32> %ctlz } define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i32' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i32' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0) @@ -447,31 +443,31 @@ define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) { define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i32u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i32u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1) @@ -480,31 +476,35 @@ define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) { define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i32' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; AVX512F-LABEL: 'var_ctlz_v16i32' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v16i32' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i32' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 0) @@ -513,31 +513,35 @@ define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) { define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i32u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v16i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; AVX512F-LABEL: 'var_ctlz_v16i32u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v16i32u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i32u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 1) @@ -546,103 +550,97 @@ define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) { define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i16' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i16' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; -; AVX-LABEL: 'var_ctlz_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0) ret <8 x i16> %ctlz } define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i16u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i16u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; -; AVX-LABEL: 'var_ctlz_v8i16u' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1) ret <8 x i16> %ctlz } define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i16' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i16' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0) @@ -651,31 +649,31 @@ define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) { define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i16u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i16u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1) @@ -684,31 +682,31 @@ define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) { define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i16' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i16' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 0) @@ -717,31 +715,31 @@ define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) { define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i16u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v32i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i16u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 1) @@ -750,103 +748,89 @@ define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) { define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i8' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz +; AVX-LABEL: 'var_ctlz_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i8' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX-LABEL: 'var_ctlz_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0) ret <16 x i8> %ctlz } define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i8u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX1-LABEL: 'var_ctlz_v16i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz -; -; AVX2-LABEL: 'var_ctlz_v16i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz +; AVX-LABEL: 'var_ctlz_v16i8u' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i8u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX-LABEL: 'var_ctlz_v16i8u' -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1) ret <16 x i8> %ctlz } define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i8' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i8' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0) @@ -855,31 +839,31 @@ define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) { define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i8u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v32i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i8u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1) @@ -888,31 +872,35 @@ define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) { define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v64i8' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; AVX512F-LABEL: 'var_ctlz_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v64i8' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 0) @@ -921,31 +909,35 @@ define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) { define <64 x i8> @var_ctlz_v64i8u(<64 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v64i8u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v64i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v64i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v64i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v64i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v64i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; AVX512F-LABEL: 'var_ctlz_v64i8u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v64i8u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v64i8u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 1) diff --git a/llvm/test/Analysis/CostModel/X86/ctlz-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/ctlz-sizelatency.ll index dcdd424..928bc65 100644 --- a/llvm/test/Analysis/CostModel/X86/ctlz-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/ctlz-sizelatency.ll @@ -4,8 +4,8 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+lzcnt,+sse4.2 | FileCheck %s -check-prefixes=LZCNT,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=LZCNT,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=LZCNT,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=LZCNT,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=LZCNT,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+lzcnt,+avx512f | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq | FileCheck %s -check-prefixes=LZCNT,AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+lzcnt,+avx512vl,+avx512bw,+avx512dq,+avx512cd | FileCheck %s -check-prefixes=LZCNT,AVX512CD ; Verify the cost of scalar leading zero count instructions. @@ -138,99 +138,93 @@ declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1) define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v2i64' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v2i64' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; -; AVX-LABEL: 'var_ctlz_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0) ret <2 x i64> %ctlz } define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v2i64u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v2i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v2i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v2i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v2i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v2i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v2i64u' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %ctlz ; -; AVX-LABEL: 'var_ctlz_v2i64u' -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1) ret <2 x i64> %ctlz } define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i64' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i64' @@ -243,27 +237,27 @@ define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) { define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i64u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i64u' @@ -276,28 +270,32 @@ define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) { define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i64' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; AVX512F-LABEL: 'var_ctlz_v8i64' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v8i64' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i64' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) @@ -309,28 +307,32 @@ define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) { define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i64u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v8i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; AVX512F-LABEL: 'var_ctlz_v8i64u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v8i64u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i64u' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true) @@ -342,99 +344,93 @@ define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) { define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i32' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i32' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; -; AVX-LABEL: 'var_ctlz_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0) ret <4 x i32> %ctlz } define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v4i32u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v4i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v4i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v4i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v4i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v4i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v4i32u' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %ctlz ; -; AVX-LABEL: 'var_ctlz_v4i32u' -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1) ret <4 x i32> %ctlz } define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i32' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i32' @@ -447,27 +443,27 @@ define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) { define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i32u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i32u' @@ -480,28 +476,32 @@ define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) { define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i32' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; AVX512F-LABEL: 'var_ctlz_v16i32' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v16i32' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i32' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) @@ -513,28 +513,32 @@ define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) { define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i32u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v16i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; AVX512F-LABEL: 'var_ctlz_v16i32u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v16i32u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i32u' ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) @@ -546,103 +550,97 @@ define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) { define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i16' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i16' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; -; AVX-LABEL: 'var_ctlz_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0) ret <8 x i16> %ctlz } define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v8i16u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v8i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v8i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v8i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v8i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v8i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v8i16u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %ctlz ; -; AVX-LABEL: 'var_ctlz_v8i16u' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1) ret <8 x i16> %ctlz } define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i16' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i16' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0) @@ -651,31 +649,31 @@ define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) { define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i16u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i16u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %ctlz ; %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1) @@ -684,31 +682,35 @@ define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) { define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i16' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz +; AVX512F-LABEL: 'var_ctlz_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i16' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 0) @@ -717,31 +719,35 @@ define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) { define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i16u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v32i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz +; AVX512F-LABEL: 'var_ctlz_v32i16u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v32i16u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i16u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %ctlz ; %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 1) @@ -750,103 +756,97 @@ define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) { define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i8' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i8' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX-LABEL: 'var_ctlz_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0) ret <16 x i8> %ctlz } define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v16i8u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v16i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v16i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v16i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v16i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v16i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v16i8u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %ctlz ; -; AVX-LABEL: 'var_ctlz_v16i8u' -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1) ret <16 x i8> %ctlz } define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i8' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i8' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0) @@ -855,31 +855,31 @@ define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) { define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v32i8u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v32i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v32i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v32i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v32i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512-LABEL: 'var_ctlz_v32i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v32i8u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %ctlz ; %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1) @@ -888,31 +888,35 @@ define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) { define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v64i8' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; AVX512F-LABEL: 'var_ctlz_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v64i8' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 0) @@ -921,31 +925,35 @@ define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) { define <64 x i8> @var_ctlz_v64i8u(<64 x i8> %a) { ; NOLZCNT-LABEL: 'var_ctlz_v64i8u' -; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE2-LABEL: 'var_ctlz_v64i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; SSE42-LABEL: 'var_ctlz_v64i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX1-LABEL: 'var_ctlz_v64i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX2-LABEL: 'var_ctlz_v64i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; -; AVX512-LABEL: 'var_ctlz_v64i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; AVX512F-LABEL: 'var_ctlz_v64i8u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz +; +; AVX512BW-LABEL: 'var_ctlz_v64i8u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; ; AVX512CD-LABEL: 'var_ctlz_v64i8u' -; AVX512CD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512CD-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX512CD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %ctlz ; %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 1) diff --git a/llvm/test/Analysis/CostModel/X86/ctlz.ll b/llvm/test/Analysis/CostModel/X86/ctlz.ll index 41d318c..565c42a 100644 --- a/llvm/test/Analysis/CostModel/X86/ctlz.ll +++ b/llvm/test/Analysis/CostModel/X86/ctlz.ll @@ -508,7 +508,7 @@ define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctlz ; ; AVX512F-LABEL: 'var_ctlz_v16i32' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctlz ; ; AVX512BW-LABEL: 'var_ctlz_v16i32' @@ -545,7 +545,7 @@ define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctlz ; ; AVX512F-LABEL: 'var_ctlz_v16i32u' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctlz ; ; AVX512BW-LABEL: 'var_ctlz_v16i32u' @@ -932,7 +932,7 @@ define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctlz ; ; AVX512F-LABEL: 'var_ctlz_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctlz ; ; AVX512BW-LABEL: 'var_ctlz_v64i8' @@ -969,7 +969,7 @@ define <64 x i8> @var_ctlz_v64i8u(<64 x i8> %a) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctlz ; ; AVX512F-LABEL: 'var_ctlz_v64i8u' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctlz ; ; AVX512BW-LABEL: 'var_ctlz_v64i8u' diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index defc949..8035350 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -264,17 +264,17 @@ define void @ctlz(i32 %a, <16 x i32> %va) { ; ; LATE-LABEL: 'ctlz' ; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; LATE-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'ctlz' ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; SIZE-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'ctlz' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)