From: Florian Hahn Date: Mon, 23 Nov 2020 17:33:27 +0000 (+0000) Subject: [AArch64] Add tests for masked.gather costs. X-Git-Tag: llvmorg-13-init~5346 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3a1c6cec15e32e4aa5593ce624915bda790dadff;p=platform%2Fupstream%2Fllvm.git [AArch64] Add tests for masked.gather costs. --- diff --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll index 3a4e0f0..3003393 100644 --- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll +++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll @@ -86,3 +86,27 @@ define <8 x i64> @load_512(<8 x i64>* %ptr) { %out = load <8 x i64>, <8 x i64>* %ptr ret <8 x i64> %out } + +define <4 x i8> @gather_load_4xi8(<4 x i8*> %ptrs) { +; CHECK: gather_load_4xi8 +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8 +; + %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) + ret <4 x i8> %lv +} +declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>) + +define <4 x i32> @gather_load_4xi32(<4 x i32*> %ptrs) { +; CHECK: gather_load_4xi32 +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32 +; + %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) + ret <4 x i32> %lv +} +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32 immarg, <4 x i1>, <4 x i32>) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll index 4c2d51b..021f9d5 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -slp-vectorizer -instcombine -pass-remarks-output=%t | FileCheck %s ; RUN: cat %t | FileCheck -check-prefix=REMARK %s -; RUN: opt < %s -S -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s +; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s ; RUN: cat %t | FileCheck -check-prefix=REMARK %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -12,6 +12,11 @@ target triple = "aarch64--linux-gnu" ; REMARK-NEXT: - String: 'Vectorized horizontal reduction with cost ' ; REMARK-NEXT: - Cost: '-7' ; +; REMARK-LABEL: Function: gather_load +; REMARK: Args: +; REMARK-NEXT: - String: 'Stores SLP vectorized with cost +; REMARK-NEXT: - Cost: '-2' + define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: @gather_multiple_use( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0 @@ -51,3 +56,41 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) { %tmp22 = add i32 %tmp21, %tmp19 ret i32 %tmp22 } + +@data = global [6 x [258 x i8]] zeroinitializer, align 1 +define void @gather_load(i16* noalias %ptr) { +; CHECK-LABEL: @gather_load( +; CHECK-NEXT: [[ARRAYIDX182:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> , i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <4 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX182]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP4]], align 2 +; CHECK-NEXT: ret void +; + %arrayidx182 = getelementptr inbounds i16, i16* %ptr, i64 1 + %arrayidx183 = getelementptr inbounds i16, i16* %ptr, i64 2 + %arrayidx184 = getelementptr inbounds i16, i16* %ptr, i64 3 + %arrayidx185 = getelementptr inbounds i16, i16* %ptr, i64 4 + %arrayidx149 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 1, i64 0 + %l0 = load i8, i8* %arrayidx149, align 1 + %conv150 = zext i8 %l0 to i16 + %add152 = add i16 10, %conv150 + %arrayidx155 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 2, i64 1 + %l1 = load i8, i8* %arrayidx155, align 1 + %conv156 = zext i8 %l1 to i16 + %add158 = add i16 20, %conv156 + %arrayidx161 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 3, i64 2 + %l2 = load i8, i8* %arrayidx161, align 1 + %conv162 = zext i8 %l2 to i16 + %add164 = add i16 30, %conv162 + %arrayidx167 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 4, i64 3 + %l3 = load i8, i8* %arrayidx167, align 1 + %conv168 = zext i8 %l3 to i16 + %add170 = add i16 40, %conv168 + store i16 %add152, i16* %arrayidx182, align 2 + store i16 %add158, i16* %arrayidx183, align 2 + store i16 %add164, i16* %arrayidx184, align 2 + store i16 %add170, i16* %arrayidx185, align 2 + ret void +}