From 1eeae4310771d8a6896fe09effe88883998f34e8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 30 Oct 2020 15:18:01 +0000 Subject: [PATCH] [SLP][X86] Extend target coverage for PR47629 As suggested on D90445, add tests for various SSE/AVX levels and more complex gep pointer offsets --- llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll | 50 ++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index 024e2fb..2d910f0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -march=avx512 | FileCheck %s +; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512F define void @gather_load(i32* %0, i32* readonly %1) { ; CHECK-LABEL: @gather_load( @@ -38,3 +41,48 @@ define void @gather_load(i32* %0, i32* readonly %1) { store <4 x i32> %17, <4 x i32>* %18, align 4 ret void } + +define void @gather_load_2(i32* %0, i32* readonly %1) { +; CHECK-LABEL: @gather_load_2( +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1 +; CHECK-NEXT: store i32 [[TMP5]], i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2 +; CHECK-NEXT: store i32 [[TMP9]], i32* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3 +; CHECK-NEXT: store i32 [[TMP13]], i32* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4 +; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP14]], align 4 +; CHECK-NEXT: ret void +; + %3 = getelementptr inbounds i32, i32* %1, i64 1 + %4 = load i32, i32* %3, align 4 + %5 = add nsw i32 %4, 1 + %6 = getelementptr inbounds i32, i32* %0, i64 1 + store i32 %5, i32* %0, align 4 + %7 = getelementptr inbounds i32, i32* %1, i64 10 + %8 = load i32, i32* %7, align 4 + %9 = add nsw i32 %8, 2 + %10 = getelementptr inbounds i32, i32* %0, i64 2 + store i32 %9, i32* %6, align 4 + %11 = getelementptr inbounds i32, i32* %1, i64 3 + %12 = load i32, i32* %11, align 4 + %13 = add nsw i32 %12, 3 + %14 = getelementptr inbounds i32, i32* %0, i64 3 + store i32 %13, i32* %10, align 4 + %15 = getelementptr inbounds i32, i32* %1, i64 5 + %16 = load i32, i32* %15, align 4 + %17 = add nsw i32 %16, 4 + store i32 %17, i32* %14, align 4 + ret void +} -- 2.7.4