[SLP][X86] Add SSE4 test coverage to minmax reduction tests

author Simon Pilgrim <llvm-dev@redking.me.uk>

Wed, 12 Apr 2023 16:41:26 +0000 (17:41 +0100)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Wed, 12 Apr 2023 16:41:31 +0000 (17:41 +0100)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Wed, 12 Apr 2023 16:41:26 +0000 (17:41 +0100)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Wed, 12 Apr 2023 16:41:31 +0000 (17:41 +0100)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll

index a4489ce..9c456df 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -1,5 +1,6 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE,SSE4
  ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
  ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX2
  ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -passes=slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
@@ -1112,18 +1113,23 @@ define i16 @smin_intrinsic_rdx_v8i16(ptr %p0) {
  }
  
  define i64 @umax_intrinsic_rdx_v4i64(ptr %p0) {
-; SSE-LABEL: @umax_intrinsic_rdx_v4i64(
-; SSE-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
-; SSE-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
-; SSE-NEXT:    [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
-; SSE-NEXT:    [[T0:%.*]] = load i64, ptr [[P0]], align 4
-; SSE-NEXT:    [[T1:%.*]] = load i64, ptr [[P1]], align 4
-; SSE-NEXT:    [[T2:%.*]] = load i64, ptr [[P2]], align 4
-; SSE-NEXT:    [[T3:%.*]] = load i64, ptr [[P3]], align 4
-; SSE-NEXT:    [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
-; SSE-NEXT:    [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
-; SSE-NEXT:    [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
-; SSE-NEXT:    ret i64 [[M]]
+; SSE2-LABEL: @umax_intrinsic_rdx_v4i64(
+; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
+; SSE2-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
+; SSE2-NEXT:    [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
+; SSE2-NEXT:    [[T0:%.*]] = load i64, ptr [[P0]], align 4
+; SSE2-NEXT:    [[T1:%.*]] = load i64, ptr [[P1]], align 4
+; SSE2-NEXT:    [[T2:%.*]] = load i64, ptr [[P2]], align 4
+; SSE2-NEXT:    [[T3:%.*]] = load i64, ptr [[P3]], align 4
+; SSE2-NEXT:    [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
+; SSE2-NEXT:    [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
+; SSE2-NEXT:    [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
+; SSE2-NEXT:    ret i64 [[M]]
+;
+; SSE4-LABEL: @umax_intrinsic_rdx_v4i64(
+; SSE4-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[P0:%.*]], align 4
+; SSE4-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP1]])
+; SSE4-NEXT:    ret i64 [[TMP2]]
  ;
  ; AVX-LABEL: @umax_intrinsic_rdx_v4i64(
  ; AVX-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
@@ -1246,3 +1252,5 @@ define void @PR49730() {
    %t14 = call i32 @llvm.umin.i32(i32 %t13, i32 93)
    ret void
  }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SSE: {{.*}}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll

index 88f584f..a838621 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
@@ -1,5 +1,6 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
  ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX
  ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX
  
@@ -102,3 +103,5 @@ define i32 @smax_v16i32(i32) {
  ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
  ; AVX: {{.*}}
  ; SSE: {{.*}}
+; SSE2: {{.*}}
+; SSE4: {{.*}}
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Wed, 12 Apr 2023 16:41:26 +0000 (17:41 +0100)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Wed, 12 Apr 2023 16:41:31 +0000 (17:41 +0100)
llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll		patch \| blob \| history