; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE,SSE4
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -passes=slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
}
define i64 @umax_intrinsic_rdx_v4i64(ptr %p0) {
-; SSE-LABEL: @umax_intrinsic_rdx_v4i64(
-; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
-; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
-; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
-; SSE-NEXT: [[T0:%.*]] = load i64, ptr [[P0]], align 4
-; SSE-NEXT: [[T1:%.*]] = load i64, ptr [[P1]], align 4
-; SSE-NEXT: [[T2:%.*]] = load i64, ptr [[P2]], align 4
-; SSE-NEXT: [[T3:%.*]] = load i64, ptr [[P3]], align 4
-; SSE-NEXT: [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
-; SSE-NEXT: [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
-; SSE-NEXT: [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
-; SSE-NEXT: ret i64 [[M]]
+; SSE2-LABEL: @umax_intrinsic_rdx_v4i64(
+; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
+; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
+; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
+; SSE2-NEXT: [[T0:%.*]] = load i64, ptr [[P0]], align 4
+; SSE2-NEXT: [[T1:%.*]] = load i64, ptr [[P1]], align 4
+; SSE2-NEXT: [[T2:%.*]] = load i64, ptr [[P2]], align 4
+; SSE2-NEXT: [[T3:%.*]] = load i64, ptr [[P3]], align 4
+; SSE2-NEXT: [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
+; SSE2-NEXT: [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
+; SSE2-NEXT: [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
+; SSE2-NEXT: ret i64 [[M]]
+;
+; SSE4-LABEL: @umax_intrinsic_rdx_v4i64(
+; SSE4-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[P0:%.*]], align 4
+; SSE4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP1]])
+; SSE4-NEXT: ret i64 [[TMP2]]
;
; AVX-LABEL: @umax_intrinsic_rdx_v4i64(
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
%t14 = call i32 @llvm.umin.i32(i32 %t13, i32 93)
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SSE: {{.*}}