; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512DQVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BWVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQVL
;
; vXi64
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX-NEXT: vpsrlq $32, %xmm0, %xmm2
-; AVX-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
-; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
-; AVX-NEXT: vpaddq %xmm2, %xmm3, %xmm2
-; AVX-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: test_v2i64:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX1OR2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX1OR2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX1OR2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX1OR2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX1OR2-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX1OR2-NEXT: vmovq %xmm0, %rax
+; AVX1OR2-NEXT: retq
;
; AVX512BW-LABEL: test_v2i64:
; AVX512BW: # %bb.0:
; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_v2i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: retq
%1 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a0)
ret i32 %1
}
; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_v4i32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: retq
%1 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a0)
ret i32 %1
}
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_v2i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: retq
%1 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> %a0)
ret i16 %1
}
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_v4i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: retq
%1 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %a0)
ret i16 %1
}
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: retq
%1 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %a0)
ret i16 %1
}
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_v2i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: retq
%1 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> %a0)
ret i8 %1
}
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_v4i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: retq
%1 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %a0)
ret i8 %1
}
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_v8i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: retq
%1 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %a0)
ret i8 %1
}