//===----------------------------------------------------------------------===//
// X86 Subtarget Tuning features
//===----------------------------------------------------------------------===//
+def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
+ "PreferMovmskOverVTest", "true",
+ "Prefer movmsk over vtest instruction">;
def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureWAITPKG];
- list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps];
+ list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
+ TuningPreferMovmskOverVTest];
list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
list<SubtargetFeature> ADLFeatures =
!listconcat(TRMFeatures, ADLAdditionalFeatures);
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=alderlake | FileCheck %s --check-prefixes=ADL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+prefer-movmsk-over-vtest | FileCheck %s --check-prefixes=ADL
declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
; ADL: # %bb.0:
; ADL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ADL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
-; ADL-NEXT: vtestpd %ymm0, %ymm0
+; ADL-NEXT: vmovmskpd %ymm0, %eax
+; ADL-NEXT: testl %eax, %eax
; ADL-NEXT: sete %al
; ADL-NEXT: vzeroupper
; ADL-NEXT: retq
; ADL: # %bb.0:
; ADL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ADL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
-; ADL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; ADL-NEXT: vtestpd %ymm1, %ymm0
-; ADL-NEXT: setb %al
+; ADL-NEXT: vmovmskpd %ymm0, %eax
+; ADL-NEXT: cmpl $15, %eax
+; ADL-NEXT: sete %al
; ADL-NEXT: vzeroupper
; ADL-NEXT: retq
%1 = fcmp oeq <4 x double> %a0, zeroinitializer
; ADL-LABEL: movmskps_concat_v4f32:
; ADL: # %bb.0:
; ADL-NEXT: vorps %xmm1, %xmm0, %xmm0
+; ADL-NEXT: vmovmskps %xmm0, %ecx
; ADL-NEXT: xorl %eax, %eax
-; ADL-NEXT: vtestps %xmm0, %xmm0
-; ADL-NEXT: setne %al
-; ADL-NEXT: negl %eax
+; ADL-NEXT: negl %ecx
+; ADL-NEXT: sbbl %eax, %eax
; ADL-NEXT: retq
%1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %1)
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=alderlake | FileCheck %s --check-prefixes=ADL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+prefer-movmsk-over-vtest | FileCheck %s --check-prefixes=ADL
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
; ADL: # %bb.0:
; ADL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ADL-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0
-; ADL-NEXT: vtestpd %xmm0, %xmm0
+; ADL-NEXT: vmovmskpd %xmm0, %eax
+; ADL-NEXT: testl %eax, %eax
; ADL-NEXT: sete %al
; ADL-NEXT: retq
%1 = fcmp oeq <2 x double> zeroinitializer, %a0
; ADL: # %bb.0:
; ADL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ADL-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0
-; ADL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; ADL-NEXT: vtestpd %xmm1, %xmm0
-; ADL-NEXT: setb %al
+; ADL-NEXT: vmovmskpd %xmm0, %eax
+; ADL-NEXT: cmpl $3, %eax
+; ADL-NEXT: sete %al
; ADL-NEXT: retq
%1 = fcmp oeq <2 x double> zeroinitializer, %a0
%2 = sext <2 x i1> %1 to <2 x i64>
;
; ADL-LABEL: pmovmskb_noneof_bitcast_v2i64:
; ADL: # %bb.0:
-; ADL-NEXT: vtestpd %xmm0, %xmm0
+; ADL-NEXT: vmovmskpd %xmm0, %eax
+; ADL-NEXT: testl %eax, %eax
; ADL-NEXT: sete %al
; ADL-NEXT: retq
%1 = icmp sgt <2 x i64> zeroinitializer, %a0
;
; ADL-LABEL: pmovmskb_allof_bitcast_v2i64:
; ADL: # %bb.0:
-; ADL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; ADL-NEXT: vtestpd %xmm1, %xmm0
-; ADL-NEXT: setb %al
+; ADL-NEXT: vmovmskpd %xmm0, %eax
+; ADL-NEXT: cmpl $3, %eax
+; ADL-NEXT: sete %al
; ADL-NEXT: retq
%1 = icmp sgt <2 x i64> zeroinitializer, %a0
%2 = sext <2 x i1> %1 to <2 x i64>
; ADL: # %bb.0:
; ADL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ADL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
-; ADL-NEXT: vtestps %xmm0, %xmm0
+; ADL-NEXT: vmovmskps %xmm0, %eax
+; ADL-NEXT: testl %eax, %eax
; ADL-NEXT: sete %al
; ADL-NEXT: retq
%1 = fcmp oeq <4 x float> %a0, zeroinitializer
; ADL: # %bb.0:
; ADL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ADL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
-; ADL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; ADL-NEXT: vtestps %xmm1, %xmm0
-; ADL-NEXT: setb %al
+; ADL-NEXT: vmovmskps %xmm0, %eax
+; ADL-NEXT: cmpl $15, %eax
+; ADL-NEXT: sete %al
; ADL-NEXT: retq
%1 = fcmp oeq <4 x float> %a0, zeroinitializer
%2 = sext <4 x i1> %1 to <4 x i32>
; ADL: # %bb.0:
; ADL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; ADL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; ADL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; ADL-NEXT: vmovmskps %xmm0, %ecx
; ADL-NEXT: xorl %eax, %eax
-; ADL-NEXT: vtestps %xmm1, %xmm0
-; ADL-NEXT: sbbl %eax, %eax
+; ADL-NEXT: cmpl $15, %ecx
+; ADL-NEXT: sete %al
+; ADL-NEXT: negl %eax
; ADL-NEXT: retq
%1 = icmp eq <16 x i8> %a0, zeroinitializer
%2 = sext <16 x i1> %1 to <16 x i8>