return SDValue();
}
+/// If we are inverting an PTEST/TESTP operand, attempt to adjust the CC
+/// to avoid the inversion.
+static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
+ SelectionDAG &DAG) {
+ // TODO: Handle X86ISD::KTEST/X86ISD::KORTEST.
+ if (EFLAGS.getOpcode() != X86ISD::PTEST &&
+ EFLAGS.getOpcode() != X86ISD::TESTP)
+ return SDValue();
+
+ // PTEST/TESTP sets EFLAGS as:
+ // TESTZ: ZF = (Op0 & Op1) == 0
+ // TESTC: CF = (~Op0 & Op1) == 0
+ // TESTNZC: ZF == 0 && CF == 0
+ EVT VT = EFLAGS.getValueType();
+ SDValue Op0 = EFLAGS.getOperand(0);
+ SDValue Op1 = EFLAGS.getOperand(1);
+ EVT OpVT = Op0.getValueType();
+
+ // TEST*(~X,Y) == TEST*(X,Y)
+ if (SDValue NotOp0 = IsNOT(Op0, DAG)) {
+ X86::CondCode InvCC;
+ switch (CC) {
+ case X86::COND_B:
+ // testc -> testz.
+ InvCC = X86::COND_E;
+ break;
+ case X86::COND_AE:
+ // !testc -> !testz.
+ InvCC = X86::COND_NE;
+ break;
+ case X86::COND_E:
+ // testz -> testc.
+ InvCC = X86::COND_B;
+ break;
+ case X86::COND_NE:
+ // !testz -> !testc.
+ InvCC = X86::COND_AE;
+ break;
+ case X86::COND_A:
+ case X86::COND_BE:
+ // testnzc -> testnzc (no change).
+ InvCC = CC;
+ break;
+ default:
+ InvCC = X86::COND_INVALID;
+ break;
+ }
+
+ if (InvCC != X86::COND_INVALID) {
+ CC = InvCC;
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
+ DAG.getBitcast(OpVT, NotOp0), Op1);
+ }
+ }
+
+ // TODO: TESTZ(X,~Y) == TESTC(Y,X)
+
+ // TESTZ(X,-1) == TESTZ(X,X)
+ // TESTZ(-1,X) == TESTZ(X,X)
+ if (CC == X86::COND_E || CC == X86::COND_NE) {
+ if (ISD::isBuildVectorAllOnes(Op0.getNode()))
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1);
+ if (ISD::isBuildVectorAllOnes(Op1.getNode()))
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);
+ }
+
+ return SDValue();
+}
+
/// Optimize an EFLAGS definition used according to the condition code \p CC
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
/// uses of chain values.
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
return R;
+
+ if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG))
+ return R;
+
return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
}
; CHECK-LABEL: ptestz_128_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: cmovnel %esi, %eax
+; CHECK-NEXT: cmovael %esi, %eax
; CHECK-NEXT: retq
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
%t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %d)
; CHECK-LABEL: ptestz_256_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vptest %ymm1, %ymm0
-; CHECK-NEXT: cmovnel %esi, %eax
+; CHECK-NEXT: cmovael %esi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
; CHECK-LABEL: ptestc_128_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: cmovael %esi, %eax
+; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: retq
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
%t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
; CHECK-LABEL: ptestc_256_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vptest %ymm1, %ymm0
-; CHECK-NEXT: cmovael %esi, %eax
+; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
; CHECK-LABEL: ptestnzc_128_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: cmovael %esi, %eax
+; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: retq
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
%t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
; CHECK-LABEL: ptestnzc_256_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vptest %ymm1, %ymm0
; CHECK-NEXT: cmovbel %esi, %eax
; CHECK-NEXT: vzeroupper
ret i32 %t4
}
+define i32 @ptestnzc_256_invert0_commute(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
+; CHECK-LABEL: ptestnzc_256_invert0_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: vptest %ymm1, %ymm0
+; CHECK-NEXT: cmoval %esi, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %t2 = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %t1, <4 x i64> %d)
+ %t3 = icmp eq i32 %t2, 0
+ %t4 = select i1 %t3, i32 %a, i32 %b
+ ret i32 %t4
+}
+
;
; testz(-1,X) -> testz(X,X)
;
; CHECK-LABEL: ptestz_128_allones0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vptest %xmm0, %xmm1
+; CHECK-NEXT: vptest %xmm0, %xmm0
; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: retq
%t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> <i64 -1, i64 -1>, <2 x i64> %c)
; CHECK-LABEL: ptestz_256_allones0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
-; CHECK-NEXT: vptest %ymm0, %ymm1
+; CHECK-NEXT: vptest %ymm0, %ymm0
; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; CHECK-LABEL: ptestz_128_allones1:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vptest %xmm1, %xmm0
+; CHECK-NEXT: vptest %xmm0, %xmm0
; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: retq
%t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> <i64 -1, i64 -1>)
; CHECK-LABEL: ptestz_256_allones1:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
-; CHECK-NEXT: vptest %ymm1, %ymm0
+; CHECK-NEXT: vptest %ymm0, %ymm0
; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; CHECK: # %bb.0: # %start
; CHECK-NEXT: vmovdqa (%rdi), %xmm0
; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vptest %xmm1, %xmm0
-; CHECK-NEXT: setb %al
+; CHECK-NEXT: vptest %xmm0, %xmm0
+; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
start:
%0 = load <16 x i8>, <16 x i8>* %x, align 16
; CHECK-LABEL: testpdz_128_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vtestpd %xmm1, %xmm0
-; CHECK-NEXT: cmovnel %esi, %eax
+; CHECK-NEXT: cmovael %esi, %eax
; CHECK-NEXT: retq
%t0 = bitcast <2 x double> %c to <2 x i64>
%t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
; CHECK-LABEL: testpdz_256_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vtestpd %ymm1, %ymm0
-; CHECK-NEXT: cmovnel %esi, %eax
+; CHECK-NEXT: cmovael %esi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%t0 = bitcast <4 x double> %c to <4 x i64>
; CHECK-LABEL: testpdc_128_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vtestpd %xmm1, %xmm0
-; CHECK-NEXT: cmovael %esi, %eax
+; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: retq
%t0 = bitcast <2 x double> %c to <2 x i64>
%t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
; CHECK-LABEL: testpdc_256_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vtestpd %ymm1, %ymm0
-; CHECK-NEXT: cmovael %esi, %eax
+; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%t0 = bitcast <4 x double> %c to <4 x i64>
; CHECK-LABEL: testpdnzc_128_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vtestpd %xmm1, %xmm0
; CHECK-NEXT: cmovbel %esi, %eax
; CHECK-NEXT: retq
; CHECK-LABEL: testpdnzc_256_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vtestpd %ymm1, %ymm0
; CHECK-NEXT: cmovbel %esi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-LABEL: testpsz_128_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vtestps %xmm1, %xmm0
-; CHECK-NEXT: cmovnel %esi, %eax
+; CHECK-NEXT: cmovael %esi, %eax
; CHECK-NEXT: retq
%t0 = bitcast <4 x float> %c to <2 x i64>
%t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
; CHECK-LABEL: testpsz_256_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vtestps %ymm1, %ymm0
-; CHECK-NEXT: cmovnel %esi, %eax
+; CHECK-NEXT: cmovael %esi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%t0 = bitcast <8 x float> %c to <4 x i64>
; CHECK-LABEL: testpsc_128_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vtestps %xmm1, %xmm0
-; CHECK-NEXT: cmovael %esi, %eax
+; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: retq
%t0 = bitcast <4 x float> %c to <2 x i64>
%t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
; CHECK-LABEL: testpsc_256_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vtestps %ymm1, %ymm0
-; CHECK-NEXT: cmovael %esi, %eax
+; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%t0 = bitcast <8 x float> %c to <4 x i64>
; CHECK-LABEL: testpsnzc_128_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vtestps %xmm1, %xmm0
; CHECK-NEXT: cmovbel %esi, %eax
; CHECK-NEXT: retq
; CHECK-LABEL: testpsnzc_256_invert0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vtestps %ymm1, %ymm0
; CHECK-NEXT: cmovbel %esi, %eax
; CHECK-NEXT: vzeroupper