return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
+ bool LegalOperations) {
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
+
+ SDValue SetCC = N->getOperand(0);
+ if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
+ !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
+ return SDValue();
+
+ SDValue X = SetCC.getOperand(0);
+ SDValue Ones = SetCC.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+ EVT XVT = X.getValueType();
+ // setge X, C is canonicalized to setgt, so we do not need to match that
+ // pattern. The setlt sibling is folded in SimplifySelectCC() becaus it does
+ // not require the 'not' op.
+ if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
+ // Invert and smear/shift the sign bit:
+ // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
+ // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
+ SDLoc DL(N);
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
+ auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
}
}
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
+
if (N0.getOpcode() == ISD::SETCC) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
return foldedExt;
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
+
if (N0.getOpcode() == ISD::SETCC) {
// Only do this before legalize for now.
if (!LegalOperations && VT.isVector() &&
define i32 @zext_ifpos(i32 %x) {
; CHECK-LABEL: zext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsr w8, w0, #31
-; CHECK-NEXT: eor w0, w8, #0x1
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: lsr w0, w8, #31
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsr w8, w0, #31
-; CHECK-NEXT: eor w8, w8, #0x1
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: lsr w8, w8, #31
; CHECK-NEXT: add w0, w8, #41 // =41
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
define i32 @sext_ifpos(i32 %x) {
; CHECK-LABEL: sext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-1
-; CHECK-NEXT: eor w0, w8, w0, asr #31
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: asr w0, w8, #31
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsr w8, w0, #31
-; CHECK-NEXT: eor w8, w8, #0x1
+; CHECK-NEXT: mvn w8, w0
; CHECK-NEXT: mov w9, #42
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w0, w9, w8, lsr #31
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
}
; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
-; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}}
+; GCN: v_ashrrev_i32_e32 v2, 31, v2
define amdgpu_kernel void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: # %bb.0:
+; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: addi 3, 3, 41
; CHECK-NEXT: blr
%c = icmp sgt i32 %x, -1
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: srawi 3, 3, 31
; CHECK-NEXT: nor 3, 3, 3
+; CHECK-NEXT: srawi 3, 3, 31
; CHECK-NEXT: addi 3, 3, 42
; CHECK-NEXT: blr
%c = icmp sgt i32 %x, -1
; CHECK-LABEL: test_igesll_z_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-NEXT: rldicl r3, r3, 1, 63
+; CHECK-NEXT: not r3, r3
; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
-; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: rldicl r3, r3, 1, 63
; CHECK-NEXT: std r3, 0(r4)
; CHECK-NEXT: blr
entry:
; CHECK-LABEL: test_igesll_sext_z_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-NEXT: sradi r3, r3, 63
-; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-NEXT: not r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: sradi r3, r3, 63
; CHECK-NEXT: std r3,
; CHECK-NEXT: blr
entry:
define i64 @test_llgesll_z(i64 %a) {
; CHECK-LABEL: test_llgesll_z:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: not r3, r3
; CHECK-NEXT: rldicl r3, r3, 1, 63
-; CHECK-NEXT: xori r3, r3, 1
; CHECK-NEXT: blr
entry:
%cmp = icmp sgt i64 %a, -1
define i64 @test_llgesll_sext_z(i64 %a) {
; CHECK-LABEL: test_llgesll_sext_z:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sradi r3, r3, 63
; CHECK-NEXT: not r3, r3
+; CHECK-NEXT: sradi r3, r3, 63
; CHECK-NEXT: blr
entry:
%cmp = icmp sgt i64 %a, -1
; CHECK-LABEL: test_llgesll_z_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-NEXT: rldicl r3, r3, 1, 63
+; CHECK-NEXT: not r3, r3
; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
-; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: rldicl r3, r3, 1, 63
; CHECK-NEXT: std r3, 0(r4)
; CHECK-NEXT: blr
entry:
; CHECK-LABEL: test_llgesll_sext_z_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
-; CHECK-NEXT: sradi r3, r3, 63
-; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
; CHECK-NEXT: not r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: sradi r3, r3, 63
; CHECK-NEXT: std r3, 0(r4)
; CHECK-NEXT: blr
entry:
;
; MCU-LABEL: test7:
; MCU: # %bb.0:
-; MCU-NEXT: xorl %ecx, %ecx
-; MCU-NEXT: testl %eax, %eax
-; MCU-NEXT: setns %cl
-; MCU-NEXT: shll $4, %ecx
-; MCU-NEXT: fldt {{\.LCPI.*}}(%ecx)
+; MCU-NEXT: notl %eax
+; MCU-NEXT: shrl $27, %eax
+; MCU-NEXT: andl $-16, %eax
+; MCU-NEXT: fldt {{\.LCPI.*}}(%eax)
; MCU-NEXT: retl
%tmp9 = icmp sgt i32 %tmp8, -1
%retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000
define i32 @pos_sel_constants(i32 %a) {
; CHECK-NOBMI-LABEL: pos_sel_constants:
; CHECK-NOBMI: # %bb.0:
-; CHECK-NOBMI-NEXT: xorl %eax, %eax
-; CHECK-NOBMI-NEXT: testl %edi, %edi
-; CHECK-NOBMI-NEXT: setns %al
-; CHECK-NOBMI-NEXT: leal (%rax,%rax,4), %eax
+; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT: notl %edi
+; CHECK-NOBMI-NEXT: shrl $31, %edi
+; CHECK-NOBMI-NEXT: leal (%rdi,%rdi,4), %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: pos_sel_constants:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %eax, %eax
-; CHECK-BMI-NEXT: testl %edi, %edi
-; CHECK-BMI-NEXT: setns %al
-; CHECK-BMI-NEXT: leal (%rax,%rax,4), %eax
+; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-BMI-NEXT: notl %edi
+; CHECK-BMI-NEXT: shrl $31, %edi
+; CHECK-BMI-NEXT: leal (%rdi,%rdi,4), %eax
; CHECK-BMI-NEXT: retq
%tmp.1 = icmp sgt i32 %a, -1
%retval = select i1 %tmp.1, i32 5, i32 0
define i32 @pos_sel_special_constant(i32 %a) {
; CHECK-NOBMI-LABEL: pos_sel_special_constant:
; CHECK-NOBMI: # %bb.0:
-; CHECK-NOBMI-NEXT: xorl %eax, %eax
-; CHECK-NOBMI-NEXT: testl %edi, %edi
-; CHECK-NOBMI-NEXT: setns %al
-; CHECK-NOBMI-NEXT: shll $9, %eax
+; CHECK-NOBMI-NEXT: notl %edi
+; CHECK-NOBMI-NEXT: shrl $22, %edi
+; CHECK-NOBMI-NEXT: andl $512, %edi # imm = 0x200
+; CHECK-NOBMI-NEXT: movl %edi, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: pos_sel_special_constant:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %eax, %eax
-; CHECK-BMI-NEXT: testl %edi, %edi
-; CHECK-BMI-NEXT: setns %al
-; CHECK-BMI-NEXT: shll $9, %eax
+; CHECK-BMI-NEXT: notl %edi
+; CHECK-BMI-NEXT: shrl $22, %edi
+; CHECK-BMI-NEXT: andl $512, %edi # imm = 0x200
+; CHECK-BMI-NEXT: movl %edi, %eax
; CHECK-BMI-NEXT: retq
%tmp.1 = icmp sgt i32 %a, -1
%retval = select i1 %tmp.1, i32 512, i32 0
define i32 @zext_ifpos(i32 %x) {
; CHECK-LABEL: zext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %al
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %al
-; CHECK-NEXT: addl $41, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: leal 41(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
define i32 @sel_ifpos_tval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_tval_bigger:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %al
-; CHECK-NEXT: addl $41, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: leal 41(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 42, i32 41
define i32 @sext_ifpos(i32 %x) {
; CHECK-LABEL: sext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %al
-; CHECK-NEXT: negl %eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %cl
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: subl %edi, %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
define i32 @sel_ifpos_fval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_fval_bigger:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: setns %cl
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: subl %edi, %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 41, i32 42
ret i32 %2
}
; CHECK-LABEL: f5:
-; CHECK-NEXT: ashr r0, r0, 32
-; CHECK-NEXT: eq r0, r0, 0
+; CHECK-NEXT: not r0, r0
+; CHECK-NEXT: mkmsk r1, 5
+; CHECK-NEXT: shr r0, r0, r1