From 0de7ddbfb033671d6ba75aa8c88c63e2919a6ad6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 28 Apr 2020 10:24:44 -0700 Subject: [PATCH] [X86] Handle more cases in combineAddOrSubToADCOrSBB. This adds support for X + SETAE --> sbb X, -1 X - SETAE --> adc X, -1 Fixes PR45700 Differential Revision: https://reviews.llvm.org/D78984 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 35 ++++++++++++++++++-- llvm/test/CodeGen/X86/sbb.ll | 22 +++++-------- llvm/test/CodeGen/X86/scheduler-backtracking.ll | 44 ++++++++++--------------- llvm/test/CodeGen/X86/select.ll | 28 +++++++++------- 4 files changed, 75 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cf70f7e..62094e1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46168,7 +46168,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { } if (CC == X86::COND_A) { - SDValue EFLAGS = Y->getOperand(1); + SDValue EFLAGS = Y.getOperand(1); // Try to convert COND_A into COND_B in an attempt to facilitate // materializing "setb reg". // @@ -46181,13 +46181,44 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), EFLAGS.getOperand(1), EFLAGS.getOperand(0)); - SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, DAG.getVTList(VT, MVT::i32), X, DAG.getConstant(0, DL, VT), NewEFLAGS); } } + if (CC == X86::COND_AE) { + // X + SETAE --> sbb X, -1 + // X - SETAE --> adc X, -1 + return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(-1, DL, VT), Y.getOperand(1)); + } + + if (CC == X86::COND_BE) { + // X + SETBE --> sbb X, -1 + // X - SETBE --> adc X, -1 + SDValue EFLAGS = Y.getOperand(1); + // Try to convert COND_BE into COND_AE in an attempt to facilitate + // materializing "setae reg". + // + // Do not flip "e <= c", where "c" is a constant, because Cmp instruction + // cannot take an immediate as its first operand. + // + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa(EFLAGS.getOperand(1))) { + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(-1, DL, VT), NewEFLAGS); + } + } + if (CC != X86::COND_E && CC != X86::COND_NE) return SDValue(); diff --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll index fc7d9ba..b3dae629 100644 --- a/llvm/test/CodeGen/X86/sbb.ll +++ b/llvm/test/CodeGen/X86/sbb.ll @@ -262,10 +262,9 @@ define i32 @ult_zext_add(i32 %0, i32 %1, i32 %2) { define i32 @ule_zext_add(i32 %0, i32 %1, i32 %2) { ; CHECK-LABEL: ule_zext_add: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl %edx, %esi -; CHECK-NEXT: setbe %al -; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %esi, %edx +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %4 = icmp ule i32 %1, %2 %5 = zext i1 %4 to i32 @@ -289,10 +288,9 @@ define i32 @ugt_zext_add(i32 %0, i32 %1, i32 %2) { define i32 @uge_zext_add(i32 %0, i32 %1, i32 %2) { ; CHECK-LABEL: uge_zext_add: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: cmpl %edx, %esi -; CHECK-NEXT: setae %al -; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %4 = icmp uge i32 %1, %2 %5 = zext i1 %4 to i32 @@ -317,10 +315,8 @@ define i32 @ule_sext_add(i32 %0, i32 %1, i32 %2) { ; CHECK-LABEL: ule_sext_add: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: cmpl %edx, %esi -; CHECK-NEXT: setbe %cl -; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: cmpl %esi, %edx +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %4 = icmp ule i32 %1, %2 %5 = sext i1 %4 to i32 @@ -345,10 +341,8 @@ define i32 @uge_sext_add(i32 %0, i32 %1, i32 %2) { ; CHECK-LABEL: uge_sext_add: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: cmpl %edx, %esi -; CHECK-NEXT: setae %cl -; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: adcl $-1, %eax ; CHECK-NEXT: retq %4 = icmp uge i32 %1, %2 %5 = sext i1 %4 to i32 diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll index be6baaf..c683c3e 100644 --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -700,43 +700,37 @@ define i64 @test4(i64 %a, i64 %b) nounwind { ; ILP-NEXT: movl $0, %edx ; ILP-NEXT: sbbq %rdx, %rdx ; ILP-NEXT: sbbq %rcx, %rcx -; ILP-NEXT: setae %cl -; ILP-NEXT: movzbl %cl, %ecx -; ILP-NEXT: subq %rcx, %rax +; ILP-NEXT: adcq $-1, %rax ; ILP-NEXT: retq ; ; HYBRID-LABEL: test4: ; HYBRID: # %bb.0: -; HYBRID-NEXT: xorl %eax, %eax ; HYBRID-NEXT: xorl %ecx, %ecx +; HYBRID-NEXT: xorl %edx, %edx ; HYBRID-NEXT: incq %rsi -; HYBRID-NEXT: sete %cl +; HYBRID-NEXT: sete %dl +; HYBRID-NEXT: movl $2, %eax ; HYBRID-NEXT: cmpq %rdi, %rsi -; HYBRID-NEXT: sbbq $0, %rcx -; HYBRID-NEXT: movl $0, %ecx +; HYBRID-NEXT: sbbq $0, %rdx +; HYBRID-NEXT: movl $0, %edx +; HYBRID-NEXT: sbbq %rdx, %rdx ; HYBRID-NEXT: sbbq %rcx, %rcx -; HYBRID-NEXT: sbbq %rax, %rax -; HYBRID-NEXT: setae %al -; HYBRID-NEXT: movzbl %al, %ecx -; HYBRID-NEXT: movl $2, %eax -; HYBRID-NEXT: subq %rcx, %rax +; HYBRID-NEXT: adcq $-1, %rax ; HYBRID-NEXT: retq ; ; BURR-LABEL: test4: ; BURR: # %bb.0: -; BURR-NEXT: xorl %eax, %eax ; BURR-NEXT: xorl %ecx, %ecx +; BURR-NEXT: xorl %edx, %edx ; BURR-NEXT: incq %rsi -; BURR-NEXT: sete %cl +; BURR-NEXT: sete %dl +; BURR-NEXT: movl $2, %eax ; BURR-NEXT: cmpq %rdi, %rsi -; BURR-NEXT: sbbq $0, %rcx -; BURR-NEXT: movl $0, %ecx +; BURR-NEXT: sbbq $0, %rdx +; BURR-NEXT: movl $0, %edx +; BURR-NEXT: sbbq %rdx, %rdx ; BURR-NEXT: sbbq %rcx, %rcx -; BURR-NEXT: sbbq %rax, %rax -; BURR-NEXT: setae %al -; BURR-NEXT: movzbl %al, %ecx -; BURR-NEXT: movl $2, %eax -; BURR-NEXT: subq %rcx, %rax +; BURR-NEXT: adcq $-1, %rax ; BURR-NEXT: retq ; ; SRC-LABEL: test4: @@ -750,10 +744,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind { ; SRC-NEXT: movl $0, %eax ; SRC-NEXT: sbbq %rax, %rax ; SRC-NEXT: sbbq %rcx, %rcx -; SRC-NEXT: setae %al -; SRC-NEXT: movzbl %al, %ecx ; SRC-NEXT: movl $2, %eax -; SRC-NEXT: subq %rcx, %rax +; SRC-NEXT: adcq $-1, %rax ; SRC-NEXT: retq ; ; LIN-LABEL: test4: @@ -768,9 +760,7 @@ define i64 @test4(i64 %a, i64 %b) nounwind { ; LIN-NEXT: movl $0, %edx ; LIN-NEXT: sbbq %rdx, %rdx ; LIN-NEXT: sbbq %rcx, %rcx -; LIN-NEXT: setae %cl -; LIN-NEXT: movzbl %cl, %ecx -; LIN-NEXT: subq %rcx, %rax +; LIN-NEXT: adcq $-1, %rax ; LIN-NEXT: retq %r = zext i64 %b to i256 %u = add i256 %r, 1 diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index 7344d38..f73a608e0 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -904,29 +904,35 @@ define i32 @test13(i32 %a, i32 %b) nounwind { } define i32 @test14(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: test14: -; CHECK: ## %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: setae %al -; CHECK-NEXT: negl %eax -; CHECK-NEXT: retq +; GENERIC-LABEL: test14: +; GENERIC: ## %bb.0: +; GENERIC-NEXT: xorl %eax, %eax +; GENERIC-NEXT: cmpl %esi, %edi +; GENERIC-NEXT: adcl $-1, %eax +; GENERIC-NEXT: retq +; +; ATOM-LABEL: test14: +; ATOM: ## %bb.0: +; ATOM-NEXT: xorl %eax, %eax +; ATOM-NEXT: cmpl %esi, %edi +; ATOM-NEXT: adcl $-1, %eax +; ATOM-NEXT: nop +; ATOM-NEXT: nop +; ATOM-NEXT: retq ; ; ATHLON-LABEL: test14: ; ATHLON: ## %bb.0: ; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx ; ATHLON-NEXT: xorl %eax, %eax ; ATHLON-NEXT: cmpl {{[0-9]+}}(%esp), %ecx -; ATHLON-NEXT: setae %al -; ATHLON-NEXT: negl %eax +; ATHLON-NEXT: adcl $-1, %eax ; ATHLON-NEXT: retl ; ; MCU-LABEL: test14: ; MCU: # %bb.0: ; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: cmpl %edx, %eax -; MCU-NEXT: setae %cl -; MCU-NEXT: negl %ecx +; MCU-NEXT: adcl $-1, %ecx ; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl %c = icmp uge i32 %a, %b -- 2.7.4