From 315896d3ac8535833c93b990be6cd3df3844bad8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 21 Mar 2022 14:20:46 +0000 Subject: [PATCH] [X86] Fold SUB(X,SBB(Y,Z,W)) -> SUB(ADC(X,Z,W),Y) Prefer the commutable ADC over SBB to improve load folding opportunities --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++++++++ llvm/test/CodeGen/X86/add-sub-bool.ll | 18 +++++++----------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8b0d6bd..c3b508d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -52971,6 +52971,14 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; + // Fold SUB(X,SBB(Y,Z,W)) -> SUB(ADC(X,Z,W),Y) + if (Op1.getOpcode() == X86ISD::SBB && Op1->hasOneUse()) { + SDValue ADC = DAG.getNode(X86ISD::ADC, SDLoc(Op1), Op1->getVTList(), Op0, + Op1.getOperand(1), Op1.getOperand(2)); + return DAG.getNode(ISD::SUB, SDLoc(N), Op0.getValueType(), ADC.getValue(0), + Op1.getOperand(0)); + } + return combineAddOrSubToADCOrSBB(N, DAG); } diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll index b5480a1..b20c022 100644 --- a/llvm/test/CodeGen/X86/add-sub-bool.ll +++ b/llvm/test/CodeGen/X86/add-sub-bool.ll @@ -215,17 +215,16 @@ define i32 @test_i32_sub_sub_idx(i32 %x, i32 %y, i32 %z) nounwind { ; X86-LABEL: test_i32_sub_sub_idx: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: btl $16, {{[0-9]+}}(%esp) -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: adcl $0, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_sub_sub_idx: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: btl $16, %edx -; X64-NEXT: sbbl $0, %esi +; X64-NEXT: adcl $0, %eax ; X64-NEXT: subl %esi, %eax ; X64-NEXT: retq %shift = lshr i32 %z, 16 @@ -427,22 +426,19 @@ define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { define i32 @test_i32_sub_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X86-LABEL: test_i32_sub_sub_var: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: btl %edx, %esi -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi +; X86-NEXT: btl %ecx, %edx +; X86-NEXT: adcl $0, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_sub_sub_var: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: btl %ecx, %edx -; X64-NEXT: sbbl $0, %esi +; X64-NEXT: adcl $0, %eax ; X64-NEXT: subl %esi, %eax ; X64-NEXT: retq %shift = lshr i32 %z, %w -- 2.7.4