From a3d489e87e8243bdb0eff947a38006b039dff8c0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 3 Feb 2020 23:35:16 -0800 Subject: [PATCH] [X86] Add a DAG combine for (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) and remove isel patterns. Same for any_extend though we don't have coverage for that. The test changes are because isel didn't check one use of the setcc_carry. So in isel we would end up with two different sized setcc_carry instructions. And since it clobbers the flags we would need to recreate the flags for the second instruction. This code handles additional uses by truncating the new wide setcc_carry back to the original size for those uses. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 35 +++++++++++++++++++++++++++++ llvm/lib/Target/X86/X86InstrCompiler.td | 23 ------------------- llvm/test/CodeGen/X86/copy-eflags.ll | 40 +++++++++++++++------------------ 3 files changed, 53 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b3f5378..2fb0dc2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43806,6 +43806,23 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, EVT InVT = N0.getValueType(); SDLoc DL(N); + // (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) + if (!DCI.isBeforeLegalizeOps() && + N0.getOpcode() == X86ISD::SETCC_CARRY) { + SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0), + N0->getOperand(1)); + bool ReplaceOtherUses = !N0.hasOneUse(); + DCI.CombineTo(N, Setcc); + // Replace other uses with a truncate of the widened setcc_carry. + if (ReplaceOtherUses) { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), Setcc); + DCI.CombineTo(N0.getNode(), Trunc); + } + + return SDValue(N, 0); + } + if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) return NewCMov; @@ -43936,6 +43953,24 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // (i32 (aext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) + // FIXME: Is this needed? We don't seem to have any tests for it. + if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND && + N0.getOpcode() == X86ISD::SETCC_CARRY) { + SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0), + N0->getOperand(1)); + bool ReplaceOtherUses = !N0.hasOneUse(); + DCI.CombineTo(N, Setcc); + // Replace other uses with a truncate of the widened setcc_carry. + if (ReplaceOtherUses) { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), Setcc); + DCI.CombineTo(N0.getNode(), Trunc); + } + + return SDValue(N, 0); + } + if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) return NewCMov; diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 539c0737..ee5ce17 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -323,20 +323,6 @@ def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", } // isCodeGenOnly -def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C16r)>; -def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C32r)>; -def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C64r)>; - -def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C16r)>; -def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C32r)>; -def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C64r)>; - // Patterns to give priority when both inputs are zero so that we don't use // an immediate for the RHS. // TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out? @@ -1900,15 +1886,6 @@ defm : one_bit_patterns; defm : one_bit_patterns; defm : one_bit_patterns; - -// (anyext (setcc_carry)) -> (setcc_carry) -def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C16r)>; -def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C32r)>; -def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C32r)>; - //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll index a93ea41..435e7bb 100644 --- a/llvm/test/CodeGen/X86/copy-eflags.ll +++ b/llvm/test/CodeGen/X86/copy-eflags.ll @@ -293,44 +293,40 @@ bb1: define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind { ; X32-LABEL: PR37431: ; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl (%ecx), %ecx -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: sarl $31, %edx -; X32-NEXT: cmpl %ecx, {{[0-9]+}}(%esp) -; X32-NEXT: sbbl %edx, %eax -; X32-NEXT: setb %cl -; X32-NEXT: sbbb %dl, %dl -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movb %dl, (%edi) -; X32-NEXT: movzbl %cl, %ecx -; X32-NEXT: xorl %edi, %edi -; X32-NEXT: subl %ecx, %edi +; X32-NEXT: movl (%edi), %edi +; X32-NEXT: movl %edi, %ebx +; X32-NEXT: sarl $31, %ebx +; X32-NEXT: cmpl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: sbbl %ebx, %esi +; X32-NEXT: sbbl %ebx, %ebx +; X32-NEXT: movb %bl, (%edx) ; X32-NEXT: cltd -; X32-NEXT: idivl %edi -; X32-NEXT: movb %dl, (%esi) +; X32-NEXT: idivl %ebx +; X32-NEXT: movb %dl, (%ecx) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx ; X32-NEXT: retl ; ; X64-LABEL: PR37431: ; X64: # %bb.0: # %entry ; X64-NEXT: movl %ecx, %eax -; X64-NEXT: movq %rdx, %r9 +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movslq (%rdi), %rdx ; X64-NEXT: cmpq %rdx, %r8 -; X64-NEXT: sbbb %cl, %cl -; X64-NEXT: cmpq %rdx, %r8 -; X64-NEXT: movb %cl, (%rsi) -; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: sbbl %edi, %edi +; X64-NEXT: movb %dil, (%rsi) ; X64-NEXT: cltd -; X64-NEXT: idivl %ecx -; X64-NEXT: movb %dl, (%r9) +; X64-NEXT: idivl %edi +; X64-NEXT: movb %dl, (%rcx) ; X64-NEXT: retq entry: %tmp = load i32, i32* %arg1 -- 2.7.4