[x86] try harder to form LEA from ADD to avoid flag conflicts (PR40483)

author Sanjay Patel <spatel@rotateright.com>

Thu, 18 Jul 2019 12:48:01 +0000 (12:48 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Thu, 18 Jul 2019 12:48:01 +0000 (12:48 +0000)
author Sanjay Patel <spatel@rotateright.com>
Thu, 18 Jul 2019 12:48:01 +0000 (12:48 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Thu, 18 Jul 2019 12:48:01 +0000 (12:48 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

index 95d31e6..fcf9b41 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
        Complexity += 2;
    }
  
+  // Heuristic: try harder to form an LEA from ADD if the operands set flags.
+  // Unlike ADD, LEA does not affect flags, so we will be less likely to require
+  // duplicating flag-producing instructions later in the pipeline.
+  if (N.getOpcode() == ISD::ADD) {
+    auto isMathWithFlags = [](SDValue V) {
+      switch (V.getOpcode()) {
+      case X86ISD::ADD:
+      case X86ISD::SUB:
+      case X86ISD::ADC:
+      case X86ISD::SBB:
+      /* TODO: These opcodes can be added safely, but we may want to justify
+               their inclusion for different reasons (better for reg-alloc).
+      case X86ISD::SMUL:
+      case X86ISD::UMUL:
+      case X86ISD::OR:
+      case X86ISD::XOR:
+      case X86ISD::AND:
+      */
+        // Value 1 is the flag output of the node - verify it's not dead.
+        return !SDValue(V.getNode(), 1).use_empty();
+      default:
+        return false;
+      }
+    };
+    // TODO: This could be an 'or' rather than 'and' to make the transform more
+    //       likely to happen. We might want to factor in whether there's a
+    //       load folding opportunity for the math op that disappears with LEA.
+    if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
+      Complexity++;
+  }
+
    if (AM.Disp)
      Complexity++;
  
diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll

index bba72c5..6eb0e1e 100644 (file)
--- a/llvm/test/CodeGen/X86/combine-sbb.ll
+++ b/llvm/test/CodeGen/X86/combine-sbb.ll
@@ -309,35 +309,25 @@ define i32 @PR40483_sub5(i32*, i32) nounwind {
  define i32 @PR40483_sub6(i32*, i32) nounwind {
  ; X86-LABEL: PR40483_sub6:
  ; X86:       # %bb.0:
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl (%edx), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %esi, %ecx
-; X86-NEXT:    subl %edi, %ecx
+; X86-NEXT:    movl (%edx), %ecx
  ; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    subl %edi, %esi
-; X86-NEXT:    movl %esi, (%edx)
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, (%edx)
  ; X86-NEXT:    jae .LBB8_2
  ; X86-NEXT:  # %bb.1:
-; X86-NEXT:    addl %ecx, %ecx
-; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    leal (%ecx,%ecx), %eax
  ; X86-NEXT:  .LBB8_2:
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: PR40483_sub6:
  ; X64:       # %bb.0:
-; X64-NEXT:    movl (%rdi), %ecx
-; X64-NEXT:    movl %ecx, %edx
-; X64-NEXT:    subl %esi, %edx
-; X64-NEXT:    addl %edx, %edx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    movl %ecx, (%rdi)
-; X64-NEXT:    cmovbl %edx, %eax
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    movl %eax, (%rdi)
+; X64-NEXT:    leal (%rax,%rax), %eax
+; X64-NEXT:    cmovael %ecx, %eax
  ; X64-NEXT:    retq
    %3 = load i32, i32* %0, align 8
    %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
author	Sanjay Patel <spatel@rotateright.com>
	Thu, 18 Jul 2019 12:48:01 +0000 (12:48 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Thu, 18 Jul 2019 12:48:01 +0000 (12:48 +0000)
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/combine-sbb.ll		patch \| blob \| history