[turbofan]: More optimizations to add and subtract operations on x64

author Daniel Clifford <danno@chromium.org>

Thu, 20 Nov 2014 13:48:34 +0000 (14:48 +0100)

committer Daniel Clifford <danno@chromium.org>

Thu, 20 Nov 2014 13:48:46 +0000 (13:48 +0000)
author Daniel Clifford <danno@chromium.org>
Thu, 20 Nov 2014 13:48:34 +0000 (14:48 +0100)
committer Daniel Clifford <danno@chromium.org>
Thu, 20 Nov 2014 13:48:46 +0000 (13:48 +0000)
diff --git a/src/compiler/x64/code-generator-x64.cc b/src/compiler/x64/code-generator-x64.cc

index e1eb92f12bb09436d57d519a771e40ffabacad46..051ff9f5f42c61a1cdf3723f3bfd8e2746d8d96b 100644 (file)
--- a/src/compiler/x64/code-generator-x64.cc
+++ b/src/compiler/x64/code-generator-x64.cc
@@ -597,6 +597,12 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
      case kX64Lea:
        __ leaq(i.OutputRegister(), i.MemoryOperand());
        break;
+    case kX64Dec32:
+      __ decl(i.OutputRegister());
+      break;
+    case kX64Inc32:
+      __ incl(i.OutputRegister());
+      break;
      case kX64Push:
        if (HasImmediateInput(instr, 0)) {
          __ pushq(i.InputImmediate(0));
diff --git a/src/compiler/x64/instruction-codes-x64.h b/src/compiler/x64/instruction-codes-x64.h

index 9d4f59ca778b75ae3af612c34b2e43ac065d16db..473cdd58907e72a9891cc87bf161310e539c6209 100644 (file)
--- a/src/compiler/x64/instruction-codes-x64.h
+++ b/src/compiler/x64/instruction-codes-x64.h
@@ -75,6 +75,8 @@ namespace compiler {
    V(X64Movss)                      \
    V(X64Lea32)                      \
    V(X64Lea)                        \
+  V(X64Dec32)                      \
+  V(X64Inc32)                      \
    V(X64Push)                       \
    V(X64StoreWriteBarrier)
  
diff --git a/src/compiler/x64/instruction-selector-x64.cc b/src/compiler/x64/instruction-selector-x64.cc

index 85d504390abf6e52abb0ab15f044ac9019a93c6f..388a0c29a04fa3fa473382d45d504e71da60d91d 100644 (file)
--- a/src/compiler/x64/instruction-selector-x64.cc
+++ b/src/compiler/x64/instruction-selector-x64.cc
@@ -428,9 +428,33 @@ void InstructionSelector::VisitInt32Add(Node* node) {
    ScaledWithOffset32Matcher m(node);
    X64OperandGenerator g(this);
    if (m.matches() && (m.constant() == NULL || g.CanBeImmediate(m.constant()))) {
+    // The add can be represented as a "leal", but there may be a smaller
+    // representation that is better and no more expensive.
+    if (m.offset() != NULL) {
+      if (m.scaled() == NULL) {
+        if (!IsLive(m.offset())) {
+          // If the add is of the form (r1 + immediate) and the non-constant
+          // input to the add is owned by the add, then it doesn't need to be
+          // preserved across the operation, so use more compact,
+          // source-register-overwriting versions when they are available and
+          // smaller, e.g. "incl" and "decl".
+          int32_t value =
+              m.constant() == NULL ? 0 : OpParameter<int32_t>(m.constant());
+          if (value == 1) {
+            Emit(kX64Inc32, g.DefineSameAsFirst(node),
+                 g.UseRegister(m.offset()));
+            return;
+          } else if (value == -1) {
+            Emit(kX64Dec32, g.DefineSameAsFirst(node),
+                 g.UseRegister(m.offset()));
+            return;
+          }
+        }
+      }
+    }
+
      InstructionOperand* inputs[4];
      size_t input_count = 0;
-
      AddressingMode mode = GenerateMemoryOperandInputs(
          &g, m.scaled(), m.scale_exponent(), m.offset(), m.constant(), inputs,
          &input_count);
@@ -462,6 +486,31 @@ void InstructionSelector::VisitInt32Sub(Node* node) {
    if (m.left().Is(0)) {
      Emit(kX64Neg32, g.DefineSameAsFirst(node), g.UseRegister(m.right().node()));
    } else {
+    if (m.right().HasValue() && g.CanBeImmediate(m.right().node())) {
+      // If the Non-constant input is owned by the subtract, using a "decl" or
+      // "incl" that overwrites that input is smaller and probably an overall
+      // win.
+      if (!IsLive(m.left().node())) {
+        if (m.right().Value() == 1) {
+          Emit(kX64Dec32, g.DefineSameAsFirst(node),
+               g.UseRegister(m.left().node()));
+          return;
+        }
+        if (m.right().Value() == -1) {
+          Emit(kX64Inc32, g.DefineSameAsFirst(node),
+               g.UseRegister(m.left().node()));
+          return;
+        }
+      } else {
+        // Special handling for subtraction of constants where the non-constant
+        // input is used elsewhere. To eliminate the gap move before the sub to
+        // copy the destination register, use a "leal" instead.
+        Emit(kX64Lea32 | AddressingModeField::encode(kMode_MRI),
+             g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+             g.TempImmediate(-m.right().Value()));
+        return;
+      }
+    }
      VisitBinop(this, node, kX64Sub32);
    }
  }
diff --git a/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc b/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc

index f4070ecd2b34c116248e26435bc0ff1513994e7e..26df0c8e714476d514778c21ff438200c46485cf 100644 (file)
--- a/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc
+++ b/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc
@@ -244,14 +244,17 @@ TEST_F(InstructionSelectorTest, TruncateInt64ToInt32WithWord64Shr) {
  // Addition.
  
  
-TEST_F(InstructionSelectorTest, Int32AddWithInt32AddWithParameters) {
+TEST_F(InstructionSelectorTest, Int32AddWithInt32ParametersLea) {
    StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32);
    Node* const p0 = m.Parameter(0);
    Node* const p1 = m.Parameter(1);
    Node* const a0 = m.Int32Add(p0, p1);
-  m.Return(m.Int32Add(a0, p0));
+  USE(a0);
+  // Additional uses of input to add chooses lea
+  Node* const a1 = m.Int32Add(p0, p1);
+  m.Return(m.Int32Add(a0, a1));
    Stream s = m.Build();
-  ASSERT_EQ(2U, s.size());
+  ASSERT_EQ(3U, s.size());
    EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
    ASSERT_EQ(2U, s[0]->InputCount());
    EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
@@ -259,10 +262,12 @@ TEST_F(InstructionSelectorTest, Int32AddWithInt32AddWithParameters) {
  }
  
  
-TEST_F(InstructionSelectorTest, Int32AddConstantAsLea) {
+TEST_F(InstructionSelectorTest, Int32AddConstantAsLeaSingle) {
    StreamBuilder m(this, kMachInt32, kMachInt32);
    Node* const p0 = m.Parameter(0);
    Node* const c0 = m.Int32Constant(15);
+  // If there is only a single use of an add's input, still use lea and not add,
+  // it is faster.
    m.Return(m.Int32Add(p0, c0));
    Stream s = m.Build();
    ASSERT_EQ(1U, s.size());
@@ -274,10 +279,80 @@ TEST_F(InstructionSelectorTest, Int32AddConstantAsLea) {
  }
  
  
-TEST_F(InstructionSelectorTest, Int32AddCommutedConstantAsLea) {
+TEST_F(InstructionSelectorTest, Int32AddConstantAsInc) {
+  StreamBuilder m(this, kMachInt32, kMachInt32);
+  Node* const p0 = m.Parameter(0);
+  Node* const c0 = m.Int32Constant(1);
+  // If there is only a single use of an add's input and the immediate constant
+  // for the add is 1, use inc.
+  m.Return(m.Int32Add(p0, c0));
+  Stream s = m.Build();
+  ASSERT_EQ(1U, s.size());
+  EXPECT_EQ(kX64Inc32, s[0]->arch_opcode());
+  EXPECT_EQ(kMode_None, s[0]->addressing_mode());
+  ASSERT_EQ(1U, s[0]->InputCount());
+  EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
+}
+
+
+TEST_F(InstructionSelectorTest, Int32AddConstantAsDec) {
+  StreamBuilder m(this, kMachInt32, kMachInt32);
+  Node* const p0 = m.Parameter(0);
+  Node* const c0 = m.Int32Constant(-1);
+  // If there is only a single use of an add's input and the immediate constant
+  // for the add is -11, use dec.
+  m.Return(m.Int32Add(p0, c0));
+  Stream s = m.Build();
+  ASSERT_EQ(1U, s.size());
+  EXPECT_EQ(kX64Dec32, s[0]->arch_opcode());
+  EXPECT_EQ(kMode_None, s[0]->addressing_mode());
+  ASSERT_EQ(1U, s[0]->InputCount());
+  EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
+}
+
+
+TEST_F(InstructionSelectorTest, Int32AddConstantAsLeaDouble) {
    StreamBuilder m(this, kMachInt32, kMachInt32);
    Node* const p0 = m.Parameter(0);
    Node* const c0 = m.Int32Constant(15);
+  // A second use of an add's input uses lea
+  Node* const a0 = m.Int32Add(p0, c0);
+  USE(a0);
+  m.Return(m.Int32Add(p0, c0));
+  Stream s = m.Build();
+  ASSERT_EQ(1U, s.size());
+  EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
+  EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
+  ASSERT_EQ(2U, s[0]->InputCount());
+  EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
+  EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
+}
+
+
+TEST_F(InstructionSelectorTest, Int32AddCommutedConstantAsLeaSingle) {
+  StreamBuilder m(this, kMachInt32, kMachInt32);
+  Node* const p0 = m.Parameter(0);
+  Node* const c0 = m.Int32Constant(15);
+  // If there is only a single use of an add's input, still use lea... it's
+  // generally faster than the add to reduce register pressure.
+  m.Return(m.Int32Add(c0, p0));
+  Stream s = m.Build();
+  ASSERT_EQ(1U, s.size());
+  EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
+  EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
+  ASSERT_EQ(2U, s[0]->InputCount());
+  EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
+  EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
+}
+
+
+TEST_F(InstructionSelectorTest, Int32AddCommutedConstantAsLeaDouble) {
+  StreamBuilder m(this, kMachInt32, kMachInt32);
+  Node* const p0 = m.Parameter(0);
+  Node* const c0 = m.Int32Constant(15);
+  // A second use of an add's input uses lea
+  Node* const a0 = m.Int32Add(c0, p0);
+  USE(a0);
    m.Return(m.Int32Add(c0, p0));
    Stream s = m.Build();
    ASSERT_EQ(1U, s.size());
@@ -615,6 +690,38 @@ TEST_F(InstructionSelectorTest, Int32AddScaled8ShlWithConstant) {
  }
  
  
+TEST_F(InstructionSelectorTest, Int32SubConstantAsInc) {
+  StreamBuilder m(this, kMachInt32, kMachInt32);
+  Node* const p0 = m.Parameter(0);
+  Node* const c0 = m.Int32Constant(-1);
+  // If there is only a single use of an add's input and the immediate constant
+  // for the add is 1, use inc.
+  m.Return(m.Int32Sub(p0, c0));
+  Stream s = m.Build();
+  ASSERT_EQ(1U, s.size());
+  EXPECT_EQ(kX64Inc32, s[0]->arch_opcode());
+  EXPECT_EQ(kMode_None, s[0]->addressing_mode());
+  ASSERT_EQ(1U, s[0]->InputCount());
+  EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
+}
+
+
+TEST_F(InstructionSelectorTest, Int32SubConstantAsDec) {
+  StreamBuilder m(this, kMachInt32, kMachInt32);
+  Node* const p0 = m.Parameter(0);
+  Node* const c0 = m.Int32Constant(1);
+  // If there is only a single use of an sub's input and the immediate constant
+  // for the add is 1, use dec.
+  m.Return(m.Int32Sub(p0, c0));
+  Stream s = m.Build();
+  ASSERT_EQ(1U, s.size());
+  EXPECT_EQ(kX64Dec32, s[0]->arch_opcode());
+  EXPECT_EQ(kMode_None, s[0]->addressing_mode());
+  ASSERT_EQ(1U, s[0]->InputCount());
+  EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
+}
+
+
  // -----------------------------------------------------------------------------
  // Multiplication.
author	Daniel Clifford <danno@chromium.org>
	Thu, 20 Nov 2014 13:48:34 +0000 (14:48 +0100)
committer	Daniel Clifford <danno@chromium.org>
	Thu, 20 Nov 2014 13:48:46 +0000 (13:48 +0000)
src/compiler/x64/code-generator-x64.cc		patch \| blob \| history
src/compiler/x64/instruction-codes-x64.h		patch \| blob \| history
src/compiler/x64/instruction-selector-x64.cc		patch \| blob \| history
test/unittests/compiler/x64/instruction-selector-x64-unittest.cc		patch \| blob \| history