[x64] Match -0 - x with sign bit flip.
authorbmeurer <bmeurer@chromium.org>
Tue, 7 Apr 2015 07:34:55 +0000 (00:34 -0700)
committerCommit bot <commit-bot@chromium.org>
Tue, 7 Apr 2015 07:35:03 +0000 (07:35 +0000)
We can use xorps/xorpd on Intel CPUs to flip the sign bit. Ideally we'd
use a RIP-relative 128-bit constant in the code object, as OCaml/GCC
does, however that requires 128-bit alignment for code objects, which is
not yet implemented. So for now we materialize the mask inline.

R=dcarney@chromium.org

Review URL: https://codereview.chromium.org/1046893002

Cr-Commit-Position: refs/heads/master@{#27611}

src/compiler/x64/code-generator-x64.cc
src/compiler/x64/instruction-codes-x64.h
src/compiler/x64/instruction-selector-x64.cc
test/cctest/compiler/test-run-machops.cc
test/unittests/compiler/x64/instruction-selector-x64-unittest.cc

index fee2702332d463a4f724043d00c23b0cb0084bcd..98ec629b4d702ba2d8d033a34e830ec6b3cc2939 100644 (file)
@@ -726,15 +726,23 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
     case kSSEFloat32Div:
       ASSEMBLE_SSE_BINOP(divss);
       break;
+    case kSSEFloat32Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      // TODO(turbofan): Add AVX version with relaxed register constraints.
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psllq(kScratchDoubleReg, 31);
+      __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kSSEFloat32Sqrt:
+      ASSEMBLE_SSE_UNOP(sqrtss);
+      break;
     case kSSEFloat32Max:
       ASSEMBLE_SSE_BINOP(maxss);
       break;
     case kSSEFloat32Min:
       ASSEMBLE_SSE_BINOP(minss);
       break;
-    case kSSEFloat32Sqrt:
-      ASSEMBLE_SSE_UNOP(sqrtss);
-      break;
     case kSSEFloat32ToFloat64:
       ASSEMBLE_SSE_UNOP(cvtss2sd);
       break;
@@ -791,6 +799,14 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
     case kSSEFloat64Min:
       ASSEMBLE_SSE_BINOP(minsd);
       break;
+    case kSSEFloat64Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      // TODO(turbofan): Add AVX version with relaxed register constraints.
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psllq(kScratchDoubleReg, 63);
+      __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+      break;
+    }
     case kSSEFloat64Sqrt:
       ASSEMBLE_SSE_UNOP(sqrtsd);
       break;
index 2feafd27e5928c2bec6de475b04410599135fd68..8b0469e1c6d319207f0d5bdbcb2ab3e2d27ed6bd 100644 (file)
@@ -52,6 +52,7 @@ namespace compiler {
   V(SSEFloat32Sub)                 \
   V(SSEFloat32Mul)                 \
   V(SSEFloat32Div)                 \
+  V(SSEFloat32Neg)                 \
   V(SSEFloat32Sqrt)                \
   V(SSEFloat32Max)                 \
   V(SSEFloat32Min)                 \
@@ -62,6 +63,7 @@ namespace compiler {
   V(SSEFloat64Mul)                 \
   V(SSEFloat64Div)                 \
   V(SSEFloat64Mod)                 \
+  V(SSEFloat64Neg)                 \
   V(SSEFloat64Sqrt)                \
   V(SSEFloat64Round)               \
   V(SSEFloat64Max)                 \
index 5f3fc06865d35d4d0d7b816039de5b36d91c2f33..82829a88352e8eb5db3e99af5ae5df4077d074ce 100644 (file)
@@ -858,6 +858,13 @@ void InstructionSelector::VisitFloat32Add(Node* node) {
 
 
 void InstructionSelector::VisitFloat32Sub(Node* node) {
+  X64OperandGenerator g(this);
+  Float32BinopMatcher m(node);
+  if (m.left().IsMinusZero()) {
+    Emit(kSSEFloat32Neg, g.DefineSameAsFirst(node),
+         g.UseRegister(m.right().node()));
+    return;
+  }
   VisitFloatBinop(this, node, kAVXFloat32Sub, kSSEFloat32Sub);
 }
 
@@ -896,17 +903,22 @@ void InstructionSelector::VisitFloat64Add(Node* node) {
 void InstructionSelector::VisitFloat64Sub(Node* node) {
   X64OperandGenerator g(this);
   Float64BinopMatcher m(node);
-  if (m.left().IsMinusZero() && m.right().IsFloat64RoundDown() &&
-      CanCover(m.node(), m.right().node())) {
-    if (m.right().InputAt(0)->opcode() == IrOpcode::kFloat64Sub &&
-        CanCover(m.right().node(), m.right().InputAt(0))) {
-      Float64BinopMatcher mright0(m.right().InputAt(0));
-      if (mright0.left().IsMinusZero()) {
-        Emit(kSSEFloat64Round | MiscField::encode(kRoundUp),
-             g.DefineAsRegister(node), g.UseRegister(mright0.right().node()));
-        return;
+  if (m.left().IsMinusZero()) {
+    if (m.right().IsFloat64RoundDown() &&
+        CanCover(m.node(), m.right().node())) {
+      if (m.right().InputAt(0)->opcode() == IrOpcode::kFloat64Sub &&
+          CanCover(m.right().node(), m.right().InputAt(0))) {
+        Float64BinopMatcher mright0(m.right().InputAt(0));
+        if (mright0.left().IsMinusZero()) {
+          Emit(kSSEFloat64Round | MiscField::encode(kRoundUp),
+               g.DefineAsRegister(node), g.UseRegister(mright0.right().node()));
+          return;
+        }
       }
     }
+    Emit(kSSEFloat64Neg, g.DefineSameAsFirst(node),
+         g.UseRegister(m.right().node()));
+    return;
   }
   VisitFloatBinop(this, node, kAVXFloat64Sub, kSSEFloat64Sub);
 }
index fdc1a48eafb6ea3afee1b0ad06c93ac389e22832..1686d377883e9c68898e301461b46d36a384384a 100644 (file)
@@ -3212,6 +3212,46 @@ TEST(RunFloat32SubP) {
 }
 
 
+TEST(RunFloat32SubImm1) {
+  float input = 0.0f;
+  float output = 0.0f;
+
+  FOR_FLOAT32_INPUTS(i) {
+    RawMachineAssemblerTester<int32_t> m;
+    Node* t0 = m.LoadFromPointer(&input, kMachFloat32);
+    Node* t1 = m.Float32Sub(m.Float32Constant(*i), t0);
+    m.StoreToPointer(&output, kMachFloat32, t1);
+    m.Return(m.Int32Constant(0));
+    FOR_FLOAT32_INPUTS(j) {
+      input = *j;
+      float expected = *i - input;
+      CHECK_EQ(0, m.Call());
+      CheckFloatEq(expected, output);
+    }
+  }
+}
+
+
+TEST(RunFloat32SubImm2) {
+  float input = 0.0f;
+  float output = 0.0f;
+
+  FOR_FLOAT32_INPUTS(i) {
+    RawMachineAssemblerTester<int32_t> m;
+    Node* t0 = m.LoadFromPointer(&input, kMachFloat32);
+    Node* t1 = m.Float32Sub(t0, m.Float32Constant(*i));
+    m.StoreToPointer(&output, kMachFloat32, t1);
+    m.Return(m.Int32Constant(0));
+    FOR_FLOAT32_INPUTS(j) {
+      input = *j;
+      float expected = input - *i;
+      CHECK_EQ(0, m.Call());
+      CheckFloatEq(expected, output);
+    }
+  }
+}
+
+
 TEST(RunFloat64SubP) {
   RawMachineAssemblerTester<int32_t> m;
   Float64BinopTester bt(&m);
index ebdf02448f4413cd955ebedc64739e2a3e1af330..6d0e589e3bf04d4607229e8cbfe64d1ade73943c 100644 (file)
@@ -75,6 +75,7 @@ TEST_F(InstructionSelectorTest, TruncateInt64ToInt32WithParameter) {
 // -----------------------------------------------------------------------------
 // Loads and stores
 
+
 namespace {
 
 struct MemoryAccess {
@@ -137,6 +138,7 @@ INSTANTIATE_TEST_CASE_P(InstructionSelectorTest,
                         InstructionSelectorMemoryAccessTest,
                         ::testing::ValuesIn(kMemoryAccesses));
 
+
 // -----------------------------------------------------------------------------
 // ChangeUint32ToUint64.
 
@@ -1026,6 +1028,22 @@ TEST_F(InstructionSelectorTest, Float64BinopArithmetic) {
 }
 
 
+TEST_F(InstructionSelectorTest, Float64SubWithMinusZeroAndParameter) {
+  StreamBuilder m(this, kMachFloat64, kMachFloat64);
+  Node* const p0 = m.Parameter(0);
+  Node* const n = m.Float64Sub(m.Float64Constant(-0.0), p0);
+  m.Return(n);
+  Stream s = m.Build();
+  ASSERT_EQ(1U, s.size());
+  EXPECT_EQ(kSSEFloat64Neg, s[0]->arch_opcode());
+  ASSERT_EQ(1U, s[0]->InputCount());
+  EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
+  ASSERT_EQ(1U, s[0]->OutputCount());
+  EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
+  EXPECT_EQ(kFlags_none, s[0]->flags_mode());
+}
+
+
 // -----------------------------------------------------------------------------
 // Miscellaneous.