Emit VMLS for multiply-subtract on ARM.

author ulan@chromium.org <ulan@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Wed, 27 Feb 2013 10:24:40 +0000 (10:24 +0000)

committer ulan@chromium.org <ulan@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Wed, 27 Feb 2013 10:24:40 +0000 (10:24 +0000)
author ulan@chromium.org <ulan@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 27 Feb 2013 10:24:40 +0000 (10:24 +0000)
committer ulan@chromium.org <ulan@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 27 Feb 2013 10:24:40 +0000 (10:24 +0000)
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc

index 7cd0a17..a8c32d9 100644 (file)
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -2536,6 +2536,24 @@ void Assembler::vmla(const DwVfpRegister dst,
  }
  
  
+void Assembler::vmls(const DwVfpRegister dst,
+                     const DwVfpRegister src1,
+                     const DwVfpRegister src2,
+                     const Condition cond) {
+  // Instruction details available in ARM DDI 0406C.b, A8-932.
+  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vn, n;
+  src1.split_code(&vn, &n);
+  int vm, m;
+  src2.split_code(&vm, &m);
+  emit(cond | 0x1C*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | B6 |
+       m*B5 | vm);
+}
+
+
  void Assembler::vdiv(const DwVfpRegister dst,
                       const DwVfpRegister src1,
                       const DwVfpRegister src2,
diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h

index b32c0f3..12cee54 100644 (file)
--- a/src/arm/assembler-arm.h
+++ b/src/arm/assembler-arm.h
@@ -1142,6 +1142,10 @@ class Assembler : public AssemblerBase {
              const DwVfpRegister src1,
              const DwVfpRegister src2,
              const Condition cond = al);
+  void vmls(const DwVfpRegister dst,
+            const DwVfpRegister src1,
+            const DwVfpRegister src2,
+            const Condition cond = al);
    void vdiv(const DwVfpRegister dst,
              const DwVfpRegister src1,
              const DwVfpRegister src2,
diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc

index 79a996c..0b005c7 100644 (file)
--- a/src/arm/disasm-arm.cc
+++ b/src/arm/disasm-arm.cc
@@ -1108,6 +1108,7 @@ int Decoder::DecodeType7(Instruction* instr) {
  // Dd = vsub(Dn, Dm)
  // Dd = vmul(Dn, Dm)
  // Dd = vmla(Dn, Dm)
+// Dd = vmls(Dn, Dm)
  // Dd = vdiv(Dn, Dm)
  // vcmp(Dd, Dm)
  // vmrs
@@ -1176,6 +1177,12 @@ void Decoder::DecodeTypeVFP(Instruction* instr) {
        } else {
          Unknown(instr);  // Not used by V8.
        }
+    } else if ((instr->Opc1Value() == 0x0) && (instr->Opc3Value() & 0x1)) {
+      if (instr->SzValue() == 0x1) {
+        Format(instr, "vmls.f64'cond 'Dd, 'Dn, 'Dm");
+      } else {
+        Unknown(instr);  // Not used by V8.
+      }
      } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) {
        if (instr->SzValue() == 0x1) {
          Format(instr, "vdiv.f64'cond 'Dd, 'Dn, 'Dm");
diff --git a/src/arm/lithium-arm.cc b/src/arm/lithium-arm.cc

index 69fd5e4..bab398c 100644 (file)
--- a/src/arm/lithium-arm.cc
+++ b/src/arm/lithium-arm.cc
@@ -1368,16 +1368,23 @@ LInstruction* LChunkBuilder::DoMul(HMul* instr) {
      return DefineAsRegister(mul);
  
    } else if (instr->representation().IsDouble()) {
-    if (instr->UseCount() == 1 && instr->uses().value()->IsAdd()) {
-      HAdd* add = HAdd::cast(instr->uses().value());
-      if (instr == add->left()) {
-        // This mul is the lhs of an add. The add and mul will be folded
-        // into a multiply-add.
+    if (instr->UseCount() == 1 && (instr->uses().value()->IsAdd() ||
+                                   instr->uses().value()->IsSub())) {
+      HBinaryOperation* use = HBinaryOperation::cast(instr->uses().value());
+
+      if (use->IsAdd() && instr == use->left()) {
+        // This mul is the lhs of an add. The add and mul will be folded into a
+        // multiply-add in DoAdd.
          return NULL;
        }
-      if (instr == add->right() && !add->left()->IsMul()) {
+      if (instr == use->right() && use->IsAdd() && !use->left()->IsMul()) {
          // This mul is the rhs of an add, where the lhs is not another mul.
-        // The add and mul will be folded into a multiply-add.
+        // The add and mul will be folded into a multiply-add in DoAdd.
+        return NULL;
+      }
+      if (instr == use->right() && use->IsSub()) {
+        // This mul is the rhs of a sub. The sub and mul will be folded into a
+        // multiply-sub in DoSub.
          return NULL;
        }
      }
@@ -1408,6 +1415,10 @@ LInstruction* LChunkBuilder::DoSub(HSub* instr) {
      }
      return result;
    } else if (instr->representation().IsDouble()) {
+    if (instr->right()->IsMul()) {
+      return DoMultiplySub(instr->left(), HMul::cast(instr->right()));
+    }
+
      return DoArithmeticD(Token::SUB, instr);
    } else {
      return DoArithmeticT(Token::SUB, instr);
@@ -1441,6 +1452,18 @@ LInstruction* LChunkBuilder::DoMultiplyAdd(HMul* mul, HValue* addend) {
                                                       multiplicand_op));
  }
  
+
+LInstruction* LChunkBuilder::DoMultiplySub(HValue* minuend, HMul* mul) {
+  LOperand* minuend_op = UseRegisterAtStart(minuend);
+  LOperand* multiplier_op = UseRegisterAtStart(mul->left());
+  LOperand* multiplicand_op = UseRegisterAtStart(mul->right());
+
+  return DefineSameAsFirst(new(zone()) LMultiplySubD(minuend_op,
+                                                     multiplier_op,
+                                                     multiplicand_op));
+}
+
+
  LInstruction* LChunkBuilder::DoAdd(HAdd* instr) {
    if (instr->representation().IsInteger32()) {
      ASSERT(instr->left()->representation().IsInteger32());
@@ -1454,8 +1477,9 @@ LInstruction* LChunkBuilder::DoAdd(HAdd* instr) {
      }
      return result;
    } else if (instr->representation().IsDouble()) {
-    if (instr->left()->IsMul())
+    if (instr->left()->IsMul()) {
        return DoMultiplyAdd(HMul::cast(instr->left()), instr->right());
+    }
  
      if (instr->right()->IsMul()) {
        ASSERT(!instr->left()->IsMul());
diff --git a/src/arm/lithium-arm.h b/src/arm/lithium-arm.h

index a1df469..e81734e 100644 (file)
--- a/src/arm/lithium-arm.h
+++ b/src/arm/lithium-arm.h
@@ -140,6 +140,7 @@ class LCodeGen;
    V(ModI)                                       \
    V(MulI)                                       \
    V(MultiplyAddD)                               \
+  V(MultiplySubD)                               \
    V(NumberTagD)                                 \
    V(NumberTagI)                                 \
    V(NumberTagU)                                 \
@@ -661,6 +662,24 @@ class LMultiplyAddD: public LTemplateInstruction<1, 3, 0> {
  };
  
  
+// Instruction for computing minuend - multiplier * multiplicand.
+class LMultiplySubD: public LTemplateInstruction<1, 3, 0> {
+ public:
+  LMultiplySubD(LOperand* minuend, LOperand* multiplier,
+                LOperand* multiplicand) {
+    inputs_[0] = minuend;
+    inputs_[1] = multiplier;
+    inputs_[2] = multiplicand;
+  }
+
+  LOperand* minuend() { return inputs_[0]; }
+  LOperand* multiplier() { return inputs_[1]; }
+  LOperand* multiplicand() { return inputs_[2]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(MultiplySubD, "multiply-sub-d")
+};
+
+
  class LCmpIDAndBranch: public LControlInstruction<2, 0> {
   public:
    LCmpIDAndBranch(LOperand* left, LOperand* right) {
@@ -2554,6 +2573,7 @@ class LChunkBuilder BASE_EMBEDDED {
  #undef DECLARE_DO
  
    LInstruction* DoMultiplyAdd(HMul* mul, HValue* addend);
+  LInstruction* DoMultiplySub(HValue* minuend, HMul* mul);
    LInstruction* DoRSub(HSub* instr);
  
    static bool HasMagicNumberForDivisor(int32_t divisor);
diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc

index 29f8260..5463200 100644 (file)
--- a/src/arm/lithium-codegen-arm.cc
+++ b/src/arm/lithium-codegen-arm.cc
@@ -1478,6 +1478,18 @@ void LCodeGen::DoMultiplyAddD(LMultiplyAddD* instr) {
  }
  
  
+void LCodeGen::DoMultiplySubD(LMultiplySubD* instr) {
+  DwVfpRegister minuend = ToDoubleRegister(instr->minuend());
+  DwVfpRegister multiplier = ToDoubleRegister(instr->multiplier());
+  DwVfpRegister multiplicand = ToDoubleRegister(instr->multiplicand());
+
+  // This is computed in-place.
+  ASSERT(minuend.is(ToDoubleRegister(instr->result())));
+
+  __ vmls(minuend, multiplier, multiplicand);
+}
+
+
  void LCodeGen::DoMathFloorOfDiv(LMathFloorOfDiv* instr) {
    const Register result = ToRegister(instr->result());
    const Register left = ToRegister(instr->left());
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc

index 86d0614..b7bc839 100644 (file)
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -2783,20 +2783,26 @@ void Simulator::DecodeTypeVFP(Instruction* instr) {
        double dm_value = get_double_from_d_register(vm);
        double dd_value = dn_value * dm_value;
        set_d_register_from_double(vd, dd_value);
-    } else if ((instr->Opc1Value() == 0x0) && !(instr->Opc3Value() & 0x1)) {
-      // vmla
+    } else if ((instr->Opc1Value() == 0x0)) {
+      // vmla, vmls
+      const bool is_vmls = (instr->Opc3Value() & 0x1);
+
        if (instr->SzValue() != 0x1) {
          UNREACHABLE();  // Not used by V8.
        }
  
-      double dd_value = get_double_from_d_register(vd);
-      double dn_value = get_double_from_d_register(vn);
-      double dm_value = get_double_from_d_register(vm);
+      const double dd_val = get_double_from_d_register(vd);
+      const double dn_val = get_double_from_d_register(vn);
+      const double dm_val = get_double_from_d_register(vm);
  
-      // Note: we do the mul and add in separate steps to avoid getting a result
-      // with too high precision.
-      set_d_register_from_double(vd, dn_value * dm_value);
-      set_d_register_from_double(vd, get_double_from_d_register(vd) + dd_value);
+      // Note: we do the mul and add/sub in separate steps to avoid getting a
+      // result with too high precision.
+      set_d_register_from_double(vd, dn_val * dm_val);
+      if (is_vmls) {
+        set_d_register_from_double(vd, dd_val - get_double_from_d_register(vd));
+      } else {
+        set_d_register_from_double(vd, dd_val + get_double_from_d_register(vd));
+      }
      } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) {
        // vdiv
        if (instr->SzValue() != 0x1) {
diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc

index 9ce74fb..14bcb1a 100644 (file)
--- a/test/cctest/test-assembler-arm.cc
+++ b/test/cctest/test-assembler-arm.cc
@@ -265,6 +265,7 @@ TEST(4) {
      __ vstr(d5, r4, OFFSET_OF(T, c));
  
      __ vmla(d5, d6, d7);
+    __ vmls(d5, d5, d6);
  
      __ vmov(r2, r3, d5);
      __ vmov(d4, r2, r3);
@@ -354,7 +355,7 @@ TEST(4) {
      CHECK_EQ(1.0, t.e);
      CHECK_EQ(1.000000059604644775390625, t.d);
      CHECK_EQ(4.25, t.c);
-    CHECK_EQ(8.375, t.b);
+    CHECK_EQ(-4.1875, t.b);
      CHECK_EQ(1.5, t.a);
    }
  }
diff --git a/test/cctest/test-disasm-arm.cc b/test/cctest/test-disasm-arm.cc

index f51b0f3..6d7c4eb 100644 (file)
--- a/test/cctest/test-disasm-arm.cc
+++ b/test/cctest/test-disasm-arm.cc
@@ -567,6 +567,11 @@ TEST(Vfp) {
      COMPARE(vmla(d6, d4, d5, cc),
              "3e046b05       vmla.f64cc d6, d4, d5");
  
+    COMPARE(vmls(d2, d1, d0),
+            "ee012b40       vmls.f64 d2, d1, d0");
+    COMPARE(vmls(d6, d4, d5, cc),
+            "3e046b45       vmls.f64cc d6, d4, d5");
+
      COMPARE(vcvt_u32_f64(s0, d0),
              "eebc0bc0       vcvt.u32.f64 s0, d0");
      COMPARE(vcvt_s32_f64(s0, d0),
diff --git a/test/mjsunit/compiler/multiply-sub.js b/test/mjsunit/compiler/multiply-sub.js

new file mode 100644 (file)

index 0000000..4793181
--- /dev/null
+++ b/test/mjsunit/compiler/multiply-sub.js
@@ -0,0 +1,56 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Flags: --allow-natives-syntax
+// Test expressions that can be computed with a multiply-add instruction.
+
+function f(a, b, c) {
+  return a - b * c;
+}
+
+function g(a, b, c) {
+  return a * b - c;
+}
+
+function h(a, b, c, d) {
+  return a * b - c * d;
+}
+
+assertEquals(-5.41, f(1.1, 2.1, 3.1));
+assertEquals(-5.41, f(1.1, 2.1, 3.1));
+%OptimizeFunctionOnNextCall(f);
+assertEquals(-5.41, f(1.1, 2.1, 3.1));
+
+assertEquals(8.36, g(2.2, 3.3, -1.1));
+assertEquals(8.36, g(2.2, 3.3, -1.1));
+%OptimizeFunctionOnNextCall(g);
+assertEquals(8.36, g(2.2, 3.3, -1.1));
+
+assertEquals(-1.5, h(1.5, 3.0, 12, 0.5));
+assertEquals(-1.5, h(1.5, 3.0, 12, 0.5));
+%OptimizeFunctionOnNextCall(h);
+assertEquals(-1.5, h(1.5, 3.0, 12, 0.5));
author	ulan@chromium.org <ulan@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Wed, 27 Feb 2013 10:24:40 +0000 (10:24 +0000)
committer	ulan@chromium.org <ulan@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Wed, 27 Feb 2013 10:24:40 +0000 (10:24 +0000)
src/arm/assembler-arm.cc		patch \| blob \| history
src/arm/assembler-arm.h		patch \| blob \| history
src/arm/disasm-arm.cc		patch \| blob \| history
src/arm/lithium-arm.cc		patch \| blob \| history
src/arm/lithium-arm.h		patch \| blob \| history
src/arm/lithium-codegen-arm.cc		patch \| blob \| history
src/arm/simulator-arm.cc		patch \| blob \| history
test/cctest/test-assembler-arm.cc		patch \| blob \| history
test/cctest/test-disasm-arm.cc		patch \| blob \| history
test/mjsunit/compiler/multiply-sub.js	[new file with mode: 0644]	patch \| blob