From: ulan@chromium.org Date: Wed, 27 Feb 2013 10:24:40 +0000 (+0000) Subject: Emit VMLS for multiply-subtract on ARM. X-Git-Tag: upstream/4.7.83~15019 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=87265114c43a15a927677b2b313400798d114ae6;p=platform%2Fupstream%2Fv8.git Emit VMLS for multiply-subtract on ARM. BUG=none Review URL: https://chromiumcodereview.appspot.com/12319113 Patch from Hans Wennborg . git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13748 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index 7cd0a17..a8c32d9 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -2536,6 +2536,24 @@ void Assembler::vmla(const DwVfpRegister dst, } +void Assembler::vmls(const DwVfpRegister dst, + const DwVfpRegister src1, + const DwVfpRegister src2, + const Condition cond) { + // Instruction details available in ARM DDI 0406C.b, A8-932. + // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) | + // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0) + int vd, d; + dst.split_code(&vd, &d); + int vn, n; + src1.split_code(&vn, &n); + int vm, m; + src2.split_code(&vm, &m); + emit(cond | 0x1C*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | B6 | + m*B5 | vm); +} + + void Assembler::vdiv(const DwVfpRegister dst, const DwVfpRegister src1, const DwVfpRegister src2, diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index b32c0f3..12cee54 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -1142,6 +1142,10 @@ class Assembler : public AssemblerBase { const DwVfpRegister src1, const DwVfpRegister src2, const Condition cond = al); + void vmls(const DwVfpRegister dst, + const DwVfpRegister src1, + const DwVfpRegister src2, + const Condition cond = al); void vdiv(const DwVfpRegister dst, const DwVfpRegister src1, const DwVfpRegister src2, diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc index 79a996c..0b005c7 100644 --- a/src/arm/disasm-arm.cc +++ b/src/arm/disasm-arm.cc @@ -1108,6 +1108,7 @@ int Decoder::DecodeType7(Instruction* instr) { // Dd = vsub(Dn, Dm) // Dd = vmul(Dn, Dm) // Dd = vmla(Dn, Dm) +// Dd = vmls(Dn, Dm) // Dd = vdiv(Dn, Dm) // vcmp(Dd, Dm) // vmrs @@ -1176,6 +1177,12 @@ void Decoder::DecodeTypeVFP(Instruction* instr) { } else { Unknown(instr); // Not used by V8. } + } else if ((instr->Opc1Value() == 0x0) && (instr->Opc3Value() & 0x1)) { + if (instr->SzValue() == 0x1) { + Format(instr, "vmls.f64'cond 'Dd, 'Dn, 'Dm"); + } else { + Unknown(instr); // Not used by V8. + } } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) { if (instr->SzValue() == 0x1) { Format(instr, "vdiv.f64'cond 'Dd, 'Dn, 'Dm"); diff --git a/src/arm/lithium-arm.cc b/src/arm/lithium-arm.cc index 69fd5e4..bab398c 100644 --- a/src/arm/lithium-arm.cc +++ b/src/arm/lithium-arm.cc @@ -1368,16 +1368,23 @@ LInstruction* LChunkBuilder::DoMul(HMul* instr) { return DefineAsRegister(mul); } else if (instr->representation().IsDouble()) { - if (instr->UseCount() == 1 && instr->uses().value()->IsAdd()) { - HAdd* add = HAdd::cast(instr->uses().value()); - if (instr == add->left()) { - // This mul is the lhs of an add. The add and mul will be folded - // into a multiply-add. + if (instr->UseCount() == 1 && (instr->uses().value()->IsAdd() || + instr->uses().value()->IsSub())) { + HBinaryOperation* use = HBinaryOperation::cast(instr->uses().value()); + + if (use->IsAdd() && instr == use->left()) { + // This mul is the lhs of an add. The add and mul will be folded into a + // multiply-add in DoAdd. return NULL; } - if (instr == add->right() && !add->left()->IsMul()) { + if (instr == use->right() && use->IsAdd() && !use->left()->IsMul()) { // This mul is the rhs of an add, where the lhs is not another mul. - // The add and mul will be folded into a multiply-add. + // The add and mul will be folded into a multiply-add in DoAdd. + return NULL; + } + if (instr == use->right() && use->IsSub()) { + // This mul is the rhs of a sub. The sub and mul will be folded into a + // multiply-sub in DoSub. return NULL; } } @@ -1408,6 +1415,10 @@ LInstruction* LChunkBuilder::DoSub(HSub* instr) { } return result; } else if (instr->representation().IsDouble()) { + if (instr->right()->IsMul()) { + return DoMultiplySub(instr->left(), HMul::cast(instr->right())); + } + return DoArithmeticD(Token::SUB, instr); } else { return DoArithmeticT(Token::SUB, instr); @@ -1441,6 +1452,18 @@ LInstruction* LChunkBuilder::DoMultiplyAdd(HMul* mul, HValue* addend) { multiplicand_op)); } + +LInstruction* LChunkBuilder::DoMultiplySub(HValue* minuend, HMul* mul) { + LOperand* minuend_op = UseRegisterAtStart(minuend); + LOperand* multiplier_op = UseRegisterAtStart(mul->left()); + LOperand* multiplicand_op = UseRegisterAtStart(mul->right()); + + return DefineSameAsFirst(new(zone()) LMultiplySubD(minuend_op, + multiplier_op, + multiplicand_op)); +} + + LInstruction* LChunkBuilder::DoAdd(HAdd* instr) { if (instr->representation().IsInteger32()) { ASSERT(instr->left()->representation().IsInteger32()); @@ -1454,8 +1477,9 @@ LInstruction* LChunkBuilder::DoAdd(HAdd* instr) { } return result; } else if (instr->representation().IsDouble()) { - if (instr->left()->IsMul()) + if (instr->left()->IsMul()) { return DoMultiplyAdd(HMul::cast(instr->left()), instr->right()); + } if (instr->right()->IsMul()) { ASSERT(!instr->left()->IsMul()); diff --git a/src/arm/lithium-arm.h b/src/arm/lithium-arm.h index a1df469..e81734e 100644 --- a/src/arm/lithium-arm.h +++ b/src/arm/lithium-arm.h @@ -140,6 +140,7 @@ class LCodeGen; V(ModI) \ V(MulI) \ V(MultiplyAddD) \ + V(MultiplySubD) \ V(NumberTagD) \ V(NumberTagI) \ V(NumberTagU) \ @@ -661,6 +662,24 @@ class LMultiplyAddD: public LTemplateInstruction<1, 3, 0> { }; +// Instruction for computing minuend - multiplier * multiplicand. +class LMultiplySubD: public LTemplateInstruction<1, 3, 0> { + public: + LMultiplySubD(LOperand* minuend, LOperand* multiplier, + LOperand* multiplicand) { + inputs_[0] = minuend; + inputs_[1] = multiplier; + inputs_[2] = multiplicand; + } + + LOperand* minuend() { return inputs_[0]; } + LOperand* multiplier() { return inputs_[1]; } + LOperand* multiplicand() { return inputs_[2]; } + + DECLARE_CONCRETE_INSTRUCTION(MultiplySubD, "multiply-sub-d") +}; + + class LCmpIDAndBranch: public LControlInstruction<2, 0> { public: LCmpIDAndBranch(LOperand* left, LOperand* right) { @@ -2554,6 +2573,7 @@ class LChunkBuilder BASE_EMBEDDED { #undef DECLARE_DO LInstruction* DoMultiplyAdd(HMul* mul, HValue* addend); + LInstruction* DoMultiplySub(HValue* minuend, HMul* mul); LInstruction* DoRSub(HSub* instr); static bool HasMagicNumberForDivisor(int32_t divisor); diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc index 29f8260..5463200 100644 --- a/src/arm/lithium-codegen-arm.cc +++ b/src/arm/lithium-codegen-arm.cc @@ -1478,6 +1478,18 @@ void LCodeGen::DoMultiplyAddD(LMultiplyAddD* instr) { } +void LCodeGen::DoMultiplySubD(LMultiplySubD* instr) { + DwVfpRegister minuend = ToDoubleRegister(instr->minuend()); + DwVfpRegister multiplier = ToDoubleRegister(instr->multiplier()); + DwVfpRegister multiplicand = ToDoubleRegister(instr->multiplicand()); + + // This is computed in-place. + ASSERT(minuend.is(ToDoubleRegister(instr->result()))); + + __ vmls(minuend, multiplier, multiplicand); +} + + void LCodeGen::DoMathFloorOfDiv(LMathFloorOfDiv* instr) { const Register result = ToRegister(instr->result()); const Register left = ToRegister(instr->left()); diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc index 86d0614..b7bc839 100644 --- a/src/arm/simulator-arm.cc +++ b/src/arm/simulator-arm.cc @@ -2783,20 +2783,26 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { double dm_value = get_double_from_d_register(vm); double dd_value = dn_value * dm_value; set_d_register_from_double(vd, dd_value); - } else if ((instr->Opc1Value() == 0x0) && !(instr->Opc3Value() & 0x1)) { - // vmla + } else if ((instr->Opc1Value() == 0x0)) { + // vmla, vmls + const bool is_vmls = (instr->Opc3Value() & 0x1); + if (instr->SzValue() != 0x1) { UNREACHABLE(); // Not used by V8. } - double dd_value = get_double_from_d_register(vd); - double dn_value = get_double_from_d_register(vn); - double dm_value = get_double_from_d_register(vm); + const double dd_val = get_double_from_d_register(vd); + const double dn_val = get_double_from_d_register(vn); + const double dm_val = get_double_from_d_register(vm); - // Note: we do the mul and add in separate steps to avoid getting a result - // with too high precision. - set_d_register_from_double(vd, dn_value * dm_value); - set_d_register_from_double(vd, get_double_from_d_register(vd) + dd_value); + // Note: we do the mul and add/sub in separate steps to avoid getting a + // result with too high precision. + set_d_register_from_double(vd, dn_val * dm_val); + if (is_vmls) { + set_d_register_from_double(vd, dd_val - get_double_from_d_register(vd)); + } else { + set_d_register_from_double(vd, dd_val + get_double_from_d_register(vd)); + } } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) { // vdiv if (instr->SzValue() != 0x1) { diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc index 9ce74fb..14bcb1a 100644 --- a/test/cctest/test-assembler-arm.cc +++ b/test/cctest/test-assembler-arm.cc @@ -265,6 +265,7 @@ TEST(4) { __ vstr(d5, r4, OFFSET_OF(T, c)); __ vmla(d5, d6, d7); + __ vmls(d5, d5, d6); __ vmov(r2, r3, d5); __ vmov(d4, r2, r3); @@ -354,7 +355,7 @@ TEST(4) { CHECK_EQ(1.0, t.e); CHECK_EQ(1.000000059604644775390625, t.d); CHECK_EQ(4.25, t.c); - CHECK_EQ(8.375, t.b); + CHECK_EQ(-4.1875, t.b); CHECK_EQ(1.5, t.a); } } diff --git a/test/cctest/test-disasm-arm.cc b/test/cctest/test-disasm-arm.cc index f51b0f3..6d7c4eb 100644 --- a/test/cctest/test-disasm-arm.cc +++ b/test/cctest/test-disasm-arm.cc @@ -567,6 +567,11 @@ TEST(Vfp) { COMPARE(vmla(d6, d4, d5, cc), "3e046b05 vmla.f64cc d6, d4, d5"); + COMPARE(vmls(d2, d1, d0), + "ee012b40 vmls.f64 d2, d1, d0"); + COMPARE(vmls(d6, d4, d5, cc), + "3e046b45 vmls.f64cc d6, d4, d5"); + COMPARE(vcvt_u32_f64(s0, d0), "eebc0bc0 vcvt.u32.f64 s0, d0"); COMPARE(vcvt_s32_f64(s0, d0), diff --git a/test/mjsunit/compiler/multiply-sub.js b/test/mjsunit/compiler/multiply-sub.js new file mode 100644 index 0000000..4793181 --- /dev/null +++ b/test/mjsunit/compiler/multiply-sub.js @@ -0,0 +1,56 @@ +// Copyright 2013 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --allow-natives-syntax +// Test expressions that can be computed with a multiply-add instruction. + +function f(a, b, c) { + return a - b * c; +} + +function g(a, b, c) { + return a * b - c; +} + +function h(a, b, c, d) { + return a * b - c * d; +} + +assertEquals(-5.41, f(1.1, 2.1, 3.1)); +assertEquals(-5.41, f(1.1, 2.1, 3.1)); +%OptimizeFunctionOnNextCall(f); +assertEquals(-5.41, f(1.1, 2.1, 3.1)); + +assertEquals(8.36, g(2.2, 3.3, -1.1)); +assertEquals(8.36, g(2.2, 3.3, -1.1)); +%OptimizeFunctionOnNextCall(g); +assertEquals(8.36, g(2.2, 3.3, -1.1)); + +assertEquals(-1.5, h(1.5, 3.0, 12, 0.5)); +assertEquals(-1.5, h(1.5, 3.0, 12, 0.5)); +%OptimizeFunctionOnNextCall(h); +assertEquals(-1.5, h(1.5, 3.0, 12, 0.5));