From 08747c3a16942bb3c98bdc06a5f99035faaf58d7 Mon Sep 17 00:00:00 2001
From: "danno@chromium.org"
 <danno@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Date: Mon, 1 Oct 2012 21:27:33 +0000
Subject: [PATCH] Support for SDIV and MLS ARM instructions, and implement
 DoModI using them. Also added support for the runtime detection to check if
 hardware supports SDIV/UDIV Other new opportunities to exploit SDIV/UDIV will
 be done in separate issues.

Review URL: https://chromiumcodereview.appspot.com/10977051
Patch from Subrato K De <subratokde@codeaurora.org>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12646 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
---
 src/arm/assembler-arm.cc       |  24 +++++
 src/arm/assembler-arm.h        |   7 ++
 src/arm/disasm-arm.cc          |  29 +++++-
 src/arm/lithium-codegen-arm.cc | 211 +++++++++++++++++++++++------------------
 src/arm/simulator-arm.cc       |  41 +++++++-
 src/flag-definitions.h         |   2 +
 src/platform-linux.cc          |   3 +
 src/v8globals.h                |   1 +
 8 files changed, 214 insertions(+), 104 deletions(-)

diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
index 78db06d..7a65ab2 100644
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -110,6 +110,10 @@ void CpuFeatures::Probe() {
   if (FLAG_enable_armv7) {
     supported_ |= 1u << ARMv7;
   }
+
+  if (FLAG_enable_sudiv) {
+    supported_ |= 1u << SUDIV;
+  }
 #else  // __arm__
   // Probe for additional features not already known to be available.
   if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {
@@ -125,6 +129,10 @@ void CpuFeatures::Probe() {
     found_by_runtime_probing_ |= 1u << ARMv7;
   }
 
+  if (!IsSupported(SUDIV) && OS::ArmCpuHasFeature(SUDIV)) {
+    found_by_runtime_probing_ |= 1u << SUDIV;
+  }
+
   supported_ |= found_by_runtime_probing_;
 #endif
 
@@ -1207,6 +1215,22 @@ void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
 }
 
 
+void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
+                    Condition cond) {
+  ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc));
+  emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 |
+       src2.code()*B8 | B7 | B4 | src1.code());
+}
+
+
+void Assembler::sdiv(Register dst, Register src1, Register src2,
+                     Condition cond) {
+  ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
+  emit(cond | B26 | B25| B24 | B20 | dst.code()*B16 | 0xf * B12 |
+       src2.code()*B8 | B4 | src1.code());
+}
+
+
 void Assembler::mul(Register dst, Register src1, Register src2,
                     SBit s, Condition cond) {
   ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h
index c62d480..005c78d 100644
--- a/src/arm/assembler-arm.h
+++ b/src/arm/assembler-arm.h
@@ -511,6 +511,7 @@ class CpuFeatures : public AllStatic {
     ASSERT(initialized_);
     if (f == VFP3 && !FLAG_enable_vfp3) return false;
     if (f == VFP2 && !FLAG_enable_vfp2) return false;
+    if (f == SUDIV && !FLAG_enable_sudiv) return false;
     return (supported_ & (1u << f)) != 0;
   }
 
@@ -869,6 +870,12 @@ class Assembler : public AssemblerBase {
   void mla(Register dst, Register src1, Register src2, Register srcA,
            SBit s = LeaveCC, Condition cond = al);
 
+  void mls(Register dst, Register src1, Register src2, Register srcA,
+           Condition cond = al);
+
+  void sdiv(Register dst, Register src1, Register src2,
+            Condition cond = al);
+
   void mul(Register dst, Register src1, Register src2,
            SBit s = LeaveCC, Condition cond = al);
 
diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc
index 96a7d3c..7c5f63e 100644
--- a/src/arm/disasm-arm.cc
+++ b/src/arm/disasm-arm.cc
@@ -692,11 +692,19 @@ void Decoder::DecodeType01(Instruction* instr) {
             // Rn field to encode it.
             Format(instr, "mul'cond's 'rn, 'rm, 'rs");
           } else {
-            // The MLA instruction description (A 4.1.28) refers to the order
-            // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
-            // Rn field to encode the Rd register and the Rd field to encode
-            // the Rn register.
-            Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+            if (instr->Bit(22) == 0) {
+              // The MLA instruction description (A 4.1.28) refers to the order
+              // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
+              // Rn field to encode the Rd register and the Rd field to encode
+              // the Rn register.
+              Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+            } else {
+              // The MLS instruction description (A 4.1.29) refers to the order
+              // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
+              // Rn field to encode the Rd register and the Rd field to encode
+              // the Rn register.
+              Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
+            }
           }
         } else {
           // The signed/long multiply instructions use the terms RdHi and RdLo
@@ -974,6 +982,17 @@ void Decoder::DecodeType3(Instruction* instr) {
       break;
     }
     case db_x: {
+      if (FLAG_enable_sudiv) {
+        if (!instr->HasW()) {
+          if (instr->Bits(5, 4) == 0x1) {
+            if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
+              // SDIV (in V8 notation matching ARM ISA format) rn = rm/rs
+              Format(instr, "sdiv'cond'b 'rn, 'rm, 'rs");
+              break;
+            }
+          }
+        }
+      }
       Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
       break;
     }
diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc
index e071183..1216486 100644
--- a/src/arm/lithium-codegen-arm.cc
+++ b/src/arm/lithium-codegen-arm.cc
@@ -979,109 +979,132 @@ void LCodeGen::DoModI(LModI* instr) {
   Register left = ToRegister(instr->left());
   Register right = ToRegister(instr->right());
   Register result = ToRegister(instr->result());
+  Label done;
 
-  Register scratch = scratch0();
-  Register scratch2 = ToRegister(instr->temp());
-  DwVfpRegister dividend = ToDoubleRegister(instr->temp2());
-  DwVfpRegister divisor = ToDoubleRegister(instr->temp3());
-  DwVfpRegister quotient = double_scratch0();
-
-  ASSERT(!dividend.is(divisor));
-  ASSERT(!dividend.is(quotient));
-  ASSERT(!divisor.is(quotient));
-  ASSERT(!scratch.is(left));
-  ASSERT(!scratch.is(right));
-  ASSERT(!scratch.is(result));
+  if (CpuFeatures::IsSupported(SUDIV)) {
+    CpuFeatures::Scope scope(SUDIV);
+    // Check for x % 0.
+    if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
+      __ cmp(right, Operand(0));
+      DeoptimizeIf(eq, instr->environment());
+    }
 
-  Label done, vfp_modulo, both_positive, right_negative;
+    // For  r3 = r1 % r2; we can have the following ARM code
+    // sdiv r3, r1, r2
+    // mls r3, r3, r2, r1
 
-  // Check for x % 0.
-  if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
-    __ cmp(right, Operand(0));
-    DeoptimizeIf(eq, instr->environment());
-  }
+    __ sdiv(result, left, right);
+    __ mls(result, result, right, left);
+    __ cmp(result, Operand(0));
+    __ b(ne, &done);
 
-  __ Move(result, left);
+    if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
+       __ cmp(left, Operand(0));
+       DeoptimizeIf(lt, instr->environment());
+    }
+  } else {
+    Register scratch = scratch0();
+    Register scratch2 = ToRegister(instr->temp());
+    DwVfpRegister dividend = ToDoubleRegister(instr->temp2());
+    DwVfpRegister divisor = ToDoubleRegister(instr->temp3());
+    DwVfpRegister quotient = double_scratch0();
+
+    ASSERT(!dividend.is(divisor));
+    ASSERT(!dividend.is(quotient));
+    ASSERT(!divisor.is(quotient));
+    ASSERT(!scratch.is(left));
+    ASSERT(!scratch.is(right));
+    ASSERT(!scratch.is(result));
+
+    Label done, vfp_modulo, both_positive, right_negative;
+
+    // Check for x % 0.
+    if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
+      __ cmp(right, Operand(0));
+      DeoptimizeIf(eq, instr->environment());
+    }
 
-  // (0 % x) must yield 0 (if x is finite, which is the case here).
-  __ cmp(left, Operand(0));
-  __ b(eq, &done);
-  // Preload right in a vfp register.
-  __ vmov(divisor.low(), right);
-  __ b(lt, &vfp_modulo);
+    __ Move(result, left);
 
-  __ cmp(left, Operand(right));
-  __ b(lt, &done);
-
-  // Check for (positive) power of two on the right hand side.
-  __ JumpIfNotPowerOfTwoOrZeroAndNeg(right,
-                                     scratch,
-                                     &right_negative,
-                                     &both_positive);
-  // Perform modulo operation (scratch contains right - 1).
-  __ and_(result, scratch, Operand(left));
-  __ b(&done);
+    // (0 % x) must yield 0 (if x is finite, which is the case here).
+    __ cmp(left, Operand(0));
+    __ b(eq, &done);
+    // Preload right in a vfp register.
+    __ vmov(divisor.low(), right);
+    __ b(lt, &vfp_modulo);
 
-  __ bind(&right_negative);
-  // Negate right. The sign of the divisor does not matter.
-  __ rsb(right, right, Operand(0));
-
-  __ bind(&both_positive);
-  const int kUnfolds = 3;
-  // If the right hand side is smaller than the (nonnegative)
-  // left hand side, the left hand side is the result.
-  // Else try a few subtractions of the left hand side.
-  __ mov(scratch, left);
-  for (int i = 0; i < kUnfolds; i++) {
-    // Check if the left hand side is less or equal than the
-    // the right hand side.
-    __ cmp(scratch, Operand(right));
-    __ mov(result, scratch, LeaveCC, lt);
+    __ cmp(left, Operand(right));
     __ b(lt, &done);
-    // If not, reduce the left hand side by the right hand
-    // side and check again.
-    if (i < kUnfolds - 1) __ sub(scratch, scratch, right);
-  }
-
-  __ bind(&vfp_modulo);
-  // Load the arguments in VFP registers.
-  // The divisor value is preloaded before. Be careful that 'right' is only live
-  // on entry.
-  __ vmov(dividend.low(), left);
-  // From here on don't use right as it may have been reallocated (for example
-  // to scratch2).
-  right = no_reg;
-
-  __ vcvt_f64_s32(dividend, dividend.low());
-  __ vcvt_f64_s32(divisor, divisor.low());
-
-  // We do not care about the sign of the divisor.
-  __ vabs(divisor, divisor);
-  // Compute the quotient and round it to a 32bit integer.
-  __ vdiv(quotient, dividend, divisor);
-  __ vcvt_s32_f64(quotient.low(), quotient);
-  __ vcvt_f64_s32(quotient, quotient.low());
-
-  // Compute the remainder in result.
-  DwVfpRegister double_scratch = dividend;
-  __ vmul(double_scratch, divisor, quotient);
-  __ vcvt_s32_f64(double_scratch.low(), double_scratch);
-  __ vmov(scratch, double_scratch.low());
-
-  if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
-    __ sub(result, left, scratch);
-  } else {
-    Label ok;
-    // Check for -0.
-    __ sub(scratch2, left, scratch, SetCC);
-    __ b(ne, &ok);
-    __ cmp(left, Operand(0));
-    DeoptimizeIf(mi, instr->environment());
-    __ bind(&ok);
-    // Load the result and we are done.
-    __ mov(result, scratch2);
-  }
 
+    // Check for (positive) power of two on the right hand side.
+    __ JumpIfNotPowerOfTwoOrZeroAndNeg(right,
+                                       scratch,
+                                       &right_negative,
+                                       &both_positive);
+    // Perform modulo operation (scratch contains right - 1).
+    __ and_(result, scratch, Operand(left));
+    __ b(&done);
+
+    __ bind(&right_negative);
+    // Negate right. The sign of the divisor does not matter.
+    __ rsb(right, right, Operand(0));
+
+    __ bind(&both_positive);
+    const int kUnfolds = 3;
+    // If the right hand side is smaller than the (nonnegative)
+    // left hand side, the left hand side is the result.
+    // Else try a few subtractions of the left hand side.
+    __ mov(scratch, left);
+    for (int i = 0; i < kUnfolds; i++) {
+      // Check if the left hand side is less or equal than the
+      // the right hand side.
+      __ cmp(scratch, Operand(right));
+      __ mov(result, scratch, LeaveCC, lt);
+      __ b(lt, &done);
+      // If not, reduce the left hand side by the right hand
+      // side and check again.
+      if (i < kUnfolds - 1) __ sub(scratch, scratch, right);
+    }
+
+    __ bind(&vfp_modulo);
+    // Load the arguments in VFP registers.
+    // The divisor value is preloaded before. Be careful that 'right'
+    // is only live on entry.
+    __ vmov(dividend.low(), left);
+    // From here on don't use right as it may have been reallocated
+    // (for example to scratch2).
+    right = no_reg;
+
+    __ vcvt_f64_s32(dividend, dividend.low());
+    __ vcvt_f64_s32(divisor, divisor.low());
+
+    // We do not care about the sign of the divisor.
+    __ vabs(divisor, divisor);
+    // Compute the quotient and round it to a 32bit integer.
+    __ vdiv(quotient, dividend, divisor);
+    __ vcvt_s32_f64(quotient.low(), quotient);
+    __ vcvt_f64_s32(quotient, quotient.low());
+
+    // Compute the remainder in result.
+    DwVfpRegister double_scratch = dividend;
+    __ vmul(double_scratch, divisor, quotient);
+    __ vcvt_s32_f64(double_scratch.low(), double_scratch);
+    __ vmov(scratch, double_scratch.low());
+
+    if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
+      __ sub(result, left, scratch);
+    } else {
+      Label ok;
+      // Check for -0.
+      __ sub(scratch2, left, scratch, SetCC);
+      __ b(ne, &ok);
+      __ cmp(left, Operand(0));
+      DeoptimizeIf(mi, instr->environment());
+      __ bind(&ok);
+      // Load the result and we are done.
+      __ mov(result, scratch2);
+    }
+  }
   __ bind(&done);
 }
 
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
index a057de5..91df404 100644
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -1986,11 +1986,23 @@ void Simulator::DecodeType01(Instruction* instr) {
               SetNZFlags(alu_out);
             }
           } else {
-            // The MLA instruction description (A 4.1.28) refers to the order
-            // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
-            // Rn field to encode the Rd register and the Rd field to encode
-            // the Rn register.
-            Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+            int rd = instr->RdValue();
+            int32_t acc_value = get_register(rd);
+            if (instr->Bit(22) == 0) {
+              // The MLA instruction description (A 4.1.28) refers to the order
+              // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
+              // Rn field to encode the Rd register and the Rd field to encode
+              // the Rn register.
+              // Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
+              int32_t mul_out = rm_val * rs_val;
+              int32_t result = acc_value + mul_out;
+              set_register(rn, result);
+            } else {
+              // Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
+              int32_t mul_out = rm_val * rs_val;
+              int32_t result = acc_value - mul_out;
+              set_register(rn, result);
+            }
           }
         } else {
           // The signed/long multiply instructions use the terms RdHi and RdLo
@@ -2546,6 +2558,25 @@ void Simulator::DecodeType3(Instruction* instr) {
       break;
     }
     case db_x: {
+      if (FLAG_enable_sudiv) {
+        if (!instr->HasW()) {
+          if (instr->Bits(5, 4) == 0x1) {
+             if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
+               // sdiv (in V8 notation matching ARM ISA format) rn = rm/rs
+               // Format(instr, "'sdiv'cond'b 'rn, 'rm, 'rs);
+               int rm = instr->RmValue();
+               int32_t rm_val = get_register(rm);
+               int rs = instr->RsValue();
+               int32_t rs_val = get_register(rs);
+               int32_t ret_val = 0;
+               ASSERT(rs_val != 0);
+               ret_val = rm_val/rs_val;
+               set_register(rn, ret_val);
+               return;
+             }
+           }
+         }
+       }
       // Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
       addr = rn_val - shifter_operand;
       if (instr->HasW()) {
diff --git a/src/flag-definitions.h b/src/flag-definitions.h
index 685559a..d9fa126 100644
--- a/src/flag-definitions.h
+++ b/src/flag-definitions.h
@@ -284,6 +284,8 @@ DEFINE_bool(enable_vfp2, true,
             "enable use of VFP2 instructions if available")
 DEFINE_bool(enable_armv7, true,
             "enable use of ARMv7 instructions if available (ARM only)")
+DEFINE_bool(enable_sudiv, true,
+            "enable use of SDIV and UDIV instructions if available (ARM only)")
 DEFINE_bool(enable_fpu, true,
             "enable use of MIPS FPU instructions if available (MIPS only)")
 
diff --git a/src/platform-linux.cc b/src/platform-linux.cc
index 606d102..b9ce9d9 100644
--- a/src/platform-linux.cc
+++ b/src/platform-linux.cc
@@ -148,6 +148,9 @@ bool OS::ArmCpuHasFeature(CpuFeature feature) {
     case ARMv7:
       search_string = "ARMv7";
       break;
+    case SUDIV:
+      search_string = "idiva";
+      break;
     default:
       UNREACHABLE();
   }
diff --git a/src/v8globals.h b/src/v8globals.h
index 3d214f8..1d22a11 100644
--- a/src/v8globals.h
+++ b/src/v8globals.h
@@ -438,6 +438,7 @@ enum CpuFeature { SSE4_1 = 32 + 19,  // x86
                   VFP3 = 1,    // ARM
                   ARMv7 = 2,   // ARM
                   VFP2 = 3,    // ARM
+                  SUDIV = 4,   // ARM
                   SAHF = 0,    // x86
                   FPU = 1};    // MIPS
 
-- 
2.7.4