MIPS: Consistenly handle power-of-2 divisors in division-like operations.
authorplind44@gmail.com <plind44@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 7 Mar 2014 22:11:52 +0000 (22:11 +0000)
committerplind44@gmail.com <plind44@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 7 Mar 2014 22:11:52 +0000 (22:11 +0000)
Port r19715 (0d6d244)

Original commit message:
Lithium currently supports 3 division-like operations on integral
operands: "Normal" division (rounding towards zero), flooring division
(rounding towards -Infinity) and modulus calculation (the counterpart
for the "normal" division). For divisors which are a power of 2, one can
efficiently use some bit fiddling to avoid the actual division for such
operations. This CL cleanly splits off these operations into separate
Lithium instructions, making the code much more maintainable and more
consistent across platforms.

There are 2 basic variations of these bit fiddling algorithms: One
involving branches and a seemingly more clever one without branches.
Choosing between the two is not as easy as it seems: Benchmarks (and
probably real-world) programs seem to favor positive dividends,
registers and shifting units are sometimes scarce resources, and branch
prediction is quite good in modern processors. Therefore only the
"normal" division by a power of 2 is implemented in a branch-free
manner, this seems to be the best approach in practice. If this turns
out to be wrong, we can easily and locally change this.

BUG=
R=plind44@gmail.com

Review URL: https://codereview.chromium.org/189433008

Patch from Balazs Kilvady <kilvadyb@homejinni.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19733 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/mips/lithium-codegen-mips.cc
src/mips/lithium-mips.cc
src/mips/lithium-mips.h

index d2e3c06..c7e2a43 100644 (file)
@@ -1060,68 +1060,75 @@ void LCodeGen::DoUnknownOSRValue(LUnknownOSRValue* instr) {
 }
 
 
-void LCodeGen::DoModI(LModI* instr) {
+void LCodeGen::DoModByPowerOf2I(LModByPowerOf2I* instr) {
+  Register dividend = ToRegister(instr->dividend());
+  int32_t divisor = instr->divisor();
+  ASSERT(dividend.is(ToRegister(instr->result())));
+
+  // Theoretically, a variation of the branch-free code for integer division by
+  // a power of 2 (calculating the remainder via an additional multiplication
+  // (which gets simplified to an 'and') and subtraction) should be faster, and
+  // this is exactly what GCC and clang emit. Nevertheless, benchmarks seem to
+  // indicate that positive dividends are heavily favored, so the branching
+  // version performs better.
   HMod* hmod = instr->hydrogen();
-  HValue* left = hmod->left();
-  HValue* right = hmod->right();
-  if (hmod->RightIsPowerOf2()) {
-    const Register left_reg = ToRegister(instr->left());
-    const Register result_reg = ToRegister(instr->result());
+  int32_t mask = divisor < 0 ? -(divisor + 1) : (divisor - 1);
+  Label dividend_is_not_negative, done;
 
+  if (hmod->left()->CanBeNegative()) {
+    __ Branch(&dividend_is_not_negative, ge, dividend, Operand(zero_reg));
     // Note: The code below even works when right contains kMinInt.
-    int32_t divisor = Abs(right->GetInteger32Constant());
-
-    Label left_is_not_negative, done;
-    if (left->CanBeNegative()) {
-      __ Branch(left_reg.is(result_reg) ? PROTECT : USE_DELAY_SLOT,
-                &left_is_not_negative, ge, left_reg, Operand(zero_reg));
-      __ subu(result_reg, zero_reg, left_reg);
-      __ And(result_reg, result_reg, divisor - 1);
-      if (hmod->CheckFlag(HValue::kBailoutOnMinusZero)) {
-        DeoptimizeIf(eq, instr->environment(), result_reg, Operand(zero_reg));
-      }
-      __ Branch(USE_DELAY_SLOT, &done);
-      __ subu(result_reg, zero_reg, result_reg);
+    __ subu(dividend, zero_reg, dividend);
+    __ And(dividend, dividend, Operand(mask));
+    if (hmod->CheckFlag(HValue::kBailoutOnMinusZero)) {
+      DeoptimizeIf(eq, instr->environment(), dividend, Operand(zero_reg));
     }
+    __ Branch(USE_DELAY_SLOT, &done);
+    __ subu(dividend, zero_reg, dividend);
+  }
 
-    __ bind(&left_is_not_negative);
-    __ And(result_reg, left_reg, divisor - 1);
-    __ bind(&done);
-  } else {
-    const Register scratch = scratch0();
-    const Register left_reg = ToRegister(instr->left());
-    const Register result_reg = ToRegister(instr->result());
+  __ bind(&dividend_is_not_negative);
+  __ And(dividend, dividend, Operand(mask));
+  __ bind(&done);
+}
 
-    // div runs in the background while we check for special cases.
-    Register right_reg = EmitLoadRegister(instr->right(), scratch);
-    __ div(left_reg, right_reg);
 
-    Label done;
-    // Check for x % 0, we have to deopt in this case because we can't return a
-    // NaN.
-    if (right->CanBeZero()) {
-      DeoptimizeIf(eq, instr->environment(), right_reg, Operand(zero_reg));
-    }
+void LCodeGen::DoModI(LModI* instr) {
+  HMod* hmod = instr->hydrogen();
+  HValue* left = hmod->left();
+  HValue* right = hmod->right();
+  const Register left_reg = ToRegister(instr->left());
+  const Register right_reg = ToRegister(instr->right());
+  const Register result_reg = ToRegister(instr->result());
 
-    // Check for kMinInt % -1, we have to deopt if we care about -0, because we
-    // can't return that.
-    if (left->RangeCanInclude(kMinInt) && right->RangeCanInclude(-1)) {
-      Label left_not_min_int;
-      __ Branch(&left_not_min_int, ne, left_reg, Operand(kMinInt));
-      // TODO(svenpanne) Don't deopt when we don't care about -0.
-      DeoptimizeIf(eq, instr->environment(), right_reg, Operand(-1));
-      __ bind(&left_not_min_int);
-    }
+  // div runs in the background while we check for special cases.
+  __ div(left_reg, right_reg);
 
-    // TODO(svenpanne) Only emit the test/deopt if we have to.
-    __ Branch(USE_DELAY_SLOT, &done, ge, left_reg, Operand(zero_reg));
-    __ mfhi(result_reg);
+  Label done;
+  // Check for x % 0, we have to deopt in this case because we can't return a
+  // NaN.
+  if (right->CanBeZero()) {
+    DeoptimizeIf(eq, instr->environment(), right_reg, Operand(zero_reg));
+  }
 
-    if (hmod->CheckFlag(HValue::kBailoutOnMinusZero)) {
-      DeoptimizeIf(eq, instr->environment(), result_reg, Operand(zero_reg));
-    }
-    __ bind(&done);
+  // Check for kMinInt % -1, we have to deopt if we care about -0, because we
+  // can't return that.
+  if (left->RangeCanInclude(kMinInt) && right->RangeCanInclude(-1)) {
+    Label left_not_min_int;
+    __ Branch(&left_not_min_int, ne, left_reg, Operand(kMinInt));
+    // TODO(svenpanne) Don't deopt when we don't care about -0.
+    DeoptimizeIf(eq, instr->environment(), right_reg, Operand(-1));
+    __ bind(&left_not_min_int);
+  }
+
+  // TODO(svenpanne) Only emit the test/deopt if we have to.
+  __ Branch(USE_DELAY_SLOT, &done, ge, left_reg, Operand(zero_reg));
+  __ mfhi(result_reg);
+
+  if (hmod->CheckFlag(HValue::kBailoutOnMinusZero)) {
+    DeoptimizeIf(eq, instr->environment(), result_reg, Operand(zero_reg));
   }
+  __ bind(&done);
 }
 
 
@@ -1277,67 +1284,74 @@ void LCodeGen::DoMultiplyAddD(LMultiplyAddD* instr) {
 }
 
 
+void LCodeGen::DoFlooringDivByConstI(LFlooringDivByConstI* instr) {
+  Register left = ToRegister(instr->dividend());
+  Register remainder = ToRegister(instr->temp());
+  Register scratch = scratch0();
+  Register result = ToRegister(instr->result());
+
+  ASSERT(instr->divisor()->IsConstantOperand());
+  Label done;
+  int32_t divisor = ToInteger32(LConstantOperand::cast(instr->divisor()));
+  if (divisor < 0) {
+    DeoptimizeIf(eq, instr->environment(), left, Operand(zero_reg));
+  }
+  EmitSignedIntegerDivisionByConstant(result,
+                                      left,
+                                      divisor,
+                                      remainder,
+                                      scratch,
+                                      instr->environment());
+  // We performed a truncating division. Correct the result if necessary.
+  __ Branch(&done, eq, remainder, Operand(zero_reg), USE_DELAY_SLOT);
+  __ Xor(scratch , remainder, Operand(divisor));
+  __ Branch(&done, ge, scratch, Operand(zero_reg));
+  __ Subu(result, result, Operand(1));
+  __ bind(&done);
+}
+
+
 void LCodeGen::DoMathFloorOfDiv(LMathFloorOfDiv* instr) {
   const Register result = ToRegister(instr->result());
   const Register left = ToRegister(instr->left());
   const Register remainder = ToRegister(instr->temp());
   const Register scratch = scratch0();
 
-  if (instr->right()->IsConstantOperand()) {
-    Label done;
-    int32_t divisor = ToInteger32(LConstantOperand::cast(instr->right()));
-    if (divisor < 0) {
-      DeoptimizeIf(eq, instr->environment(), left, Operand(zero_reg));
-    }
-    EmitSignedIntegerDivisionByConstant(result,
-                                        left,
-                                        divisor,
-                                        remainder,
-                                        scratch,
-                                        instr->environment());
-    // We performed a truncating division. Correct the result if necessary.
-    __ Branch(&done, eq, remainder, Operand(zero_reg), USE_DELAY_SLOT);
-    __ Xor(scratch , remainder, Operand(divisor));
-    __ Branch(&done, ge, scratch, Operand(zero_reg));
-    __ Subu(result, result, Operand(1));
-    __ bind(&done);
-  } else {
-    Label done;
-    const Register right = ToRegister(instr->right());
+  Label done;
+  const Register right = ToRegister(instr->right());
 
-    // On MIPS div is asynchronous - it will run in the background while we
-    // check for special cases.
-    __ div(left, right);
+  // On MIPS div is asynchronous - it will run in the background while we
+  // check for special cases.
+  __ div(left, right);
 
-    // Check for x / 0.
-    DeoptimizeIf(eq, instr->environment(), right, Operand(zero_reg));
+  // Check for x / 0.
+  DeoptimizeIf(eq, instr->environment(), right, Operand(zero_reg));
 
-    // Check for (0 / -x) that will produce negative zero.
-    if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
-      Label left_not_zero;
-      __ Branch(&left_not_zero, ne, left, Operand(zero_reg));
-      DeoptimizeIf(lt, instr->environment(), right, Operand(zero_reg));
-      __ bind(&left_not_zero);
-    }
+  // Check for (0 / -x) that will produce negative zero.
+  if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
+    Label left_not_zero;
+    __ Branch(&left_not_zero, ne, left, Operand(zero_reg));
+    DeoptimizeIf(lt, instr->environment(), right, Operand(zero_reg));
+    __ bind(&left_not_zero);
+  }
 
-    // Check for (kMinInt / -1).
-    if (instr->hydrogen()->CheckFlag(HValue::kCanOverflow)) {
-      Label left_not_min_int;
-      __ Branch(&left_not_min_int, ne, left, Operand(kMinInt));
-      DeoptimizeIf(eq, instr->environment(), right, Operand(-1));
-      __ bind(&left_not_min_int);
-    }
+  // Check for (kMinInt / -1).
+  if (instr->hydrogen()->CheckFlag(HValue::kCanOverflow)) {
+    Label left_not_min_int;
+    __ Branch(&left_not_min_int, ne, left, Operand(kMinInt));
+    DeoptimizeIf(eq, instr->environment(), right, Operand(-1));
+    __ bind(&left_not_min_int);
+  }
 
-    __ mfhi(remainder);
-    __ mflo(result);
+  __ mfhi(remainder);
+  __ mflo(result);
 
-    // We performed a truncating division. Correct the result if necessary.
-    __ Branch(&done, eq, remainder, Operand(zero_reg), USE_DELAY_SLOT);
-    __ Xor(scratch , remainder, Operand(right));
-    __ Branch(&done, ge, scratch, Operand(zero_reg));
-    __ Subu(result, result, Operand(1));
-    __ bind(&done);
-  }
+  // We performed a truncating division. Correct the result if necessary.
+  __ Branch(&done, eq, remainder, Operand(zero_reg), USE_DELAY_SLOT);
+  __ Xor(scratch , remainder, Operand(right));
+  __ Branch(&done, ge, scratch, Operand(zero_reg));
+  __ Subu(result, result, Operand(1));
+  __ bind(&done);
 }
 
 
index 3146408..d4f4e22 100644 (file)
@@ -1244,14 +1244,20 @@ LInstruction* LChunkBuilder::DoBitwise(HBitwise* instr) {
 }
 
 
+LInstruction* LChunkBuilder::DoDivI(HBinaryOperation* instr) {
+  ASSERT(instr->representation().IsSmiOrInteger32());
+  ASSERT(instr->left()->representation().Equals(instr->representation()));
+  ASSERT(instr->right()->representation().Equals(instr->representation()));
+  LOperand* dividend = UseRegister(instr->left());
+  LOperand* divisor = UseRegister(instr->right());
+  LDivI* div = new(zone()) LDivI(dividend, divisor);
+  return AssignEnvironment(DefineAsRegister(div));
+}
+
+
 LInstruction* LChunkBuilder::DoDiv(HDiv* instr) {
   if (instr->representation().IsSmiOrInteger32()) {
-    ASSERT(instr->left()->representation().Equals(instr->representation()));
-    ASSERT(instr->right()->representation().Equals(instr->representation()));
-    LOperand* dividend = UseRegister(instr->left());
-    LOperand* divisor = UseRegister(instr->right());
-    LDivI* div = new(zone()) LDivI(dividend, divisor);
-    return AssignEnvironment(DefineAsRegister(div));
+    return DoDivI(instr);
   } else if (instr->representation().IsDouble()) {
     return DoArithmeticD(Token::DIV, instr);
   } else {
@@ -1262,10 +1268,8 @@ LInstruction* LChunkBuilder::DoDiv(HDiv* instr) {
 
 bool LChunkBuilder::HasMagicNumberForDivisor(int32_t divisor) {
   uint32_t divisor_abs = abs(divisor);
-  // Dividing by 0, 1, and powers of 2 is easy.
-  // Note that IsPowerOf2(0) returns true;
-  ASSERT(IsPowerOf2(0) == true);
-  if (IsPowerOf2(divisor_abs)) return true;
+  // Dividing by 0 or powers of 2 is easy.
+  if (divisor == 0 || IsPowerOf2(divisor_abs)) return true;
 
   // We have magic numbers for a few specific divisors.
   // Details and proofs can be found in:
@@ -1281,51 +1285,73 @@ bool LChunkBuilder::HasMagicNumberForDivisor(int32_t divisor) {
     CompilerIntrinsics::CountTrailingZeros(divisor_abs);
   DivMagicNumbers magic_numbers =
     DivMagicNumberFor(divisor_abs >> power_of_2_factor);
-  if (magic_numbers.M != InvalidDivMagicNumber.M) return true;
+  return magic_numbers.M != InvalidDivMagicNumber.M;
+}
+
 
-  return false;
+LInstruction* LChunkBuilder::DoFlooringDivByConstI(HMathFloorOfDiv* instr) {
+  LOperand* dividend = UseRegister(instr->left());
+  LOperand* divisor = UseOrConstant(instr->right());
+  LOperand* remainder = TempRegister();
+  LInstruction* result =
+      DefineAsRegister(
+          new(zone()) LFlooringDivByConstI(dividend, divisor, remainder));
+  return AssignEnvironment(result);
 }
 
 
 LInstruction* LChunkBuilder::DoMathFloorOfDiv(HMathFloorOfDiv* instr) {
+  if (instr->right()->IsConstant()) {
+    return DoFlooringDivByConstI(instr);
+  } else {
     HValue* right = instr->right();
     LOperand* dividend = UseRegister(instr->left());
     LOperand* divisor = UseRegisterOrConstant(right);
     LOperand* remainder = TempRegister();
     return AssignEnvironment(DefineAsRegister(
-          new(zone()) LMathFloorOfDiv(dividend, divisor, remainder)));
+        new(zone()) LMathFloorOfDiv(dividend, divisor, remainder)));
+  }
+}
+
+
+LInstruction* LChunkBuilder::DoModByPowerOf2I(HMod* instr) {
+  ASSERT(instr->representation().IsSmiOrInteger32());
+  ASSERT(instr->left()->representation().Equals(instr->representation()));
+  ASSERT(instr->right()->representation().Equals(instr->representation()));
+  LOperand* dividend = UseRegisterAtStart(instr->left());
+  int32_t divisor = instr->right()->GetInteger32Constant();
+  LInstruction* result =
+      DefineSameAsFirst(new(zone()) LModByPowerOf2I(dividend, divisor));
+  bool can_deopt =
+      instr->CheckFlag(HValue::kBailoutOnMinusZero) &&
+      instr->left()->CanBeNegative();
+  return can_deopt ? AssignEnvironment(result) : result;
+}
+
+
+LInstruction* LChunkBuilder::DoModI(HMod* instr) {
+  ASSERT(instr->representation().IsSmiOrInteger32());
+  ASSERT(instr->left()->representation().Equals(instr->representation()));
+  ASSERT(instr->right()->representation().Equals(instr->representation()));
+  LOperand* dividend = UseRegister(instr->left());
+  LOperand* divisor = UseRegister(instr->right());
+  LModI* mod = new(zone()) LModI(dividend,
+                                 divisor);
+  LInstruction* result = DefineAsRegister(mod);
+  bool can_deopt = (instr->right()->CanBeZero() ||
+                    (instr->left()->RangeCanInclude(kMinInt) &&
+                     instr->right()->RangeCanInclude(-1) &&
+                     instr->CheckFlag(HValue::kBailoutOnMinusZero)) ||
+                    (instr->left()->CanBeNegative() &&
+                     instr->CanBeZero() &&
+                     instr->CheckFlag(HValue::kBailoutOnMinusZero)));
+  return can_deopt ? AssignEnvironment(result) : result;
 }
 
 
 LInstruction* LChunkBuilder::DoMod(HMod* instr) {
-  HValue* left = instr->left();
-  HValue* right = instr->right();
   if (instr->representation().IsSmiOrInteger32()) {
-    ASSERT(instr->left()->representation().Equals(instr->representation()));
-    ASSERT(instr->right()->representation().Equals(instr->representation()));
-    if (instr->RightIsPowerOf2()) {
-      ASSERT(!right->CanBeZero());
-      LModI* mod = new(zone()) LModI(UseRegisterAtStart(left),
-                                     UseConstant(right));
-      LInstruction* result = DefineAsRegister(mod);
-      return (left->CanBeNegative() &&
-              instr->CheckFlag(HValue::kBailoutOnMinusZero))
-          ? AssignEnvironment(result)
-          : result;
-    } else {
-      LModI* mod = new(zone()) LModI(UseRegister(left),
-                                     UseRegister(right),
-                                     TempRegister(),
-                                     FixedTemp(f20),
-                                     FixedTemp(f22));
-      LInstruction* result = DefineAsRegister(mod);
-      return (right->CanBeZero() ||
-              (left->RangeCanInclude(kMinInt) &&
-               right->RangeCanInclude(-1)) ||
-              instr->CheckFlag(HValue::kBailoutOnMinusZero))
-          ? AssignEnvironment(result)
-          : result;
-    }
+    return instr->RightIsPowerOf2() ? DoModByPowerOf2I(instr) : DoModI(instr);
   } else if (instr->representation().IsDouble()) {
     return DoArithmeticD(Token::MOD, instr);
   } else {
index ffabe1d..43ce740 100644 (file)
@@ -93,6 +93,7 @@ class LCodeGen;
   V(Drop)                                       \
   V(Dummy)                                      \
   V(DummyUse)                                   \
+  V(FlooringDivByConstI)                        \
   V(ForInCacheArray)                            \
   V(ForInPrepareMap)                            \
   V(FunctionLiteral)                            \
@@ -135,6 +136,7 @@ class LCodeGen;
   V(MathPowHalf)                                \
   V(MathRound)                                  \
   V(MathSqrt)                                   \
+  V(ModByPowerOf2I)                             \
   V(ModI)                                       \
   V(MulI)                                       \
   V(MultiplyAddD)                               \
@@ -616,36 +618,34 @@ class LArgumentsElements V8_FINAL : public LTemplateInstruction<1, 0, 0> {
 };
 
 
+class LModByPowerOf2I V8_FINAL : public LTemplateInstruction<1, 1, 0> {
+ public:
+  LModByPowerOf2I(LOperand* dividend, int32_t divisor) {
+    inputs_[0] = dividend;
+    divisor_ = divisor;
+  }
+
+  LOperand* dividend() { return inputs_[0]; }
+  int32_t divisor() const { return divisor_; }
+
+  DECLARE_CONCRETE_INSTRUCTION(ModByPowerOf2I, "mod-by-power-of-2-i")
+  DECLARE_HYDROGEN_ACCESSOR(Mod)
+
+ private:
+  int32_t divisor_;
+};
+
+
 class LModI V8_FINAL : public LTemplateInstruction<1, 2, 3> {
  public:
-  // Used when the right hand is a constant power of 2.
   LModI(LOperand* left,
         LOperand* right) {
     inputs_[0] = left;
     inputs_[1] = right;
-    temps_[0] = NULL;
-    temps_[1] = NULL;
-    temps_[2] = NULL;
-  }
-
-  // Used for the standard case.
-  LModI(LOperand* left,
-        LOperand* right,
-        LOperand* temp,
-        LOperand* temp2,
-        LOperand* temp3) {
-    inputs_[0] = left;
-    inputs_[1] = right;
-    temps_[0] = temp;
-    temps_[1] = temp2;
-    temps_[2] = temp3;
   }
 
   LOperand* left() { return inputs_[0]; }
   LOperand* right() { return inputs_[1]; }
-  LOperand* temp() { return temps_[0]; }
-  LOperand* temp2() { return temps_[1]; }
-  LOperand* temp3() { return temps_[2]; }
 
   DECLARE_CONCRETE_INSTRUCTION(ModI, "mod-i")
   DECLARE_HYDROGEN_ACCESSOR(Mod)
@@ -663,7 +663,24 @@ class LDivI V8_FINAL : public LTemplateInstruction<1, 2, 0> {
   LOperand* right() { return inputs_[1]; }
 
   DECLARE_CONCRETE_INSTRUCTION(DivI, "div-i")
-  DECLARE_HYDROGEN_ACCESSOR(Div)
+  DECLARE_HYDROGEN_ACCESSOR(BinaryOperation)
+};
+
+
+class LFlooringDivByConstI V8_FINAL : public LTemplateInstruction<1, 2, 1> {
+ public:
+  LFlooringDivByConstI(LOperand* dividend, LOperand* divisor, LOperand* temp) {
+    inputs_[0] = dividend;
+    inputs_[1] = divisor;
+    temps_[0] = temp;
+  }
+
+  LOperand* dividend() { return inputs_[0]; }
+  LOperand* divisor() { return inputs_[1]; }
+  LOperand* temp() { return temps_[0]; }
+
+  DECLARE_CONCRETE_INSTRUCTION(FlooringDivByConstI, "flooring-div-by-const-i")
+  DECLARE_HYDROGEN_ACCESSOR(MathFloorOfDiv)
 };
 
 
@@ -2629,6 +2646,10 @@ class LChunkBuilder V8_FINAL : public LChunkBuilderBase {
   LInstruction* DoMathSqrt(HUnaryMathOperation* instr);
   LInstruction* DoMathPowHalf(HUnaryMathOperation* instr);
   LInstruction* DoMathClz32(HUnaryMathOperation* instr);
+  LInstruction* DoDivI(HBinaryOperation* instr);
+  LInstruction* DoModByPowerOf2I(HMod* instr);
+  LInstruction* DoModI(HMod* instr);
+  LInstruction* DoFlooringDivByConstI(HMathFloorOfDiv* instr);
 
  private:
   enum Status {