JSC should scale the optimization threshold for a code block according to the cost...
authorHojong Han <hojong.han@samsung.com>
Tue, 2 Apr 2013 02:33:17 +0000 (11:33 +0900)
committerGerrit Code Review <gerrit2@kim11>
Tue, 2 Apr 2013 04:22:37 +0000 (13:22 +0900)
[Title] JSC should scale the optimization threshold for a code block according to the cost of compiling it
[Issue#] N/A
[Problem] Speed up for SunSpider benchmark
[Cause] N/A
[Solution] This patch is based on commit 10f249a64e5541f54533c5960fde4392409ce982
           We just take some parts, suitable to tizen, from the original patch above.

Change-Id: I41283308911bec11feed39f2579b946aecbe92ef

Source/JavaScriptCore/bytecode/CodeBlock.cpp
Source/JavaScriptCore/bytecode/CodeBlock.h
Source/JavaScriptCore/bytecode/ExecutionCounter.cpp
Source/JavaScriptCore/bytecompiler/BytecodeGenerator.cpp
Source/JavaScriptCore/runtime/Options.h

index 846f812..1890bf6 100644 (file)
@@ -1775,9 +1775,6 @@ CodeBlock::CodeBlock(ScriptExecutable* ownerExecutable, CodeType codeType, JSGlo
 {
     ASSERT(m_source);
     
-    optimizeAfterWarmUp();
-    jitAfterWarmUp();
-
 #if DUMP_CODE_BLOCK_STATISTICS
     liveCodeBlockSet.add(this);
 #endif
@@ -2824,6 +2821,196 @@ bool FunctionCodeBlock::jitCompileImpl(ExecState* exec)
 }
 #endif
 
+unsigned CodeBlock::reoptimizationRetryCounter() const
+{
+    ASSERT(m_reoptimizationRetryCounter <= Options::reoptimizationRetryCounterMax());
+    return m_reoptimizationRetryCounter;
+}
+
+void CodeBlock::countReoptimization()
+{
+    m_reoptimizationRetryCounter++;
+    if (m_reoptimizationRetryCounter > Options::reoptimizationRetryCounterMax())
+        m_reoptimizationRetryCounter = Options::reoptimizationRetryCounterMax();
+}
+
+double CodeBlock::optimizationThresholdScalingFactor()
+{
+    // This expression arises from doing a least-squares fit of
+    //
+    // F[x_] =: a * Sqrt[x + b] + Abs[c * x] + d
+    //
+    // against the data points:
+    //
+    //    x       F[x_]
+    //    10       0.9          (smallest reasonable code block)
+    //   200       1.0          (typical small-ish code block)
+    //   320       1.2          (something I saw in 3d-cube that I wanted to optimize)
+    //  1268       5.0          (something I saw in 3d-cube that I didn't want to optimize)
+    //  4000       5.5          (random large size, used to cause the function to converge to a shallow curve of some sort)
+    // 10000       6.0          (similar to above)
+    //
+    // I achieve the minimization using the following Mathematica code:
+    //
+    // MyFunctionTemplate[x_, a_, b_, c_, d_] := a*Sqrt[x + b] + Abs[c*x] + d
+    //
+    // samples = {{10, 0.9}, {200, 1}, {320, 1.2}, {1268, 5}, {4000, 5.5}, {10000, 6}}
+    //
+    // solution = 
+    //     Minimize[Plus @@ ((MyFunctionTemplate[#[[1]], a, b, c, d] - #[[2]])^2 & /@ samples),
+    //         {a, b, c, d}][[2]]
+    //
+    // And the code below (to initialize a, b, c, d) is generated by:
+    //
+    // Print["const double " <> ToString[#[[1]]] <> " = " <>
+    //     If[#[[2]] < 0.00001, "0.0", ToString[#[[2]]]] <> ";"] & /@ solution
+    //
+    // We've long known the following to be true:
+    // - Small code blocks are cheap to optimize and so we should do it sooner rather
+    //   than later.
+    // - Large code blocks are expensive to optimize and so we should postpone doing so,
+    //   and sometimes have a large enough threshold that we never optimize them.
+    // - The difference in cost is not totally linear because (a) just invoking the
+    //   DFG incurs some base cost and (b) for large code blocks there is enough slop
+    //   in the correlation between instruction count and the actual compilation cost
+    //   that for those large blocks, the instruction count should not have a strong
+    //   influence on our threshold.
+    //
+    // I knew the goals but I didn't know how to achieve them; so I picked an interesting
+    // example where the heuristics were right (code block in 3d-cube with instruction
+    // count 320, which got compiled early as it should have been) and one where they were
+    // totally wrong (code block in 3d-cube with instruction count 1268, which was expensive
+    // to compile and didn't run often enough to warrant compilation in my opinion), and
+    // then threw in additional data points that represented my own guess of what our
+    // heuristics should do for some round-numbered examples.
+    //
+    // The expression to which I decided to fit the data arose because I started with an
+    // affine function, and then did two things: put the linear part in an Abs to ensure
+    // that the fit didn't end up choosing a negative value of c (which would result in
+    // the function turning over and going negative for large x) and I threw in a Sqrt
+    // term because Sqrt represents my intution that the function should be more sensitive
+    // to small changes in small values of x, but less sensitive when x gets large.
+    
+    // Note that the current fit essentially eliminates the linear portion of the
+    // expression (c == 0.0).
+    const double a = 0.061504;
+    const double b = 1.02406;
+    const double c = 0.0;
+    const double d = 0.825914;
+    
+    double instructionCount = this->instructionCount();
+    
+    ASSERT(instructionCount); // Make sure this is called only after we have an instruction stream; otherwise it'll just return the value of d, which makes no sense.
+    
+    double result = d + a * sqrt(instructionCount + b) + c * instructionCount;
+#if ENABLE(JIT_VERBOSE_OSR)
+    dataLog(*this, ": instruction count is ", instructionCount, ", scaling execution counter by ", result, "\n");
+#endif
+    return result;
+}
+
+static int32_t clipThreshold(double threshold)
+{
+    if (threshold < 1.0)
+        return 1;
+    
+    if (threshold > static_cast<double>(std::numeric_limits<int32_t>::max()))
+        return std::numeric_limits<int32_t>::max();
+    
+    return static_cast<int32_t>(threshold);
+}
+
+int32_t CodeBlock::counterValueForOptimizeAfterWarmUp()
+{
+    return clipThreshold(
+        Options::thresholdForOptimizeAfterWarmUp() *
+        optimizationThresholdScalingFactor() *
+        (1 << reoptimizationRetryCounter()));
+}
+
+int32_t CodeBlock::counterValueForOptimizeAfterLongWarmUp()
+{
+    return clipThreshold(
+        Options::thresholdForOptimizeAfterLongWarmUp() *
+        optimizationThresholdScalingFactor() *
+        (1 << reoptimizationRetryCounter()));
+}
+
+int32_t CodeBlock::counterValueForOptimizeSoon()
+{
+    return clipThreshold(
+        Options::thresholdForOptimizeSoon() *
+        optimizationThresholdScalingFactor() *
+        (1 << reoptimizationRetryCounter()));
+}
+
+bool CodeBlock::checkIfOptimizationThresholdReached()
+{
+    return m_jitExecuteCounter.checkIfThresholdCrossedAndSet(this);
+}
+
+void CodeBlock::optimizeNextInvocation()
+{
+    m_jitExecuteCounter.setNewThreshold(0, this);
+}
+
+void CodeBlock::dontOptimizeAnytimeSoon()
+{
+    m_jitExecuteCounter.deferIndefinitely();
+}
+
+void CodeBlock::optimizeAfterWarmUp()
+{
+    m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeAfterWarmUp(), this);
+}
+
+void CodeBlock::optimizeAfterLongWarmUp()
+{
+    m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeAfterLongWarmUp(), this);
+}
+
+void CodeBlock::optimizeSoon()
+{
+    m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeSoon(), this);
+}
+
+#if ENABLE(JIT)
+uint32_t CodeBlock::adjustedExitCountThreshold(uint32_t desiredThreshold)
+{
+    ASSERT(getJITType() == JITCode::DFGJIT);
+    // Compute this the lame way so we don't saturate. This is called infrequently
+    // enough that this loop won't hurt us.
+    unsigned result = desiredThreshold;
+    for (unsigned n = baselineVersion()->reoptimizationRetryCounter(); n--;) {
+        unsigned newResult = result << 1;
+        if (newResult < result)
+            return std::numeric_limits<uint32_t>::max();
+        result = newResult;
+    }
+    return result;
+}
+
+uint32_t CodeBlock::exitCountThresholdForReoptimization()
+{
+    return adjustedExitCountThreshold(Options::osrExitCountForReoptimization());
+}
+
+uint32_t CodeBlock::exitCountThresholdForReoptimizationFromLoop()
+{
+    return adjustedExitCountThreshold(Options::osrExitCountForReoptimizationFromLoop());
+}
+
+bool CodeBlock::shouldReoptimizeNow()
+{
+    return osrExitCounter() >= exitCountThresholdForReoptimization();
+}
+
+bool CodeBlock::shouldReoptimizeFromLoopNow()
+{
+    return osrExitCounter() >= exitCountThresholdForReoptimizationFromLoop();
+}
+#endif
+
 #if ENABLE(VALUE_PROFILER)
 void CodeBlock::updateAllPredictionsAndCountLiveness(
     OperationInProgress operation, unsigned& numberOfLiveNonArgumentValueProfiles, unsigned& numberOfSamplesInProfiles)
index 08dc49c..541d2c3 100644 (file)
@@ -1009,28 +1009,12 @@ namespace JSC {
         // When we observe a lot of speculation failures, we trigger a
         // reoptimization. But each time, we increase the optimization trigger
         // to avoid thrashing.
-        unsigned reoptimizationRetryCounter() const
-        {
-            ASSERT(m_reoptimizationRetryCounter <= Options::reoptimizationRetryCounterMax());
-            return m_reoptimizationRetryCounter;
-        }
-        
-        void countReoptimization()
-        {
-            m_reoptimizationRetryCounter++;
-            if (m_reoptimizationRetryCounter > Options::reoptimizationRetryCounterMax())
-                m_reoptimizationRetryCounter = Options::reoptimizationRetryCounterMax();
-        }
+        unsigned reoptimizationRetryCounter() const;
+        void countReoptimization();
         
-        int32_t counterValueForOptimizeAfterWarmUp()
-        {
-            return Options::thresholdForOptimizeAfterWarmUp() << reoptimizationRetryCounter();
-        }
-        
-        int32_t counterValueForOptimizeAfterLongWarmUp()
-        {
-            return Options::thresholdForOptimizeAfterLongWarmUp() << reoptimizationRetryCounter();
-        }
+        int32_t counterValueForOptimizeAfterWarmUp();
+        int32_t counterValueForOptimizeAfterLongWarmUp();
+        int32_t counterValueForOptimizeSoon();
         
         int32_t* addressOfJITExecuteCounter()
         {
@@ -1048,28 +1032,19 @@ namespace JSC {
         // Check if the optimization threshold has been reached, and if not,
         // adjust the heuristics accordingly. Returns true if the threshold has
         // been reached.
-        bool checkIfOptimizationThresholdReached()
-        {
-            return m_jitExecuteCounter.checkIfThresholdCrossedAndSet(this);
-        }
+        bool checkIfOptimizationThresholdReached();
         
         // Call this to force the next optimization trigger to fire. This is
         // rarely wise, since optimization triggers are typically more
         // expensive than executing baseline code.
-        void optimizeNextInvocation()
-        {
-            m_jitExecuteCounter.setNewThreshold(0, this);
-        }
+        void optimizeNextInvocation();
         
         // Call this to prevent optimization from happening again. Note that
         // optimization will still happen after roughly 2^29 invocations,
         // so this is really meant to delay that as much as possible. This
         // is called if optimization failed, and we expect it to fail in
         // the future as well.
-        void dontOptimizeAnytimeSoon()
-        {
-            m_jitExecuteCounter.deferIndefinitely();
-        }
+        void dontOptimizeAnytimeSoon();
         
         // Call this to reinitialize the counter to its starting state,
         // forcing a warm-up to happen before the next optimization trigger
@@ -1077,17 +1052,11 @@ namespace JSC {
         // makes sense to call this if an OSR exit occurred. Note that
         // OSR exit code is code generated, so the value of the execute
         // counter that this corresponds to is also available directly.
-        void optimizeAfterWarmUp()
-        {
-            m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeAfterWarmUp(), this);
-        }
+        void optimizeAfterWarmUp();
         
         // Call this to force an optimization trigger to fire only after
         // a lot of warm-up.
-        void optimizeAfterLongWarmUp()
-        {
-            m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeAfterLongWarmUp(), this);
-        }
+        void optimizeAfterLongWarmUp();
         
         // Call this to cause an optimization trigger to fire soon, but
         // not necessarily the next one. This makes sense if optimization
@@ -1107,10 +1076,7 @@ namespace JSC {
         // each return, as that would be superfluous. It only makes sense
         // to trigger optimization if one of those functions becomes hot
         // in the baseline code.
-        void optimizeSoon()
-        {
-            m_jitExecuteCounter.setNewThreshold(Options::thresholdForOptimizeSoon() << reoptimizationRetryCounter(), this);
-        }
+        void optimizeSoon();
         
         uint32_t osrExitCounter() const { return m_osrExitCounter; }
         
@@ -1121,40 +1087,11 @@ namespace JSC {
         static ptrdiff_t offsetOfOSRExitCounter() { return OBJECT_OFFSETOF(CodeBlock, m_osrExitCounter); }
 
 #if ENABLE(JIT)
-        uint32_t adjustedExitCountThreshold(uint32_t desiredThreshold)
-        {
-            ASSERT(getJITType() == JITCode::DFGJIT);
-            // Compute this the lame way so we don't saturate. This is called infrequently
-            // enough that this loop won't hurt us.
-            unsigned result = desiredThreshold;
-            for (unsigned n = baselineVersion()->reoptimizationRetryCounter(); n--;) {
-                unsigned newResult = result << 1;
-                if (newResult < result)
-                    return std::numeric_limits<uint32_t>::max();
-                result = newResult;
-            }
-            return result;
-        }
-        
-        uint32_t exitCountThresholdForReoptimization()
-        {
-            return adjustedExitCountThreshold(Options::osrExitCountForReoptimization());
-        }
-        
-        uint32_t exitCountThresholdForReoptimizationFromLoop()
-        {
-            return adjustedExitCountThreshold(Options::osrExitCountForReoptimizationFromLoop());
-        }
-
-        bool shouldReoptimizeNow()
-        {
-            return osrExitCounter() >= exitCountThresholdForReoptimization();
-        }
-        
-        bool shouldReoptimizeFromLoopNow()
-        {
-            return osrExitCounter() >= exitCountThresholdForReoptimizationFromLoop();
-        }
+        uint32_t adjustedExitCountThreshold(uint32_t desiredThreshold);
+        uint32_t exitCountThresholdForReoptimization();
+        uint32_t exitCountThresholdForReoptimizationFromLoop();
+        bool shouldReoptimizeNow();
+        bool shouldReoptimizeFromLoopNow();
 #endif
 
 #if ENABLE(VALUE_PROFILER)
@@ -1190,6 +1127,8 @@ namespace JSC {
     private:
         friend class DFGCodeBlocks;
         
+        double optimizationThresholdScalingFactor();
+
 #if ENABLE(DFG_JIT)
         void tallyFrequentExitSites();
 #else
index 12a4049..01d5104 100644 (file)
@@ -113,7 +113,8 @@ bool ExecutionCounter::hasCrossedThreshold(CodeBlock* codeBlock) const
     double modifiedThreshold = applyMemoryUsageHeuristics(m_activeThreshold, codeBlock);
     
     return static_cast<double>(m_totalCount) + m_counter >=
-         modifiedThreshold - static_cast<double>(m_activeThreshold) / 2;
+        modifiedThreshold - static_cast<double>(
+            std::min(m_activeThreshold, Options::maximumExecutionCountsBetweenCheckpoints())) / 2;
 }
 
 bool ExecutionCounter::setThreshold(CodeBlock* codeBlock)
index 82c10f0..ecd5623 100644 (file)
@@ -208,6 +208,9 @@ JSObject* BytecodeGenerator::generate()
     
     m_codeBlock->instructions() = RefCountedArray<Instruction>(m_instructions);
 
+    m_codeBlock->optimizeAfterWarmUp();
+    m_codeBlock->jitAfterWarmUp();
+
     if (s_dumpsGeneratedCode)
         m_codeBlock->dump(m_scopeChain->globalObject->globalExec());
 
index ab3f34b..af15eb0 100644 (file)
@@ -82,7 +82,7 @@ namespace JSC {
     v(int32, thresholdForJITSoon, 100) \
     \
     v(int32, thresholdForOptimizeAfterWarmUp, 1000) \
-    v(int32, thresholdForOptimizeAfterLongWarmUp, 5000) \
+    v(int32, thresholdForOptimizeAfterLongWarmUp, 1000) \
     v(int32, thresholdForOptimizeSoon, 1000) \
     \
     v(int32, executionCounterIncrementForLoop, 1) \