From 90e4d594ba080dd830da51f2e7760229333deab4 Mon Sep 17 00:00:00 2001 From: Hojong Han Date: Tue, 2 Apr 2013 11:33:17 +0900 Subject: [PATCH] JSC should scale the optimization threshold for a code block according to the cost of compiling it [Title] JSC should scale the optimization threshold for a code block according to the cost of compiling it [Issue#] N/A [Problem] Speed up for SunSpider benchmark [Cause] N/A [Solution] This patch is based on commit 10f249a64e5541f54533c5960fde4392409ce982 We just take some parts, suitable to tizen, from the original patch above. Change-Id: I41283308911bec11feed39f2579b946aecbe92ef --- Source/JavaScriptCore/bytecode/CodeBlock.cpp | 193 ++++++++++++++++++++- Source/JavaScriptCore/bytecode/CodeBlock.h | 97 ++--------- .../JavaScriptCore/bytecode/ExecutionCounter.cpp | 3 +- .../bytecompiler/BytecodeGenerator.cpp | 3 + Source/JavaScriptCore/runtime/Options.h | 2 +- 5 files changed, 214 insertions(+), 84 deletions(-) diff --git a/Source/JavaScriptCore/bytecode/CodeBlock.cpp b/Source/JavaScriptCore/bytecode/CodeBlock.cpp index 846f812..1890bf6 100644 --- a/Source/JavaScriptCore/bytecode/CodeBlock.cpp +++ b/Source/JavaScriptCore/bytecode/CodeBlock.cpp @@ -1775,9 +1775,6 @@ CodeBlock::CodeBlock(ScriptExecutable* ownerExecutable, CodeType codeType, JSGlo { ASSERT(m_source); - optimizeAfterWarmUp(); - jitAfterWarmUp(); - #if DUMP_CODE_BLOCK_STATISTICS liveCodeBlockSet.add(this); #endif @@ -2824,6 +2821,196 @@ bool FunctionCodeBlock::jitCompileImpl(ExecState* exec) } #endif +unsigned CodeBlock::reoptimizationRetryCounter() const +{ + ASSERT(m_reoptimizationRetryCounter <= Options::reoptimizationRetryCounterMax()); + return m_reoptimizationRetryCounter; +} + +void CodeBlock::countReoptimization() +{ + m_reoptimizationRetryCounter++; + if (m_reoptimizationRetryCounter > Options::reoptimizationRetryCounterMax()) + m_reoptimizationRetryCounter = Options::reoptimizationRetryCounterMax(); +} + +double CodeBlock::optimizationThresholdScalingFactor() +{ + // This expression arises from doing a least-squares fit of + // + // F[x_] =: a * Sqrt[x + b] + Abs[c * x] + d + // + // against the data points: + // + // x F[x_] + // 10 0.9 (smallest reasonable code block) + // 200 1.0 (typical small-ish code block) + // 320 1.2 (something I saw in 3d-cube that I wanted to optimize) + // 1268 5.0 (something I saw in 3d-cube that I didn't want to optimize) + // 4000 5.5 (random large size, used to cause the function to converge to a shallow curve of some sort) + // 10000 6.0 (similar to above) + // + // I achieve the minimization using the following Mathematica code: + // + // MyFunctionTemplate[x_, a_, b_, c_, d_] := a*Sqrt[x + b] + Abs[c*x] + d + // + // samples = {{10, 0.9}, {200, 1}, {320, 1.2}, {1268, 5}, {4000, 5.5}, {10000, 6}} + // + // solution = + // Minimize[Plus @@ ((MyFunctionTemplate[#[[1]], a, b, c, d] - #[[2]])^2 & /@ samples), + // {a, b, c, d}][[2]] + // + // And the code below (to initialize a, b, c, d) is generated by: + // + // Print["const double " <> ToString[#[[1]]] <> " = " <> + // If[#[[2]] < 0.00001, "0.0", ToString[#[[2]]]] <> ";"] & /@ solution + // + // We've long known the following to be true: + // - Small code blocks are cheap to optimize and so we should do it sooner rather + // than later. + // - Large code blocks are expensive to optimize and so we should postpone doing so, + // and sometimes have a large enough threshold that we never optimize them. + // - The difference in cost is not totally linear because (a) just invoking the + // DFG incurs some base cost and (b) for large code blocks there is enough slop + // in the correlation between instruction count and the actual compilation cost + // that for those large blocks, the instruction count should not have a strong + // influence on our threshold. + // + // I knew the goals but I didn't know how to achieve them; so I picked an interesting + // example where the heuristics were right (code block in 3d-cube with instruction + // count 320, which got compiled early as it should have been) and one where they were + // totally wrong (code block in 3d-cube with instruction count 1268, which was expensive + // to compile and didn't run often enough to warrant compilation in my opinion), and + // then threw in additional data points that represented my own guess of what our + // heuristics should do for some round-numbered examples. + // + // The expression to which I decided to fit the data arose because I started with an + // affine function, and then did two things: put the linear part in an Abs to ensure + // that the fit didn't end up choosing a negative value of c (which would result in + // the function turning over and going negative for large x) and I threw in a Sqrt + // term because Sqrt represents my intution that the function should be more sensitive + // to small changes in small values of x, but less sensitive when x gets large. + + // Note that the current fit essentially eliminates the linear portion of the + // expression (c == 0.0). + const double a = 0.061504; + const double b = 1.02406; + const double c = 0.0; + const double d = 0.825914; + + double instructionCount = this->instructionCount(); + + ASSERT(instructionCount); // Make sure this is called only after we have an instruction stream; otherwise it'll just return the value of d, which makes no sense. + + double result = d + a * sqrt(instructionCount + b) + c * instructionCount; +#if ENABLE(JIT_VERBOSE_OSR) + dataLog(*this, ": instruction count is ", instructionCount, ", scaling execution counter by ", result, "\n"); +#endif + return result; +} + +static int32_t clipThreshold(double threshold) +{ + if (threshold < 1.0) + return 1; + + if (threshold > static_cast(std::numeric_limits::max())) + return std::numeric_limits::max(); + + return static_cast(threshold); +} + +int32_t CodeBlock::counterValueForOptimizeAfterWarmUp() +{ + return clipThreshold( + Options::thresholdForOptimizeAfterWarmUp() * + optimizationThresholdScalingFactor() * + (1 << reoptimizationRetryCounter())); +} + +int32_t CodeBlock::counterValueForOptimizeAfterLongWarmUp() +{ + return clipThreshold( + Options::thresholdForOptimizeAfterLongWarmUp() * + optimizationThresholdScalingFactor() * + (1 << reoptimizationRetryCounter())); +} + +int32_t CodeBlock::counterValueForOptimizeSoon() +{ + return clipThreshold( + Options::thresholdForOptimizeSoon() * + optimizationThresholdScalingFactor() * + (1 << reoptimizationRetryCounter())); +} + +bool CodeBlock::checkIfOptimizationThresholdReached() +{ + return m_jitExecuteCounter.checkIfThresholdCrossedAndSet(this); +} + +void CodeBlock::optimizeNextInvocation() +{ + m_jitExecuteCounter.setNewThreshold(0, this); +} + +void CodeBlock::dontOptimizeAnytimeSoon() +{ + m_jitExecuteCounter.deferIndefinitely(); +} + +void CodeBlock::optimizeAfterWarmUp() +{ + m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeAfterWarmUp(), this); +} + +void CodeBlock::optimizeAfterLongWarmUp() +{ + m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeAfterLongWarmUp(), this); +} + +void CodeBlock::optimizeSoon() +{ + m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeSoon(), this); +} + +#if ENABLE(JIT) +uint32_t CodeBlock::adjustedExitCountThreshold(uint32_t desiredThreshold) +{ + ASSERT(getJITType() == JITCode::DFGJIT); + // Compute this the lame way so we don't saturate. This is called infrequently + // enough that this loop won't hurt us. + unsigned result = desiredThreshold; + for (unsigned n = baselineVersion()->reoptimizationRetryCounter(); n--;) { + unsigned newResult = result << 1; + if (newResult < result) + return std::numeric_limits::max(); + result = newResult; + } + return result; +} + +uint32_t CodeBlock::exitCountThresholdForReoptimization() +{ + return adjustedExitCountThreshold(Options::osrExitCountForReoptimization()); +} + +uint32_t CodeBlock::exitCountThresholdForReoptimizationFromLoop() +{ + return adjustedExitCountThreshold(Options::osrExitCountForReoptimizationFromLoop()); +} + +bool CodeBlock::shouldReoptimizeNow() +{ + return osrExitCounter() >= exitCountThresholdForReoptimization(); +} + +bool CodeBlock::shouldReoptimizeFromLoopNow() +{ + return osrExitCounter() >= exitCountThresholdForReoptimizationFromLoop(); +} +#endif + #if ENABLE(VALUE_PROFILER) void CodeBlock::updateAllPredictionsAndCountLiveness( OperationInProgress operation, unsigned& numberOfLiveNonArgumentValueProfiles, unsigned& numberOfSamplesInProfiles) diff --git a/Source/JavaScriptCore/bytecode/CodeBlock.h b/Source/JavaScriptCore/bytecode/CodeBlock.h index 08dc49c..541d2c3 100644 --- a/Source/JavaScriptCore/bytecode/CodeBlock.h +++ b/Source/JavaScriptCore/bytecode/CodeBlock.h @@ -1009,28 +1009,12 @@ namespace JSC { // When we observe a lot of speculation failures, we trigger a // reoptimization. But each time, we increase the optimization trigger // to avoid thrashing. - unsigned reoptimizationRetryCounter() const - { - ASSERT(m_reoptimizationRetryCounter <= Options::reoptimizationRetryCounterMax()); - return m_reoptimizationRetryCounter; - } - - void countReoptimization() - { - m_reoptimizationRetryCounter++; - if (m_reoptimizationRetryCounter > Options::reoptimizationRetryCounterMax()) - m_reoptimizationRetryCounter = Options::reoptimizationRetryCounterMax(); - } + unsigned reoptimizationRetryCounter() const; + void countReoptimization(); - int32_t counterValueForOptimizeAfterWarmUp() - { - return Options::thresholdForOptimizeAfterWarmUp() << reoptimizationRetryCounter(); - } - - int32_t counterValueForOptimizeAfterLongWarmUp() - { - return Options::thresholdForOptimizeAfterLongWarmUp() << reoptimizationRetryCounter(); - } + int32_t counterValueForOptimizeAfterWarmUp(); + int32_t counterValueForOptimizeAfterLongWarmUp(); + int32_t counterValueForOptimizeSoon(); int32_t* addressOfJITExecuteCounter() { @@ -1048,28 +1032,19 @@ namespace JSC { // Check if the optimization threshold has been reached, and if not, // adjust the heuristics accordingly. Returns true if the threshold has // been reached. - bool checkIfOptimizationThresholdReached() - { - return m_jitExecuteCounter.checkIfThresholdCrossedAndSet(this); - } + bool checkIfOptimizationThresholdReached(); // Call this to force the next optimization trigger to fire. This is // rarely wise, since optimization triggers are typically more // expensive than executing baseline code. - void optimizeNextInvocation() - { - m_jitExecuteCounter.setNewThreshold(0, this); - } + void optimizeNextInvocation(); // Call this to prevent optimization from happening again. Note that // optimization will still happen after roughly 2^29 invocations, // so this is really meant to delay that as much as possible. This // is called if optimization failed, and we expect it to fail in // the future as well. - void dontOptimizeAnytimeSoon() - { - m_jitExecuteCounter.deferIndefinitely(); - } + void dontOptimizeAnytimeSoon(); // Call this to reinitialize the counter to its starting state, // forcing a warm-up to happen before the next optimization trigger @@ -1077,17 +1052,11 @@ namespace JSC { // makes sense to call this if an OSR exit occurred. Note that // OSR exit code is code generated, so the value of the execute // counter that this corresponds to is also available directly. - void optimizeAfterWarmUp() - { - m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeAfterWarmUp(), this); - } + void optimizeAfterWarmUp(); // Call this to force an optimization trigger to fire only after // a lot of warm-up. - void optimizeAfterLongWarmUp() - { - m_jitExecuteCounter.setNewThreshold(counterValueForOptimizeAfterLongWarmUp(), this); - } + void optimizeAfterLongWarmUp(); // Call this to cause an optimization trigger to fire soon, but // not necessarily the next one. This makes sense if optimization @@ -1107,10 +1076,7 @@ namespace JSC { // each return, as that would be superfluous. It only makes sense // to trigger optimization if one of those functions becomes hot // in the baseline code. - void optimizeSoon() - { - m_jitExecuteCounter.setNewThreshold(Options::thresholdForOptimizeSoon() << reoptimizationRetryCounter(), this); - } + void optimizeSoon(); uint32_t osrExitCounter() const { return m_osrExitCounter; } @@ -1121,40 +1087,11 @@ namespace JSC { static ptrdiff_t offsetOfOSRExitCounter() { return OBJECT_OFFSETOF(CodeBlock, m_osrExitCounter); } #if ENABLE(JIT) - uint32_t adjustedExitCountThreshold(uint32_t desiredThreshold) - { - ASSERT(getJITType() == JITCode::DFGJIT); - // Compute this the lame way so we don't saturate. This is called infrequently - // enough that this loop won't hurt us. - unsigned result = desiredThreshold; - for (unsigned n = baselineVersion()->reoptimizationRetryCounter(); n--;) { - unsigned newResult = result << 1; - if (newResult < result) - return std::numeric_limits::max(); - result = newResult; - } - return result; - } - - uint32_t exitCountThresholdForReoptimization() - { - return adjustedExitCountThreshold(Options::osrExitCountForReoptimization()); - } - - uint32_t exitCountThresholdForReoptimizationFromLoop() - { - return adjustedExitCountThreshold(Options::osrExitCountForReoptimizationFromLoop()); - } - - bool shouldReoptimizeNow() - { - return osrExitCounter() >= exitCountThresholdForReoptimization(); - } - - bool shouldReoptimizeFromLoopNow() - { - return osrExitCounter() >= exitCountThresholdForReoptimizationFromLoop(); - } + uint32_t adjustedExitCountThreshold(uint32_t desiredThreshold); + uint32_t exitCountThresholdForReoptimization(); + uint32_t exitCountThresholdForReoptimizationFromLoop(); + bool shouldReoptimizeNow(); + bool shouldReoptimizeFromLoopNow(); #endif #if ENABLE(VALUE_PROFILER) @@ -1190,6 +1127,8 @@ namespace JSC { private: friend class DFGCodeBlocks; + double optimizationThresholdScalingFactor(); + #if ENABLE(DFG_JIT) void tallyFrequentExitSites(); #else diff --git a/Source/JavaScriptCore/bytecode/ExecutionCounter.cpp b/Source/JavaScriptCore/bytecode/ExecutionCounter.cpp index 12a4049..01d5104 100644 --- a/Source/JavaScriptCore/bytecode/ExecutionCounter.cpp +++ b/Source/JavaScriptCore/bytecode/ExecutionCounter.cpp @@ -113,7 +113,8 @@ bool ExecutionCounter::hasCrossedThreshold(CodeBlock* codeBlock) const double modifiedThreshold = applyMemoryUsageHeuristics(m_activeThreshold, codeBlock); return static_cast(m_totalCount) + m_counter >= - modifiedThreshold - static_cast(m_activeThreshold) / 2; + modifiedThreshold - static_cast( + std::min(m_activeThreshold, Options::maximumExecutionCountsBetweenCheckpoints())) / 2; } bool ExecutionCounter::setThreshold(CodeBlock* codeBlock) diff --git a/Source/JavaScriptCore/bytecompiler/BytecodeGenerator.cpp b/Source/JavaScriptCore/bytecompiler/BytecodeGenerator.cpp index 82c10f0..ecd5623 100644 --- a/Source/JavaScriptCore/bytecompiler/BytecodeGenerator.cpp +++ b/Source/JavaScriptCore/bytecompiler/BytecodeGenerator.cpp @@ -208,6 +208,9 @@ JSObject* BytecodeGenerator::generate() m_codeBlock->instructions() = RefCountedArray(m_instructions); + m_codeBlock->optimizeAfterWarmUp(); + m_codeBlock->jitAfterWarmUp(); + if (s_dumpsGeneratedCode) m_codeBlock->dump(m_scopeChain->globalObject->globalExec()); diff --git a/Source/JavaScriptCore/runtime/Options.h b/Source/JavaScriptCore/runtime/Options.h index ab3f34b..af15eb0 100644 --- a/Source/JavaScriptCore/runtime/Options.h +++ b/Source/JavaScriptCore/runtime/Options.h @@ -82,7 +82,7 @@ namespace JSC { v(int32, thresholdForJITSoon, 100) \ \ v(int32, thresholdForOptimizeAfterWarmUp, 1000) \ - v(int32, thresholdForOptimizeAfterLongWarmUp, 5000) \ + v(int32, thresholdForOptimizeAfterLongWarmUp, 1000) \ v(int32, thresholdForOptimizeSoon, 1000) \ \ v(int32, executionCounterIncrementForLoop, 1) \ -- 2.7.4