Fix x86 steady state tiered compilation performance (#17476)
authorNoah Falk <noahfalk@users.noreply.github.com>
Wed, 11 Apr 2018 03:35:33 +0000 (20:35 -0700)
committerGitHub <noreply@github.com>
Wed, 11 Apr 2018 03:35:33 +0000 (20:35 -0700)
* Fix x86 steady state tiered compilation performance

Also included - a few tiered compilation only test hooks + small logging fix for JitBench

Tiered compilation wasn't correctly implementing the MayHavePrecode and RequiresStableEntryPoint policy functions. On x64 this was a non-issue, but due to compact entrypoints on x86 it lead to methods allocating both FuncPtrStubs and Precodes. The FuncPtrStubs would never get backpatched which caused never ending invocations of the Prestub for some methods. Although such code still runs correctly, it is much slower than it needs to be. On MusicStore x86 I am seeing a 20% improvement in steady state RPS after this fix, bringing us inline with what I've seen on x64.

src/inc/clrconfigvalues.h
src/vm/eeconfig.cpp
src/vm/eeconfig.h
src/vm/method.cpp
src/vm/method.hpp
src/vm/prestub.cpp
src/vm/tieredcompilation.cpp
tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs

index 2e66f12..12a2c0f 100644 (file)
@@ -654,6 +654,9 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredCompilation, W("TieredCompilation"), 0,
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LEGACY_TieredCompilation, W("EXPERIMENTAL_TieredCompilation"), 0, "Deprecated - Use COMPLUS_TieredCompilation")
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold, W("TieredCompilation_Tier1CallCountThreshold"), 30, "Number of times a method must be called after which it is promoted to tier 1.")
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs, W("TieredCompilation_Tier1CallCountingDelayMs"), 100, "Delay in milliseconds since process startup or the last tier 0 JIT before call counting begins for tier 1 promotion.")
+
+RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Test_CallCounting, W("TieredCompilation_Test_CallCounting"), 1, "Enabled by default (only activates when TieredCompilation is also enabled). If disabled immediately backpatches prestub, and likely prevents any tier1 promotion")
+RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Test_OptimizeTier0, W("TieredCompilation_Test_OptimizeTier0"), 0, "Use optimized codegen (normally used by tier1) in tier0")
 #endif
 
 
index 1b44367..7782e69 100644 (file)
@@ -375,6 +375,8 @@ HRESULT EEConfig::Init()
 
 #if defined(FEATURE_TIERED_COMPILATION)
     fTieredCompilation = false;
+    fTieredCompilation_CallCounting = false;
+    fTieredCompilation_OptimizeTier0 = false;
     tieredCompilation_tier1CallCountThreshold = 1;
     tieredCompilation_tier1CallCountingDelayMs = 0;
 #endif
@@ -1242,6 +1244,9 @@ HRESULT EEConfig::sync()
         //this older name is deprecated, but still accepted for a time. Preserving it is a very small overhead not to needlessly break things.
         CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_LEGACY_TieredCompilation) != 0;
 
+    fTieredCompilation_CallCounting = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Test_CallCounting) != 0;
+    fTieredCompilation_OptimizeTier0 = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Test_OptimizeTier0) != 0;
+
     tieredCompilation_tier1CallCountThreshold =
         CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold);
     if (tieredCompilation_tier1CallCountThreshold < 1)
index 5c88f42..e30c0e2 100644 (file)
@@ -285,6 +285,8 @@ public:
     // Tiered Compilation config
 #if defined(FEATURE_TIERED_COMPILATION)
     bool          TieredCompilation(void)           const {LIMITED_METHOD_CONTRACT;  return fTieredCompilation; }
+    bool          TieredCompilation_CallCounting()  const {LIMITED_METHOD_CONTRACT;  return fTieredCompilation_CallCounting; }
+    bool          TieredCompilation_OptimizeTier0() const {LIMITED_METHOD_CONTRACT; return fTieredCompilation_OptimizeTier0; }
     DWORD         TieredCompilation_Tier1CallCountThreshold() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountThreshold; }
     DWORD         TieredCompilation_Tier1CallCountingDelayMs() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountingDelayMs; }
 #endif
@@ -1109,6 +1111,8 @@ private: //----------------------------------------------------------------
 
 #if defined(FEATURE_TIERED_COMPILATION)
     bool fTieredCompilation;
+    bool fTieredCompilation_CallCounting;
+    bool fTieredCompilation_OptimizeTier0;
     DWORD tieredCompilation_tier1CallCountThreshold;
     DWORD tieredCompilation_tier1CallCountingDelayMs;
 #endif
index d323ef0..e1bd021 100644 (file)
@@ -2414,7 +2414,11 @@ BOOL MethodDesc::RequiresMethodDescCallingConvention(BOOL fEstimateForChunk /*=F
 BOOL MethodDesc::RequiresStableEntryPoint(BOOL fEstimateForChunk /*=FALSE*/)
 {
     LIMITED_METHOD_CONTRACT;
-
+    
+    // Create precodes for versionable methods
+    if (IsVersionableWithPrecode())
+        return TRUE;
+    
     // Create precodes for edit and continue to make methods updateable
     if (IsEnCMethod() || IsEnCAddedMethod())
         return TRUE;
index f4f2a70..c1316d0 100644 (file)
@@ -284,7 +284,7 @@ public:
         }
         CONTRACTL_END
 
-        return !MayHaveNativeCode() || IsRemotingInterceptedViaPrestub();
+        return !MayHaveNativeCode() || IsRemotingInterceptedViaPrestub() || IsVersionableWithPrecode();
     }
 
     void InterlockedUpdateFlags2(BYTE bMask, BOOL fSet);
index 5f37ff9..507f8d3 100644 (file)
@@ -1736,15 +1736,20 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT)
     // When the TieredCompilationManager has received enough call notifications
     // for this method only then do we back-patch it.
     BOOL fCanBackpatchPrestub = TRUE;
+    BOOL fEligibleForCallCounting = FALSE;
 #ifdef FEATURE_TIERED_COMPILATION
     TieredCompilationManager* pTieredCompilationManager = nullptr;
     BOOL fEligibleForTieredCompilation = IsEligibleForTieredCompilation();
     BOOL fWasPromotedToTier1 = FALSE;
     if (fEligibleForTieredCompilation)
     {
-        pTieredCompilationManager = GetAppDomain()->GetTieredCompilationManager();
-        CallCounter * pCallCounter = GetCallCounter();
-        pCallCounter->OnMethodCalled(this, pTieredCompilationManager, &fCanBackpatchPrestub, &fWasPromotedToTier1);
+        fEligibleForCallCounting = g_pConfig->TieredCompilation_CallCounting();
+        if (fEligibleForCallCounting)
+        {
+            pTieredCompilationManager = GetAppDomain()->GetTieredCompilationManager();
+            CallCounter * pCallCounter = GetCallCounter();
+            pCallCounter->OnMethodCalled(this, pTieredCompilationManager, &fCanBackpatchPrestub, &fWasPromotedToTier1);
+        }
     }
 #endif
 
@@ -1757,7 +1762,7 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT)
     {
         pCode = GetCodeVersionManager()->PublishVersionableCodeIfNecessary(this, fCanBackpatchPrestub);
 
-        if (pTieredCompilationManager != nullptr && fCanBackpatchPrestub && pCode != NULL && !fWasPromotedToTier1)
+        if (pTieredCompilationManager != nullptr && fEligibleForCallCounting && fCanBackpatchPrestub && pCode != NULL && !fWasPromotedToTier1)
         {
             pTieredCompilationManager->OnMethodCallCountingStoppedWithoutTier1Promotion(this);
         }
index f139dde..b87d01a 100644 (file)
@@ -611,7 +611,8 @@ CORJIT_FLAGS TieredCompilationManager::GetJitFlags(NativeCodeVersion nativeCodeV
         return flags;
     }
     
-    if (nativeCodeVersion.GetOptimizationTier() == NativeCodeVersion::OptimizationTier0)
+    if (nativeCodeVersion.GetOptimizationTier() == NativeCodeVersion::OptimizationTier0 &&
+        !g_pConfig->TieredCompilation_OptimizeTier0())
     {
         flags.Set(CORJIT_FLAGS::CORJIT_FLAG_TIER0);
     }
index 85185b4..13554c8 100644 (file)
@@ -71,15 +71,13 @@ namespace JitBench
                     startInfo.WorkingDirectory = WorkingDirPath;
                     startInfo.RedirectStandardError = true;
                     startInfo.RedirectStandardOutput = true;
-                    foreach (KeyValuePair<string, string> kv in config.EnvironmentVariables)
+                    IEnumerable<KeyValuePair<string, string>> extraEnvVars = config.EnvironmentVariables.Concat(EnvironmentVariables).Append(new KeyValuePair<string, string>("DOTNET_MULTILEVEL_LOOKUP", "0"));
+                    foreach (KeyValuePair<string, string> kv in extraEnvVars)
                     {
                         startInfo.Environment[kv.Key] = kv.Value;
                     }
-                    foreach (KeyValuePair<string, string> kv in EnvironmentVariables)
-                    {
-                        startInfo.Environment[kv.Key] = kv.Value;
-                    }
-                    startInfo.Environment["DOTNET_MULTILEVEL_LOOKUP"] = "0";
+                    output.WriteLine("XUnitPerfHarness doesn't log env vars it uses to run processes. To workaround, logging them here:");
+                    output.WriteLine(string.Join(", ", extraEnvVars.Select(kv => kv.Key + "=" + kv.Value)));
 
                     BenchmarkRunResult result = new BenchmarkRunResult(this, config);
                     StringBuilder stderr = new StringBuilder();