JIT: Unspill normalize-on-load variables using exact small type (#90318)
authorJakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
Mon, 14 Aug 2023 17:20:11 +0000 (19:20 +0200)
committerGitHub <noreply@github.com>
Mon, 14 Aug 2023 17:20:11 +0000 (19:20 +0200)
The existing logic would sometimes unspill using the type of the local
that is being unspilled. This type is often wider than the exact small
type in the LclVarDsc, since NOL locals are normally expanded as
CAST(<small type>, LCL_VAR<int>).

This causes problems since morph will in some cases avoid inserting
normalization for NOL locals when it has a subrange assertion available.
This optimization relies on the backend always ensuring that the local
will be normalized as part of unspilling and args homing.

Size-wise regressions are expected on xarch since the encoding of the
normalizing load is larger. However, as we have seen before, using wide
loads can cause significant store-forwarding stalls which can have large
negative impact on performance, so overall there is an expected perf
benefit of using the small loads (in addition to the correctness fix).

Fix #90219

src/coreclr/jit/codegenlinear.cpp
src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.cs [new file with mode: 0644]
src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.csproj [new file with mode: 0644]
src/tests/issues.targets

index 4c512a4..b0048c9 100644 (file)
@@ -1211,29 +1211,55 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
             // Reset spilled flag, since we are going to load a local variable from its home location.
             unspillTree->gtFlags &= ~GTF_SPILLED;
 
-            GenTreeLclVar* lcl       = unspillTree->AsLclVar();
-            LclVarDsc*     varDsc    = compiler->lvaGetDesc(lcl);
-            var_types      spillType = varDsc->GetRegisterType(lcl);
-            assert(spillType != TYP_UNDEF);
+            GenTreeLclVar* lcl         = unspillTree->AsLclVar();
+            LclVarDsc*     varDsc      = compiler->lvaGetDesc(lcl);
+            var_types      unspillType = varDsc->GetRegisterType(lcl);
+            assert(unspillType != TYP_UNDEF);
 
 // TODO-Cleanup: The following code could probably be further merged and cleaned up.
 #if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-            // Load local variable from its home location.
-            // Never allow truncating the locals here, otherwise a subsequent
-            // use of the local with a wider type would see the truncated
-            // value. We do allow wider loads as those can be efficient even
-            // when unaligned and might be smaller encoding wise (on xarch).
-            var_types lclLoadType = varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : varDsc->GetStackSlotHomeType();
-            assert(lclLoadType != TYP_UNDEF);
-            if (genTypeSize(spillType) < genTypeSize(lclLoadType))
+
+            // Pick type to reload register from stack with. Note that in
+            // general, the type of 'lcl' does not have any relation to the
+            // type of 'varDsc':
+            //
+            // * For normalize-on-load (NOL) locals it is wider under normal
+            // circumstances, where morph has added a cast on top. In some
+            // cases it is the same, when morph has used a subrange assertion
+            // to avoid normalizing.
+            //
+            // * For all locals it can be narrower in some cases, when
+            // lowering optimizes to use a smaller typed `cmp` (e.g. 32-bit cmp
+            // for 64-bit local, or 8-bit cmp for 16-bit local).
+            //
+            // * For byrefs it can differ in GC-ness (TYP_I_IMPL vs TYP_BYREF).
+            //
+            // In the NOL case the potential use of subrange assertions means
+            // we always have to normalize, even if 'lcl' is wide; we could
+            // have a GTF_SPILLED LCL_VAR<int>(NOL local) with a future
+            // LCL_VAR<ushort>(same NOL local), where the latter local then
+            // relies on the normalization to have happened here as part of
+            // unspilling.
+            //
+            if (varDsc->lvNormalizeOnLoad())
             {
-                spillType = lclLoadType;
+                unspillType = varDsc->TypeGet();
+            }
+            else
+            {
+                // Potentially narrower -- see if we should widen.
+                var_types lclLoadType = varDsc->GetStackSlotHomeType();
+                assert(lclLoadType != TYP_UNDEF);
+                if (genTypeSize(unspillType) < genTypeSize(lclLoadType))
+                {
+                    unspillType = lclLoadType;
+                }
             }
 
 #if defined(TARGET_LOONGARCH64)
             if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum()))
             {
-                spillType = spillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
+                unspillType = unspillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
             }
 #endif
 #elif defined(TARGET_ARM)
@@ -1241,9 +1267,10 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
 #else
             NYI("Unspilling not implemented for this target architecture.");
 #endif
+
             bool reSpill   = ((unspillTree->gtFlags & GTF_SPILL) != 0);
             bool isLastUse = lcl->IsLastUse(0);
-            genUnspillLocal(lcl->GetLclNum(), spillType, lcl->AsLclVar(), tree->GetRegNum(), reSpill, isLastUse);
+            genUnspillLocal(lcl->GetLclNum(), unspillType, lcl->AsLclVar(), tree->GetRegNum(), reSpill, isLastUse);
         }
         else if (unspillTree->IsMultiRegLclVar())
         {
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.cs b/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.cs
new file mode 100644 (file)
index 0000000..b314f36
--- /dev/null
@@ -0,0 +1,110 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Xunit;
+
+// Generated by Fuzzlyn v1.5 on 2023-08-05 16:08:41
+// Run on X86 Windows
+// Seed: 7610693270284065118
+// Reduced from 12.0 KiB to 2.2 KiB in 00:01:06
+// Debug: Outputs 1
+// Release: Outputs 2
+public interface I0
+{
+}
+
+public class C0 : I0
+{
+    public ulong F0;
+    public uint F4;
+    public C0(ulong f0, uint f4)
+    {
+        F0 = f0;
+        F4 = f4;
+    }
+}
+
+public class Runtime_90219
+{
+    public static IRuntime s_rt;
+    public static I0 s_1;
+    public static uint s_2;
+    public static byte[] s_4 = new byte[]{0};
+    public static byte Result;
+
+    [Fact]
+    public static int TestEntryPoint()
+    {
+        CollectibleALC alc = new CollectibleALC();
+        System.Reflection.Assembly asm = alc.LoadFromAssemblyPath(System.Reflection.Assembly.GetExecutingAssembly().Location);
+        System.Reflection.MethodInfo mi = asm.GetType(typeof(Runtime_90219).FullName).GetMethod(nameof(MainInner));
+        System.Type runtimeTy = asm.GetType(typeof(Runtime).FullName);
+        return (int)mi.Invoke(null, new object[]{System.Activator.CreateInstance(runtimeTy)});
+    }
+
+    public static int MainInner(IRuntime rt)
+    {
+        s_rt = rt;
+        var vr8 = new C0(0, 0);
+        var vr12 = s_4[0];
+        vr8.F0 *= M2(vr12);
+        long[] vr9 = new long[]{0};
+        bool vr10 = (int)M2(0) <= vr9[0];
+        return Result == 1 ? 100 : 101;
+    }
+
+    public static ulong M2(byte arg0)
+    {
+        for (int var0 = 0; var0 < 2; var0++)
+        {
+            var vr1 = new C0(0, 0);
+            var vr3 = new C0(0, 0);
+            uint vr14 = s_2;
+            s_rt.WriteLine("c_0", vr14);
+            uint vr15 = vr3.F4;
+            s_2 = M3(vr1, vr15);
+            s_1 = s_1;
+            s_1 = new C0(0, 0);
+            s_rt.WriteLine("c_6", var0);
+        }
+
+        var vr5 = new C0(0, 1);
+        uint vr18 = vr5.F4;
+        arg0 = (byte)vr18;
+        var vr7 = new C0(0, 1838341904U);
+        if ((arg0 >= (byte)M3(vr7, 0)))
+        {
+            arg0 <<= 1;
+        }
+
+        s_rt.WriteLine("c_7", arg0);
+        return 0;
+    }
+
+    public static uint M3(C0 argThis, uint arg0)
+    {
+        s_rt.WriteLine("c_0", arg0);
+        return argThis.F4;
+    }
+}
+
+public interface IRuntime
+{
+    void WriteLine<T>(string site, T value);
+}
+
+public class Runtime : IRuntime
+{
+    public void WriteLine<T>(string site, T value)
+    {
+        if (typeof(T) == typeof(byte))
+            Runtime_90219.Result = (byte)(object)value;
+    }
+}
+
+public class CollectibleALC : System.Runtime.Loader.AssemblyLoadContext
+{
+    public CollectibleALC(): base(true)
+    {
+    }
+}
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.csproj
new file mode 100644 (file)
index 0000000..de6d5e0
--- /dev/null
@@ -0,0 +1,8 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+</Project>
index 6e88c46..754af1a 100644 (file)
         <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_76273/Runtime_76273/**">
             <Issue>https://github.com/dotnet/runtimelab/issues/155: Assembly.Load</Issue>
         </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219/**">
+            <Issue>https://github.com/dotnet/runtimelab/issues/155: Assembly.Load</Issue>
+        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/reflection\DefaultInterfaceMethods\Emit\*">
             <Issue>https://github.com/dotnet/runtimelab/issues/155: Reflection.Emit</Issue>
         </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/simpleruntimeeventvalidation/**">
             <Issue>https://github.com/dotnet/runtime/issues/88499</Issue>
         </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219/*">
+            <Issue>https://github.com/dotnet/runtime/issues/90374</Issue>
+        </ExcludeList>
     </ItemGroup>
 
     <!-- Known failures for mono runtime on Windows -->
             <Issue>https://github.com/dotnet/runtime/issues/48914</Issue>
         </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_76273/Runtime_76273/**">
-            <Issue>Fuzzlyn</Issue>
+            <Issue>https://github.com/dotnet/runtime/issues/90372</Issue>
         </ExcludeList>
         <ExcludeList Include = "$(XUnitTestBinBase)/JIT/HardwareIntrinsics/X86/X86Base/Pause*/**">
             <Issue>https://github.com/dotnet/runtime/issues/73454;https://github.com/dotnet/runtime/pull/61707#issuecomment-973122341</Issue>
         <ExcludeList Include="$(XunitTestBinBase)/Loader/StartupHooks/**">
             <Issue>Loads an assembly from file</Issue>
         </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219/*">
+            <Issue>Loads an assembly from file</Issue>
+        </ExcludeList>
     </ItemGroup>
 
     <ItemGroup Condition="'$(TargetArchitecture)' == 'wasm'">