From 9c3134e445412ecfac84cdd3239dea8b04a36c40 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Michal=20Strehovsk=C3=BD?= Date: Mon, 5 Dec 2022 19:53:06 +0900 Subject: [PATCH] Speed up data dehydration (#79209) Now that we switched everything we wanted to switch to dehydrated data, I redid startup measurements. I used a 30 MB app that doesn't actually do anything useful with those megabytes and just exits (to get a lot of dehydrated data into the startup path). On Windows: (The measurements were frustratingly noisy within a range that spans 10 ms.) Without dehydration, the app exits in ~27 ms. With dehydration, the app exits in ~37 ms. With the fixes in this PR and dehydration, it exits in ~34 ms. On Linux, the difference between not dehydrated and dehydrated is ~8 ms -> ~12 ms. I didn't re-measure with this PR. Possibly shaved off a millisecond. --- .../CompilerHelpers/StartupCodeHelpers.cs | 41 +++++++++++++++++-- .../Common/Internal/Runtime/DehydratedData.cs | 2 + 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/coreclr/nativeaot/Common/src/Internal/Runtime/CompilerHelpers/StartupCodeHelpers.cs b/src/coreclr/nativeaot/Common/src/Internal/Runtime/CompilerHelpers/StartupCodeHelpers.cs index 49a5039b373..1ff01b55101 100644 --- a/src/coreclr/nativeaot/Common/src/Internal/Runtime/CompilerHelpers/StartupCodeHelpers.cs +++ b/src/coreclr/nativeaot/Common/src/Internal/Runtime/CompilerHelpers/StartupCodeHelpers.cs @@ -262,9 +262,44 @@ namespace Internal.Runtime.CompilerHelpers switch (command) { case DehydratedDataCommand.Copy: - // TODO: can we do any kind of memcpy here? - for (; payload > 0; payload--) - *pDest++ = *pCurrent++; + Debug.Assert(payload != 0); + if (payload < 4) + { + *pDest = *pCurrent; + if (payload > 1) + *(short*)(pDest + payload - 2) = *(short*)(pCurrent + payload - 2); + } + else if (payload < 8) + { + *(int*)pDest = *(int*)pCurrent; + *(int*)(pDest + payload - 4) = *(int*)(pCurrent + payload - 4); + } + else if (payload <= 16) + { +#if TARGET_64BIT + *(long*)pDest = *(long*)pCurrent; + *(long*)(pDest + payload - 8) = *(long*)(pCurrent + payload - 8); +#else + *(int*)pDest = *(int*)pCurrent; + *(int*)(pDest + 4) = *(int*)(pCurrent + 4); + *(int*)(pDest + payload - 8) = *(int*)(pCurrent + payload - 8); + *(int*)(pDest + payload - 4) = *(int*)(pCurrent + payload - 4); +#endif + } + else + { + // At the time of writing this, 90% of DehydratedDataCommand.Copy cases + // would fall into the above specialized cases. 10% fall back to memmove. + memmove(pDest, pCurrent, (nuint)payload); + + // Not a DllImport - we don't need a GC transition since this is early startup + [MethodImplAttribute(MethodImplOptions.InternalCall)] + [RuntimeImport("*", "memmove")] + static extern unsafe void* memmove(byte* dmem, byte* smem, nuint size); + } + + pDest += payload; + pCurrent += payload; break; case DehydratedDataCommand.ZeroFill: pDest += payload; diff --git a/src/coreclr/tools/Common/Internal/Runtime/DehydratedData.cs b/src/coreclr/tools/Common/Internal/Runtime/DehydratedData.cs index dee7c2cfdfd..f640a7b504f 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/DehydratedData.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/DehydratedData.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Runtime.CompilerServices; using Debug = System.Diagnostics.Debug; @@ -59,6 +60,7 @@ namespace Internal.Runtime return 1 + numExtraBytes; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe byte* Decode(byte* pB, out int command, out int payload) { byte b = *pB; -- 2.34.1