From 5e82446fb1ce58d1ceb01ef04d53e01f42e5e4fa Mon Sep 17 00:00:00 2001 From: mikedn Date: Thu, 1 Mar 2018 17:34:12 +0200 Subject: [PATCH] Improve MemoryMarshal.Cast (#16659) Avoid unnecessary checked and signed arithmetic. Handle special cases such as cast between same size types and from byte sized types, the JIT is unable to optimize these cases currently. --- .../InteropServices/MemoryMarshal.Fast.cs | 62 ++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/src/mscorlib/shared/System/Runtime/InteropServices/MemoryMarshal.Fast.cs b/src/mscorlib/shared/System/Runtime/InteropServices/MemoryMarshal.Fast.cs index 01c98eeb0e..5d2a4447d3 100644 --- a/src/mscorlib/shared/System/Runtime/InteropServices/MemoryMarshal.Fast.cs +++ b/src/mscorlib/shared/System/Runtime/InteropServices/MemoryMarshal.Fast.cs @@ -60,9 +60,38 @@ namespace System.Runtime.InteropServices if (RuntimeHelpers.IsReferenceOrContainsReferences()) ThrowHelper.ThrowInvalidTypeWithPointersNotSupported(typeof(TTo)); + // Use unsigned integers - unsigned division by constant (especially by power of 2) + // and checked casts are faster and smaller. + uint fromSize = (uint)Unsafe.SizeOf(); + uint toSize = (uint)Unsafe.SizeOf(); + uint fromLength = (uint)source.Length; + int toLength; + if (fromSize == toSize) + { + // Special case for same size types - `(ulong)fromLength * (ulong)fromSize / (ulong)toSize` + // should be optimized to just `length` but the JIT doesn't do that today. + toLength = (int)fromLength; + } + else if (fromSize == 1) + { + // Special case for byte sized TFrom - `(ulong)fromLength * (ulong)fromSize / (ulong)toSize` + // becomes `(ulong)fromLength / (ulong)toSize` but the JIT can't narrow it down to `int` + // and can't eliminate the checked cast. This also avoids a 32 bit specific issue, + // the JIT can't eliminate long multiply by 1. + toLength = (int)(fromLength / toSize); + } + else + { + // Ensure that casts are done in such a way that the JIT is able to "see" + // the uint->ulong casts and the multiply together so that on 32 bit targets + // 32x32to64 multiplication is used. + ulong toLengthUInt64 = (ulong)fromLength * (ulong)fromSize / (ulong)toSize; + toLength = checked((int)toLengthUInt64); + } + return new Span( ref Unsafe.As(ref source._pointer.Value), - checked((int)((long)source.Length * Unsafe.SizeOf() / Unsafe.SizeOf()))); + toLength); } /// @@ -86,9 +115,38 @@ namespace System.Runtime.InteropServices if (RuntimeHelpers.IsReferenceOrContainsReferences()) ThrowHelper.ThrowInvalidTypeWithPointersNotSupported(typeof(TTo)); + // Use unsigned integers - unsigned division by constant (especially by power of 2) + // and checked casts are faster and smaller. + uint fromSize = (uint)Unsafe.SizeOf(); + uint toSize = (uint)Unsafe.SizeOf(); + uint fromLength = (uint)source.Length; + int toLength; + if (fromSize == toSize) + { + // Special case for same size types - `(ulong)fromLength * (ulong)fromSize / (ulong)toSize` + // should be optimized to just `length` but the JIT doesn't do that today. + toLength = (int)fromLength; + } + else if (fromSize == 1) + { + // Special case for byte sized TFrom - `(ulong)fromLength * (ulong)fromSize / (ulong)toSize` + // becomes `(ulong)fromLength / (ulong)toSize` but the JIT can't narrow it down to `int` + // and can't eliminate the checked cast. This also avoids a 32 bit specific issue, + // the JIT can't eliminate long multiply by 1. + toLength = (int)(fromLength / toSize); + } + else + { + // Ensure that casts are done in such a way that the JIT is able to "see" + // the uint->ulong casts and the multiply together so that on 32 bit targets + // 32x32to64 multiplication is used. + ulong toLengthUInt64 = (ulong)fromLength * (ulong)fromSize / (ulong)toSize; + toLength = checked((int)toLengthUInt64); + } + return new ReadOnlySpan( ref Unsafe.As(ref MemoryMarshal.GetReference(source)), - checked((int)((long)source.Length * Unsafe.SizeOf() / Unsafe.SizeOf()))); + toLength); } /// -- 2.34.1