Improve MemoryMarshal.Cast (#16659)
authormikedn <onemihaid@hotmail.com>
Thu, 1 Mar 2018 15:34:12 +0000 (17:34 +0200)
committerAndy Ayers <andya@microsoft.com>
Thu, 1 Mar 2018 15:34:12 +0000 (07:34 -0800)
Avoid unnecessary checked and signed arithmetic. Handle special cases such as cast between same size types and from byte sized types, the JIT is unable to optimize these cases currently.

src/mscorlib/shared/System/Runtime/InteropServices/MemoryMarshal.Fast.cs

index 01c98eeb0ec15baf395d06d5a7ed8f24eaf94fc9..5d2a4447d3ded2b82f87d159c789259998d1ac87 100644 (file)
@@ -60,9 +60,38 @@ namespace System.Runtime.InteropServices
             if (RuntimeHelpers.IsReferenceOrContainsReferences<TTo>())
                 ThrowHelper.ThrowInvalidTypeWithPointersNotSupported(typeof(TTo));
 
+            // Use unsigned integers - unsigned division by constant (especially by power of 2)
+            // and checked casts are faster and smaller.
+            uint fromSize = (uint)Unsafe.SizeOf<TFrom>();
+            uint toSize = (uint)Unsafe.SizeOf<TTo>();
+            uint fromLength = (uint)source.Length;
+            int toLength;
+            if (fromSize == toSize)
+            {
+                // Special case for same size types - `(ulong)fromLength * (ulong)fromSize / (ulong)toSize`
+                // should be optimized to just `length` but the JIT doesn't do that today.
+                toLength = (int)fromLength;
+            }
+            else if (fromSize == 1)
+            {
+                // Special case for byte sized TFrom - `(ulong)fromLength * (ulong)fromSize / (ulong)toSize`
+                // becomes `(ulong)fromLength / (ulong)toSize` but the JIT can't narrow it down to `int`
+                // and can't eliminate the checked cast. This also avoids a 32 bit specific issue,
+                // the JIT can't eliminate long multiply by 1.
+                toLength = (int)(fromLength / toSize);
+            }
+            else
+            {
+                // Ensure that casts are done in such a way that the JIT is able to "see"
+                // the uint->ulong casts and the multiply together so that on 32 bit targets
+                // 32x32to64 multiplication is used.
+                ulong toLengthUInt64 = (ulong)fromLength * (ulong)fromSize / (ulong)toSize;
+                toLength = checked((int)toLengthUInt64);
+            }
+
             return new Span<TTo>(
                 ref Unsafe.As<TFrom, TTo>(ref source._pointer.Value),
-                checked((int)((long)source.Length * Unsafe.SizeOf<TFrom>() / Unsafe.SizeOf<TTo>())));
+                toLength);
         }
 
         /// <summary>
@@ -86,9 +115,38 @@ namespace System.Runtime.InteropServices
             if (RuntimeHelpers.IsReferenceOrContainsReferences<TTo>())
                 ThrowHelper.ThrowInvalidTypeWithPointersNotSupported(typeof(TTo));
 
+            // Use unsigned integers - unsigned division by constant (especially by power of 2)
+            // and checked casts are faster and smaller.
+            uint fromSize = (uint)Unsafe.SizeOf<TFrom>();
+            uint toSize = (uint)Unsafe.SizeOf<TTo>();
+            uint fromLength = (uint)source.Length;
+            int toLength;
+            if (fromSize == toSize)
+            {
+                // Special case for same size types - `(ulong)fromLength * (ulong)fromSize / (ulong)toSize`
+                // should be optimized to just `length` but the JIT doesn't do that today.
+                toLength = (int)fromLength;
+            }
+            else if (fromSize == 1)
+            {
+                // Special case for byte sized TFrom - `(ulong)fromLength * (ulong)fromSize / (ulong)toSize`
+                // becomes `(ulong)fromLength / (ulong)toSize` but the JIT can't narrow it down to `int`
+                // and can't eliminate the checked cast. This also avoids a 32 bit specific issue,
+                // the JIT can't eliminate long multiply by 1.
+                toLength = (int)(fromLength / toSize);
+            }
+            else
+            {
+                // Ensure that casts are done in such a way that the JIT is able to "see"
+                // the uint->ulong casts and the multiply together so that on 32 bit targets
+                // 32x32to64 multiplication is used.
+                ulong toLengthUInt64 = (ulong)fromLength * (ulong)fromSize / (ulong)toSize;
+                toLength = checked((int)toLengthUInt64);
+            }
+
             return new ReadOnlySpan<TTo>(
                 ref Unsafe.As<TFrom, TTo>(ref MemoryMarshal.GetReference(source)),
-                checked((int)((long)source.Length * Unsafe.SizeOf<TFrom>() / Unsafe.SizeOf<TTo>())));
+                toLength);
         }
 
         /// <summary>