Rewrite how Matrix3x2 and Matrix4x4 are implemented (#80091)
authorTanner Gooding <tagoo@outlook.com>
Sat, 7 Jan 2023 00:45:06 +0000 (16:45 -0800)
committerGitHub <noreply@github.com>
Sat, 7 Jan 2023 00:45:06 +0000 (16:45 -0800)
* Rewrite how Matrix3x2 and Matrix4x4 are implemented

* Fix a bug in lowerxarch related to merging Sse41.Insert chains

src/coreclr/jit/lowerxarch.cpp
src/libraries/System.Numerics.Vectors/tests/Matrix3x2Tests.cs
src/libraries/System.Numerics.Vectors/tests/Matrix4x4Tests.cs
src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix3x2.Impl.cs [new file with mode: 0644]
src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix3x2.cs
src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix4x4.Impl.cs [new file with mode: 0644]
src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix4x4.cs
src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs

index 9fa3cf3..58decd3 100644 (file)
@@ -2809,6 +2809,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
 
             if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
             {
+                assert(argCnt <= 4);
+                GenTree* insertedNodes[4];
+
                 for (N = 1; N < argCnt - 1; N++)
                 {
                     // We will be constructing the following parts:
@@ -2837,10 +2840,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
                     idx = comp->gtNewIconNode(N << 4, TYP_INT);
                     BlockRange().InsertAfter(tmp2, idx);
 
-                    tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, idx, NI_SSE41_Insert, simdBaseJitType,
+                    tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, idx, NI_SSE41_Insert, simdBaseJitType,
                                                           simdSize);
-                    BlockRange().InsertAfter(idx, tmp1);
-                    LowerNode(tmp1);
+                    BlockRange().InsertAfter(idx, tmp3);
+
+                    insertedNodes[N] = tmp3;
+                    tmp1             = tmp3;
                 }
 
                 // We will be constructing the following parts:
@@ -2868,6 +2873,18 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
                 BlockRange().InsertAfter(tmp2, idx);
 
                 node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, tmp2, idx);
+
+                for (N = 1; N < argCnt - 1; N++)
+                {
+                    // LowerNode for NI_SSE41_Insert specially handles zeros, constants, and certain mask values
+                    // to do the minimal number of operations and may merge together two neighboring inserts that
+                    // don't have any side effects between them. Because of this and because of the interdependence
+                    // of the inserts we've created above, we need to wait to lower the generated inserts until after
+                    // we've completed the chain.
+
+                    GenTree* insertedNode = insertedNodes[N];
+                    LowerNode(insertedNode);
+                }
                 break;
             }
 
index f4144ad..4374cb7 100644 (file)
@@ -568,12 +568,15 @@ namespace System.Numerics.Tests
         public void Matrix3x2GetHashCodeTest()
         {
             Matrix3x2 target = GenerateIncrementalMatrixNumber();
-            int expected = HashCode.Combine(target.M11, target.M12,
-                                     target.M21, target.M22,
-                                     target.M31, target.M32);
-            int actual;
 
-            actual = target.GetHashCode();
+            int expected = HashCode.Combine(
+                new Vector2(target.M11, target.M12),
+                new Vector2(target.M21, target.M22),
+                new Vector2(target.M31, target.M32)
+            );
+
+            int actual = target.GetHashCode();
+
             Assert.Equal(expected, actual);
         }
 
index 98bf54d..0aacc92 100644 (file)
@@ -1690,29 +1690,13 @@ namespace System.Numerics.Tests
         {
             Matrix4x4 target = GenerateIncrementalMatrixNumber();
 
-            HashCode hash = default;
+            int expected = HashCode.Combine(
+                new Vector4(target.M11, target.M12, target.M13, target.M14),
+                new Vector4(target.M21, target.M22, target.M23, target.M24),
+                new Vector4(target.M31, target.M32, target.M33, target.M34),
+                new Vector4(target.M41, target.M42, target.M43, target.M44)
+            );
 
-            hash.Add(target.M11);
-            hash.Add(target.M12);
-            hash.Add(target.M13);
-            hash.Add(target.M14);
-
-            hash.Add(target.M21);
-            hash.Add(target.M22);
-            hash.Add(target.M23);
-            hash.Add(target.M24);
-
-            hash.Add(target.M31);
-            hash.Add(target.M32);
-            hash.Add(target.M33);
-            hash.Add(target.M34);
-
-            hash.Add(target.M41);
-            hash.Add(target.M42);
-            hash.Add(target.M43);
-            hash.Add(target.M44);
-
-            int expected = hash.ToHashCode();
             int actual = target.GetHashCode();
 
             Assert.Equal(expected, actual);
index 58fe300..63fd459 100644 (file)
     <Compile Include="$(MSBuildThisFileDirectory)System\Number.Parsing.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\BitOperations.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\Matrix3x2.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\Matrix3x2.Impl.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\Matrix4x4.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\Matrix4x4.Impl.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\Plane.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\Quaternion.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\TotalOrderIeee754Comparer.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\IUnaryPlusOperators.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Numerics\IUnsignedNumber.cs" />
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix3x2.Impl.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix3x2.Impl.cs
new file mode 100644 (file)
index 0000000..4fb270b
--- /dev/null
@@ -0,0 +1,522 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+
+namespace System.Numerics
+{
+    public partial struct Matrix3x2
+    {
+        // See Matrix3x2.cs for an explanation of why this file/type exists
+        //
+        // Note that we use some particular patterns below, such as defining a result
+        // and assigning the fields directly rather than using the object initializer
+        // syntax. We do this because it saves roughly 8-bytes of IL per method which
+        // in turn helps improve inlining chances.
+
+        internal const uint RowCount = 3;
+        internal const uint ColumnCount = 2;
+
+        [UnscopedRef]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal ref Impl AsImpl() => ref Unsafe.As<Matrix3x2, Impl>(ref this);
+
+        [UnscopedRef]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal readonly ref readonly Impl AsROImpl() => ref Unsafe.As<Matrix3x2, Impl>(ref Unsafe.AsRef(in this));
+
+        internal struct Impl : IEquatable<Impl>
+        {
+            [UnscopedRef]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public ref Matrix3x2 AsM3x2() => ref Unsafe.As<Impl, Matrix3x2>(ref this);
+
+            private const float RotationEpsilon = 0.001f * MathF.PI / 180f;     // 0.1% of a degree
+
+            public Vector2 X;
+            public Vector2 Y;
+            public Vector2 Z;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Init(float m11, float m12,
+                             float m21, float m22,
+                             float m31, float m32)
+            {
+                X = new Vector2(m11, m12);
+                Y = new Vector2(m21, m22);
+                Z = new Vector2(m31, m32);
+            }
+
+            public static Impl Identity
+            {
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                get
+                {
+                    Impl result;
+
+                    result.X = Vector2.UnitX;
+                    result.Y = Vector2.UnitY;
+                    result.Z = Vector2.Zero;
+
+                    return result;
+                }
+            }
+
+            public float this[int row, int column]
+            {
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                readonly get
+                {
+                    if ((uint)row >= RowCount)
+                    {
+                        ThrowHelper.ThrowArgumentOutOfRangeException();
+                    }
+
+                    return Unsafe.Add(ref Unsafe.AsRef(in this.X), row)[column];
+                }
+
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                set
+                {
+                    if ((uint)row >= RowCount)
+                    {
+                        ThrowHelper.ThrowArgumentOutOfRangeException();
+                    }
+                    Unsafe.Add(ref this.X, row)[column] = value;
+                }
+            }
+
+            public readonly bool IsIdentity
+            {
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                get
+                {
+                    return (X == Vector2.UnitX)
+                        && (Y == Vector2.UnitY)
+                        && (Z == Vector2.Zero);
+                }
+            }
+
+            public Vector2 Translation
+            {
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                readonly get
+                {
+                    return Z;
+                }
+
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                set
+                {
+                    Z = value;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator +(in Impl left, in Impl right)
+            {
+                Impl result;
+
+                result.X = left.X + right.X;
+                result.Y = left.Y + right.Y;
+                result.Z = left.Z + right.Z;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static bool operator ==(in Impl left, in Impl right)
+            {
+                return (left.X == right.X)
+                    && (left.Y == right.Y)
+                    && (left.Z == right.Z);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static bool operator !=(in Impl left, in Impl right)
+            {
+                return (left.X != right.X)
+                    || (left.Y != right.Y)
+                    || (left.Z != right.Z);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator *(in Impl left, in Impl right)
+            {
+                Impl result;
+
+                result.X = new Vector2(
+                    left.X.X * right.X.X + left.X.Y * right.Y.X,
+                    left.X.X * right.X.Y + left.X.Y * right.Y.Y
+                );
+                result.Y = new Vector2(
+                    left.Y.X * right.X.X + left.Y.Y * right.Y.X,
+                    left.Y.X * right.X.Y + left.Y.Y * right.Y.Y
+                );
+                result.Z = new Vector2(
+                    left.Z.X * right.X.X + left.Z.Y * right.Y.X + right.Z.X,
+                    left.Z.X * right.X.Y + left.Z.Y * right.Y.Y + right.Z.Y
+                );
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator *(in Impl left, float right)
+            {
+                Impl result;
+
+                result.X = left.X * right;
+                result.Y = left.Y * right;
+                result.Z = left.Z * right;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator -(in Impl left, in Impl right)
+            {
+                Impl result;
+
+                result.X = left.X - right.X;
+                result.Y = left.Y - right.Y;
+                result.Z = left.Z - right.Z;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator -(in Impl value)
+            {
+                Impl result;
+
+                result.X = -value.X;
+                result.Y = -value.Y;
+                result.Z = -value.Z;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateRotation(float radians)
+            {
+                radians = MathF.IEEERemainder(radians, MathF.PI * 2);
+
+                float c;
+                float s;
+
+                if (radians > -RotationEpsilon && radians < RotationEpsilon)
+                {
+                    // Exact case for zero rotation.
+                    c = 1;
+                    s = 0;
+                }
+                else if (radians > MathF.PI / 2 - RotationEpsilon && radians < MathF.PI / 2 + RotationEpsilon)
+                {
+                    // Exact case for 90 degree rotation.
+                    c = 0;
+                    s = 1;
+                }
+                else if (radians < -MathF.PI + RotationEpsilon || radians > MathF.PI - RotationEpsilon)
+                {
+                    // Exact case for 180 degree rotation.
+                    c = -1;
+                    s = 0;
+                }
+                else if (radians > -MathF.PI / 2 - RotationEpsilon && radians < -MathF.PI / 2 + RotationEpsilon)
+                {
+                    // Exact case for 270 degree rotation.
+                    c = 0;
+                    s = -1;
+                }
+                else
+                {
+                    // Arbitrary rotation.
+                    c = MathF.Cos(radians);
+                    s = MathF.Sin(radians);
+                }
+
+                // [  c  s ]
+                // [ -s  c ]
+                // [  0  0 ]
+
+                Impl result;
+
+                result.X = new Vector2( c, s);
+                result.Y = new Vector2(-s, c);
+                result.Z = Vector2.Zero;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateRotation(float radians, Vector2 centerPoint)
+            {
+                radians = MathF.IEEERemainder(radians, MathF.PI * 2);
+
+                float c, s;
+
+                if (radians > -RotationEpsilon && radians < RotationEpsilon)
+                {
+                    // Exact case for zero rotation.
+                    c = 1;
+                    s = 0;
+                }
+                else if (radians > MathF.PI / 2 - RotationEpsilon && radians < MathF.PI / 2 + RotationEpsilon)
+                {
+                    // Exact case for 90 degree rotation.
+                    c = 0;
+                    s = 1;
+                }
+                else if (radians < -MathF.PI + RotationEpsilon || radians > MathF.PI - RotationEpsilon)
+                {
+                    // Exact case for 180 degree rotation.
+                    c = -1;
+                    s = 0;
+                }
+                else if (radians > -MathF.PI / 2 - RotationEpsilon && radians < -MathF.PI / 2 + RotationEpsilon)
+                {
+                    // Exact case for 270 degree rotation.
+                    c = 0;
+                    s = -1;
+                }
+                else
+                {
+                    // Arbitrary rotation.
+                    c = MathF.Cos(radians);
+                    s = MathF.Sin(radians);
+                }
+
+                float x = centerPoint.X * (1 - c) + centerPoint.Y * s;
+                float y = centerPoint.Y * (1 - c) - centerPoint.X * s;
+
+                // [  c  s ]
+                // [ -s  c ]
+                // [  x  y ]
+
+                Impl result;
+
+                result.X = new Vector2( c, s);
+                result.Y = new Vector2(-s, c);
+                result.Z = new Vector2( x, y);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(Vector2 scales)
+            {
+                Impl result;
+
+                result.X = new Vector2(scales.X, 0);
+                result.Y = new Vector2(0, scales.Y);
+                result.Z = Vector2.Zero;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(float scaleX, float scaleY)
+            {
+                Impl result;
+
+                result.X = new Vector2(scaleX, 0);
+                result.Y = new Vector2(0, scaleY);
+                result.Z = Vector2.Zero;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(float scaleX, float scaleY, Vector2 centerPoint)
+            {
+                Impl result;
+
+                result.X = new Vector2(scaleX, 0);
+                result.Y = new Vector2(0, scaleY);
+                result.Z = centerPoint * (Vector2.One - new Vector2(scaleX, scaleY));
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(Vector2 scales, Vector2 centerPoint)
+            {
+                Impl result;
+
+                result.X = new Vector2(scales.X, 0);
+                result.Y = new Vector2(0, scales.Y);
+                result.Z = centerPoint * (Vector2.One - scales);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(float scale)
+            {
+                Impl result;
+
+                result.X = new Vector2(scale, 0);
+                result.Y = new Vector2(0, scale);
+                result.Z = Vector2.Zero;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(float scale, Vector2 centerPoint)
+            {
+                Impl result;
+
+                result.X = new Vector2(scale, 0);
+                result.Y = new Vector2(0, scale);
+                result.Z = centerPoint * (Vector2.One - new Vector2(scale));
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateSkew(float radiansX, float radiansY)
+            {
+                Impl result;
+
+                result.X = new Vector2(1, MathF.Tan(radiansY));
+                result.Y = new Vector2(MathF.Tan(radiansX), 1);
+                result.Z = Vector2.Zero;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateSkew(float radiansX, float radiansY, Vector2 centerPoint)
+            {
+                float xTan = MathF.Tan(radiansX);
+                float yTan = MathF.Tan(radiansY);
+
+                float tx = -centerPoint.Y * xTan;
+                float ty = -centerPoint.X * yTan;
+
+                Impl result;
+
+                result.X = new Vector2(1, yTan);
+                result.Y = new Vector2(xTan, 1);
+                result.Z = new Vector2(tx, ty);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateTranslation(Vector2 position)
+            {
+                Impl result;
+
+                result.X = Vector2.UnitX;
+                result.Y = Vector2.UnitY;
+                result.Z = position;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateTranslation(float positionX, float positionY)
+            {
+                Impl result;
+
+                result.X = Vector2.UnitX;
+                result.Y = Vector2.UnitY;
+                result.Z = new Vector2(positionX, positionY);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static bool Invert(in Impl matrix, out Impl result)
+            {
+                float det = (matrix.X.X * matrix.Y.Y) - (matrix.Y.X * matrix.X.Y);
+
+                if (MathF.Abs(det) < float.Epsilon)
+                {
+                    Vector2 vNaN = new Vector2(float.NaN);
+
+                    result.X = vNaN;
+                    result.Y = vNaN;
+                    result.Z = vNaN;
+
+                    return false;
+                }
+
+                float invDet = 1.0f / det;
+
+                result.X = new Vector2(
+                    +matrix.Y.Y * invDet,
+                    -matrix.X.Y * invDet
+                );
+                result.Y = new Vector2(
+                    -matrix.Y.X * invDet,
+                    +matrix.X.X * invDet
+                );
+                result.Z = new Vector2(
+                    (matrix.Y.X * matrix.Z.Y - matrix.Z.X * matrix.Y.Y) * invDet,
+                    (matrix.Z.X * matrix.X.Y - matrix.X.X * matrix.Z.Y) * invDet
+                );
+
+                return true;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl Lerp(in Impl left, in Impl right, float amount)
+            {
+                Impl result;
+
+                result.X = Vector2.Lerp(left.X, right.X, amount);
+                result.Y = Vector2.Lerp(left.Y, right.Y, amount);
+                result.Z = Vector2.Lerp(left.Z, right.Z, amount);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public override readonly bool Equals([NotNullWhen(true)] object? obj)
+                => (obj is Matrix3x2 other) && Equals(in other.AsImpl());
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public readonly bool Equals(in Impl other)
+            {
+                // This function needs to account for floating-point equality around NaN
+                // and so must behave equivalently to the underlying float/double.Equals
+
+                return X.Equals(other.X)
+                    && Y.Equals(other.Y)
+                    && Z.Equals(other.Z);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public readonly float GetDeterminant()
+            {
+                // There isn't actually any such thing as a determinant for a non-square matrix,
+                // but this 3x2 type is really just an optimization of a 3x3 where we happen to
+                // know the rightmost column is always (0, 0, 1). So we expand to 3x3 format:
+                //
+                //  [ X.X, X.Y, 0 ]
+                //  [ Y.X, Y.Y, 0 ]
+                //  [ Z.X, Z.Y, 1 ]
+                //
+                // Sum the diagonal products:
+                //  (X.X * Y.Y * 1) + (X.Y * 0 * Z.X) + (0 * Y.X * Z.Y)
+                //
+                // Subtract the opposite diagonal products:
+                //  (Z.X * Y.Y * 0) + (Z.Y * 0 * X.X) + (1 * Y.X * X.Y)
+                //
+                // Collapse out the constants and oh look, this is just a 2x2 determinant!
+
+                return (X.X * Y.Y) - (Y.X * X.Y);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public override readonly int GetHashCode() => HashCode.Combine(X, Y, Z);
+
+            bool IEquatable<Impl>.Equals(Impl other) => Equals(in other);
+        }
+    }
+}
index f939ec9..49dac97 100644 (file)
@@ -3,7 +3,6 @@
 
 using System.Diagnostics.CodeAnalysis;
 using System.Runtime.CompilerServices;
-using System.Runtime.Intrinsics;
 
 namespace System.Numerics
 {
@@ -12,15 +11,17 @@ namespace System.Numerics
     /// [!INCLUDE[vectors-are-rows-paragraph](~/includes/system-numerics-vectors-are-rows.md)]
     /// ]]></format></remarks>
     [Intrinsic]
-    public struct Matrix3x2 : IEquatable<Matrix3x2>
+    public partial struct Matrix3x2 : IEquatable<Matrix3x2>
     {
-        private const float RotationEpsilon = 0.001f * MathF.PI / 180f;     // 0.1% of a degree
-
-        private static readonly Matrix3x2 _identity = new Matrix3x2(
-            1f, 0f,
-            0f, 1f,
-            0f, 0f
-        );
+        // In an ideal world, we'd have 3x Vector2 fields. However, Matrix3x2 was shipped with
+        // 6x public float fields and as such we cannot change the "backing" fields without it being
+        // a breaking change. Likewise, we cannot switch to using something like ExplicitLayout
+        // without it pessimizing other parts of the JIT and still preventing things like field promotion.
+        //
+        // This nested Impl struct works around this problem by relying on the JIT treating same sizeof
+        // value type bitcasts as a no-op. Effectively the entire implementation is here in this type
+        // and the public facing Matrix3x2 just defers to it with simple reinterpret casts inserted
+        // at the relevant points.
 
         /// <summary>The first element of the first row.</summary>
         public float M11;
@@ -51,21 +52,21 @@ namespace System.Numerics
                          float m21, float m22,
                          float m31, float m32)
         {
-            M11 = m11;
-            M12 = m12;
-
-            M21 = m21;
-            M22 = m22;
+            Unsafe.SkipInit(out this);
 
-            M31 = m31;
-            M32 = m32;
+            AsImpl().Init(
+                m11, m12,
+                m21, m22,
+                m31, m32
+            );
         }
 
         /// <summary>Gets the multiplicative identity matrix.</summary>
         /// <value>The multiplicative identify matrix.</value>
         public static Matrix3x2 Identity
         {
-            get => _identity;
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get => Impl.Identity.AsM3x2();
         }
 
         /// <summary>Gets or sets the element at the specified indices.</summary>
@@ -77,45 +78,32 @@ namespace System.Numerics
         /// -or-
         /// <paramref name="column" /> was less than zero or greater than the number of columns.
         /// </exception>
-        public unsafe float this[int row, int column]
+        public float this[int row, int column]
         {
-            get
-            {
-                if ((uint)row >= 3)
-                    ThrowHelper.ThrowArgumentOutOfRangeException();
-
-                Vector2 vrow = Unsafe.Add(ref Unsafe.As<float, Vector2>(ref M11), row);
-                return vrow[column];
-            }
-            set
-            {
-                if ((uint)row >= 3)
-                    ThrowHelper.ThrowArgumentOutOfRangeException();
-
-                ref Vector2 vrow = ref Unsafe.Add(ref Unsafe.As<float, Vector2>(ref M11), row);
-                var tmp = Vector2.WithElement(vrow, column, value);
-                vrow = tmp;
-            }
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            readonly get => AsROImpl()[row, column];
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            set => AsImpl()[row, column] = value;
         }
 
         /// <summary>Gets a value that indicates whether the current matrix is the identity matrix.</summary>
         /// <value><see langword="true" /> if the current matrix is the identity matrix; otherwise, <see langword="false" />.</value>
         public readonly bool IsIdentity
         {
-            get => this == Identity;
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get => AsROImpl().IsIdentity;
         }
 
         /// <summary>Gets or sets the translation component of this matrix.</summary>
         /// <value>The translation component of the current instance.</value>
         public Vector2 Translation
         {
-            readonly get => new Vector2(M31, M32);
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            readonly get => AsROImpl().Translation;
 
-            set
-            {
-                M31 = value.X;
-                M32 = value.Y;
-            }
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            set => AsImpl().Translation = value;
         }
 
         /// <summary>Adds each element in one matrix with its corresponding element in a second matrix.</summary>
@@ -123,46 +111,26 @@ namespace System.Numerics
         /// <param name="value2">The second matrix.</param>
         /// <returns>The matrix that contains the summed values.</returns>
         /// <remarks>The <see cref="System.Numerics.Matrix3x2.op_Addition" /> method defines the operation of the addition operator for <see cref="System.Numerics.Matrix3x2" /> objects.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 operator +(Matrix3x2 value1, Matrix3x2 value2)
-        {
-            Matrix3x2 m;
-
-            m.M11 = value1.M11 + value2.M11;
-            m.M12 = value1.M12 + value2.M12;
-
-            m.M21 = value1.M21 + value2.M21;
-            m.M22 = value1.M22 + value2.M22;
-
-            m.M31 = value1.M31 + value2.M31;
-            m.M32 = value1.M32 + value2.M32;
-
-            return m;
-        }
+            => (value1.AsImpl() + value2.AsImpl()).AsM3x2();
 
         /// <summary>Returns a value that indicates whether the specified matrices are equal.</summary>
         /// <param name="value1">The first matrix to compare.</param>
         /// <param name="value2">The second matrix to compare.</param>
         /// <returns><see langword="true" /> if <paramref name="value1" /> and <paramref name="value2" /> are equal; otherwise, <see langword="false" />.</returns>
         /// <remarks>Two matrices are equal if all their corresponding elements are equal.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool operator ==(Matrix3x2 value1, Matrix3x2 value2)
-        {
-            // Check diagonal element first for early out.
-            return (value1.M11 == value2.M11
-                 && value1.M22 == value2.M22
-                 && value1.M12 == value2.M12
-                 && value1.M21 == value2.M21
-                 && value1.M31 == value2.M31
-                 && value1.M32 == value2.M32);
-        }
+            => value1.AsImpl() == value2.AsImpl();
 
         /// <summary>Returns a value that indicates whether the specified matrices are not equal.</summary>
         /// <param name="value1">The first matrix to compare.</param>
         /// <param name="value2">The second matrix to compare.</param>
         /// <returns><see langword="true" /> if <paramref name="value1" /> and <paramref name="value2" /> are not equal; otherwise, <see langword="false" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool operator !=(Matrix3x2 value1, Matrix3x2 value2)
-        {
-            return !(value1 == value2);
-        }
+            => value1.AsImpl() != value2.AsImpl();
 
         /// <summary>Multiplies two matrices together to compute the product.</summary>
         /// <param name="value1">The first matrix.</param>
@@ -170,85 +138,33 @@ namespace System.Numerics
         /// <returns>The product matrix.</returns>
         /// <remarks>The <see cref="System.Numerics.Matrix3x2.op_Multiply" /> method defines the operation of the multiplication operator for <see cref="System.Numerics.Matrix3x2" /> objects.</remarks>
         public static Matrix3x2 operator *(Matrix3x2 value1, Matrix3x2 value2)
-        {
-            Matrix3x2 m;
-
-            // First row
-            m.M11 = value1.M11 * value2.M11 + value1.M12 * value2.M21;
-            m.M12 = value1.M11 * value2.M12 + value1.M12 * value2.M22;
-
-            // Second row
-            m.M21 = value1.M21 * value2.M11 + value1.M22 * value2.M21;
-            m.M22 = value1.M21 * value2.M12 + value1.M22 * value2.M22;
-
-            // Third row
-            m.M31 = value1.M31 * value2.M11 + value1.M32 * value2.M21 + value2.M31;
-            m.M32 = value1.M31 * value2.M12 + value1.M32 * value2.M22 + value2.M32;
-
-            return m;
-        }
+            => (value1.AsImpl() * value2.AsImpl()).AsM3x2();
 
         /// <summary>Multiplies a matrix by a float to compute the product.</summary>
         /// <param name="value1">The matrix to scale.</param>
         /// <param name="value2">The scaling value to use.</param>
         /// <returns>The scaled matrix.</returns>
         /// <remarks>The <see cref="System.Numerics.Matrix3x2.op_Multiply" /> method defines the operation of the multiplication operator for <see cref="System.Numerics.Matrix3x2" /> objects.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 operator *(Matrix3x2 value1, float value2)
-        {
-            Matrix3x2 m;
-
-            m.M11 = value1.M11 * value2;
-            m.M12 = value1.M12 * value2;
-
-            m.M21 = value1.M21 * value2;
-            m.M22 = value1.M22 * value2;
-
-            m.M31 = value1.M31 * value2;
-            m.M32 = value1.M32 * value2;
-
-            return m;
-        }
+            => (value1.AsImpl() * value2).AsM3x2();
 
         /// <summary>Subtracts each element in a second matrix from its corresponding element in a first matrix.</summary>
         /// <param name="value1">The first matrix.</param>
         /// <param name="value2">The second matrix.</param>
         /// <returns>The matrix containing the values that result from subtracting each element in <paramref name="value2" /> from its corresponding element in <paramref name="value1" />.</returns>
         /// <remarks>The <see cref="System.Numerics.Matrix3x2.Subtract" /> method defines the operation of the subtraction operator for <see cref="System.Numerics.Matrix3x2" /> objects.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 operator -(Matrix3x2 value1, Matrix3x2 value2)
-        {
-            Matrix3x2 m;
-
-            m.M11 = value1.M11 - value2.M11;
-            m.M12 = value1.M12 - value2.M12;
-
-            m.M21 = value1.M21 - value2.M21;
-            m.M22 = value1.M22 - value2.M22;
-
-            m.M31 = value1.M31 - value2.M31;
-            m.M32 = value1.M32 - value2.M32;
-
-            return m;
-        }
+            => (value1.AsImpl() - value2.AsImpl()).AsM3x2();
 
         /// <summary>Negates the specified matrix by multiplying all its values by -1.</summary>
         /// <param name="value">The matrix to negate.</param>
         /// <returns>The negated matrix.</returns>
         /// <altmember cref="System.Numerics.Matrix3x2.Negate(System.Numerics.Matrix3x2)"/>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 operator -(Matrix3x2 value)
-        {
-            Matrix3x2 m;
-
-            m.M11 = -value.M11;
-            m.M12 = -value.M12;
-
-            m.M21 = -value.M21;
-            m.M22 = -value.M22;
-
-            m.M31 = -value.M31;
-            m.M32 = -value.M32;
-
-            return m;
-        }
+            => (-value.AsImpl()).AsM3x2();
 
         /// <summary>Adds each element in one matrix with its corresponding element in a second matrix.</summary>
         /// <param name="value1">The first matrix.</param>
@@ -256,148 +172,33 @@ namespace System.Numerics
         /// <returns>The matrix that contains the summed values of <paramref name="value1" /> and <paramref name="value2" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 Add(Matrix3x2 value1, Matrix3x2 value2)
-        {
-            return value1 + value2;
-        }
+            => (value1.AsImpl() + value2.AsImpl()).AsM3x2();
 
         /// <summary>Creates a rotation matrix using the given rotation in radians.</summary>
         /// <param name="radians">The amount of rotation, in radians.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix3x2 CreateRotation(float radians)
-        {
-            radians = MathF.IEEERemainder(radians, MathF.PI * 2);
-
-            float c, s;
-
-            if (radians > -RotationEpsilon && radians < RotationEpsilon)
-            {
-                // Exact case for zero rotation.
-                c = 1;
-                s = 0;
-            }
-            else if (radians > MathF.PI / 2 - RotationEpsilon && radians < MathF.PI / 2 + RotationEpsilon)
-            {
-                // Exact case for 90 degree rotation.
-                c = 0;
-                s = 1;
-            }
-            else if (radians < -MathF.PI + RotationEpsilon || radians > MathF.PI - RotationEpsilon)
-            {
-                // Exact case for 180 degree rotation.
-                c = -1;
-                s = 0;
-            }
-            else if (radians > -MathF.PI / 2 - RotationEpsilon && radians < -MathF.PI / 2 + RotationEpsilon)
-            {
-                // Exact case for 270 degree rotation.
-                c = 0;
-                s = -1;
-            }
-            else
-            {
-                // Arbitrary rotation.
-                c = MathF.Cos(radians);
-                s = MathF.Sin(radians);
-            }
-
-            // [  c  s ]
-            // [ -s  c ]
-            // [  0  0 ]
-            Matrix3x2 result = Identity;
-
-            result.M11 = c;
-            result.M12 = s;
-            result.M21 = -s;
-            result.M22 = c;
-
-            return result;
-        }
+            => Impl.CreateRotation(radians).AsM3x2();
 
         /// <summary>Creates a rotation matrix using the specified rotation in radians and a center point.</summary>
         /// <param name="radians">The amount of rotation, in radians.</param>
         /// <param name="centerPoint">The center point.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix3x2 CreateRotation(float radians, Vector2 centerPoint)
-        {
-            Matrix3x2 result;
-
-            radians = MathF.IEEERemainder(radians, MathF.PI * 2);
-
-            float c, s;
-
-            if (radians > -RotationEpsilon && radians < RotationEpsilon)
-            {
-                // Exact case for zero rotation.
-                c = 1;
-                s = 0;
-            }
-            else if (radians > MathF.PI / 2 - RotationEpsilon && radians < MathF.PI / 2 + RotationEpsilon)
-            {
-                // Exact case for 90 degree rotation.
-                c = 0;
-                s = 1;
-            }
-            else if (radians < -MathF.PI + RotationEpsilon || radians > MathF.PI - RotationEpsilon)
-            {
-                // Exact case for 180 degree rotation.
-                c = -1;
-                s = 0;
-            }
-            else if (radians > -MathF.PI / 2 - RotationEpsilon && radians < -MathF.PI / 2 + RotationEpsilon)
-            {
-                // Exact case for 270 degree rotation.
-                c = 0;
-                s = -1;
-            }
-            else
-            {
-                // Arbitrary rotation.
-                c = MathF.Cos(radians);
-                s = MathF.Sin(radians);
-            }
-
-            float x = centerPoint.X * (1 - c) + centerPoint.Y * s;
-            float y = centerPoint.Y * (1 - c) - centerPoint.X * s;
-
-            // [  c  s ]
-            // [ -s  c ]
-            // [  x  y ]
-            result.M11 = c;
-            result.M12 = s;
-            result.M21 = -s;
-            result.M22 = c;
-            result.M31 = x;
-            result.M32 = y;
-
-            return result;
-        }
+            => Impl.CreateRotation(radians, centerPoint).AsM3x2();
 
         /// <summary>Creates a scaling matrix from the specified vector scale.</summary>
         /// <param name="scales">The scale to use.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix3x2 CreateScale(Vector2 scales)
-        {
-            Matrix3x2 result = Identity;
-
-            result.M11 = scales.X;
-            result.M22 = scales.Y;
-
-            return result;
-        }
+            => Impl.CreateScale(scales).AsM3x2();
 
         /// <summary>Creates a scaling matrix from the specified X and Y components.</summary>
         /// <param name="xScale">The value to scale by on the X axis.</param>
         /// <param name="yScale">The value to scale by on the Y axis.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix3x2 CreateScale(float xScale, float yScale)
-        {
-            Matrix3x2 result = Identity;
-
-            result.M11 = xScale;
-            result.M22 = yScale;
-
-            return result;
-        }
+            => Impl.CreateScale(xScale, yScale).AsM3x2();
 
         /// <summary>Creates a scaling matrix that is offset by a given center point.</summary>
         /// <param name="xScale">The value to scale by on the X axis.</param>
@@ -405,87 +206,34 @@ namespace System.Numerics
         /// <param name="centerPoint">The center point.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix3x2 CreateScale(float xScale, float yScale, Vector2 centerPoint)
-        {
-            Matrix3x2 result = Identity;
-
-            float tx = centerPoint.X * (1 - xScale);
-            float ty = centerPoint.Y * (1 - yScale);
-
-            result.M11 = xScale;
-            result.M22 = yScale;
-            result.M31 = tx;
-            result.M32 = ty;
-
-            return result;
-        }
+            => Impl.CreateScale(xScale, yScale, centerPoint).AsM3x2();
 
         /// <summary>Creates a scaling matrix from the specified vector scale with an offset from the specified center point.</summary>
         /// <param name="scales">The scale to use.</param>
         /// <param name="centerPoint">The center offset.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix3x2 CreateScale(Vector2 scales, Vector2 centerPoint)
-        {
-            Matrix3x2 result = Identity;
-
-            float tx = centerPoint.X * (1 - scales.X);
-            float ty = centerPoint.Y * (1 - scales.Y);
-
-            result.M11 = scales.X;
-            result.M22 = scales.Y;
-            result.M31 = tx;
-            result.M32 = ty;
-
-            return result;
-        }
+            => Impl.CreateScale(scales, centerPoint).AsM3x2();
 
         /// <summary>Creates a scaling matrix that scales uniformly with the given scale.</summary>
         /// <param name="scale">The uniform scale to use.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix3x2 CreateScale(float scale)
-        {
-            Matrix3x2 result = Identity;
-
-            result.M11 = scale;
-            result.M22 = scale;
-
-            return result;
-        }
+            => Impl.CreateScale(scale).AsM3x2();
 
         /// <summary>Creates a scaling matrix that scales uniformly with the specified scale with an offset from the specified center.</summary>
         /// <param name="scale">The uniform scale to use.</param>
         /// <param name="centerPoint">The center offset.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix3x2 CreateScale(float scale, Vector2 centerPoint)
-        {
-            Matrix3x2 result = Identity;
-
-            float tx = centerPoint.X * (1 - scale);
-            float ty = centerPoint.Y * (1 - scale);
-
-            result.M11 = scale;
-            result.M22 = scale;
-            result.M31 = tx;
-            result.M32 = ty;
-
-            return result;
-        }
+            => Impl.CreateScale(scale, centerPoint).AsM3x2();
 
         /// <summary>Creates a skew matrix from the specified angles in radians.</summary>
         /// <param name="radiansX">The X angle, in radians.</param>
         /// <param name="radiansY">The Y angle, in radians.</param>
         /// <returns>The skew matrix.</returns>
         public static Matrix3x2 CreateSkew(float radiansX, float radiansY)
-        {
-            Matrix3x2 result = Identity;
-
-            float xTan = MathF.Tan(radiansX);
-            float yTan = MathF.Tan(radiansY);
-
-            result.M12 = yTan;
-            result.M21 = xTan;
-
-            return result;
-        }
+            => Impl.CreateSkew(radiansX, radiansY).AsM3x2();
 
         /// <summary>Creates a skew matrix from the specified angles in radians and a center point.</summary>
         /// <param name="radiansX">The X angle, in radians.</param>
@@ -493,50 +241,20 @@ namespace System.Numerics
         /// <param name="centerPoint">The center point.</param>
         /// <returns>The skew matrix.</returns>
         public static Matrix3x2 CreateSkew(float radiansX, float radiansY, Vector2 centerPoint)
-        {
-            Matrix3x2 result = Identity;
-
-            float xTan = MathF.Tan(radiansX);
-            float yTan = MathF.Tan(radiansY);
-
-            float tx = -centerPoint.Y * xTan;
-            float ty = -centerPoint.X * yTan;
-
-            result.M12 = yTan;
-            result.M21 = xTan;
-
-            result.M31 = tx;
-            result.M32 = ty;
-
-            return result;
-        }
+            => Impl.CreateSkew(radiansX, radiansY, centerPoint).AsM3x2();
 
         /// <summary>Creates a translation matrix from the specified 2-dimensional vector.</summary>
         /// <param name="position">The translation position.</param>
         /// <returns>The translation matrix.</returns>
         public static Matrix3x2 CreateTranslation(Vector2 position)
-        {
-            Matrix3x2 result = Identity;
-
-            result.M31 = position.X;
-            result.M32 = position.Y;
-
-            return result;
-        }
+            => Impl.CreateTranslation(position).AsM3x2();
 
         /// <summary>Creates a translation matrix from the specified X and Y components.</summary>
         /// <param name="xPosition">The X position.</param>
         /// <param name="yPosition">The Y position.</param>
         /// <returns>The translation matrix.</returns>
         public static Matrix3x2 CreateTranslation(float xPosition, float yPosition)
-        {
-            Matrix3x2 result = Identity;
-
-            result.M31 = xPosition;
-            result.M32 = yPosition;
-
-            return result;
-        }
+            => Impl.CreateTranslation(xPosition, yPosition).AsM3x2();
 
         /// <summary>Tries to invert the specified matrix. The return value indicates whether the operation succeeded.</summary>
         /// <param name="matrix">The matrix to invert.</param>
@@ -544,26 +262,8 @@ namespace System.Numerics
         /// <returns><see langword="true" /> if <paramref name="matrix" /> was converted successfully; otherwise,  <see langword="false" />.</returns>
         public static bool Invert(Matrix3x2 matrix, out Matrix3x2 result)
         {
-            float det = (matrix.M11 * matrix.M22) - (matrix.M21 * matrix.M12);
-
-            if (MathF.Abs(det) < float.Epsilon)
-            {
-                result = new Matrix3x2(float.NaN, float.NaN, float.NaN, float.NaN, float.NaN, float.NaN);
-                return false;
-            }
-
-            float invDet = 1.0f / det;
-
-            result.M11 = matrix.M22 * invDet;
-            result.M12 = -matrix.M12 * invDet;
-
-            result.M21 = -matrix.M21 * invDet;
-            result.M22 = matrix.M11 * invDet;
-
-            result.M31 = (matrix.M21 * matrix.M32 - matrix.M31 * matrix.M22) * invDet;
-            result.M32 = (matrix.M31 * matrix.M12 - matrix.M11 * matrix.M32) * invDet;
-
-            return true;
+            Unsafe.SkipInit(out result);
+            return Impl.Invert(in matrix.AsImpl(), out result.AsImpl());
         }
 
         /// <summary>Performs a linear interpolation from one matrix to a second matrix based on a value that specifies the weighting of the second matrix.</summary>
@@ -571,24 +271,9 @@ namespace System.Numerics
         /// <param name="matrix2">The second matrix.</param>
         /// <param name="amount">The relative weighting of <paramref name="matrix2" />.</param>
         /// <returns>The interpolated matrix.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 Lerp(Matrix3x2 matrix1, Matrix3x2 matrix2, float amount)
-        {
-            Matrix3x2 result;
-
-            // First row
-            result.M11 = matrix1.M11 + (matrix2.M11 - matrix1.M11) * amount;
-            result.M12 = matrix1.M12 + (matrix2.M12 - matrix1.M12) * amount;
-
-            // Second row
-            result.M21 = matrix1.M21 + (matrix2.M21 - matrix1.M21) * amount;
-            result.M22 = matrix1.M22 + (matrix2.M22 - matrix1.M22) * amount;
-
-            // Third row
-            result.M31 = matrix1.M31 + (matrix2.M31 - matrix1.M31) * amount;
-            result.M32 = matrix1.M32 + (matrix2.M32 - matrix1.M32) * amount;
-
-            return result;
-        }
+            => Impl.Lerp(in matrix1.AsImpl(), in matrix2.AsImpl(), amount).AsM3x2();
 
         /// <summary>Multiplies two matrices together to compute the product.</summary>
         /// <param name="value1">The first matrix.</param>
@@ -596,9 +281,7 @@ namespace System.Numerics
         /// <returns>The product matrix.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 Multiply(Matrix3x2 value1, Matrix3x2 value2)
-        {
-            return value1 * value2;
-        }
+            => (value1.AsImpl() * value2.AsImpl()).AsM3x2();
 
         /// <summary>Multiplies a matrix by a float to compute the product.</summary>
         /// <param name="value1">The matrix to scale.</param>
@@ -606,18 +289,14 @@ namespace System.Numerics
         /// <returns>The scaled matrix.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 Multiply(Matrix3x2 value1, float value2)
-        {
-            return value1 * value2;
-        }
+            => (value1.AsImpl() * value2).AsM3x2();
 
         /// <summary>Negates the specified matrix by multiplying all its values by -1.</summary>
         /// <param name="value">The matrix to negate.</param>
         /// <returns>The negated matrix.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 Negate(Matrix3x2 value)
-        {
-            return -value;
-        }
+            => (-value.AsImpl()).AsM3x2();
 
         /// <summary>Subtracts each element in a second matrix from its corresponding element in a first matrix.</summary>
         /// <param name="value1">The first matrix.</param>
@@ -625,9 +304,7 @@ namespace System.Numerics
         /// <returns>The matrix containing the values that result from subtracting each element in <paramref name="value2" /> from its corresponding element in <paramref name="value1" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix3x2 Subtract(Matrix3x2 value1, Matrix3x2 value2)
-        {
-            return value1 - value2;
-        }
+            => (value1.AsImpl() - value2.AsImpl()).AsM3x2();
 
         /// <summary>Returns a value that indicates whether this instance and a specified object are equal.</summary>
         /// <param name="obj">The object to compare with the current instance.</param>
@@ -635,9 +312,7 @@ namespace System.Numerics
         /// <remarks>The current instance and <paramref name="obj" /> are equal if <paramref name="obj" /> is a <see cref="System.Numerics.Matrix3x2" /> object and the corresponding elements of each matrix are equal.</remarks>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public override readonly bool Equals([NotNullWhen(true)] object? obj)
-        {
-            return (obj is Matrix3x2 other) && Equals(other);
-        }
+            => AsROImpl().Equals(obj);
 
         /// <summary>Returns a value that indicates whether this instance and another 3x2 matrix are equal.</summary>
         /// <param name="other">The other matrix.</param>
@@ -645,65 +320,25 @@ namespace System.Numerics
         /// <remarks>Two matrices are equal if all their corresponding elements are equal.</remarks>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public readonly bool Equals(Matrix3x2 other)
-        {
-            // This function needs to account for floating-point equality around NaN
-            // and so must behave equivalently to the underlying float/double.Equals
-
-            if (Vector128.IsHardwareAccelerated)
-            {
-                // We'll two two overlapping comparisons. The first gets M11, M12, M21, and M22
-                // The second will get M21, M22, M31, and M32. This is more efficient overall.
-
-                return Vector128.LoadUnsafe(ref Unsafe.AsRef(in M11)).Equals(Vector128.LoadUnsafe(ref other.M11))
-                    && Vector128.LoadUnsafe(ref Unsafe.AsRef(in M21)).Equals(Vector128.LoadUnsafe(ref other.M21));
-
-            }
-
-            return SoftwareFallback(in this, other);
-
-            static bool SoftwareFallback(in Matrix3x2 self, Matrix3x2 other)
-            {
-                return self.M11.Equals(other.M11) && self.M22.Equals(other.M22) // Check diagonal element first for early out.
-                    && self.M12.Equals(other.M12) && self.M21.Equals(other.M21)
-                    && self.M31.Equals(other.M31) && self.M32.Equals(other.M32);
-            }
-        }
+            => AsROImpl().Equals(in other.AsImpl());
 
         /// <summary>Calculates the determinant for this matrix.</summary>
         /// <returns>The determinant.</returns>
         /// <remarks>The determinant is calculated by expanding the matrix with a third column whose values are (0,0,1).</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public readonly float GetDeterminant()
-        {
-            // There isn't actually any such thing as a determinant for a non-square matrix,
-            // but this 3x2 type is really just an optimization of a 3x3 where we happen to
-            // know the rightmost column is always (0, 0, 1). So we expand to 3x3 format:
-            //
-            //  [ M11, M12, 0 ]
-            //  [ M21, M22, 0 ]
-            //  [ M31, M32, 1 ]
-            //
-            // Sum the diagonal products:
-            //  (M11 * M22 * 1) + (M12 * 0 * M31) + (0 * M21 * M32)
-            //
-            // Subtract the opposite diagonal products:
-            //  (M31 * M22 * 0) + (M32 * 0 * M11) + (1 * M21 * M12)
-            //
-            // Collapse out the constants and oh look, this is just a 2x2 determinant!
-
-            return (M11 * M22) - (M21 * M12);
-        }
+            => AsROImpl().GetDeterminant();
 
         /// <summary>Returns the hash code for this instance.</summary>
         /// <returns>The hash code.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public override readonly int GetHashCode()
-        {
-            return HashCode.Combine(M11, M12, M21, M22, M31, M32);
-        }
+            => AsROImpl().GetHashCode();
 
         /// <summary>Returns a string that represents this matrix.</summary>
         /// <returns>The string representation of this matrix.</returns>
         /// <remarks>The numeric values in the returned string are formatted by using the conventions of the current culture. For example, for the en-US culture, the returned string might appear as <c>{ {M11:1.1 M12:1.2} {M21:2.1 M22:2.2} {M31:3.1 M32:3.2} }</c>.</remarks>
-        public override readonly string ToString() =>
-            $"{{ {{M11:{M11} M12:{M12}}} {{M21:{M21} M22:{M22}}} {{M31:{M31} M32:{M32}}} }}";
+        public override readonly string ToString()
+            => $"{{ {{M11:{M11} M12:{M12}}} {{M21:{M21} M22:{M22}}} {{M31:{M31} M32:{M32}}} }}";
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix4x4.Impl.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix4x4.Impl.cs
new file mode 100644 (file)
index 0000000..f0a3b4f
--- /dev/null
@@ -0,0 +1,1620 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+
+namespace System.Numerics
+{
+    public partial struct Matrix4x4
+    {
+        // See Matrix4x4.cs for an explanation of why this file/type exists
+        //
+        // Note that we use some particular patterns below, such as defining a result
+        // and assigning the fields directly rather than using the object initializer
+        // syntax. We do this because it saves roughly 8-bytes of IL per method which
+        // in turn helps improve inlining chances.
+
+        // TODO: Vector3 is "inefficient" and we'd be better off taking Vector4 or Vector128<T>
+
+        internal const uint RowCount = 4;
+        internal const uint ColumnCount = 4;
+
+        [UnscopedRef]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal ref Impl AsImpl() => ref Unsafe.As<Matrix4x4, Impl>(ref this);
+
+        [UnscopedRef]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal readonly ref readonly Impl AsROImpl() => ref Unsafe.As<Matrix4x4, Impl>(ref Unsafe.AsRef(in this));
+
+        internal struct Impl : IEquatable<Impl>
+        {
+            [UnscopedRef]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public ref Matrix4x4 AsM4x4() => ref Unsafe.As<Impl, Matrix4x4>(ref this);
+
+            private const float BillboardEpsilon = 1e-4f;
+            private const float BillboardMinAngle = 1.0f - (0.1f * (MathF.PI / 180.0f)); // 0.1 degrees
+            private const float DecomposeEpsilon = 0.0001f;
+
+            public Vector4 X;
+            public Vector4 Y;
+            public Vector4 Z;
+            public Vector4 W;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Init(float m11, float m12, float m13, float m14,
+                             float m21, float m22, float m23, float m24,
+                             float m31, float m32, float m33, float m34,
+                             float m41, float m42, float m43, float m44)
+            {
+                X = new Vector4(m11, m12, m13, m14);
+                Y = new Vector4(m21, m22, m23, m24);
+                Z = new Vector4(m31, m32, m33, m34);
+                W = new Vector4(m41, m42, m43, m44);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void Init(in Matrix3x2.Impl value)
+            {
+                X = new Vector4(value.X, 0, 0);
+                Y = new Vector4(value.Y, 0, 0);
+                Z = Vector4.UnitZ;
+                W = new Vector4(value.Z, 0, 1);
+            }
+
+            public static Impl Identity
+            {
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                get
+                {
+                    Impl result;
+
+                    result.X = Vector4.UnitX;
+                    result.Y = Vector4.UnitY;
+                    result.Z = Vector4.UnitZ;
+                    result.W = Vector4.UnitW;
+
+                    return result;
+                }
+            }
+
+            public float this[int row, int column]
+            {
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                readonly get
+                {
+                    if ((uint)row >= RowCount)
+                    {
+                        ThrowHelper.ThrowArgumentOutOfRangeException();
+                    }
+
+                    return Unsafe.Add(ref Unsafe.AsRef(in this.X), row)[column];
+                }
+
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                set
+                {
+                    if ((uint)row >= RowCount)
+                    {
+                        ThrowHelper.ThrowArgumentOutOfRangeException();
+                    }
+                    Unsafe.Add(ref this.X, row)[column] = value;
+                }
+            }
+
+            public readonly bool IsIdentity
+            {
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                get
+                {
+                    return (X == Vector4.UnitX)
+                        && (Y == Vector4.UnitY)
+                        && (Z == Vector4.UnitZ)
+                        && (W == Vector4.UnitW);
+                }
+            }
+
+            public Vector3 Translation
+            {
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                readonly get => new Vector3(W.X, W.Y, W.Z);
+
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                set
+                {
+                    W = new Vector4(value, W.W);
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator +(in Impl left, in Impl right)
+            {
+                Impl result;
+
+                result.X = left.X + right.X;
+                result.Y = left.Y + right.Y;
+                result.Z = left.Z + right.Z;
+                result.W = left.W + right.W;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static bool operator ==(in Impl left, in Impl right)
+            {
+                return (left.X == right.X)
+                    && (left.Y == right.Y)
+                    && (left.Z == right.Z)
+                    && (left.W == right.W);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static bool operator !=(in Impl left, in Impl right)
+            {
+                return (left.X != right.X)
+                    || (left.Y != right.Y)
+                    || (left.Z != right.Z)
+                    || (left.W != right.W);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator *(in Impl left, in Impl right)
+            {
+                Impl result;
+
+                // result.X = Transform(left.X, in right);
+                result.X = right.X * left.X.X;
+                result.X += right.Y * left.X.Y;
+                result.X += right.Z * left.X.Z;
+                result.X += right.W * left.X.W;
+
+                // result.Y = Transform(left.Y, in right);
+                result.Y = right.X * left.Y.X;
+                result.Y += right.Y * left.Y.Y;
+                result.Y += right.Z * left.Y.Z;
+                result.Y += right.W * left.Y.W;
+
+                // result.Z = Transform(left.Z, in right);
+                result.Z = right.X * left.Z.X;
+                result.Z += right.Y * left.Z.Y;
+                result.Z += right.Z * left.Z.Z;
+                result.Z += right.W * left.Z.W;
+
+                // result.W = Transform(left.W, in right);
+                result.W = right.X * left.W.X;
+                result.W += right.Y * left.W.Y;
+                result.W += right.Z * left.W.Z;
+                result.W += right.W * left.W.W;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator *(in Impl left, float right)
+            {
+                Impl result;
+
+                result.X = left.X * right;
+                result.Y = left.Y * right;
+                result.Z = left.Z * right;
+                result.W = left.W * right;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator -(in Impl left, in Impl right)
+            {
+                Impl result;
+
+                result.X = left.X - right.X;
+                result.Y = left.Y - right.Y;
+                result.Z = left.Z - right.Z;
+                result.W = left.W - right.W;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl operator -(in Impl value)
+            {
+                Impl result;
+
+                result.X = -value.X;
+                result.Y = -value.Y;
+                result.Z = -value.Z;
+                result.W = -value.W;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateBillboard(in Vector3 objectPosition, in Vector3 cameraPosition, in Vector3 cameraUpVector, in Vector3 cameraForwardVector)
+            {
+                Vector3 axisZ = objectPosition - cameraPosition;
+                float norm = axisZ.LengthSquared();
+
+                if (norm < BillboardEpsilon)
+                {
+                    axisZ = -cameraForwardVector;
+                }
+                else
+                {
+                    axisZ = Vector3.Multiply(axisZ, 1.0f / MathF.Sqrt(norm));
+                }
+
+                Vector3 axisX = Vector3.Normalize(Vector3.Cross(cameraUpVector, axisZ));
+                Vector3 axisY = Vector3.Cross(axisZ, axisX);
+
+                Impl result;
+
+                result.X = new Vector4(axisX, 0);
+                result.Y = new Vector4(axisY, 0);
+                result.Z = new Vector4(axisZ, 0);
+                result.W = new Vector4(objectPosition, 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateConstrainedBillboard(in Vector3 objectPosition, in Vector3 cameraPosition, in Vector3 rotateAxis, in Vector3 cameraForwardVector, in Vector3 objectForwardVector)
+            {
+                // Treat the case when object and camera positions are too close.
+                Vector3 faceDir = objectPosition - cameraPosition;
+                float norm = faceDir.LengthSquared();
+
+                if (norm < BillboardEpsilon)
+                {
+                    faceDir = -cameraForwardVector;
+                }
+                else
+                {
+                    faceDir = Vector3.Multiply(faceDir, (1.0f / MathF.Sqrt(norm)));
+                }
+
+                Vector3 axisY = rotateAxis;
+
+                // Treat the case when angle between faceDir and rotateAxis is too close to 0.
+                float dot = Vector3.Dot(axisY, faceDir);
+
+                if (MathF.Abs(dot) > BillboardMinAngle)
+                {
+                    faceDir = objectForwardVector;
+
+                    // Make sure passed values are useful for compute.
+                    dot = Vector3.Dot(axisY, faceDir);
+
+                    if (MathF.Abs(dot) > BillboardMinAngle)
+                    {
+                        faceDir = (MathF.Abs(axisY.Z) > BillboardMinAngle) ? Vector3.UnitX : new Vector3(0, 0, -1);
+                    }
+                }
+
+                Vector3 axisX = Vector3.Normalize(Vector3.Cross(axisY, faceDir));
+                Vector3 axisZ = Vector3.Normalize(Vector3.Cross(axisX, axisY));
+
+                Impl result;
+
+                result.X = new Vector4(axisX, 0);
+                result.Y = new Vector4(axisY, 0);
+                result.Z = new Vector4(axisZ, 0);
+                result.W = new Vector4(objectPosition, 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateFromAxisAngle(in Vector3 axis, float angle)
+            {
+                // a: angle
+                // x, y, z: unit vector for axis.
+                //
+                // Rotation matrix M can compute by using below equation.
+                //
+                //        T               T
+                //  M = uu + (cos a)( I-uu ) + (sin a)S
+                //
+                // Where:
+                //
+                //  u = ( x, y, z )
+                //
+                //      [  0 -z  y ]
+                //  S = [  z  0 -x ]
+                //      [ -y  x  0 ]
+                //
+                //      [ 1 0 0 ]
+                //  I = [ 0 1 0 ]
+                //      [ 0 0 1 ]
+                //
+                //
+                //     [  xx+cosa*(1-xx)   yx-cosa*yx-sina*z zx-cosa*xz+sina*y ]
+                // M = [ xy-cosa*yx+sina*z    yy+cosa(1-yy)  yz-cosa*yz-sina*x ]
+                //     [ zx-cosa*zx-sina*y zy-cosa*zy+sina*x   zz+cosa*(1-zz)  ]
+                //
+
+                float x = axis.X;
+                float y = axis.Y;
+                float z = axis.Z;
+
+                float sa = MathF.Sin(angle);
+                float ca = MathF.Cos(angle);
+
+                float xx = x * x;
+                float yy = y * y;
+                float zz = z * z;
+
+                float xy = x * y;
+                float xz = x * z;
+                float yz = y * z;
+
+                Impl result;
+
+                result.X = new Vector4(
+                    xx + ca * (1.0f - xx),
+                    xy - ca * xy + sa * z,
+                    xz - ca * xz - sa * y,
+                    0
+                );
+                result.Y = new Vector4(
+                    xy - ca * xy - sa * z,
+                    yy + ca * (1.0f - yy),
+                    yz - ca * yz + sa * x,
+                    0
+                );
+                result.Z = new Vector4(
+                    xz - ca * xz + sa * y,
+                    yz - ca * yz - sa * x,
+                    zz + ca * (1.0f - zz),
+                    0
+                );
+                result.W = Vector4.UnitW;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateFromQuaternion(in Quaternion quaternion)
+            {
+                float xx = quaternion.X * quaternion.X;
+                float yy = quaternion.Y * quaternion.Y;
+                float zz = quaternion.Z * quaternion.Z;
+
+                float xy = quaternion.X * quaternion.Y;
+                float wz = quaternion.Z * quaternion.W;
+                float xz = quaternion.Z * quaternion.X;
+                float wy = quaternion.Y * quaternion.W;
+                float yz = quaternion.Y * quaternion.Z;
+                float wx = quaternion.X * quaternion.W;
+
+                Impl result;
+
+                result.X = new Vector4(
+                    1.0f - 2.0f * (yy + zz),
+                    2.0f * (xy + wz),
+                    2.0f * (xz - wy),
+                    0
+                );
+                result.Y = new Vector4(
+                    2.0f * (xy - wz),
+                    1.0f - 2.0f * (zz + xx),
+                    2.0f * (yz + wx),
+                    0
+                );
+                result.Z = new Vector4(
+                    2.0f * (xz + wy),
+                    2.0f * (yz - wx),
+                    1.0f - 2.0f * (yy + xx),
+                    0
+                );
+                result.W = Vector4.UnitW;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateFromYawPitchRoll(float yaw, float pitch, float roll)
+            {
+                Quaternion q = Quaternion.CreateFromYawPitchRoll(yaw, pitch, roll);
+                return CreateFromQuaternion(q);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateLookAt(in Vector3 cameraPosition, in Vector3 cameraTarget, in Vector3 cameraUpVector)
+            {
+                Vector3 axisZ = Vector3.Normalize(cameraPosition - cameraTarget);
+                Vector3 axisX = Vector3.Normalize(Vector3.Cross(cameraUpVector, axisZ));
+                Vector3 axisY = Vector3.Cross(axisZ, axisX);
+
+                Impl result;
+
+                result.X = new Vector4(
+                    axisX.X,
+                    axisY.X,
+                    axisZ.X,
+                    0
+                );
+                result.Y = new Vector4(
+                    axisX.Y,
+                    axisY.Y,
+                    axisZ.Y,
+                    0
+                );
+                result.Z = new Vector4(
+                    axisX.Z,
+                    axisY.Z,
+                    axisZ.Z,
+                    0
+                );
+                result.W = new Vector4(
+                    -Vector3.Dot(axisX, cameraPosition),
+                    -Vector3.Dot(axisY, cameraPosition),
+                    -Vector3.Dot(axisZ, cameraPosition),
+                    1
+                );
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateOrthographic(float width, float height, float zNearPlane, float zFarPlane)
+            {
+                Impl result;
+
+                result.X = new Vector4(2.0f / width, 0, 0, 0);
+                result.Y = new Vector4(0, 2.0f / height, 0, 0);
+                result.Z = new Vector4(0, 0, 1.0f / (zNearPlane - zFarPlane), 0);
+                result.W = new Vector4(0, 0, zNearPlane / (zNearPlane - zFarPlane), 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateOrthographicOffCenter(float left, float right, float bottom, float top, float zNearPlane, float zFarPlane)
+            {
+                Impl result;
+
+                result.X = new Vector4(2.0f / (right - left), 0, 0, 0);
+                result.Y = new Vector4(0, 2.0f / (top - bottom), 0, 0);
+                result.Z = new Vector4(0, 0, 1.0f / (zNearPlane - zFarPlane), 0);
+                result.W = new Vector4(
+                    (left + right) / (left - right),
+                    (top + bottom) / (bottom - top),
+                    zNearPlane / (zNearPlane - zFarPlane),
+                    1
+                );
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreatePerspective(float width, float height, float nearPlaneDistance, float farPlaneDistance)
+            {
+                ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(nearPlaneDistance, 0.0f);
+                ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(farPlaneDistance, 0.0f);
+                ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(nearPlaneDistance, farPlaneDistance);
+
+                float negFarRange = float.IsPositiveInfinity(farPlaneDistance) ? -1.0f : farPlaneDistance / (nearPlaneDistance - farPlaneDistance);
+
+                Impl result;
+
+                result.X = new Vector4(2.0f * nearPlaneDistance / width, 0, 0, 0);
+                result.Y = new Vector4(0, 2.0f * nearPlaneDistance / height, 0, 0);
+                result.Z = new Vector4(0, 0, negFarRange, -1.0f);
+                result.W = new Vector4(0, 0, nearPlaneDistance * negFarRange, 0);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreatePerspectiveFieldOfView(float fieldOfView, float aspectRatio, float nearPlaneDistance, float farPlaneDistance)
+            {
+                ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(fieldOfView, 0.0f);
+                ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(fieldOfView, MathF.PI);
+
+                ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(nearPlaneDistance, 0.0f);
+                ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(farPlaneDistance, 0.0f);
+                ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(nearPlaneDistance, farPlaneDistance);
+
+                float scaleY = 1.0f / MathF.Tan(fieldOfView * 0.5f);
+                float scaleX = scaleY / aspectRatio;
+                float negFarRange = float.IsPositiveInfinity(farPlaneDistance) ? -1.0f : farPlaneDistance / (nearPlaneDistance - farPlaneDistance);
+
+                Impl result;
+
+                result.X = new Vector4(scaleX, 0, 0, 0);
+                result.Y = new Vector4(0, scaleY, 0, 0);
+                result.Z = new Vector4(0, 0, negFarRange, -1.0f);
+                result.W = new Vector4(0, 0, nearPlaneDistance * negFarRange, 0);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreatePerspectiveOffCenter(float left, float right, float bottom, float top, float nearPlaneDistance, float farPlaneDistance)
+            {
+                ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(nearPlaneDistance, 0.0f);
+                ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(farPlaneDistance, 0.0f);
+                ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(nearPlaneDistance, farPlaneDistance);
+
+                float negFarRange = float.IsPositiveInfinity(farPlaneDistance) ? -1.0f : farPlaneDistance / (nearPlaneDistance - farPlaneDistance);
+
+                Impl result;
+
+                result.X = new Vector4(2.0f * nearPlaneDistance / (right - left), 0, 0, 0);
+                result.Y = new Vector4(0, 2.0f * nearPlaneDistance / (top - bottom), 0, 0);
+                result.Z = new Vector4(
+                    (left + right) / (right - left),
+                    (top + bottom) / (top - bottom),
+                    negFarRange,
+                    -1.0f
+                );
+                result.W = new Vector4(0, 0, nearPlaneDistance * negFarRange, 0);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateReflection(in Plane value)
+            {
+                Plane p = Plane.Normalize(value);
+                Vector3 f = p.Normal * -2.0f;
+
+                Impl result;
+
+                result.X = new Vector4(f * p.Normal.X, 0) + Vector4.UnitX;
+                result.Y = new Vector4(f * p.Normal.Y, 0) + Vector4.UnitY;
+                result.Z = new Vector4(f * p.Normal.Z, 0) + Vector4.UnitZ;
+                result.W = new Vector4(f * p.D, 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateRotationX(float radians)
+            {
+                float c = MathF.Cos(radians);
+                float s = MathF.Sin(radians);
+
+                // [  1  0  0  0 ]
+                // [  0  c  s  0 ]
+                // [  0 -s  c  0 ]
+                // [  0  0  0  1 ]
+
+                Impl result;
+
+                result.X = Vector4.UnitX;
+                result.Y = new Vector4(0,  c, s, 0);
+                result.Z = new Vector4(0, -s, c, 0);
+                result.W = Vector4.UnitW;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateRotationX(float radians, in Vector3 centerPoint)
+            {
+                float c = MathF.Cos(radians);
+                float s = MathF.Sin(radians);
+
+                float y = centerPoint.Y * (1 - c) + centerPoint.Z * s;
+                float z = centerPoint.Z * (1 - c) - centerPoint.Y * s;
+
+                // [  1  0  0  0 ]
+                // [  0  c  s  0 ]
+                // [  0 -s  c  0 ]
+                // [  0  y  z  1 ]
+
+                Impl result;
+
+                result.X = Vector4.UnitX;
+                result.Y = new Vector4(0,  c, s, 0);
+                result.Z = new Vector4(0, -s, c, 0);
+                result.W = new Vector4(0,  y, z, 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateRotationY(float radians)
+            {
+                float c = MathF.Cos(radians);
+                float s = MathF.Sin(radians);
+
+                // [  c  0 -s  0 ]
+                // [  0  1  0  0 ]
+                // [  s  0  c  0 ]
+                // [  0  0  0  1 ]
+
+                Impl result;
+
+                result.X = new Vector4(c, 0, -s, 0);
+                result.Y = Vector4.UnitY;
+                result.Z = new Vector4(s, 0,  c, 0);
+                result.W = Vector4.UnitW;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateRotationY(float radians, in Vector3 centerPoint)
+            {
+                float c = MathF.Cos(radians);
+                float s = MathF.Sin(radians);
+
+                float x = centerPoint.X * (1 - c) - centerPoint.Z * s;
+                float z = centerPoint.Z * (1 - c) + centerPoint.X * s;
+
+                // [  c  0 -s  0 ]
+                // [  0  1  0  0 ]
+                // [  s  0  c  0 ]
+                // [  x  0  z  1 ]
+
+                Impl result;
+
+                result.X = new Vector4(c, 0, -s, 0);
+                result.Y = Vector4.UnitY;
+                result.Z = new Vector4(s, 0,  c, 0);
+                result.W = new Vector4(x, 0,  z, 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateRotationZ(float radians)
+            {
+
+                float c = MathF.Cos(radians);
+                float s = MathF.Sin(radians);
+
+                // [  c  s  0  0 ]
+                // [ -s  c  0  0 ]
+                // [  0  0  1  0 ]
+                // [  0  0  0  1 ]
+
+                Impl result;
+
+                result.X = new Vector4( c, s, 0, 0);
+                result.Y = new Vector4(-s, c, 0, 0);
+                result.Z = Vector4.UnitZ;
+                result.W = Vector4.UnitW;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateRotationZ(float radians, in Vector3 centerPoint)
+            {
+                float c = MathF.Cos(radians);
+                float s = MathF.Sin(radians);
+
+                float x = centerPoint.X * (1 - c) + centerPoint.Y * s;
+                float y = centerPoint.Y * (1 - c) - centerPoint.X * s;
+
+                // [  c  s  0  0 ]
+                // [ -s  c  0  0 ]
+                // [  0  0  1  0 ]
+                // [  x  y  0  1 ]
+
+                Impl result;
+
+                result.X = new Vector4( c, s, 0, 0);
+                result.Y = new Vector4(-s, c, 0, 0);
+                result.Z = Vector4.UnitZ;
+                result.W = new Vector4(x, y, 0, 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(float scaleX, float scaleY, float scaleZ)
+            {
+                Impl result;
+
+                result.X = new Vector4(scaleX, 0, 0, 0);
+                result.Y = new Vector4(0, scaleY, 0, 0);
+                result.Z = new Vector4(0, 0, scaleZ, 0);
+                result.W = Vector4.UnitW;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(float scaleX, float scaleY, float scaleZ, in Vector3 centerPoint)
+            {
+                Impl result;
+
+                result.X = new Vector4(scaleX, 0, 0, 0);
+                result.Y = new Vector4(0, scaleY, 0, 0);
+                result.Z = new Vector4(0, 0, scaleZ, 0);
+                result.W = new Vector4(centerPoint * (Vector3.One - new Vector3(scaleX, scaleY, scaleZ)), 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(in Vector3 scales)
+            {
+                Impl result;
+
+                result.X = new Vector4(scales.X, 0, 0, 0);
+                result.Y = new Vector4(0, scales.Y, 0, 0);
+                result.Z = new Vector4(0, 0, scales.Z, 0);
+                result.W = Vector4.UnitW;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(in Vector3 scales, in Vector3 centerPoint)
+            {
+                Impl result;
+
+                result.X = new Vector4(scales.X, 0, 0, 0);
+                result.Y = new Vector4(0, scales.Y, 0, 0);
+                result.Z = new Vector4(0, 0, scales.Z, 0);
+                result.W = new Vector4(centerPoint * (Vector3.One - scales), 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(float scale)
+            {
+                Impl result;
+
+                result.X = new Vector4(scale, 0, 0, 0);
+                result.Y = new Vector4(0, scale, 0, 0);
+                result.Z = new Vector4(0, 0, scale, 0);
+                result.W = Vector4.UnitW;
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateScale(float scale, in Vector3 centerPoint)
+            {
+                Impl result;
+
+                result.X = new Vector4(scale, 0, 0, 0);
+                result.Y = new Vector4(0, scale, 0, 0);
+                result.Z = new Vector4(0, 0, scale, 0);
+                result.W = new Vector4(centerPoint * (Vector3.One - new Vector3(scale)), 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateShadow(in Vector3 lightDirection, in Plane plane)
+            {
+                Plane p = Plane.Normalize(plane);
+                float dot = Vector3.Dot(lightDirection, p.Normal);
+
+                Vector3 normal = -p.Normal;
+
+                Impl result;
+
+                result.X = new Vector4(lightDirection * normal.X, 0) + new Vector4(dot, 0, 0, 0);
+                result.Y = new Vector4(lightDirection * normal.Y, 0) + new Vector4(0, dot, 0, 0);
+                result.Z = new Vector4(lightDirection * normal.Z, 0) + new Vector4(0, 0, dot, 0);
+                result.W = new Vector4(lightDirection * -p.D, dot);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateTranslation(in Vector3 position)
+            {
+                Impl result;
+
+                result.X = Vector4.UnitX;
+                result.Y = Vector4.UnitY;
+                result.Z = Vector4.UnitZ;
+                result.W = new Vector4(position, 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateTranslation(float positionX, float positionY, float positionZ)
+            {
+                Impl result;
+
+                result.X = Vector4.UnitX;
+                result.Y = Vector4.UnitY;
+                result.Z = Vector4.UnitZ;
+                result.W = new Vector4(positionX, positionY, positionZ, 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl CreateWorld(in Vector3 position, in Vector3 forward, in Vector3 up)
+            {
+                Vector3 axisZ = Vector3.Normalize(-forward);
+                Vector3 axisX = Vector3.Normalize(Vector3.Cross(up, axisZ));
+                Vector3 axisY = Vector3.Cross(axisZ, axisX);
+
+                Impl result;
+
+                result.X = new Vector4(axisX, 0);
+                result.Y = new Vector4(axisY, 0);
+                result.Z = new Vector4(axisZ, 0);
+                result.W = new Vector4(position, 1);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static unsafe bool Decompose(in Impl matrix, out Vector3 scale, out Quaternion rotation, out Vector3 translation)
+            {
+                bool result = true;
+
+                fixed (Vector3* scaleBase = &scale)
+                {
+                    float* pfScales = (float*)scaleBase;
+                    float det;
+
+                    VectorBasis vectorBasis;
+                    Vector3** pVectorBasis = (Vector3**)&vectorBasis;
+
+                    Impl matTemp = Identity;
+                    CanonicalBasis canonicalBasis = default;
+                    Vector3* pCanonicalBasis = &canonicalBasis.Row0;
+
+                    canonicalBasis.Row0 = new Vector3(1.0f, 0.0f, 0.0f);
+                    canonicalBasis.Row1 = new Vector3(0.0f, 1.0f, 0.0f);
+                    canonicalBasis.Row2 = new Vector3(0.0f, 0.0f, 1.0f);
+
+                    translation = new Vector3(
+                        matrix.W.X,
+                        matrix.W.Y,
+                        matrix.W.Z);
+
+                    pVectorBasis[0] = (Vector3*)&matTemp.X;
+                    pVectorBasis[1] = (Vector3*)&matTemp.Y;
+                    pVectorBasis[2] = (Vector3*)&matTemp.Z;
+
+                    *(pVectorBasis[0]) = new Vector3(matrix.X.X, matrix.X.Y, matrix.X.Z);
+                    *(pVectorBasis[1]) = new Vector3(matrix.Y.X, matrix.Y.Y, matrix.Y.Z);
+                    *(pVectorBasis[2]) = new Vector3(matrix.Z.X, matrix.Z.Y, matrix.Z.Z);
+
+                    scale.X = pVectorBasis[0]->Length();
+                    scale.Y = pVectorBasis[1]->Length();
+                    scale.Z = pVectorBasis[2]->Length();
+
+                    uint a, b, c;
+
+                    #region Ranking
+                    float x = pfScales[0];
+                    float y = pfScales[1];
+                    float z = pfScales[2];
+
+                    if (x < y)
+                    {
+                        if (y < z)
+                        {
+                            a = 2;
+                            b = 1;
+                            c = 0;
+                        }
+                        else
+                        {
+                            a = 1;
+
+                            if (x < z)
+                            {
+                                b = 2;
+                                c = 0;
+                            }
+                            else
+                            {
+                                b = 0;
+                                c = 2;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        if (x < z)
+                        {
+                            a = 2;
+                            b = 0;
+                            c = 1;
+                        }
+                        else
+                        {
+                            a = 0;
+
+                            if (y < z)
+                            {
+                                b = 2;
+                                c = 1;
+                            }
+                            else
+                            {
+                                b = 1;
+                                c = 2;
+                            }
+                        }
+                    }
+                    #endregion
+
+                    if (pfScales[a] < DecomposeEpsilon)
+                    {
+                        *(pVectorBasis[a]) = pCanonicalBasis[a];
+                    }
+
+                    *pVectorBasis[a] = Vector3.Normalize(*pVectorBasis[a]);
+
+                    if (pfScales[b] < DecomposeEpsilon)
+                    {
+                        uint cc;
+                        float fAbsX, fAbsY, fAbsZ;
+
+                        fAbsX = MathF.Abs(pVectorBasis[a]->X);
+                        fAbsY = MathF.Abs(pVectorBasis[a]->Y);
+                        fAbsZ = MathF.Abs(pVectorBasis[a]->Z);
+
+                        #region Ranking
+                        if (fAbsX < fAbsY)
+                        {
+                            if (fAbsY < fAbsZ)
+                            {
+                                cc = 0;
+                            }
+                            else
+                            {
+                                if (fAbsX < fAbsZ)
+                                {
+                                    cc = 0;
+                                }
+                                else
+                                {
+                                    cc = 2;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            if (fAbsX < fAbsZ)
+                            {
+                                cc = 1;
+                            }
+                            else
+                            {
+                                if (fAbsY < fAbsZ)
+                                {
+                                    cc = 1;
+                                }
+                                else
+                                {
+                                    cc = 2;
+                                }
+                            }
+                        }
+                        #endregion
+
+                        *pVectorBasis[b] = Vector3.Cross(*pVectorBasis[a], *(pCanonicalBasis + cc));
+                    }
+
+                    *pVectorBasis[b] = Vector3.Normalize(*pVectorBasis[b]);
+
+                    if (pfScales[c] < DecomposeEpsilon)
+                    {
+                        *pVectorBasis[c] = Vector3.Cross(*pVectorBasis[a], *pVectorBasis[b]);
+                    }
+
+                    *pVectorBasis[c] = Vector3.Normalize(*pVectorBasis[c]);
+
+                    det = matTemp.GetDeterminant();
+
+                    // use Kramer's rule to check for handedness of coordinate system
+                    if (det < 0.0f)
+                    {
+                        // switch coordinate system by negating the scale and inverting the basis vector on the x-axis
+                        pfScales[a] = -pfScales[a];
+                        *pVectorBasis[a] = -(*pVectorBasis[a]);
+
+                        det = -det;
+                    }
+
+                    det -= 1.0f;
+                    det *= det;
+
+                    if ((DecomposeEpsilon < det))
+                    {
+                        // Non-SRT matrix encountered
+                        rotation = Quaternion.Identity;
+                        result = false;
+                    }
+                    else
+                    {
+                        // generate the quaternion from the matrix
+                        rotation = Quaternion.CreateFromRotationMatrix(Unsafe.As<Impl, Matrix4x4>(ref matTemp));
+                    }
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static bool Invert(in Impl matrix, out Impl result)
+            {
+                // This implementation is based on the DirectX Math Library XMMatrixInverse method
+                // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathMatrix.inl
+
+                if (Sse.IsSupported)
+                {
+                    return SseImpl(in matrix, out result);
+                }
+
+                return SoftwareFallback(in matrix, out result);
+
+                static bool SseImpl(in Impl matrix, out Impl result)
+                {
+                    if (!Sse.IsSupported)
+                    {
+                        // Redundant test so we won't prejit remainder of this method on platforms without SSE.
+                        throw new PlatformNotSupportedException();
+                    }
+
+                    // Load the matrix values into rows
+                    Vector128<float> row1 = matrix.X.AsVector128();
+                    Vector128<float> row2 = matrix.Y.AsVector128();
+                    Vector128<float> row3 = matrix.Z.AsVector128();
+                    Vector128<float> row4 = matrix.W.AsVector128();
+
+                    // Transpose the matrix
+                    Vector128<float> vTemp1 = Sse.Shuffle(row1, row2, 0x44); //_MM_SHUFFLE(1, 0, 1, 0)
+                    Vector128<float> vTemp3 = Sse.Shuffle(row1, row2, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
+                    Vector128<float> vTemp2 = Sse.Shuffle(row3, row4, 0x44); //_MM_SHUFFLE(1, 0, 1, 0)
+                    Vector128<float> vTemp4 = Sse.Shuffle(row3, row4, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
+
+                    row1 = Sse.Shuffle(vTemp1, vTemp2, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
+                    row2 = Sse.Shuffle(vTemp1, vTemp2, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
+                    row3 = Sse.Shuffle(vTemp3, vTemp4, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
+                    row4 = Sse.Shuffle(vTemp3, vTemp4, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
+
+                    Vector128<float> V00 = Permute(row3, 0x50);           //_MM_SHUFFLE(1, 1, 0, 0)
+                    Vector128<float> V10 = Permute(row4, 0xEE);           //_MM_SHUFFLE(3, 2, 3, 2)
+                    Vector128<float> V01 = Permute(row1, 0x50);           //_MM_SHUFFLE(1, 1, 0, 0)
+                    Vector128<float> V11 = Permute(row2, 0xEE);           //_MM_SHUFFLE(3, 2, 3, 2)
+                    Vector128<float> V02 = Sse.Shuffle(row3, row1, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
+                    Vector128<float> V12 = Sse.Shuffle(row4, row2, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
+
+                    Vector128<float> D0 = V00 * V10;
+                    Vector128<float> D1 = V01 * V11;
+                    Vector128<float> D2 = V02 * V12;
+
+                    V00 = Permute(row3, 0xEE);           //_MM_SHUFFLE(3, 2, 3, 2)
+                    V10 = Permute(row4, 0x50);           //_MM_SHUFFLE(1, 1, 0, 0)
+                    V01 = Permute(row1, 0xEE);           //_MM_SHUFFLE(3, 2, 3, 2)
+                    V11 = Permute(row2, 0x50);           //_MM_SHUFFLE(1, 1, 0, 0)
+                    V02 = Sse.Shuffle(row3, row1, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
+                    V12 = Sse.Shuffle(row4, row2, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
+
+                    // Note:  We use this expansion pattern instead of Fused Multiply Add
+                    // in order to support older hardware
+                    D0 -= V00 * V10;
+                    D1 -= V01 * V11;
+                    D2 -= V02 * V12;
+
+                    // V11 = D0Y,D0W,D2Y,D2Y
+                    V11 = Sse.Shuffle(D0, D2, 0x5D);  //_MM_SHUFFLE(1, 1, 3, 1)
+                    V00 = Permute(row2, 0x49);        //_MM_SHUFFLE(1, 0, 2, 1)
+                    V10 = Sse.Shuffle(V11, D0, 0x32); //_MM_SHUFFLE(0, 3, 0, 2)
+                    V01 = Permute(row1, 0x12);        //_MM_SHUFFLE(0, 1, 0, 2)
+                    V11 = Sse.Shuffle(V11, D0, 0x99); //_MM_SHUFFLE(2, 1, 2, 1)
+
+                    // V13 = D1Y,D1W,D2W,D2W
+                    Vector128<float> V13 = Sse.Shuffle(D1, D2, 0xFD); //_MM_SHUFFLE(3, 3, 3, 1)
+                    V02 = Permute(row4, 0x49);                        //_MM_SHUFFLE(1, 0, 2, 1)
+                    V12 = Sse.Shuffle(V13, D1, 0x32);                 //_MM_SHUFFLE(0, 3, 0, 2)
+                    Vector128<float> V03 = Permute(row3, 0x12);       //_MM_SHUFFLE(0, 1, 0, 2)
+                    V13 = Sse.Shuffle(V13, D1, 0x99);                 //_MM_SHUFFLE(2, 1, 2, 1)
+
+                    Vector128<float> C0 = V00 * V10;
+                    Vector128<float> C2 = V01 * V11;
+                    Vector128<float> C4 = V02 * V12;
+                    Vector128<float> C6 = V03 * V13;
+
+                    // V11 = D0X,D0Y,D2X,D2X
+                    V11 = Sse.Shuffle(D0, D2, 0x4);    //_MM_SHUFFLE(0, 0, 1, 0)
+                    V00 = Permute(row2, 0x9e);         //_MM_SHUFFLE(2, 1, 3, 2)
+                    V10 = Sse.Shuffle(D0, V11, 0x93);  //_MM_SHUFFLE(2, 1, 0, 3)
+                    V01 = Permute(row1, 0x7b);         //_MM_SHUFFLE(1, 3, 2, 3)
+                    V11 = Sse.Shuffle(D0, V11, 0x26);  //_MM_SHUFFLE(0, 2, 1, 2)
+
+                    // V13 = D1X,D1Y,D2Z,D2Z
+                    V13 = Sse.Shuffle(D1, D2, 0xa4);   //_MM_SHUFFLE(2, 2, 1, 0)
+                    V02 = Permute(row4, 0x9e);         //_MM_SHUFFLE(2, 1, 3, 2)
+                    V12 = Sse.Shuffle(D1, V13, 0x93);  //_MM_SHUFFLE(2, 1, 0, 3)
+                    V03 = Permute(row3, 0x7b);         //_MM_SHUFFLE(1, 3, 2, 3)
+                    V13 = Sse.Shuffle(D1, V13, 0x26);  //_MM_SHUFFLE(0, 2, 1, 2)
+
+                    C0 -= V00 * V10;
+                    C2 -= V01 * V11;
+                    C4 -= V02 * V12;
+                    C6 -= V03 * V13;
+
+                    V00 = Permute(row2, 0x33); //_MM_SHUFFLE(0, 3, 0, 3)
+
+                    // V10 = D0Z,D0Z,D2X,D2Y
+                    V10 = Sse.Shuffle(D0, D2, 0x4A); //_MM_SHUFFLE(1, 0, 2, 2)
+                    V10 = Permute(V10, 0x2C);        //_MM_SHUFFLE(0, 2, 3, 0)
+                    V01 = Permute(row1, 0x8D);       //_MM_SHUFFLE(2, 0, 3, 1)
+
+                    // V11 = D0X,D0W,D2X,D2Y
+                    V11 = Sse.Shuffle(D0, D2, 0x4C); //_MM_SHUFFLE(1, 0, 3, 0)
+                    V11 = Permute(V11, 0x93);        //_MM_SHUFFLE(2, 1, 0, 3)
+                    V02 = Permute(row4, 0x33);       //_MM_SHUFFLE(0, 3, 0, 3)
+
+                    // V12 = D1Z,D1Z,D2Z,D2W
+                    V12 = Sse.Shuffle(D1, D2, 0xEA); //_MM_SHUFFLE(3, 2, 2, 2)
+                    V12 = Permute(V12, 0x2C);        //_MM_SHUFFLE(0, 2, 3, 0)
+                    V03 = Permute(row3, 0x8D);       //_MM_SHUFFLE(2, 0, 3, 1)
+
+                    // V13 = D1X,D1W,D2Z,D2W
+                    V13 = Sse.Shuffle(D1, D2, 0xEC); //_MM_SHUFFLE(3, 2, 3, 0)
+                    V13 = Permute(V13, 0x93);        //_MM_SHUFFLE(2, 1, 0, 3)
+
+                    V00 *= V10;
+                    V01 *= V11;
+                    V02 *= V12;
+                    V03 *= V13;
+
+                    Vector128<float> C1 = C0 - V00;
+                    C0 += V00;
+
+                    Vector128<float> C3 = C2 + V01;
+                    C2 -= V01;
+
+                    Vector128<float> C5 = C4 - V02;
+                    C4 += V02;
+
+                    Vector128<float> C7 = C6 + V03;
+                    C6 -= V03;
+
+                    C0 = Sse.Shuffle(C0, C1, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+                    C2 = Sse.Shuffle(C2, C3, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+                    C4 = Sse.Shuffle(C4, C5, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+                    C6 = Sse.Shuffle(C6, C7, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+
+                    C0 = Permute(C0, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+                    C2 = Permute(C2, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+                    C4 = Permute(C4, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+                    C6 = Permute(C6, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+
+                    // Get the determinant
+                    float det = Vector4.Dot(C0.AsVector4(), row1.AsVector4());
+
+                    // Check determinate is not zero
+                    if (MathF.Abs(det) < float.Epsilon)
+                    {
+                        Vector4 vNaN = new Vector4(float.NaN);
+
+                        result.X = vNaN;
+                        result.Y = vNaN;
+                        result.Z = vNaN;
+                        result.W = vNaN;
+
+                        return false;
+                    }
+
+                    // Create Vector128<float> copy of the determinant and invert them.
+
+                    Vector128<float> ones = Vector128.Create(1.0f);
+                    Vector128<float> vTemp = Vector128.Create(det);
+
+                    vTemp = ones / vTemp;
+
+                    result.X = (C0 * vTemp).AsVector4();
+                    result.Y = (C2 * vTemp).AsVector4();
+                    result.Z = (C4 * vTemp).AsVector4();
+                    result.W = (C6 * vTemp).AsVector4();
+
+                    return true;
+                }
+
+                [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                static Vector128<float> Permute(Vector128<float> value, [ConstantExpected] byte control)
+                {
+                    if (Avx.IsSupported)
+                    {
+                        return Avx.Permute(value, control);
+                    }
+                    else if (Sse.IsSupported)
+                    {
+                        return Sse.Shuffle(value, value, control);
+                    }
+                    else
+                    {
+                        // Redundant test so we won't prejit remainder of this method on platforms without SSE.
+                        throw new PlatformNotSupportedException();
+                    }
+                }
+
+                static bool SoftwareFallback(in Impl matrix, out Impl result)
+                {
+                    //                                       -1
+                    // If you have matrix M, inverse Matrix M   can compute
+                    //
+                    //     -1       1
+                    //    M   = --------- A
+                    //            det(M)
+                    //
+                    // A is adjugate (adjoint) of M, where,
+                    //
+                    //      T
+                    // A = C
+                    //
+                    // C is Cofactor matrix of M, where,
+                    //           i + j
+                    // C   = (-1)      * det(M  )
+                    //  ij                    ij
+                    //
+                    //     [ a b c d ]
+                    // M = [ e f g h ]
+                    //     [ i j k l ]
+                    //     [ m n o p ]
+                    //
+                    // First Row
+                    //           2 | f g h |
+                    // C   = (-1)  | j k l | = + ( f ( kp - lo ) - g ( jp - ln ) + h ( jo - kn ) )
+                    //  11         | n o p |
+                    //
+                    //           3 | e g h |
+                    // C   = (-1)  | i k l | = - ( e ( kp - lo ) - g ( ip - lm ) + h ( io - km ) )
+                    //  12         | m o p |
+                    //
+                    //           4 | e f h |
+                    // C   = (-1)  | i j l | = + ( e ( jp - ln ) - f ( ip - lm ) + h ( in - jm ) )
+                    //  13         | m n p |
+                    //
+                    //           5 | e f g |
+                    // C   = (-1)  | i j k | = - ( e ( jo - kn ) - f ( io - km ) + g ( in - jm ) )
+                    //  14         | m n o |
+                    //
+                    // Second Row
+                    //           3 | b c d |
+                    // C   = (-1)  | j k l | = - ( b ( kp - lo ) - c ( jp - ln ) + d ( jo - kn ) )
+                    //  21         | n o p |
+                    //
+                    //           4 | a c d |
+                    // C   = (-1)  | i k l | = + ( a ( kp - lo ) - c ( ip - lm ) + d ( io - km ) )
+                    //  22         | m o p |
+                    //
+                    //           5 | a b d |
+                    // C   = (-1)  | i j l | = - ( a ( jp - ln ) - b ( ip - lm ) + d ( in - jm ) )
+                    //  23         | m n p |
+                    //
+                    //           6 | a b c |
+                    // C   = (-1)  | i j k | = + ( a ( jo - kn ) - b ( io - km ) + c ( in - jm ) )
+                    //  24         | m n o |
+                    //
+                    // Third Row
+                    //           4 | b c d |
+                    // C   = (-1)  | f g h | = + ( b ( gp - ho ) - c ( fp - hn ) + d ( fo - gn ) )
+                    //  31         | n o p |
+                    //
+                    //           5 | a c d |
+                    // C   = (-1)  | e g h | = - ( a ( gp - ho ) - c ( ep - hm ) + d ( eo - gm ) )
+                    //  32         | m o p |
+                    //
+                    //           6 | a b d |
+                    // C   = (-1)  | e f h | = + ( a ( fp - hn ) - b ( ep - hm ) + d ( en - fm ) )
+                    //  33         | m n p |
+                    //
+                    //           7 | a b c |
+                    // C   = (-1)  | e f g | = - ( a ( fo - gn ) - b ( eo - gm ) + c ( en - fm ) )
+                    //  34         | m n o |
+                    //
+                    // Fourth Row
+                    //           5 | b c d |
+                    // C   = (-1)  | f g h | = - ( b ( gl - hk ) - c ( fl - hj ) + d ( fk - gj ) )
+                    //  41         | j k l |
+                    //
+                    //           6 | a c d |
+                    // C   = (-1)  | e g h | = + ( a ( gl - hk ) - c ( el - hi ) + d ( ek - gi ) )
+                    //  42         | i k l |
+                    //
+                    //           7 | a b d |
+                    // C   = (-1)  | e f h | = - ( a ( fl - hj ) - b ( el - hi ) + d ( ej - fi ) )
+                    //  43         | i j l |
+                    //
+                    //           8 | a b c |
+                    // C   = (-1)  | e f g | = + ( a ( fk - gj ) - b ( ek - gi ) + c ( ej - fi ) )
+                    //  44         | i j k |
+                    //
+                    // Cost of operation
+                    // 53 adds, 104 muls, and 1 div.
+
+                    float a = matrix.X.X, b = matrix.X.Y, c = matrix.X.Z, d = matrix.X.W;
+                    float e = matrix.Y.X, f = matrix.Y.Y, g = matrix.Y.Z, h = matrix.Y.W;
+                    float i = matrix.Z.X, j = matrix.Z.Y, k = matrix.Z.Z, l = matrix.Z.W;
+                    float m = matrix.W.X, n = matrix.W.Y, o = matrix.W.Z, p = matrix.W.W;
+
+                    float kp_lo = k * p - l * o;
+                    float jp_ln = j * p - l * n;
+                    float jo_kn = j * o - k * n;
+                    float ip_lm = i * p - l * m;
+                    float io_km = i * o - k * m;
+                    float in_jm = i * n - j * m;
+
+                    float a11 = +(f * kp_lo - g * jp_ln + h * jo_kn);
+                    float a12 = -(e * kp_lo - g * ip_lm + h * io_km);
+                    float a13 = +(e * jp_ln - f * ip_lm + h * in_jm);
+                    float a14 = -(e * jo_kn - f * io_km + g * in_jm);
+
+                    float det = a * a11 + b * a12 + c * a13 + d * a14;
+
+                    if (MathF.Abs(det) < float.Epsilon)
+                    {
+                        Vector4 vNaN = new Vector4(float.NaN);
+
+                        result.X = vNaN;
+                        result.Y = vNaN;
+                        result.Z = vNaN;
+                        result.W = vNaN;
+
+                        return false;
+                    }
+
+                    float invDet = 1.0f / det;
+
+                    result.X.X = a11 * invDet;
+                    result.Y.X = a12 * invDet;
+                    result.Z.X = a13 * invDet;
+                    result.W.X = a14 * invDet;
+
+                    result.X.Y = -(b * kp_lo - c * jp_ln + d * jo_kn) * invDet;
+                    result.Y.Y = +(a * kp_lo - c * ip_lm + d * io_km) * invDet;
+                    result.Z.Y = -(a * jp_ln - b * ip_lm + d * in_jm) * invDet;
+                    result.W.Y = +(a * jo_kn - b * io_km + c * in_jm) * invDet;
+
+                    float gp_ho = g * p - h * o;
+                    float fp_hn = f * p - h * n;
+                    float fo_gn = f * o - g * n;
+                    float ep_hm = e * p - h * m;
+                    float eo_gm = e * o - g * m;
+                    float en_fm = e * n - f * m;
+
+                    result.X.Z = +(b * gp_ho - c * fp_hn + d * fo_gn) * invDet;
+                    result.Y.Z = -(a * gp_ho - c * ep_hm + d * eo_gm) * invDet;
+                    result.Z.Z = +(a * fp_hn - b * ep_hm + d * en_fm) * invDet;
+                    result.W.Z = -(a * fo_gn - b * eo_gm + c * en_fm) * invDet;
+
+                    float gl_hk = g * l - h * k;
+                    float fl_hj = f * l - h * j;
+                    float fk_gj = f * k - g * j;
+                    float el_hi = e * l - h * i;
+                    float ek_gi = e * k - g * i;
+                    float ej_fi = e * j - f * i;
+
+                    result.X.W = -(b * gl_hk - c * fl_hj + d * fk_gj) * invDet;
+                    result.Y.W = +(a * gl_hk - c * el_hi + d * ek_gi) * invDet;
+                    result.Z.W = -(a * fl_hj - b * el_hi + d * ej_fi) * invDet;
+                    result.W.W = +(a * fk_gj - b * ek_gi + c * ej_fi) * invDet;
+
+                    return true;
+                }
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl Lerp(in Impl left, in Impl right, float amount)
+            {
+                Impl result;
+
+                result.X = Vector4.Lerp(left.X, right.X, amount);
+                result.Y = Vector4.Lerp(left.Y, right.Y, amount);
+                result.Z = Vector4.Lerp(left.Z, right.Z, amount);
+                result.W = Vector4.Lerp(left.W, right.W, amount);
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl Transform(in Impl value, in Quaternion rotation)
+            {
+                // Compute rotation matrix.
+                float x2 = rotation.X + rotation.X;
+                float y2 = rotation.Y + rotation.Y;
+                float z2 = rotation.Z + rotation.Z;
+
+                float wx2 = rotation.W * x2;
+                float wy2 = rotation.W * y2;
+                float wz2 = rotation.W * z2;
+
+                float xx2 = rotation.X * x2;
+                float xy2 = rotation.X * y2;
+                float xz2 = rotation.X * z2;
+
+                float yy2 = rotation.Y * y2;
+                float yz2 = rotation.Y * z2;
+                float zz2 = rotation.Z * z2;
+
+                float q11 = 1.0f - yy2 - zz2;
+                float q21 = xy2 - wz2;
+                float q31 = xz2 + wy2;
+
+                float q12 = xy2 + wz2;
+                float q22 = 1.0f - xx2 - zz2;
+                float q32 = yz2 - wx2;
+
+                float q13 = xz2 - wy2;
+                float q23 = yz2 + wx2;
+                float q33 = 1.0f - xx2 - yy2;
+
+                Impl result;
+
+                result.X = new Vector4(
+                    value.X.X * q11 + value.X.Y * q21 + value.X.Z * q31,
+                    value.X.X * q12 + value.X.Y * q22 + value.X.Z * q32,
+                    value.X.X * q13 + value.X.Y * q23 + value.X.Z * q33,
+                    value.X.W
+                );
+                result.Y = new Vector4(
+                    value.Y.X * q11 + value.Y.Y * q21 + value.Y.Z * q31,
+                    value.Y.X * q12 + value.Y.Y * q22 + value.Y.Z * q32,
+                    value.Y.X * q13 + value.Y.Y * q23 + value.Y.Z * q33,
+                    value.Y.W
+                );
+                result.Z = new Vector4(
+                    value.Z.X * q11 + value.Z.Y * q21 + value.Z.Z * q31,
+                    value.Z.X * q12 + value.Z.Y * q22 + value.Z.Z * q32,
+                    value.Z.X * q13 + value.Z.Y * q23 + value.Z.Z * q33,
+                    value.Z.W
+                );
+                result.W = new Vector4(
+                    value.W.X * q11 + value.W.Y * q21 + value.W.Z * q31,
+                    value.W.X * q12 + value.W.Y * q22 + value.W.Z * q32,
+                    value.W.X * q13 + value.W.Y * q23 + value.W.Z * q33,
+                    value.W.W
+                );
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static Impl Transpose(in Impl matrix)
+            {
+                // This implementation is based on the DirectX Math Library XMMatrixTranspose method
+                // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathMatrix.inl
+
+                Impl result;
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    Vector128<float> x = matrix.X.AsVector128();
+                    Vector128<float> y = matrix.Y.AsVector128();
+                    Vector128<float> z = matrix.Z.AsVector128();
+                    Vector128<float> w = matrix.W.AsVector128();
+
+                    Vector128<float> lowerXZ = AdvSimd.Arm64.ZipLow(x, z);          // x[0], z[0], x[1], z[1]
+                    Vector128<float> lowerYW = AdvSimd.Arm64.ZipLow(y, w);          // y[0], w[0], y[1], w[1]
+                    Vector128<float> upperXZ = AdvSimd.Arm64.ZipHigh(x, z);         // x[2], z[2], x[3], z[3]
+                    Vector128<float> upperYW = AdvSimd.Arm64.ZipHigh(y, w);         // y[2], w[2], y[3], z[3]
+
+                    result.X = AdvSimd.Arm64.ZipLow(lowerXZ, lowerYW).AsVector4();  // x[0], y[0], z[0], w[0]
+                    result.Y = AdvSimd.Arm64.ZipHigh(lowerXZ, lowerYW).AsVector4(); // x[1], y[1], z[1], w[1]
+                    result.Z = AdvSimd.Arm64.ZipLow(upperXZ, upperYW).AsVector4();  // x[2], y[2], z[2], w[2]
+                    result.W = AdvSimd.Arm64.ZipHigh(upperXZ, upperYW).AsVector4(); // x[3], y[3], z[3], w[3]
+                }
+                else if (Sse.IsSupported)
+                {
+                    Vector128<float> x = matrix.X.AsVector128();
+                    Vector128<float> y = matrix.Y.AsVector128();
+                    Vector128<float> z = matrix.Z.AsVector128();
+                    Vector128<float> w = matrix.W.AsVector128();
+
+                    Vector128<float> lowerXZ = Sse.UnpackLow(x, z);                 // x[0], z[0], x[1], z[1]
+                    Vector128<float> lowerYW = Sse.UnpackLow(y, w);                 // y[0], w[0], y[1], w[1]
+                    Vector128<float> upperXZ = Sse.UnpackHigh(x, z);                // x[2], z[2], x[3], z[3]
+                    Vector128<float> upperYW = Sse.UnpackHigh(y, w);                // y[2], w[2], y[3], z[3]
+
+                    result.X = Sse.UnpackLow(lowerXZ, lowerYW).AsVector4();         // x[0], y[0], z[0], w[0]
+                    result.Y = Sse.UnpackHigh(lowerXZ, lowerYW).AsVector4();        // x[1], y[1], z[1], w[1]
+                    result.Z = Sse.UnpackLow(upperXZ, upperYW).AsVector4();         // x[2], y[2], z[2], w[2]
+                    result.W = Sse.UnpackHigh(upperXZ, upperYW).AsVector4();        // x[3], y[3], z[3], w[3]
+                }
+                else
+                {
+                    result.X = new Vector4(matrix.X.X, matrix.Y.X, matrix.Z.X, matrix.W.X);
+                    result.Y = new Vector4(matrix.X.Y, matrix.Y.Y, matrix.Z.Y, matrix.W.Y);
+                    result.Z = new Vector4(matrix.X.Z, matrix.Y.Z, matrix.Z.Z, matrix.W.Z);
+                    result.W = new Vector4(matrix.X.W, matrix.Y.W, matrix.Z.W, matrix.W.W);
+                }
+
+                return result;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public override readonly bool Equals([NotNullWhen(true)] object? obj)
+                => (obj is Matrix4x4 other) && Equals(in other.AsImpl());
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public readonly bool Equals(in Impl other)
+            {
+                // This function needs to account for floating-point equality around NaN
+                // and so must behave equivalently to the underlying float/double.Equals
+
+                return X.Equals(other.X)
+                    && Y.Equals(other.Y)
+                    && Z.Equals(other.Z)
+                    && W.Equals(other.W);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public readonly float GetDeterminant()
+            {
+                // | a b c d |     | f g h |     | e g h |     | e f h |     | e f g |
+                // | e f g h | = a | j k l | - b | i k l | + c | i j l | - d | i j k |
+                // | i j k l |     | n o p |     | m o p |     | m n p |     | m n o |
+                // | m n o p |
+                //
+                //   | f g h |
+                // a | j k l | = a ( f ( kp - lo ) - g ( jp - ln ) + h ( jo - kn ) )
+                //   | n o p |
+                //
+                //   | e g h |
+                // b | i k l | = b ( e ( kp - lo ) - g ( ip - lm ) + h ( io - km ) )
+                //   | m o p |
+                //
+                //   | e f h |
+                // c | i j l | = c ( e ( jp - ln ) - f ( ip - lm ) + h ( in - jm ) )
+                //   | m n p |
+                //
+                //   | e f g |
+                // d | i j k | = d ( e ( jo - kn ) - f ( io - km ) + g ( in - jm ) )
+                //   | m n o |
+                //
+                // Cost of operation
+                // 17 adds and 28 muls.
+                //
+                // add: 6 + 8 + 3 = 17
+                // mul: 12 + 16 = 28
+
+                float a = X.X, b = X.Y, c = X.Z, d = X.W;
+                float e = Y.X, f = Y.Y, g = Y.Z, h = Y.W;
+                float i = Z.X, j = Z.Y, k = Z.Z, l = Z.W;
+                float m = W.X, n = W.Y, o = W.Z, p = W.W;
+
+                float kp_lo = k * p - l * o;
+                float jp_ln = j * p - l * n;
+                float jo_kn = j * o - k * n;
+                float ip_lm = i * p - l * m;
+                float io_km = i * o - k * m;
+                float in_jm = i * n - j * m;
+
+                return a * (f * kp_lo - g * jp_ln + h * jo_kn) -
+                       b * (e * kp_lo - g * ip_lm + h * io_km) +
+                       c * (e * jp_ln - f * ip_lm + h * in_jm) -
+                       d * (e * jo_kn - f * io_km + g * in_jm);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public override readonly int GetHashCode() => HashCode.Combine(X, Y, Z, W);
+
+            bool IEquatable<Impl>.Equals(Impl other) => Equals(in other);
+
+            private struct CanonicalBasis
+            {
+                public Vector3 Row0;
+                public Vector3 Row1;
+                public Vector3 Row2;
+            };
+
+            private unsafe struct VectorBasis
+            {
+                public Vector3* Element0;
+                public Vector3* Element1;
+                public Vector3* Element2;
+            }
+        }
+    }
+}
index 0ea71be..18e05f7 100644 (file)
@@ -3,9 +3,6 @@
 
 using System.Diagnostics.CodeAnalysis;
 using System.Runtime.CompilerServices;
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.Arm;
-using System.Runtime.Intrinsics.X86;
 
 namespace System.Numerics
 {
@@ -14,19 +11,17 @@ namespace System.Numerics
     /// [!INCLUDE[vectors-are-rows-paragraph](~/includes/system-numerics-vectors-are-rows.md)]
     /// ]]></format></remarks>
     [Intrinsic]
-    public struct Matrix4x4 : IEquatable<Matrix4x4>
+    public partial struct Matrix4x4 : IEquatable<Matrix4x4>
     {
-        private const float BillboardEpsilon = 1e-4f;
-        private const float BillboardMinAngle = 1.0f - (0.1f * (MathF.PI / 180.0f)); // 0.1 degrees
-        private const float DecomposeEpsilon = 0.0001f;
-
-        private static readonly Matrix4x4 _identity = new Matrix4x4
-        (
-            1f, 0f, 0f, 0f,
-            0f, 1f, 0f, 0f,
-            0f, 0f, 1f, 0f,
-            0f, 0f, 0f, 1f
-        );
+        // In an ideal world, we'd have 4x Vector4 fields. However, Matrix4x4 was shipped with
+        // 16x public float fields and as such we cannot change the "backing" fields without it being
+        // a breaking change. Likewise, we cannot switch to using something like ExplicitLayout
+        // without it pessimizing other parts of the JIT and still preventing things like field promotion.
+        //
+        // This nested Impl struct works around this problem by relying on the JIT treating same sizeof
+        // value type bitcasts as a no-op. Effectively the entire implementation is here in this type
+        // and the public facing Matrix4x4 just defers to it with simple reinterpret casts inserted
+        // at the relevant points.
 
         /// <summary>The first element of the first row.</summary>
         public float M11;
@@ -98,58 +93,31 @@ namespace System.Numerics
                          float m31, float m32, float m33, float m34,
                          float m41, float m42, float m43, float m44)
         {
-            M11 = m11;
-            M12 = m12;
-            M13 = m13;
-            M14 = m14;
-
-            M21 = m21;
-            M22 = m22;
-            M23 = m23;
-            M24 = m24;
-
-            M31 = m31;
-            M32 = m32;
-            M33 = m33;
-            M34 = m34;
-
-            M41 = m41;
-            M42 = m42;
-            M43 = m43;
-            M44 = m44;
+            Unsafe.SkipInit(out this);
+
+            AsImpl().Init(
+                m11, m12, m13, m14,
+                m21, m22, m23, m24,
+                m31, m32, m33, m34,
+                m41, m42, m43, m44
+            );
         }
 
-        /// <summary>Creates a <see cref="System.Numerics.Matrix4x4" /> object from a specified <see cref="System.Numerics.Matrix3x2" /> object.</summary>
+        /// <summary>Creates a <see cref="Matrix4x4" /> object from a specified <see cref="Matrix3x2" /> object.</summary>
         /// <param name="value">A 3x2 matrix.</param>
-        /// <remarks>This constructor creates a 4x4 matrix whose <see cref="System.Numerics.Matrix4x4.M13" />, <see cref="System.Numerics.Matrix4x4.M14" />, <see cref="System.Numerics.Matrix4x4.M23" />, <see cref="System.Numerics.Matrix4x4.M24" />, <see cref="System.Numerics.Matrix4x4.M31" />, <see cref="System.Numerics.Matrix4x4.M32" />, <see cref="System.Numerics.Matrix4x4.M34" />, and <see cref="System.Numerics.Matrix4x4.M43" /> components are zero, and whose <see cref="System.Numerics.Matrix4x4.M33" /> and <see cref="System.Numerics.Matrix4x4.M44" /> components are one.</remarks>
+        /// <remarks>This constructor creates a 4x4 matrix whose <see cref="M13" />, <see cref="M14" />, <see cref="M23" />, <see cref="M24" />, <see cref="M31" />, <see cref="M32" />, <see cref="M34" />, and <see cref="M43" /> components are zero, and whose <see cref="M33" /> and <see cref="M44" /> components are one.</remarks>
         public Matrix4x4(Matrix3x2 value)
         {
-            M11 = value.M11;
-            M12 = value.M12;
-            M13 = 0f;
-            M14 = 0f;
-
-            M21 = value.M21;
-            M22 = value.M22;
-            M23 = 0f;
-            M24 = 0f;
-
-            M31 = 0f;
-            M32 = 0f;
-            M33 = 1f;
-            M34 = 0f;
-
-            M41 = value.M31;
-            M42 = value.M32;
-            M43 = 0f;
-            M44 = 1f;
+            Unsafe.SkipInit(out this);
+            AsImpl().Init(in value.AsImpl());
         }
 
         /// <summary>Gets the multiplicative identity matrix.</summary>
         /// <value>Gets the multiplicative identity matrix.</value>
         public static Matrix4x4 Identity
         {
-            get => _identity;
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get => Impl.Identity.AsM4x4();
         }
 
         /// <summary>Gets or sets the element at the specified indices.</summary>
@@ -161,411 +129,92 @@ namespace System.Numerics
         /// -or-
         /// <paramref name="column" /> was less than zero or greater than the number of columns.
         /// </exception>
-        public unsafe float this[int row, int column]
+        public float this[int row, int column]
         {
-            get
-            {
-                if ((uint)row >= 4)
-                    ThrowHelper.ThrowArgumentOutOfRangeException();
-
-                Vector4 vrow = Unsafe.Add(ref Unsafe.As<float, Vector4>(ref M11), row);
-                return vrow[column];
-            }
-            set
-            {
-                if ((uint)row >= 4)
-                    ThrowHelper.ThrowArgumentOutOfRangeException();
-
-                ref Vector4 vrow = ref Unsafe.Add(ref Unsafe.As<float, Vector4>(ref M11), row);
-                var tmp = Vector4.WithElement(vrow, column, value);
-                vrow = tmp;
-            }
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            readonly get => AsROImpl()[row, column];
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            set => AsImpl()[row, column] = value;
         }
 
         /// <summary>Indicates whether the current matrix is the identity matrix.</summary>
         /// <value><see langword="true" /> if the current matrix is the identity matrix; otherwise, <see langword="false" />.</value>
         public readonly bool IsIdentity
         {
-            get
-            {
-                return M11 == 1f && M22 == 1f && M33 == 1f && M44 == 1f && // Check diagonal element first for early out.
-                                    M12 == 0f && M13 == 0f && M14 == 0f &&
-                       M21 == 0f && M23 == 0f && M24 == 0f &&
-                       M31 == 0f && M32 == 0f && M34 == 0f &&
-                       M41 == 0f && M42 == 0f && M43 == 0f;
-            }
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get => AsROImpl().IsIdentity;
         }
 
         /// <summary>Gets or sets the translation component of this matrix.</summary>
         /// <value>The translation component of the current instance.</value>
         public Vector3 Translation
         {
-            readonly get => new Vector3(M41, M42, M43);
-
-            set
-            {
-                M41 = value.X;
-                M42 = value.Y;
-                M43 = value.Z;
-            }
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            readonly get => AsROImpl().Translation;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            set => AsImpl().Translation = value;
         }
 
         /// <summary>Adds each element in one matrix with its corresponding element in a second matrix.</summary>
         /// <param name="value1">The first matrix.</param>
         /// <param name="value2">The second matrix.</param>
         /// <returns>The matrix that contains the summed values.</returns>
-        /// <remarks>The <see cref="System.Numerics.Matrix4x4.op_Addition" /> method defines the operation of the addition operator for <see cref="System.Numerics.Matrix4x4" /> objects.</remarks>
-        public static unsafe Matrix4x4 operator +(Matrix4x4 value1, Matrix4x4 value2)
-        {
-            if (AdvSimd.IsSupported)
-            {
-                AdvSimd.Store(&value1.M11, AdvSimd.Add(AdvSimd.LoadVector128(&value1.M11), AdvSimd.LoadVector128(&value2.M11)));
-                AdvSimd.Store(&value1.M21, AdvSimd.Add(AdvSimd.LoadVector128(&value1.M21), AdvSimd.LoadVector128(&value2.M21)));
-                AdvSimd.Store(&value1.M31, AdvSimd.Add(AdvSimd.LoadVector128(&value1.M31), AdvSimd.LoadVector128(&value2.M31)));
-                AdvSimd.Store(&value1.M41, AdvSimd.Add(AdvSimd.LoadVector128(&value1.M41), AdvSimd.LoadVector128(&value2.M41)));
-                return value1;
-            }
-            else if (Sse.IsSupported)
-            {
-                Sse.Store(&value1.M11, Sse.Add(Sse.LoadVector128(&value1.M11), Sse.LoadVector128(&value2.M11)));
-                Sse.Store(&value1.M21, Sse.Add(Sse.LoadVector128(&value1.M21), Sse.LoadVector128(&value2.M21)));
-                Sse.Store(&value1.M31, Sse.Add(Sse.LoadVector128(&value1.M31), Sse.LoadVector128(&value2.M31)));
-                Sse.Store(&value1.M41, Sse.Add(Sse.LoadVector128(&value1.M41), Sse.LoadVector128(&value2.M41)));
-                return value1;
-            }
-
-            Matrix4x4 m;
-
-            m.M11 = value1.M11 + value2.M11;
-            m.M12 = value1.M12 + value2.M12;
-            m.M13 = value1.M13 + value2.M13;
-            m.M14 = value1.M14 + value2.M14;
-            m.M21 = value1.M21 + value2.M21;
-            m.M22 = value1.M22 + value2.M22;
-            m.M23 = value1.M23 + value2.M23;
-            m.M24 = value1.M24 + value2.M24;
-            m.M31 = value1.M31 + value2.M31;
-            m.M32 = value1.M32 + value2.M32;
-            m.M33 = value1.M33 + value2.M33;
-            m.M34 = value1.M34 + value2.M34;
-            m.M41 = value1.M41 + value2.M41;
-            m.M42 = value1.M42 + value2.M42;
-            m.M43 = value1.M43 + value2.M43;
-            m.M44 = value1.M44 + value2.M44;
-
-            return m;
-        }
+        /// <remarks>The <see cref="op_Addition" /> method defines the operation of the addition operator for <see cref="Matrix4x4" /> objects.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Matrix4x4 operator +(Matrix4x4 value1, Matrix4x4 value2)
+            => (value1.AsImpl() + value2.AsImpl()).AsM4x4();
 
         /// <summary>Returns a value that indicates whether the specified matrices are equal.</summary>
         /// <param name="value1">The first matrix to compare.</param>
         /// <param name="value2">The second matrix to care</param>
         /// <returns><see langword="true" /> if <paramref name="value1" /> and <paramref name="value2" /> are equal; otherwise, <see langword="false" />.</returns>
         /// <remarks>Two matrices are equal if all their corresponding elements are equal.</remarks>
-        public static unsafe bool operator ==(Matrix4x4 value1, Matrix4x4 value2)
-        {
-            if (AdvSimd.Arm64.IsSupported)
-            {
-                return VectorMath.Equal(AdvSimd.LoadVector128(&value1.M11), AdvSimd.LoadVector128(&value2.M11)) &&
-                       VectorMath.Equal(AdvSimd.LoadVector128(&value1.M21), AdvSimd.LoadVector128(&value2.M21)) &&
-                       VectorMath.Equal(AdvSimd.LoadVector128(&value1.M31), AdvSimd.LoadVector128(&value2.M31)) &&
-                       VectorMath.Equal(AdvSimd.LoadVector128(&value1.M41), AdvSimd.LoadVector128(&value2.M41));
-            }
-            else if (Sse.IsSupported)
-            {
-                return VectorMath.Equal(Sse.LoadVector128(&value1.M11), Sse.LoadVector128(&value2.M11)) &&
-                       VectorMath.Equal(Sse.LoadVector128(&value1.M21), Sse.LoadVector128(&value2.M21)) &&
-                       VectorMath.Equal(Sse.LoadVector128(&value1.M31), Sse.LoadVector128(&value2.M31)) &&
-                       VectorMath.Equal(Sse.LoadVector128(&value1.M41), Sse.LoadVector128(&value2.M41));
-            }
-
-            return (value1.M11 == value2.M11 && value1.M22 == value2.M22 && value1.M33 == value2.M33 && value1.M44 == value2.M44 && // Check diagonal element first for early out.
-                    value1.M12 == value2.M12 && value1.M13 == value2.M13 && value1.M14 == value2.M14 && value1.M21 == value2.M21 &&
-                    value1.M23 == value2.M23 && value1.M24 == value2.M24 && value1.M31 == value2.M31 && value1.M32 == value2.M32 &&
-                    value1.M34 == value2.M34 && value1.M41 == value2.M41 && value1.M42 == value2.M42 && value1.M43 == value2.M43);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool operator ==(Matrix4x4 value1, Matrix4x4 value2)
+            => value1.AsImpl() == value2.AsImpl();
 
         /// <summary>Returns a value that indicates whether the specified matrices are not equal.</summary>
         /// <param name="value1">The first matrix to compare.</param>
         /// <param name="value2">The second matrix to compare.</param>
         /// <returns><see langword="true" /> if <paramref name="value1" /> and <paramref name="value2" /> are not equal; otherwise, <see langword="false" />.</returns>
-        public static unsafe bool operator !=(Matrix4x4 value1, Matrix4x4 value2)
-        {
-            if (AdvSimd.Arm64.IsSupported)
-            {
-                return VectorMath.NotEqual(AdvSimd.LoadVector128(&value1.M11), AdvSimd.LoadVector128(&value2.M11)) ||
-                       VectorMath.NotEqual(AdvSimd.LoadVector128(&value1.M21), AdvSimd.LoadVector128(&value2.M21)) ||
-                       VectorMath.NotEqual(AdvSimd.LoadVector128(&value1.M31), AdvSimd.LoadVector128(&value2.M31)) ||
-                       VectorMath.NotEqual(AdvSimd.LoadVector128(&value1.M41), AdvSimd.LoadVector128(&value2.M41));
-            }
-            else if (Sse.IsSupported)
-            {
-                return
-                    VectorMath.NotEqual(Sse.LoadVector128(&value1.M11), Sse.LoadVector128(&value2.M11)) ||
-                    VectorMath.NotEqual(Sse.LoadVector128(&value1.M21), Sse.LoadVector128(&value2.M21)) ||
-                    VectorMath.NotEqual(Sse.LoadVector128(&value1.M31), Sse.LoadVector128(&value2.M31)) ||
-                    VectorMath.NotEqual(Sse.LoadVector128(&value1.M41), Sse.LoadVector128(&value2.M41));
-            }
-
-            return (value1.M11 != value2.M11 || value1.M12 != value2.M12 || value1.M13 != value2.M13 || value1.M14 != value2.M14 ||
-                    value1.M21 != value2.M21 || value1.M22 != value2.M22 || value1.M23 != value2.M23 || value1.M24 != value2.M24 ||
-                    value1.M31 != value2.M31 || value1.M32 != value2.M32 || value1.M33 != value2.M33 || value1.M34 != value2.M34 ||
-                    value1.M41 != value2.M41 || value1.M42 != value2.M42 || value1.M43 != value2.M43 || value1.M44 != value2.M44);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool operator !=(Matrix4x4 value1, Matrix4x4 value2)
+            => value1.AsImpl() != value2.AsImpl();
 
         /// <summary>Multiplies two matrices together to compute the product.</summary>
         /// <param name="value1">The first matrix.</param>
         /// <param name="value2">The second matrix.</param>
         /// <returns>The product matrix.</returns>
-        /// <remarks>The <see cref="System.Numerics.Matrix4x4.op_Multiply" /> method defines the operation of the multiplication operator for <see cref="System.Numerics.Matrix4x4" /> objects.</remarks>
-        public static unsafe Matrix4x4 operator *(Matrix4x4 value1, Matrix4x4 value2)
-        {
-            if (AdvSimd.Arm64.IsSupported)
-            {
-                Unsafe.SkipInit(out Matrix4x4 result);
-
-                // Perform the operation on the first row
-
-                Vector128<float> M11 = AdvSimd.LoadVector128(&value1.M11);
-
-                Vector128<float> vX = AdvSimd.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&value2.M11), M11, 0);
-                Vector128<float> vY = AdvSimd.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&value2.M21), M11, 1);
-                Vector128<float> vZ = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vX, AdvSimd.LoadVector128(&value2.M31), M11, 2);
-                Vector128<float> vW = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vY, AdvSimd.LoadVector128(&value2.M41), M11, 3);
-
-                AdvSimd.Store(&result.M11, AdvSimd.Add(vZ, vW));
-
-                // Repeat for the other 3 rows
-
-                Vector128<float> M21 = AdvSimd.LoadVector128(&value1.M21);
-
-                vX = AdvSimd.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&value2.M11), M21, 0);
-                vY = AdvSimd.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&value2.M21), M21, 1);
-                vZ = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vX, AdvSimd.LoadVector128(&value2.M31), M21, 2);
-                vW = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vY, AdvSimd.LoadVector128(&value2.M41), M21, 3);
-
-                AdvSimd.Store(&result.M21, AdvSimd.Add(vZ, vW));
-
-                Vector128<float> M31 = AdvSimd.LoadVector128(&value1.M31);
-
-                vX = AdvSimd.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&value2.M11), M31, 0);
-                vY = AdvSimd.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&value2.M21), M31, 1);
-                vZ = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vX, AdvSimd.LoadVector128(&value2.M31), M31, 2);
-                vW = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vY, AdvSimd.LoadVector128(&value2.M41), M31, 3);
-
-                AdvSimd.Store(&result.M31, AdvSimd.Add(vZ, vW));
-
-                Vector128<float> M41 = AdvSimd.LoadVector128(&value1.M41);
-
-                vX = AdvSimd.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&value2.M11), M41, 0);
-                vY = AdvSimd.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&value2.M21), M41, 1);
-                vZ = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vX, AdvSimd.LoadVector128(&value2.M31), M41, 2);
-                vW = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vY, AdvSimd.LoadVector128(&value2.M41), M41, 3);
-
-                AdvSimd.Store(&result.M41, AdvSimd.Add(vZ, vW));
-
-                return result;
-            }
-            else if (Sse.IsSupported)
-            {
-                Vector128<float> row = Sse.LoadVector128(&value1.M11);
-                Sse.Store(&value1.M11,
-                    Sse.Add(Sse.Add(Sse.Multiply(Sse.Shuffle(row, row, 0x00), Sse.LoadVector128(&value2.M11)),
-                                    Sse.Multiply(Sse.Shuffle(row, row, 0x55), Sse.LoadVector128(&value2.M21))),
-                            Sse.Add(Sse.Multiply(Sse.Shuffle(row, row, 0xAA), Sse.LoadVector128(&value2.M31)),
-                                    Sse.Multiply(Sse.Shuffle(row, row, 0xFF), Sse.LoadVector128(&value2.M41)))));
-
-                // 0x00 is _MM_SHUFFLE(0,0,0,0), 0x55 is _MM_SHUFFLE(1,1,1,1), etc.
-                // TODO: Replace with a method once it's added to the API.
-
-                row = Sse.LoadVector128(&value1.M21);
-                Sse.Store(&value1.M21,
-                    Sse.Add(Sse.Add(Sse.Multiply(Sse.Shuffle(row, row, 0x00), Sse.LoadVector128(&value2.M11)),
-                                    Sse.Multiply(Sse.Shuffle(row, row, 0x55), Sse.LoadVector128(&value2.M21))),
-                            Sse.Add(Sse.Multiply(Sse.Shuffle(row, row, 0xAA), Sse.LoadVector128(&value2.M31)),
-                                    Sse.Multiply(Sse.Shuffle(row, row, 0xFF), Sse.LoadVector128(&value2.M41)))));
-
-                row = Sse.LoadVector128(&value1.M31);
-                Sse.Store(&value1.M31,
-                    Sse.Add(Sse.Add(Sse.Multiply(Sse.Shuffle(row, row, 0x00), Sse.LoadVector128(&value2.M11)),
-                                    Sse.Multiply(Sse.Shuffle(row, row, 0x55), Sse.LoadVector128(&value2.M21))),
-                            Sse.Add(Sse.Multiply(Sse.Shuffle(row, row, 0xAA), Sse.LoadVector128(&value2.M31)),
-                                    Sse.Multiply(Sse.Shuffle(row, row, 0xFF), Sse.LoadVector128(&value2.M41)))));
-
-                row = Sse.LoadVector128(&value1.M41);
-                Sse.Store(&value1.M41,
-                    Sse.Add(Sse.Add(Sse.Multiply(Sse.Shuffle(row, row, 0x00), Sse.LoadVector128(&value2.M11)),
-                                    Sse.Multiply(Sse.Shuffle(row, row, 0x55), Sse.LoadVector128(&value2.M21))),
-                            Sse.Add(Sse.Multiply(Sse.Shuffle(row, row, 0xAA), Sse.LoadVector128(&value2.M31)),
-                                    Sse.Multiply(Sse.Shuffle(row, row, 0xFF), Sse.LoadVector128(&value2.M41)))));
-                return value1;
-            }
-
-            Matrix4x4 m;
-
-            // First row
-            m.M11 = value1.M11 * value2.M11 + value1.M12 * value2.M21 + value1.M13 * value2.M31 + value1.M14 * value2.M41;
-            m.M12 = value1.M11 * value2.M12 + value1.M12 * value2.M22 + value1.M13 * value2.M32 + value1.M14 * value2.M42;
-            m.M13 = value1.M11 * value2.M13 + value1.M12 * value2.M23 + value1.M13 * value2.M33 + value1.M14 * value2.M43;
-            m.M14 = value1.M11 * value2.M14 + value1.M12 * value2.M24 + value1.M13 * value2.M34 + value1.M14 * value2.M44;
-
-            // Second row
-            m.M21 = value1.M21 * value2.M11 + value1.M22 * value2.M21 + value1.M23 * value2.M31 + value1.M24 * value2.M41;
-            m.M22 = value1.M21 * value2.M12 + value1.M22 * value2.M22 + value1.M23 * value2.M32 + value1.M24 * value2.M42;
-            m.M23 = value1.M21 * value2.M13 + value1.M22 * value2.M23 + value1.M23 * value2.M33 + value1.M24 * value2.M43;
-            m.M24 = value1.M21 * value2.M14 + value1.M22 * value2.M24 + value1.M23 * value2.M34 + value1.M24 * value2.M44;
-
-            // Third row
-            m.M31 = value1.M31 * value2.M11 + value1.M32 * value2.M21 + value1.M33 * value2.M31 + value1.M34 * value2.M41;
-            m.M32 = value1.M31 * value2.M12 + value1.M32 * value2.M22 + value1.M33 * value2.M32 + value1.M34 * value2.M42;
-            m.M33 = value1.M31 * value2.M13 + value1.M32 * value2.M23 + value1.M33 * value2.M33 + value1.M34 * value2.M43;
-            m.M34 = value1.M31 * value2.M14 + value1.M32 * value2.M24 + value1.M33 * value2.M34 + value1.M34 * value2.M44;
-
-            // Fourth row
-            m.M41 = value1.M41 * value2.M11 + value1.M42 * value2.M21 + value1.M43 * value2.M31 + value1.M44 * value2.M41;
-            m.M42 = value1.M41 * value2.M12 + value1.M42 * value2.M22 + value1.M43 * value2.M32 + value1.M44 * value2.M42;
-            m.M43 = value1.M41 * value2.M13 + value1.M42 * value2.M23 + value1.M43 * value2.M33 + value1.M44 * value2.M43;
-            m.M44 = value1.M41 * value2.M14 + value1.M42 * value2.M24 + value1.M43 * value2.M34 + value1.M44 * value2.M44;
-
-            return m;
-        }
+        /// <remarks>The <see cref="Matrix4x4.op_Multiply" /> method defines the operation of the multiplication operator for <see cref="Matrix4x4" /> objects.</remarks>
+        public static Matrix4x4 operator *(Matrix4x4 value1, Matrix4x4 value2)
+            => (value1.AsImpl() * value2.AsImpl()).AsM4x4();
 
         /// <summary>Multiplies a matrix by a float to compute the product.</summary>
         /// <param name="value1">The matrix to scale.</param>
         /// <param name="value2">The scaling value to use.</param>
         /// <returns>The scaled matrix.</returns>
-        /// <remarks>The <see cref="System.Numerics.Matrix4x4.op_Multiply" /> method defines the operation of the multiplication operator for <see cref="System.Numerics.Matrix4x4" /> objects.</remarks>
-        public static unsafe Matrix4x4 operator *(Matrix4x4 value1, float value2)
-        {
-            if (AdvSimd.IsSupported)
-            {
-                Vector128<float> value2Vec = Vector128.Create(value2);
-                AdvSimd.Store(&value1.M11, AdvSimd.Multiply(AdvSimd.LoadVector128(&value1.M11), value2Vec));
-                AdvSimd.Store(&value1.M21, AdvSimd.Multiply(AdvSimd.LoadVector128(&value1.M21), value2Vec));
-                AdvSimd.Store(&value1.M31, AdvSimd.Multiply(AdvSimd.LoadVector128(&value1.M31), value2Vec));
-                AdvSimd.Store(&value1.M41, AdvSimd.Multiply(AdvSimd.LoadVector128(&value1.M41), value2Vec));
-                return value1;
-            }
-            else if (Sse.IsSupported)
-            {
-                Vector128<float> value2Vec = Vector128.Create(value2);
-                Sse.Store(&value1.M11, Sse.Multiply(Sse.LoadVector128(&value1.M11), value2Vec));
-                Sse.Store(&value1.M21, Sse.Multiply(Sse.LoadVector128(&value1.M21), value2Vec));
-                Sse.Store(&value1.M31, Sse.Multiply(Sse.LoadVector128(&value1.M31), value2Vec));
-                Sse.Store(&value1.M41, Sse.Multiply(Sse.LoadVector128(&value1.M41), value2Vec));
-                return value1;
-            }
-
-            Matrix4x4 m;
-
-            m.M11 = value1.M11 * value2;
-            m.M12 = value1.M12 * value2;
-            m.M13 = value1.M13 * value2;
-            m.M14 = value1.M14 * value2;
-            m.M21 = value1.M21 * value2;
-            m.M22 = value1.M22 * value2;
-            m.M23 = value1.M23 * value2;
-            m.M24 = value1.M24 * value2;
-            m.M31 = value1.M31 * value2;
-            m.M32 = value1.M32 * value2;
-            m.M33 = value1.M33 * value2;
-            m.M34 = value1.M34 * value2;
-            m.M41 = value1.M41 * value2;
-            m.M42 = value1.M42 * value2;
-            m.M43 = value1.M43 * value2;
-            m.M44 = value1.M44 * value2;
-            return m;
-        }
+        /// <remarks>The <see cref="Matrix4x4.op_Multiply" /> method defines the operation of the multiplication operator for <see cref="Matrix4x4" /> objects.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Matrix4x4 operator *(Matrix4x4 value1, float value2)
+            => (value1.AsImpl() * value2).AsM4x4();
 
         /// <summary>Subtracts each element in a second matrix from its corresponding element in a first matrix.</summary>
         /// <param name="value1">The first matrix.</param>
         /// <param name="value2">The second matrix.</param>
         /// <returns>The matrix containing the values that result from subtracting each element in <paramref name="value2" /> from its corresponding element in <paramref name="value1" />.</returns>
-        /// <remarks>The <see cref="System.Numerics.Matrix4x4.op_Subtraction" /> method defines the operation of the subtraction operator for <see cref="System.Numerics.Matrix4x4" /> objects.</remarks>
-        public static unsafe Matrix4x4 operator -(Matrix4x4 value1, Matrix4x4 value2)
-        {
-            if (AdvSimd.IsSupported)
-            {
-                AdvSimd.Store(&value1.M11, AdvSimd.Subtract(AdvSimd.LoadVector128(&value1.M11), AdvSimd.LoadVector128(&value2.M11)));
-                AdvSimd.Store(&value1.M21, AdvSimd.Subtract(AdvSimd.LoadVector128(&value1.M21), AdvSimd.LoadVector128(&value2.M21)));
-                AdvSimd.Store(&value1.M31, AdvSimd.Subtract(AdvSimd.LoadVector128(&value1.M31), AdvSimd.LoadVector128(&value2.M31)));
-                AdvSimd.Store(&value1.M41, AdvSimd.Subtract(AdvSimd.LoadVector128(&value1.M41), AdvSimd.LoadVector128(&value2.M41)));
-                return value1;
-            }
-            else if (Sse.IsSupported)
-            {
-                Sse.Store(&value1.M11, Sse.Subtract(Sse.LoadVector128(&value1.M11), Sse.LoadVector128(&value2.M11)));
-                Sse.Store(&value1.M21, Sse.Subtract(Sse.LoadVector128(&value1.M21), Sse.LoadVector128(&value2.M21)));
-                Sse.Store(&value1.M31, Sse.Subtract(Sse.LoadVector128(&value1.M31), Sse.LoadVector128(&value2.M31)));
-                Sse.Store(&value1.M41, Sse.Subtract(Sse.LoadVector128(&value1.M41), Sse.LoadVector128(&value2.M41)));
-                return value1;
-            }
-
-            Matrix4x4 m;
-
-            m.M11 = value1.M11 - value2.M11;
-            m.M12 = value1.M12 - value2.M12;
-            m.M13 = value1.M13 - value2.M13;
-            m.M14 = value1.M14 - value2.M14;
-            m.M21 = value1.M21 - value2.M21;
-            m.M22 = value1.M22 - value2.M22;
-            m.M23 = value1.M23 - value2.M23;
-            m.M24 = value1.M24 - value2.M24;
-            m.M31 = value1.M31 - value2.M31;
-            m.M32 = value1.M32 - value2.M32;
-            m.M33 = value1.M33 - value2.M33;
-            m.M34 = value1.M34 - value2.M34;
-            m.M41 = value1.M41 - value2.M41;
-            m.M42 = value1.M42 - value2.M42;
-            m.M43 = value1.M43 - value2.M43;
-            m.M44 = value1.M44 - value2.M44;
-
-            return m;
-        }
+        /// <remarks>The <see cref="op_Subtraction" /> method defines the operation of the subtraction operator for <see cref="Matrix4x4" /> objects.</remarks>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Matrix4x4 operator -(Matrix4x4 value1, Matrix4x4 value2)
+            => (value1.AsImpl() - value2.AsImpl()).AsM4x4();
 
         /// <summary>Negates the specified matrix by multiplying all its values by -1.</summary>
         /// <param name="value">The matrix to negate.</param>
         /// <returns>The negated matrix.</returns>
-        public static unsafe Matrix4x4 operator -(Matrix4x4 value)
-        {
-            if (AdvSimd.IsSupported)
-            {
-                AdvSimd.Store(&value.M11, AdvSimd.Negate(AdvSimd.LoadVector128(&value.M11)));
-                AdvSimd.Store(&value.M21, AdvSimd.Negate(AdvSimd.LoadVector128(&value.M21)));
-                AdvSimd.Store(&value.M31, AdvSimd.Negate(AdvSimd.LoadVector128(&value.M31)));
-                AdvSimd.Store(&value.M41, AdvSimd.Negate(AdvSimd.LoadVector128(&value.M41)));
-                return value;
-            }
-            else if (Sse.IsSupported)
-            {
-                Vector128<float> zero = Vector128<float>.Zero;
-                Sse.Store(&value.M11, Sse.Subtract(zero, Sse.LoadVector128(&value.M11)));
-                Sse.Store(&value.M21, Sse.Subtract(zero, Sse.LoadVector128(&value.M21)));
-                Sse.Store(&value.M31, Sse.Subtract(zero, Sse.LoadVector128(&value.M31)));
-                Sse.Store(&value.M41, Sse.Subtract(zero, Sse.LoadVector128(&value.M41)));
-                return value;
-            }
-
-            Matrix4x4 m;
-
-            m.M11 = -value.M11;
-            m.M12 = -value.M12;
-            m.M13 = -value.M13;
-            m.M14 = -value.M14;
-            m.M21 = -value.M21;
-            m.M22 = -value.M22;
-            m.M23 = -value.M23;
-            m.M24 = -value.M24;
-            m.M31 = -value.M31;
-            m.M32 = -value.M32;
-            m.M33 = -value.M33;
-            m.M34 = -value.M34;
-            m.M41 = -value.M41;
-            m.M42 = -value.M42;
-            m.M43 = -value.M43;
-            m.M44 = -value.M44;
-
-            return m;
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Matrix4x4 operator -(Matrix4x4 value)
+            => (-value.AsImpl()).AsM4x4();
 
         /// <summary>Adds each element in one matrix with its corresponding element in a second matrix.</summary>
         /// <param name="value1">The first matrix.</param>
@@ -573,9 +222,7 @@ namespace System.Numerics
         /// <returns>The matrix that contains the summed values of <paramref name="value1" /> and <paramref name="value2" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix4x4 Add(Matrix4x4 value1, Matrix4x4 value2)
-        {
-            return value1 + value2;
-        }
+            => (value1.AsImpl() + value2.AsImpl()).AsM4x4();
 
         /// <summary>Creates a spherical billboard that rotates around a specified object position.</summary>
         /// <param name="objectPosition">The position of the object that the billboard will rotate around.</param>
@@ -584,46 +231,7 @@ namespace System.Numerics
         /// <param name="cameraForwardVector">The forward vector of the camera.</param>
         /// <returns>The created billboard.</returns>
         public static Matrix4x4 CreateBillboard(Vector3 objectPosition, Vector3 cameraPosition, Vector3 cameraUpVector, Vector3 cameraForwardVector)
-        {
-            Vector3 zaxis = objectPosition - cameraPosition;
-            float norm = zaxis.LengthSquared();
-
-            if (norm < BillboardEpsilon)
-            {
-                zaxis = -cameraForwardVector;
-            }
-            else
-            {
-                zaxis = Vector3.Multiply(zaxis, 1.0f / MathF.Sqrt(norm));
-            }
-
-            Vector3 xaxis = Vector3.Normalize(Vector3.Cross(cameraUpVector, zaxis));
-            Vector3 yaxis = Vector3.Cross(zaxis, xaxis);
-
-            Matrix4x4 result;
-
-            result.M11 = xaxis.X;
-            result.M12 = xaxis.Y;
-            result.M13 = xaxis.Z;
-            result.M14 = 0.0f;
-
-            result.M21 = yaxis.X;
-            result.M22 = yaxis.Y;
-            result.M23 = yaxis.Z;
-            result.M24 = 0.0f;
-
-            result.M31 = zaxis.X;
-            result.M32 = zaxis.Y;
-            result.M33 = zaxis.Z;
-            result.M34 = 0.0f;
-
-            result.M41 = objectPosition.X;
-            result.M42 = objectPosition.Y;
-            result.M43 = objectPosition.Z;
-            result.M44 = 1.0f;
-
-            return result;
-        }
+            => Impl.CreateBillboard(in objectPosition, in cameraPosition, in cameraUpVector, in cameraForwardVector).AsM4x4();
 
         /// <summary>Creates a cylindrical billboard that rotates around a specified axis.</summary>
         /// <param name="objectPosition">The position of the object that the billboard will rotate around.</param>
@@ -633,158 +241,20 @@ namespace System.Numerics
         /// <param name="objectForwardVector">The forward vector of the object.</param>
         /// <returns>The billboard matrix.</returns>
         public static Matrix4x4 CreateConstrainedBillboard(Vector3 objectPosition, Vector3 cameraPosition, Vector3 rotateAxis, Vector3 cameraForwardVector, Vector3 objectForwardVector)
-        {
-            // Treat the case when object and camera positions are too close.
-            Vector3 faceDir = objectPosition - cameraPosition;
-            float norm = faceDir.LengthSquared();
-
-            if (norm < BillboardEpsilon)
-            {
-                faceDir = -cameraForwardVector;
-            }
-            else
-            {
-                faceDir = Vector3.Multiply(faceDir, (1.0f / MathF.Sqrt(norm)));
-            }
-
-            Vector3 yaxis = rotateAxis;
-            Vector3 xaxis;
-            Vector3 zaxis;
-
-            // Treat the case when angle between faceDir and rotateAxis is too close to 0.
-            float dot = Vector3.Dot(rotateAxis, faceDir);
-
-            if (MathF.Abs(dot) > BillboardMinAngle)
-            {
-                zaxis = objectForwardVector;
-
-                // Make sure passed values are useful for compute.
-                dot = Vector3.Dot(rotateAxis, zaxis);
-
-                if (MathF.Abs(dot) > BillboardMinAngle)
-                {
-                    zaxis = (MathF.Abs(rotateAxis.Z) > BillboardMinAngle) ? new Vector3(1, 0, 0) : new Vector3(0, 0, -1);
-                }
-
-                xaxis = Vector3.Normalize(Vector3.Cross(rotateAxis, zaxis));
-                zaxis = Vector3.Normalize(Vector3.Cross(xaxis, rotateAxis));
-            }
-            else
-            {
-                xaxis = Vector3.Normalize(Vector3.Cross(rotateAxis, faceDir));
-                zaxis = Vector3.Normalize(Vector3.Cross(xaxis, yaxis));
-            }
-
-            Matrix4x4 result;
-
-            result.M11 = xaxis.X;
-            result.M12 = xaxis.Y;
-            result.M13 = xaxis.Z;
-            result.M14 = 0.0f;
-
-            result.M21 = yaxis.X;
-            result.M22 = yaxis.Y;
-            result.M23 = yaxis.Z;
-            result.M24 = 0.0f;
-
-            result.M31 = zaxis.X;
-            result.M32 = zaxis.Y;
-            result.M33 = zaxis.Z;
-            result.M34 = 0.0f;
-
-            result.M41 = objectPosition.X;
-            result.M42 = objectPosition.Y;
-            result.M43 = objectPosition.Z;
-            result.M44 = 1.0f;
-
-            return result;
-        }
+            => Impl.CreateConstrainedBillboard(in objectPosition, in cameraPosition, in rotateAxis, in cameraForwardVector, in objectForwardVector).AsM4x4();
 
         /// <summary>Creates a matrix that rotates around an arbitrary vector.</summary>
         /// <param name="axis">The axis to rotate around.</param>
         /// <param name="angle">The angle to rotate around <paramref name="axis" />, in radians.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix4x4 CreateFromAxisAngle(Vector3 axis, float angle)
-        {
-            // a: angle
-            // x, y, z: unit vector for axis.
-            //
-            // Rotation matrix M can compute by using below equation.
-            //
-            //        T               T
-            //  M = uu + (cos a)( I-uu ) + (sin a)S
-            //
-            // Where:
-            //
-            //  u = ( x, y, z )
-            //
-            //      [  0 -z  y ]
-            //  S = [  z  0 -x ]
-            //      [ -y  x  0 ]
-            //
-            //      [ 1 0 0 ]
-            //  I = [ 0 1 0 ]
-            //      [ 0 0 1 ]
-            //
-            //
-            //     [  xx+cosa*(1-xx)   yx-cosa*yx-sina*z zx-cosa*xz+sina*y ]
-            // M = [ xy-cosa*yx+sina*z    yy+cosa(1-yy)  yz-cosa*yz-sina*x ]
-            //     [ zx-cosa*zx-sina*y zy-cosa*zy+sina*x   zz+cosa*(1-zz)  ]
-            //
-            float x = axis.X, y = axis.Y, z = axis.Z;
-            float sa = MathF.Sin(angle), ca = MathF.Cos(angle);
-            float xx = x * x, yy = y * y, zz = z * z;
-            float xy = x * y, xz = x * z, yz = y * z;
-
-            Matrix4x4 result = Identity;
-
-            result.M11 = xx + ca * (1.0f - xx);
-            result.M12 = xy - ca * xy + sa * z;
-            result.M13 = xz - ca * xz - sa * y;
-
-            result.M21 = xy - ca * xy - sa * z;
-            result.M22 = yy + ca * (1.0f - yy);
-            result.M23 = yz - ca * yz + sa * x;
-
-            result.M31 = xz - ca * xz + sa * y;
-            result.M32 = yz - ca * yz - sa * x;
-            result.M33 = zz + ca * (1.0f - zz);
-
-            return result;
-        }
+            => Impl.CreateFromAxisAngle(in axis, angle).AsM4x4();
 
         /// <summary>Creates a rotation matrix from the specified Quaternion rotation value.</summary>
         /// <param name="quaternion">The source Quaternion.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix4x4 CreateFromQuaternion(Quaternion quaternion)
-        {
-            Matrix4x4 result = Identity;
-
-            float xx = quaternion.X * quaternion.X;
-            float yy = quaternion.Y * quaternion.Y;
-            float zz = quaternion.Z * quaternion.Z;
-
-            float xy = quaternion.X * quaternion.Y;
-            float wz = quaternion.Z * quaternion.W;
-            float xz = quaternion.Z * quaternion.X;
-            float wy = quaternion.Y * quaternion.W;
-            float yz = quaternion.Y * quaternion.Z;
-            float wx = quaternion.X * quaternion.W;
-
-            result.M11 = 1.0f - 2.0f * (yy + zz);
-            result.M12 = 2.0f * (xy + wz);
-            result.M13 = 2.0f * (xz - wy);
-
-            result.M21 = 2.0f * (xy - wz);
-            result.M22 = 1.0f - 2.0f * (zz + xx);
-            result.M23 = 2.0f * (yz + wx);
-
-            result.M31 = 2.0f * (xz + wy);
-            result.M32 = 2.0f * (yz - wx);
-            result.M33 = 1.0f - 2.0f * (yy + xx);
-
-            return result;
-        }
+            => Impl.CreateFromQuaternion(in quaternion).AsM4x4();
 
         /// <summary>Creates a rotation matrix from the specified yaw, pitch, and roll.</summary>
         /// <param name="yaw">The angle of rotation, in radians, around the Y axis.</param>
@@ -792,10 +262,7 @@ namespace System.Numerics
         /// <param name="roll">The angle of rotation, in radians, around the Z axis.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix4x4 CreateFromYawPitchRoll(float yaw, float pitch, float roll)
-        {
-            Quaternion q = Quaternion.CreateFromYawPitchRoll(yaw, pitch, roll);
-            return CreateFromQuaternion(q);
-        }
+            => Impl.CreateFromYawPitchRoll(yaw, pitch, roll).AsM4x4();
 
         /// <summary>Creates a view matrix.</summary>
         /// <param name="cameraPosition">The position of the camera.</param>
@@ -803,31 +270,7 @@ namespace System.Numerics
         /// <param name="cameraUpVector">The direction that is "up" from the camera's point of view.</param>
         /// <returns>The view matrix.</returns>
         public static Matrix4x4 CreateLookAt(Vector3 cameraPosition, Vector3 cameraTarget, Vector3 cameraUpVector)
-        {
-            Vector3 zaxis = Vector3.Normalize(cameraPosition - cameraTarget);
-            Vector3 xaxis = Vector3.Normalize(Vector3.Cross(cameraUpVector, zaxis));
-            Vector3 yaxis = Vector3.Cross(zaxis, xaxis);
-
-            Matrix4x4 result = Identity;
-
-            result.M11 = xaxis.X;
-            result.M12 = yaxis.X;
-            result.M13 = zaxis.X;
-
-            result.M21 = xaxis.Y;
-            result.M22 = yaxis.Y;
-            result.M23 = zaxis.Y;
-
-            result.M31 = xaxis.Z;
-            result.M32 = yaxis.Z;
-            result.M33 = zaxis.Z;
-
-            result.M41 = -Vector3.Dot(xaxis, cameraPosition);
-            result.M42 = -Vector3.Dot(yaxis, cameraPosition);
-            result.M43 = -Vector3.Dot(zaxis, cameraPosition);
-
-            return result;
-        }
+            => Impl.CreateLookAt(in cameraPosition, in cameraTarget, in cameraUpVector).AsM4x4();
 
         /// <summary>Creates an orthographic perspective matrix from the given view volume dimensions.</summary>
         /// <param name="width">The width of the view volume.</param>
@@ -836,16 +279,7 @@ namespace System.Numerics
         /// <param name="zFarPlane">The maximum Z-value of the view volume.</param>
         /// <returns>The orthographic projection matrix.</returns>
         public static Matrix4x4 CreateOrthographic(float width, float height, float zNearPlane, float zFarPlane)
-        {
-            Matrix4x4 result = Identity;
-
-            result.M11 = 2.0f / width;
-            result.M22 = 2.0f / height;
-            result.M33 = 1.0f / (zNearPlane - zFarPlane);
-            result.M43 = zNearPlane / (zNearPlane - zFarPlane);
-
-            return result;
-        }
+            => Impl.CreateOrthographic(width, height, zNearPlane, zFarPlane).AsM4x4();
 
         /// <summary>Creates a customized orthographic projection matrix.</summary>
         /// <param name="left">The minimum X-value of the view volume.</param>
@@ -856,21 +290,7 @@ namespace System.Numerics
         /// <param name="zFarPlane">The maximum Z-value of the view volume.</param>
         /// <returns>The orthographic projection matrix.</returns>
         public static Matrix4x4 CreateOrthographicOffCenter(float left, float right, float bottom, float top, float zNearPlane, float zFarPlane)
-        {
-            Matrix4x4 result = Identity;
-
-            result.M11 = 2.0f / (right - left);
-
-            result.M22 = 2.0f / (top - bottom);
-
-            result.M33 = 1.0f / (zNearPlane - zFarPlane);
-
-            result.M41 = (left + right) / (left - right);
-            result.M42 = (top + bottom) / (bottom - top);
-            result.M43 = zNearPlane / (zNearPlane - zFarPlane);
-
-            return result;
-        }
+            => Impl.CreateOrthographicOffCenter(left, right, bottom, top, zNearPlane, zFarPlane).AsM4x4();
 
         /// <summary>Creates a perspective projection matrix from the given view volume dimensions.</summary>
         /// <param name="width">The width of the view volume at the near view plane.</param>
@@ -878,35 +298,13 @@ namespace System.Numerics
         /// <param name="nearPlaneDistance">The distance to the near view plane.</param>
         /// <param name="farPlaneDistance">The distance to the far view plane.</param>
         /// <returns>The perspective projection matrix.</returns>
-        /// <exception cref="System.ArgumentOutOfRangeException"><paramref name="nearPlaneDistance" /> is less than or equal to zero.
+        /// <exception cref="ArgumentOutOfRangeException"><paramref name="nearPlaneDistance" /> is less than or equal to zero.
         /// -or-
         /// <paramref name="farPlaneDistance" /> is less than or equal to zero.
         /// -or-
         /// <paramref name="nearPlaneDistance" /> is greater than or equal to <paramref name="farPlaneDistance" />.</exception>
         public static Matrix4x4 CreatePerspective(float width, float height, float nearPlaneDistance, float farPlaneDistance)
-        {
-            ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(nearPlaneDistance, 0.0f);
-            ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(farPlaneDistance, 0.0f);
-            ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(nearPlaneDistance, farPlaneDistance);
-
-            Matrix4x4 result;
-
-            result.M11 = 2.0f * nearPlaneDistance / width;
-            result.M12 = result.M13 = result.M14 = 0.0f;
-
-            result.M22 = 2.0f * nearPlaneDistance / height;
-            result.M21 = result.M23 = result.M24 = 0.0f;
-
-            float negFarRange = float.IsPositiveInfinity(farPlaneDistance) ? -1.0f : farPlaneDistance / (nearPlaneDistance - farPlaneDistance);
-            result.M33 = negFarRange;
-            result.M31 = result.M32 = 0.0f;
-            result.M34 = -1.0f;
-
-            result.M41 = result.M42 = result.M44 = 0.0f;
-            result.M43 = nearPlaneDistance * negFarRange;
-
-            return result;
-        }
+            => Impl.CreatePerspective(width, height, nearPlaneDistance, farPlaneDistance).AsM4x4();
 
         /// <summary>Creates a perspective projection matrix based on a field of view, aspect ratio, and near and far view plane distances.</summary>
         /// <param name="fieldOfView">The field of view in the y direction, in radians.</param>
@@ -914,44 +312,16 @@ namespace System.Numerics
         /// <param name="nearPlaneDistance">The distance to the near view plane.</param>
         /// <param name="farPlaneDistance">The distance to the far view plane.</param>
         /// <returns>The perspective projection matrix.</returns>
-        /// <exception cref="System.ArgumentOutOfRangeException"><paramref name="fieldOfView" /> is less than or equal to zero.
+        /// <exception cref="ArgumentOutOfRangeException"><paramref name="fieldOfView" /> is less than or equal to zero.
         /// -or-
-        /// <paramref name="fieldOfView" /> is greater than or equal to <see cref="System.Math.PI" />.
+        /// <paramref name="fieldOfView" /> is greater than or equal to <see cref="Math.PI" />.
         /// <paramref name="nearPlaneDistance" /> is less than or equal to zero.
         /// -or-
         /// <paramref name="farPlaneDistance" /> is less than or equal to zero.
         /// -or-
         /// <paramref name="nearPlaneDistance" /> is greater than or equal to <paramref name="farPlaneDistance" />.</exception>
         public static Matrix4x4 CreatePerspectiveFieldOfView(float fieldOfView, float aspectRatio, float nearPlaneDistance, float farPlaneDistance)
-        {
-            ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(fieldOfView, 0.0f);
-            ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(fieldOfView, MathF.PI);
-
-            ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(nearPlaneDistance, 0.0f);
-            ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(farPlaneDistance, 0.0f);
-            ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(nearPlaneDistance, farPlaneDistance);
-
-            float yScale = 1.0f / MathF.Tan(fieldOfView * 0.5f);
-            float xScale = yScale / aspectRatio;
-
-            Matrix4x4 result;
-
-            result.M11 = xScale;
-            result.M12 = result.M13 = result.M14 = 0.0f;
-
-            result.M22 = yScale;
-            result.M21 = result.M23 = result.M24 = 0.0f;
-
-            result.M31 = result.M32 = 0.0f;
-            float negFarRange = float.IsPositiveInfinity(farPlaneDistance) ? -1.0f : farPlaneDistance / (nearPlaneDistance - farPlaneDistance);
-            result.M33 = negFarRange;
-            result.M34 = -1.0f;
-
-            result.M41 = result.M42 = result.M44 = 0.0f;
-            result.M43 = nearPlaneDistance * negFarRange;
-
-            return result;
-        }
+            => Impl.CreatePerspectiveFieldOfView(fieldOfView, aspectRatio, nearPlaneDistance, farPlaneDistance).AsM4x4();
 
         /// <summary>Creates a customized perspective projection matrix.</summary>
         /// <param name="left">The minimum x-value of the view volume at the near view plane.</param>
@@ -961,224 +331,58 @@ namespace System.Numerics
         /// <param name="nearPlaneDistance">The distance to the near view plane.</param>
         /// <param name="farPlaneDistance">The distance to the far view plane.</param>
         /// <returns>The perspective projection matrix.</returns>
-        /// <exception cref="System.ArgumentOutOfRangeException"><paramref name="nearPlaneDistance" /> is less than or equal to zero.
+        /// <exception cref="ArgumentOutOfRangeException"><paramref name="nearPlaneDistance" /> is less than or equal to zero.
         /// -or-
         /// <paramref name="farPlaneDistance" /> is less than or equal to zero.
         /// -or-
         /// <paramref name="nearPlaneDistance" /> is greater than or equal to <paramref name="farPlaneDistance" />.</exception>
         public static Matrix4x4 CreatePerspectiveOffCenter(float left, float right, float bottom, float top, float nearPlaneDistance, float farPlaneDistance)
-        {
-            ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(nearPlaneDistance, 0.0f);
-            ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(farPlaneDistance, 0.0f);
-            ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(nearPlaneDistance, farPlaneDistance);
-
-            Matrix4x4 result;
-
-            result.M11 = 2.0f * nearPlaneDistance / (right - left);
-            result.M12 = result.M13 = result.M14 = 0.0f;
-
-            result.M22 = 2.0f * nearPlaneDistance / (top - bottom);
-            result.M21 = result.M23 = result.M24 = 0.0f;
-
-            result.M31 = (left + right) / (right - left);
-            result.M32 = (top + bottom) / (top - bottom);
-            float negFarRange = float.IsPositiveInfinity(farPlaneDistance) ? -1.0f : farPlaneDistance / (nearPlaneDistance - farPlaneDistance);
-            result.M33 = negFarRange;
-            result.M34 = -1.0f;
-
-            result.M43 = nearPlaneDistance * negFarRange;
-            result.M41 = result.M42 = result.M44 = 0.0f;
-
-            return result;
-        }
+            => Impl.CreatePerspectiveOffCenter(left, right, bottom, top, nearPlaneDistance, farPlaneDistance).AsM4x4();
 
         /// <summary>Creates a matrix that reflects the coordinate system about a specified plane.</summary>
         /// <param name="value">The plane about which to create a reflection.</param>
         /// <returns>A new matrix expressing the reflection.</returns>
         public static Matrix4x4 CreateReflection(Plane value)
-        {
-            value = Plane.Normalize(value);
-
-            float a = value.Normal.X;
-            float b = value.Normal.Y;
-            float c = value.Normal.Z;
-
-            float fa = -2.0f * a;
-            float fb = -2.0f * b;
-            float fc = -2.0f * c;
-
-            Matrix4x4 result = Identity;
-
-            result.M11 = fa * a + 1.0f;
-            result.M12 = fb * a;
-            result.M13 = fc * a;
-
-            result.M21 = fa * b;
-            result.M22 = fb * b + 1.0f;
-            result.M23 = fc * b;
-
-            result.M31 = fa * c;
-            result.M32 = fb * c;
-            result.M33 = fc * c + 1.0f;
-
-            result.M41 = fa * value.D;
-            result.M42 = fb * value.D;
-            result.M43 = fc * value.D;
-
-            return result;
-        }
+            => Impl.CreateReflection(in value).AsM4x4();
 
         /// <summary>Creates a matrix for rotating points around the X axis.</summary>
         /// <param name="radians">The amount, in radians, by which to rotate around the X axis.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix4x4 CreateRotationX(float radians)
-        {
-            Matrix4x4 result = Identity;
-
-            float c = MathF.Cos(radians);
-            float s = MathF.Sin(radians);
-
-            // [  1  0  0  0 ]
-            // [  0  c  s  0 ]
-            // [  0 -s  c  0 ]
-            // [  0  0  0  1 ]
-
-            result.M22 = c;
-            result.M23 = s;
-            result.M32 = -s;
-            result.M33 = c;
-
-            return result;
-        }
+            => Impl.CreateRotationX(radians).AsM4x4();
 
         /// <summary>Creates a matrix for rotating points around the X axis from a center point.</summary>
         /// <param name="radians">The amount, in radians, by which to rotate around the X axis.</param>
         /// <param name="centerPoint">The center point.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix4x4 CreateRotationX(float radians, Vector3 centerPoint)
-        {
-            Matrix4x4 result = Identity;
-
-            float c = MathF.Cos(radians);
-            float s = MathF.Sin(radians);
-
-            float y = centerPoint.Y * (1 - c) + centerPoint.Z * s;
-            float z = centerPoint.Z * (1 - c) - centerPoint.Y * s;
-
-            // [  1  0  0  0 ]
-            // [  0  c  s  0 ]
-            // [  0 -s  c  0 ]
-            // [  0  y  z  1 ]
-
-            result.M22 = c;
-            result.M23 = s;
-            result.M32 = -s;
-            result.M33 = c;
-            result.M42 = y;
-            result.M43 = z;
-
-            return result;
-        }
+            => Impl.CreateRotationX(radians, in centerPoint).AsM4x4();
 
         /// <summary>Creates a matrix for rotating points around the Y axis.</summary>
         /// <param name="radians">The amount, in radians, by which to rotate around the Y-axis.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix4x4 CreateRotationY(float radians)
-        {
-            Matrix4x4 result = Identity;
-
-            float c = MathF.Cos(radians);
-            float s = MathF.Sin(radians);
-
-            // [  c  0 -s  0 ]
-            // [  0  1  0  0 ]
-            // [  s  0  c  0 ]
-            // [  0  0  0  1 ]
-            result.M11 = c;
-            result.M13 = -s;
-            result.M31 = s;
-            result.M33 = c;
-
-            return result;
-        }
+            => Impl.CreateRotationY(radians).AsM4x4();
 
         /// <summary>The amount, in radians, by which to rotate around the Y axis from a center point.</summary>
         /// <param name="radians">The amount, in radians, by which to rotate around the Y-axis.</param>
         /// <param name="centerPoint">The center point.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix4x4 CreateRotationY(float radians, Vector3 centerPoint)
-        {
-            Matrix4x4 result = Identity;
-
-            float c = MathF.Cos(radians);
-            float s = MathF.Sin(radians);
-
-            float x = centerPoint.X * (1 - c) - centerPoint.Z * s;
-            float z = centerPoint.Z * (1 - c) + centerPoint.X * s;
-
-            // [  c  0 -s  0 ]
-            // [  0  1  0  0 ]
-            // [  s  0  c  0 ]
-            // [  x  0  z  1 ]
-            result.M11 = c;
-            result.M13 = -s;
-            result.M31 = s;
-            result.M33 = c;
-            result.M41 = x;
-            result.M43 = z;
-
-            return result;
-        }
+            => Impl.CreateRotationY(radians, in centerPoint).AsM4x4();
 
         /// <summary>Creates a matrix for rotating points around the Z axis.</summary>
         /// <param name="radians">The amount, in radians, by which to rotate around the Z-axis.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix4x4 CreateRotationZ(float radians)
-        {
-            Matrix4x4 result = Identity;
-
-            float c = MathF.Cos(radians);
-            float s = MathF.Sin(radians);
-
-            // [  c  s  0  0 ]
-            // [ -s  c  0  0 ]
-            // [  0  0  1  0 ]
-            // [  0  0  0  1 ]
-            result.M11 = c;
-            result.M12 = s;
-            result.M21 = -s;
-            result.M22 = c;
-
-            return result;
-        }
+            => Impl.CreateRotationZ(radians).AsM4x4();
 
         /// <summary>Creates a matrix for rotating points around the Z axis from a center point.</summary>
         /// <param name="radians">The amount, in radians, by which to rotate around the Z-axis.</param>
         /// <param name="centerPoint">The center point.</param>
         /// <returns>The rotation matrix.</returns>
         public static Matrix4x4 CreateRotationZ(float radians, Vector3 centerPoint)
-        {
-            Matrix4x4 result = Identity;
-
-            float c = MathF.Cos(radians);
-            float s = MathF.Sin(radians);
-
-            float x = centerPoint.X * (1 - c) + centerPoint.Y * s;
-            float y = centerPoint.Y * (1 - c) - centerPoint.X * s;
-
-            // [  c  s  0  0 ]
-            // [ -s  c  0  0 ]
-            // [  0  0  1  0 ]
-            // [  x  y  0  1 ]
-            result.M11 = c;
-            result.M12 = s;
-            result.M21 = -s;
-            result.M22 = c;
-            result.M41 = x;
-            result.M42 = y;
-
-            return result;
-        }
+            => Impl.CreateRotationZ(radians, in centerPoint).AsM4x4();
 
         /// <summary>Creates a scaling matrix from the specified X, Y, and Z components.</summary>
         /// <param name="xScale">The value to scale by on the X axis.</param>
@@ -1186,16 +390,7 @@ namespace System.Numerics
         /// <param name="zScale">The value to scale by on the Z axis.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix4x4 CreateScale(float xScale, float yScale, float zScale)
-        {
-            Matrix4x4 result = default;
-
-            result.M11 = xScale;
-            result.M22 = yScale;
-            result.M33 = zScale;
-            result.M44 = 1;
-
-            return result;
-        }
+            => Impl.CreateScale(xScale, yScale, zScale).AsM4x4();
 
         /// <summary>Creates a scaling matrix that is offset by a given center point.</summary>
         /// <param name="xScale">The value to scale by on the X axis.</param>
@@ -1204,146 +399,46 @@ namespace System.Numerics
         /// <param name="centerPoint">The center point.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix4x4 CreateScale(float xScale, float yScale, float zScale, Vector3 centerPoint)
-        {
-            Matrix4x4 result = default;
-
-            float tx = centerPoint.X * (1 - xScale);
-            float ty = centerPoint.Y * (1 - yScale);
-            float tz = centerPoint.Z * (1 - zScale);
-
-            result.M11 = xScale;
-            result.M22 = yScale;
-            result.M33 = zScale;
-            result.M44 = 1;
-
-            result.M41 = tx;
-            result.M42 = ty;
-            result.M43 = tz;
-
-            return result;
-        }
+            => Impl.CreateScale(xScale, yScale, zScale, in centerPoint).AsM4x4();
 
         /// <summary>Creates a scaling matrix from the specified vector scale.</summary>
         /// <param name="scales">The scale to use.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix4x4 CreateScale(Vector3 scales)
-        {
-            Matrix4x4 result = default;
-
-            result.M11 = scales.X;
-            result.M22 = scales.Y;
-            result.M33 = scales.Z;
-            result.M44 = 1;
-
-            return result;
-        }
+            => Impl.CreateScale(in scales).AsM4x4();
 
         /// <summary>Creates a scaling matrix with a center point.</summary>
         /// <param name="scales">The vector that contains the amount to scale on each axis.</param>
         /// <param name="centerPoint">The center point.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix4x4 CreateScale(Vector3 scales, Vector3 centerPoint)
-        {
-            Matrix4x4 result = default;
-
-            Vector3 t = centerPoint * (Vector3.One - scales);
-
-            result.M11 = scales.X;
-            result.M22 = scales.Y;
-            result.M33 = scales.Z;
-            result.M44 = 1;
-
-            result.M41 = t.X;
-            result.M42 = t.Y;
-            result.M43 = t.Z;
-
-            return result;
-        }
+            => Impl.CreateScale(scales, in centerPoint).AsM4x4();
 
         /// <summary>Creates a uniform scaling matrix that scale equally on each axis.</summary>
         /// <param name="scale">The uniform scaling factor.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix4x4 CreateScale(float scale)
-        {
-            Matrix4x4 result = default;
-
-            result.M11 = scale;
-            result.M22 = scale;
-            result.M33 = scale;
-            result.M44 = 1;
-
-            return result;
-        }
+            => Impl.CreateScale(scale).AsM4x4();
 
         /// <summary>Creates a uniform scaling matrix that scales equally on each axis with a center point.</summary>
         /// <param name="scale">The uniform scaling factor.</param>
         /// <param name="centerPoint">The center point.</param>
         /// <returns>The scaling matrix.</returns>
         public static Matrix4x4 CreateScale(float scale, Vector3 centerPoint)
-        {
-            Matrix4x4 result = default;
-
-            Vector3 t = centerPoint * (Vector3.One - new Vector3(scale));
-
-            result.M11 = scale;
-            result.M22 = scale;
-            result.M33 = scale;
-            result.M44 = 1;
-
-            result.M41 = t.X;
-            result.M42 = t.Y;
-            result.M43 = t.Z;
-
-            return result;
-        }
+            => Impl.CreateScale(scale, in centerPoint).AsM4x4();
 
         /// <summary>Creates a matrix that flattens geometry into a specified plane as if casting a shadow from a specified light source.</summary>
         /// <param name="lightDirection">The direction from which the light that will cast the shadow is coming.</param>
         /// <param name="plane">The plane onto which the new matrix should flatten geometry so as to cast a shadow.</param>
         /// <returns>A new matrix that can be used to flatten geometry onto the specified plane from the specified direction.</returns>
         public static Matrix4x4 CreateShadow(Vector3 lightDirection, Plane plane)
-        {
-            Plane p = Plane.Normalize(plane);
-
-            float dot = p.Normal.X * lightDirection.X + p.Normal.Y * lightDirection.Y + p.Normal.Z * lightDirection.Z;
-            float a = -p.Normal.X;
-            float b = -p.Normal.Y;
-            float c = -p.Normal.Z;
-            float d = -p.D;
-
-            Matrix4x4 result = Identity;
-
-            result.M11 = a * lightDirection.X + dot;
-            result.M21 = b * lightDirection.X;
-            result.M31 = c * lightDirection.X;
-            result.M41 = d * lightDirection.X;
-
-            result.M12 = a * lightDirection.Y;
-            result.M22 = b * lightDirection.Y + dot;
-            result.M32 = c * lightDirection.Y;
-            result.M42 = d * lightDirection.Y;
-
-            result.M13 = a * lightDirection.Z;
-            result.M23 = b * lightDirection.Z;
-            result.M33 = c * lightDirection.Z + dot;
-            result.M43 = d * lightDirection.Z;
-
-            result.M44 = dot;
-
-            return result;
-        }
+            => Impl.CreateShadow(in lightDirection, in plane).AsM4x4();
 
         /// <summary>Creates a translation matrix from the specified 3-dimensional vector.</summary>
         /// <param name="position">The amount to translate in each axis.</param>
         /// <returns>The translation matrix.</returns>
         public static Matrix4x4 CreateTranslation(Vector3 position)
-        {
-            Matrix4x4 result = Identity;
-            result.M41 = position.X;
-            result.M42 = position.Y;
-            result.M43 = position.Z;
-            return result;
-        }
+            => Impl.CreateTranslation(in position).AsM4x4();
 
         /// <summary>Creates a translation matrix from the specified X, Y, and Z components.</summary>
         /// <param name="xPosition">The amount to translate on the X axis.</param>
@@ -1351,13 +446,7 @@ namespace System.Numerics
         /// <param name="zPosition">The amount to translate on the Z axis.</param>
         /// <returns>The translation matrix.</returns>
         public static Matrix4x4 CreateTranslation(float xPosition, float yPosition, float zPosition)
-        {
-            Matrix4x4 result = Identity;
-            result.M41 = xPosition;
-            result.M42 = yPosition;
-            result.M43 = zPosition;
-            return result;
-        }
+            => Impl.CreateTranslation(xPosition, yPosition, zPosition).AsM4x4();
 
         /// <summary>Creates a world matrix with the specified parameters.</summary>
         /// <param name="position">The position of the object.</param>
@@ -1366,387 +455,42 @@ namespace System.Numerics
         /// <returns>The world matrix.</returns>
         /// <remarks><paramref name="position" /> is used in translation operations.</remarks>
         public static Matrix4x4 CreateWorld(Vector3 position, Vector3 forward, Vector3 up)
-        {
-            Vector3 zaxis = Vector3.Normalize(-forward);
-            Vector3 xaxis = Vector3.Normalize(Vector3.Cross(up, zaxis));
-            Vector3 yaxis = Vector3.Cross(zaxis, xaxis);
-
-            Matrix4x4 result = Identity;
-
-            result.M11 = xaxis.X;
-            result.M12 = xaxis.Y;
-            result.M13 = xaxis.Z;
+            => Impl.CreateWorld(in position, in forward, in up).AsM4x4();
 
-            result.M21 = yaxis.X;
-            result.M22 = yaxis.Y;
-            result.M23 = yaxis.Z;
-
-            result.M31 = zaxis.X;
-            result.M32 = zaxis.Y;
-            result.M33 = zaxis.Z;
-
-            result.M41 = position.X;
-            result.M42 = position.Y;
-            result.M43 = position.Z;
-
-            return result;
-        }
+        /// <summary>Attempts to extract the scale, translation, and rotation components from the given scale, rotation, or translation matrix. The return value indicates whether the operation succeeded.</summary>
+        /// <param name="matrix">The source matrix.</param>
+        /// <param name="scale">When this method returns, contains the scaling component of the transformation matrix if the operation succeeded.</param>
+        /// <param name="rotation">When this method returns, contains the rotation component of the transformation matrix if the operation succeeded.</param>
+        /// <param name="translation">When the method returns, contains the translation component of the transformation matrix if the operation succeeded.</param>
+        /// <returns><see langword="true" /> if <paramref name="matrix" /> was decomposed successfully; otherwise,  <see langword="false" />.</returns>
+        public static unsafe bool Decompose(Matrix4x4 matrix, out Vector3 scale, out Quaternion rotation, out Vector3 translation)
+            => Impl.Decompose(in matrix.AsImpl(), out scale, out rotation, out translation);
 
         /// <summary>Tries to invert the specified matrix. The return value indicates whether the operation succeeded.</summary>
         /// <param name="matrix">The matrix to invert.</param>
         /// <param name="result">When this method returns, contains the inverted matrix if the operation succeeded.</param>
         /// <returns><see langword="true" /> if <paramref name="matrix" /> was converted successfully; otherwise,  <see langword="false" />.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe bool Invert(Matrix4x4 matrix, out Matrix4x4 result)
+        public static bool Invert(Matrix4x4 matrix, out Matrix4x4 result)
         {
-            // This implementation is based on the DirectX Math Library XMMatrixInverse method
-            // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathMatrix.inl
-
-            if (Sse.IsSupported)
-            {
-                return SseImpl(matrix, out result);
-            }
-
-            return SoftwareFallback(matrix, out result);
-
-            static unsafe bool SseImpl(Matrix4x4 matrix, out Matrix4x4 result)
-            {
-                if (!Sse.IsSupported)
-                {
-                    // Redundant test so we won't prejit remainder of this method on platforms without SSE.
-                    throw new PlatformNotSupportedException();
-                }
-
-                // Load the matrix values into rows
-                Vector128<float> row1 = Sse.LoadVector128(&matrix.M11);
-                Vector128<float> row2 = Sse.LoadVector128(&matrix.M21);
-                Vector128<float> row3 = Sse.LoadVector128(&matrix.M31);
-                Vector128<float> row4 = Sse.LoadVector128(&matrix.M41);
-
-                // Transpose the matrix
-                Vector128<float> vTemp1 = Sse.Shuffle(row1, row2, 0x44); //_MM_SHUFFLE(1, 0, 1, 0)
-                Vector128<float> vTemp3 = Sse.Shuffle(row1, row2, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
-                Vector128<float> vTemp2 = Sse.Shuffle(row3, row4, 0x44); //_MM_SHUFFLE(1, 0, 1, 0)
-                Vector128<float> vTemp4 = Sse.Shuffle(row3, row4, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
-
-                row1 = Sse.Shuffle(vTemp1, vTemp2, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
-                row2 = Sse.Shuffle(vTemp1, vTemp2, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
-                row3 = Sse.Shuffle(vTemp3, vTemp4, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
-                row4 = Sse.Shuffle(vTemp3, vTemp4, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
-
-                Vector128<float> V00 = Permute(row3, 0x50);           //_MM_SHUFFLE(1, 1, 0, 0)
-                Vector128<float> V10 = Permute(row4, 0xEE);           //_MM_SHUFFLE(3, 2, 3, 2)
-                Vector128<float> V01 = Permute(row1, 0x50);           //_MM_SHUFFLE(1, 1, 0, 0)
-                Vector128<float> V11 = Permute(row2, 0xEE);           //_MM_SHUFFLE(3, 2, 3, 2)
-                Vector128<float> V02 = Sse.Shuffle(row3, row1, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
-                Vector128<float> V12 = Sse.Shuffle(row4, row2, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
-
-                Vector128<float> D0 = Sse.Multiply(V00, V10);
-                Vector128<float> D1 = Sse.Multiply(V01, V11);
-                Vector128<float> D2 = Sse.Multiply(V02, V12);
-
-                V00 = Permute(row3, 0xEE);           //_MM_SHUFFLE(3, 2, 3, 2)
-                V10 = Permute(row4, 0x50);           //_MM_SHUFFLE(1, 1, 0, 0)
-                V01 = Permute(row1, 0xEE);           //_MM_SHUFFLE(3, 2, 3, 2)
-                V11 = Permute(row2, 0x50);           //_MM_SHUFFLE(1, 1, 0, 0)
-                V02 = Sse.Shuffle(row3, row1, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
-                V12 = Sse.Shuffle(row4, row2, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
-
-                // Note:  We use this expansion pattern instead of Fused Multiply Add
-                // in order to support older hardware
-                D0 = Sse.Subtract(D0, Sse.Multiply(V00, V10));
-                D1 = Sse.Subtract(D1, Sse.Multiply(V01, V11));
-                D2 = Sse.Subtract(D2, Sse.Multiply(V02, V12));
-
-                // V11 = D0Y,D0W,D2Y,D2Y
-                V11 = Sse.Shuffle(D0, D2, 0x5D);  //_MM_SHUFFLE(1, 1, 3, 1)
-                V00 = Permute(row2, 0x49);        //_MM_SHUFFLE(1, 0, 2, 1)
-                V10 = Sse.Shuffle(V11, D0, 0x32); //_MM_SHUFFLE(0, 3, 0, 2)
-                V01 = Permute(row1, 0x12);        //_MM_SHUFFLE(0, 1, 0, 2)
-                V11 = Sse.Shuffle(V11, D0, 0x99); //_MM_SHUFFLE(2, 1, 2, 1)
-
-                // V13 = D1Y,D1W,D2W,D2W
-                Vector128<float> V13 = Sse.Shuffle(D1, D2, 0xFD); //_MM_SHUFFLE(3, 3, 3, 1)
-                V02 = Permute(row4, 0x49);                        //_MM_SHUFFLE(1, 0, 2, 1)
-                V12 = Sse.Shuffle(V13, D1, 0x32);                 //_MM_SHUFFLE(0, 3, 0, 2)
-                Vector128<float> V03 = Permute(row3, 0x12);       //_MM_SHUFFLE(0, 1, 0, 2)
-                V13 = Sse.Shuffle(V13, D1, 0x99);                 //_MM_SHUFFLE(2, 1, 2, 1)
-
-                Vector128<float> C0 = Sse.Multiply(V00, V10);
-                Vector128<float> C2 = Sse.Multiply(V01, V11);
-                Vector128<float> C4 = Sse.Multiply(V02, V12);
-                Vector128<float> C6 = Sse.Multiply(V03, V13);
-
-                // V11 = D0X,D0Y,D2X,D2X
-                V11 = Sse.Shuffle(D0, D2, 0x4);    //_MM_SHUFFLE(0, 0, 1, 0)
-                V00 = Permute(row2, 0x9e);         //_MM_SHUFFLE(2, 1, 3, 2)
-                V10 = Sse.Shuffle(D0, V11, 0x93);  //_MM_SHUFFLE(2, 1, 0, 3)
-                V01 = Permute(row1, 0x7b);         //_MM_SHUFFLE(1, 3, 2, 3)
-                V11 = Sse.Shuffle(D0, V11, 0x26);  //_MM_SHUFFLE(0, 2, 1, 2)
-
-                // V13 = D1X,D1Y,D2Z,D2Z
-                V13 = Sse.Shuffle(D1, D2, 0xa4);   //_MM_SHUFFLE(2, 2, 1, 0)
-                V02 = Permute(row4, 0x9e);         //_MM_SHUFFLE(2, 1, 3, 2)
-                V12 = Sse.Shuffle(D1, V13, 0x93);  //_MM_SHUFFLE(2, 1, 0, 3)
-                V03 = Permute(row3, 0x7b);         //_MM_SHUFFLE(1, 3, 2, 3)
-                V13 = Sse.Shuffle(D1, V13, 0x26);  //_MM_SHUFFLE(0, 2, 1, 2)
-
-                C0 = Sse.Subtract(C0, Sse.Multiply(V00, V10));
-                C2 = Sse.Subtract(C2, Sse.Multiply(V01, V11));
-                C4 = Sse.Subtract(C4, Sse.Multiply(V02, V12));
-                C6 = Sse.Subtract(C6, Sse.Multiply(V03, V13));
-
-                V00 = Permute(row2, 0x33); //_MM_SHUFFLE(0, 3, 0, 3)
-
-                // V10 = D0Z,D0Z,D2X,D2Y
-                V10 = Sse.Shuffle(D0, D2, 0x4A); //_MM_SHUFFLE(1, 0, 2, 2)
-                V10 = Permute(V10, 0x2C);        //_MM_SHUFFLE(0, 2, 3, 0)
-                V01 = Permute(row1, 0x8D);       //_MM_SHUFFLE(2, 0, 3, 1)
-
-                // V11 = D0X,D0W,D2X,D2Y
-                V11 = Sse.Shuffle(D0, D2, 0x4C); //_MM_SHUFFLE(1, 0, 3, 0)
-                V11 = Permute(V11, 0x93);        //_MM_SHUFFLE(2, 1, 0, 3)
-                V02 = Permute(row4, 0x33);       //_MM_SHUFFLE(0, 3, 0, 3)
-
-                // V12 = D1Z,D1Z,D2Z,D2W
-                V12 = Sse.Shuffle(D1, D2, 0xEA); //_MM_SHUFFLE(3, 2, 2, 2)
-                V12 = Permute(V12, 0x2C);        //_MM_SHUFFLE(0, 2, 3, 0)
-                V03 = Permute(row3, 0x8D);       //_MM_SHUFFLE(2, 0, 3, 1)
-
-                // V13 = D1X,D1W,D2Z,D2W
-                V13 = Sse.Shuffle(D1, D2, 0xEC); //_MM_SHUFFLE(3, 2, 3, 0)
-                V13 = Permute(V13, 0x93);        //_MM_SHUFFLE(2, 1, 0, 3)
-
-                V00 = Sse.Multiply(V00, V10);
-                V01 = Sse.Multiply(V01, V11);
-                V02 = Sse.Multiply(V02, V12);
-                V03 = Sse.Multiply(V03, V13);
-
-                Vector128<float> C1 = Sse.Subtract(C0, V00);
-                C0 = Sse.Add(C0, V00);
-                Vector128<float> C3 = Sse.Add(C2, V01);
-                C2 = Sse.Subtract(C2, V01);
-                Vector128<float> C5 = Sse.Subtract(C4, V02);
-                C4 = Sse.Add(C4, V02);
-                Vector128<float> C7 = Sse.Add(C6, V03);
-                C6 = Sse.Subtract(C6, V03);
-
-                C0 = Sse.Shuffle(C0, C1, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
-                C2 = Sse.Shuffle(C2, C3, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
-                C4 = Sse.Shuffle(C4, C5, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
-                C6 = Sse.Shuffle(C6, C7, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
-
-                C0 = Permute(C0, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
-                C2 = Permute(C2, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
-                C4 = Permute(C4, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
-                C6 = Permute(C6, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
-
-                // Get the determinant
-                vTemp2 = row1;
-                float det = Vector4.Dot(C0.AsVector4(), vTemp2.AsVector4());
-
-                // Check determinate is not zero
-                if (MathF.Abs(det) < float.Epsilon)
-                {
-                    result = new Matrix4x4(float.NaN, float.NaN, float.NaN, float.NaN,
-                                float.NaN, float.NaN, float.NaN, float.NaN,
-                                float.NaN, float.NaN, float.NaN, float.NaN,
-                                float.NaN, float.NaN, float.NaN, float.NaN);
-                    return false;
-                }
-
-                // Create Vector128<float> copy of the determinant and invert them.
-                Vector128<float> ones = Vector128.Create(1.0f);
-                Vector128<float> vTemp = Vector128.Create(det);
-                vTemp = Sse.Divide(ones, vTemp);
-
-                row1 = Sse.Multiply(C0, vTemp);
-                row2 = Sse.Multiply(C2, vTemp);
-                row3 = Sse.Multiply(C4, vTemp);
-                row4 = Sse.Multiply(C6, vTemp);
-
-                Unsafe.SkipInit(out result);
-                ref Vector128<float> vResult = ref Unsafe.As<Matrix4x4, Vector128<float>>(ref result);
-
-                vResult = row1;
-                Unsafe.Add(ref vResult, 1) = row2;
-                Unsafe.Add(ref vResult, 2) = row3;
-                Unsafe.Add(ref vResult, 3) = row4;
-
-                return true;
-            }
-
-            static bool SoftwareFallback(Matrix4x4 matrix, out Matrix4x4 result)
-            {
-                //                                       -1
-                // If you have matrix M, inverse Matrix M   can compute
-                //
-                //     -1       1
-                //    M   = --------- A
-                //            det(M)
-                //
-                // A is adjugate (adjoint) of M, where,
-                //
-                //      T
-                // A = C
-                //
-                // C is Cofactor matrix of M, where,
-                //           i + j
-                // C   = (-1)      * det(M  )
-                //  ij                    ij
-                //
-                //     [ a b c d ]
-                // M = [ e f g h ]
-                //     [ i j k l ]
-                //     [ m n o p ]
-                //
-                // First Row
-                //           2 | f g h |
-                // C   = (-1)  | j k l | = + ( f ( kp - lo ) - g ( jp - ln ) + h ( jo - kn ) )
-                //  11         | n o p |
-                //
-                //           3 | e g h |
-                // C   = (-1)  | i k l | = - ( e ( kp - lo ) - g ( ip - lm ) + h ( io - km ) )
-                //  12         | m o p |
-                //
-                //           4 | e f h |
-                // C   = (-1)  | i j l | = + ( e ( jp - ln ) - f ( ip - lm ) + h ( in - jm ) )
-                //  13         | m n p |
-                //
-                //           5 | e f g |
-                // C   = (-1)  | i j k | = - ( e ( jo - kn ) - f ( io - km ) + g ( in - jm ) )
-                //  14         | m n o |
-                //
-                // Second Row
-                //           3 | b c d |
-                // C   = (-1)  | j k l | = - ( b ( kp - lo ) - c ( jp - ln ) + d ( jo - kn ) )
-                //  21         | n o p |
-                //
-                //           4 | a c d |
-                // C   = (-1)  | i k l | = + ( a ( kp - lo ) - c ( ip - lm ) + d ( io - km ) )
-                //  22         | m o p |
-                //
-                //           5 | a b d |
-                // C   = (-1)  | i j l | = - ( a ( jp - ln ) - b ( ip - lm ) + d ( in - jm ) )
-                //  23         | m n p |
-                //
-                //           6 | a b c |
-                // C   = (-1)  | i j k | = + ( a ( jo - kn ) - b ( io - km ) + c ( in - jm ) )
-                //  24         | m n o |
-                //
-                // Third Row
-                //           4 | b c d |
-                // C   = (-1)  | f g h | = + ( b ( gp - ho ) - c ( fp - hn ) + d ( fo - gn ) )
-                //  31         | n o p |
-                //
-                //           5 | a c d |
-                // C   = (-1)  | e g h | = - ( a ( gp - ho ) - c ( ep - hm ) + d ( eo - gm ) )
-                //  32         | m o p |
-                //
-                //           6 | a b d |
-                // C   = (-1)  | e f h | = + ( a ( fp - hn ) - b ( ep - hm ) + d ( en - fm ) )
-                //  33         | m n p |
-                //
-                //           7 | a b c |
-                // C   = (-1)  | e f g | = - ( a ( fo - gn ) - b ( eo - gm ) + c ( en - fm ) )
-                //  34         | m n o |
-                //
-                // Fourth Row
-                //           5 | b c d |
-                // C   = (-1)  | f g h | = - ( b ( gl - hk ) - c ( fl - hj ) + d ( fk - gj ) )
-                //  41         | j k l |
-                //
-                //           6 | a c d |
-                // C   = (-1)  | e g h | = + ( a ( gl - hk ) - c ( el - hi ) + d ( ek - gi ) )
-                //  42         | i k l |
-                //
-                //           7 | a b d |
-                // C   = (-1)  | e f h | = - ( a ( fl - hj ) - b ( el - hi ) + d ( ej - fi ) )
-                //  43         | i j l |
-                //
-                //           8 | a b c |
-                // C   = (-1)  | e f g | = + ( a ( fk - gj ) - b ( ek - gi ) + c ( ej - fi ) )
-                //  44         | i j k |
-                //
-                // Cost of operation
-                // 53 adds, 104 muls, and 1 div.
-                float a = matrix.M11, b = matrix.M12, c = matrix.M13, d = matrix.M14;
-                float e = matrix.M21, f = matrix.M22, g = matrix.M23, h = matrix.M24;
-                float i = matrix.M31, j = matrix.M32, k = matrix.M33, l = matrix.M34;
-                float m = matrix.M41, n = matrix.M42, o = matrix.M43, p = matrix.M44;
-
-                float kp_lo = k * p - l * o;
-                float jp_ln = j * p - l * n;
-                float jo_kn = j * o - k * n;
-                float ip_lm = i * p - l * m;
-                float io_km = i * o - k * m;
-                float in_jm = i * n - j * m;
-
-                float a11 = +(f * kp_lo - g * jp_ln + h * jo_kn);
-                float a12 = -(e * kp_lo - g * ip_lm + h * io_km);
-                float a13 = +(e * jp_ln - f * ip_lm + h * in_jm);
-                float a14 = -(e * jo_kn - f * io_km + g * in_jm);
-
-                float det = a * a11 + b * a12 + c * a13 + d * a14;
-
-                if (MathF.Abs(det) < float.Epsilon)
-                {
-                    result = new Matrix4x4(float.NaN, float.NaN, float.NaN, float.NaN,
-                                           float.NaN, float.NaN, float.NaN, float.NaN,
-                                           float.NaN, float.NaN, float.NaN, float.NaN,
-                                           float.NaN, float.NaN, float.NaN, float.NaN);
-                    return false;
-                }
-
-                float invDet = 1.0f / det;
-
-                result.M11 = a11 * invDet;
-                result.M21 = a12 * invDet;
-                result.M31 = a13 * invDet;
-                result.M41 = a14 * invDet;
-
-                result.M12 = -(b * kp_lo - c * jp_ln + d * jo_kn) * invDet;
-                result.M22 = +(a * kp_lo - c * ip_lm + d * io_km) * invDet;
-                result.M32 = -(a * jp_ln - b * ip_lm + d * in_jm) * invDet;
-                result.M42 = +(a * jo_kn - b * io_km + c * in_jm) * invDet;
-
-                float gp_ho = g * p - h * o;
-                float fp_hn = f * p - h * n;
-                float fo_gn = f * o - g * n;
-                float ep_hm = e * p - h * m;
-                float eo_gm = e * o - g * m;
-                float en_fm = e * n - f * m;
-
-                result.M13 = +(b * gp_ho - c * fp_hn + d * fo_gn) * invDet;
-                result.M23 = -(a * gp_ho - c * ep_hm + d * eo_gm) * invDet;
-                result.M33 = +(a * fp_hn - b * ep_hm + d * en_fm) * invDet;
-                result.M43 = -(a * fo_gn - b * eo_gm + c * en_fm) * invDet;
-
-                float gl_hk = g * l - h * k;
-                float fl_hj = f * l - h * j;
-                float fk_gj = f * k - g * j;
-                float el_hi = e * l - h * i;
-                float ek_gi = e * k - g * i;
-                float ej_fi = e * j - f * i;
-
-                result.M14 = -(b * gl_hk - c * fl_hj + d * fk_gj) * invDet;
-                result.M24 = +(a * gl_hk - c * el_hi + d * ek_gi) * invDet;
-                result.M34 = -(a * fl_hj - b * el_hi + d * ej_fi) * invDet;
-                result.M44 = +(a * fk_gj - b * ek_gi + c * ej_fi) * invDet;
-
-                return true;
-            }
+            Unsafe.SkipInit(out result);
+            return Impl.Invert(in matrix.AsImpl(), out result.AsImpl());
         }
 
+        /// <summary>Performs a linear interpolation from one matrix to a second matrix based on a value that specifies the weighting of the second matrix.</summary>
+        /// <param name="matrix1">The first matrix.</param>
+        /// <param name="matrix2">The second matrix.</param>
+        /// <param name="amount">The relative weighting of <paramref name="matrix2" />.</param>
+        /// <returns>The interpolated matrix.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Matrix4x4 Lerp(Matrix4x4 matrix1, Matrix4x4 matrix2, float amount)
+            => Impl.Lerp(in matrix1.AsImpl(), in matrix2.AsImpl(), amount).AsM4x4();
+
         /// <summary>Multiplies two matrices together to compute the product.</summary>
         /// <param name="value1">The first matrix.</param>
         /// <param name="value2">The second matrix.</param>
         /// <returns>The product matrix.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix4x4 Multiply(Matrix4x4 value1, Matrix4x4 value2)
-        {
-            return value1 * value2;
-        }
+            => (value1.AsImpl() * value2.AsImpl()).AsM4x4();
 
         /// <summary>Multiplies a matrix by a float to compute the product.</summary>
         /// <param name="value1">The matrix to scale.</param>
@@ -1754,18 +498,14 @@ namespace System.Numerics
         /// <returns>The scaled matrix.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix4x4 Multiply(Matrix4x4 value1, float value2)
-        {
-            return value1 * value2;
-        }
+            => (value1.AsImpl() * value2).AsM4x4();
 
         /// <summary>Negates the specified matrix by multiplying all its values by -1.</summary>
         /// <param name="value">The matrix to negate.</param>
         /// <returns>The negated matrix.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix4x4 Negate(Matrix4x4 value)
-        {
-            return -value;
-        }
+            => (-value.AsImpl()).AsM4x4();
 
         /// <summary>Subtracts each element in a second matrix from its corresponding element in a first matrix.</summary>
         /// <param name="value1">The first matrix.</param>
@@ -1773,548 +513,51 @@ namespace System.Numerics
         /// <returns>The matrix containing the values that result from subtracting each element in <paramref name="value2" /> from its corresponding element in <paramref name="value1" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Matrix4x4 Subtract(Matrix4x4 value1, Matrix4x4 value2)
-        {
-            return value1 - value2;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static Vector128<float> Permute(Vector128<float> value, [ConstantExpected] byte control)
-        {
-            if (Avx.IsSupported)
-            {
-                return Avx.Permute(value, control);
-            }
-            else if (Sse.IsSupported)
-            {
-                return Sse.Shuffle(value, value, control);
-            }
-            else
-            {
-                // Redundant test so we won't prejit remainder of this method on platforms without AdvSimd.
-                throw new PlatformNotSupportedException();
-            }
-        }
-
-        /// <summary>Attempts to extract the scale, translation, and rotation components from the given scale, rotation, or translation matrix. The return value indicates whether the operation succeeded.</summary>
-        /// <param name="matrix">The source matrix.</param>
-        /// <param name="scale">When this method returns, contains the scaling component of the transformation matrix if the operation succeeded.</param>
-        /// <param name="rotation">When this method returns, contains the rotation component of the transformation matrix if the operation succeeded.</param>
-        /// <param name="translation">When the method returns, contains the translation component of the transformation matrix if the operation succeeded.</param>
-        /// <returns><see langword="true" /> if <paramref name="matrix" /> was decomposed successfully; otherwise,  <see langword="false" />.</returns>
-        public static bool Decompose(Matrix4x4 matrix, out Vector3 scale, out Quaternion rotation, out Vector3 translation)
-        {
-            bool result = true;
-
-            unsafe
-            {
-                fixed (Vector3* scaleBase = &scale)
-                {
-                    float* pfScales = (float*)scaleBase;
-                    float det;
-
-                    VectorBasis vectorBasis;
-                    Vector3** pVectorBasis = (Vector3**)&vectorBasis;
-
-                    Matrix4x4 matTemp = Identity;
-                    CanonicalBasis canonicalBasis = default;
-                    Vector3* pCanonicalBasis = &canonicalBasis.Row0;
-
-                    canonicalBasis.Row0 = new Vector3(1.0f, 0.0f, 0.0f);
-                    canonicalBasis.Row1 = new Vector3(0.0f, 1.0f, 0.0f);
-                    canonicalBasis.Row2 = new Vector3(0.0f, 0.0f, 1.0f);
-
-                    translation = new Vector3(
-                        matrix.M41,
-                        matrix.M42,
-                        matrix.M43);
-
-                    pVectorBasis[0] = (Vector3*)&matTemp.M11;
-                    pVectorBasis[1] = (Vector3*)&matTemp.M21;
-                    pVectorBasis[2] = (Vector3*)&matTemp.M31;
-
-                    *(pVectorBasis[0]) = new Vector3(matrix.M11, matrix.M12, matrix.M13);
-                    *(pVectorBasis[1]) = new Vector3(matrix.M21, matrix.M22, matrix.M23);
-                    *(pVectorBasis[2]) = new Vector3(matrix.M31, matrix.M32, matrix.M33);
-
-                    scale.X = pVectorBasis[0]->Length();
-                    scale.Y = pVectorBasis[1]->Length();
-                    scale.Z = pVectorBasis[2]->Length();
-
-                    uint a, b, c;
-                    #region Ranking
-                    float x = pfScales[0], y = pfScales[1], z = pfScales[2];
-                    if (x < y)
-                    {
-                        if (y < z)
-                        {
-                            a = 2;
-                            b = 1;
-                            c = 0;
-                        }
-                        else
-                        {
-                            a = 1;
-
-                            if (x < z)
-                            {
-                                b = 2;
-                                c = 0;
-                            }
-                            else
-                            {
-                                b = 0;
-                                c = 2;
-                            }
-                        }
-                    }
-                    else
-                    {
-                        if (x < z)
-                        {
-                            a = 2;
-                            b = 0;
-                            c = 1;
-                        }
-                        else
-                        {
-                            a = 0;
-
-                            if (y < z)
-                            {
-                                b = 2;
-                                c = 1;
-                            }
-                            else
-                            {
-                                b = 1;
-                                c = 2;
-                            }
-                        }
-                    }
-                    #endregion
-
-                    if (pfScales[a] < DecomposeEpsilon)
-                    {
-                        *(pVectorBasis[a]) = pCanonicalBasis[a];
-                    }
-
-                    *pVectorBasis[a] = Vector3.Normalize(*pVectorBasis[a]);
-
-                    if (pfScales[b] < DecomposeEpsilon)
-                    {
-                        uint cc;
-                        float fAbsX, fAbsY, fAbsZ;
-
-                        fAbsX = MathF.Abs(pVectorBasis[a]->X);
-                        fAbsY = MathF.Abs(pVectorBasis[a]->Y);
-                        fAbsZ = MathF.Abs(pVectorBasis[a]->Z);
-
-                        #region Ranking
-                        if (fAbsX < fAbsY)
-                        {
-                            if (fAbsY < fAbsZ)
-                            {
-                                cc = 0;
-                            }
-                            else
-                            {
-                                if (fAbsX < fAbsZ)
-                                {
-                                    cc = 0;
-                                }
-                                else
-                                {
-                                    cc = 2;
-                                }
-                            }
-                        }
-                        else
-                        {
-                            if (fAbsX < fAbsZ)
-                            {
-                                cc = 1;
-                            }
-                            else
-                            {
-                                if (fAbsY < fAbsZ)
-                                {
-                                    cc = 1;
-                                }
-                                else
-                                {
-                                    cc = 2;
-                                }
-                            }
-                        }
-                        #endregion
-
-                        *pVectorBasis[b] = Vector3.Cross(*pVectorBasis[a], *(pCanonicalBasis + cc));
-                    }
-
-                    *pVectorBasis[b] = Vector3.Normalize(*pVectorBasis[b]);
-
-                    if (pfScales[c] < DecomposeEpsilon)
-                    {
-                        *pVectorBasis[c] = Vector3.Cross(*pVectorBasis[a], *pVectorBasis[b]);
-                    }
-
-                    *pVectorBasis[c] = Vector3.Normalize(*pVectorBasis[c]);
-
-                    det = matTemp.GetDeterminant();
-
-                    // use Kramer's rule to check for handedness of coordinate system
-                    if (det < 0.0f)
-                    {
-                        // switch coordinate system by negating the scale and inverting the basis vector on the x-axis
-                        pfScales[a] = -pfScales[a];
-                        *pVectorBasis[a] = -(*pVectorBasis[a]);
-
-                        det = -det;
-                    }
-
-                    det -= 1.0f;
-                    det *= det;
-
-                    if ((DecomposeEpsilon < det))
-                    {
-                        // Non-SRT matrix encountered
-                        rotation = Quaternion.Identity;
-                        result = false;
-                    }
-                    else
-                    {
-                        // generate the quaternion from the matrix
-                        rotation = Quaternion.CreateFromRotationMatrix(matTemp);
-                    }
-                }
-            }
-
-            return result;
-        }
-
-        /// <summary>Performs a linear interpolation from one matrix to a second matrix based on a value that specifies the weighting of the second matrix.</summary>
-        /// <param name="matrix1">The first matrix.</param>
-        /// <param name="matrix2">The second matrix.</param>
-        /// <param name="amount">The relative weighting of <paramref name="matrix2" />.</param>
-        /// <returns>The interpolated matrix.</returns>
-        public static unsafe Matrix4x4 Lerp(Matrix4x4 matrix1, Matrix4x4 matrix2, float amount)
-        {
-            if (AdvSimd.IsSupported)
-            {
-                Vector128<float> amountVec = Vector128.Create(amount);
-                AdvSimd.Store(&matrix1.M11, VectorMath.Lerp(AdvSimd.LoadVector128(&matrix1.M11), AdvSimd.LoadVector128(&matrix2.M11), amountVec));
-                AdvSimd.Store(&matrix1.M21, VectorMath.Lerp(AdvSimd.LoadVector128(&matrix1.M21), AdvSimd.LoadVector128(&matrix2.M21), amountVec));
-                AdvSimd.Store(&matrix1.M31, VectorMath.Lerp(AdvSimd.LoadVector128(&matrix1.M31), AdvSimd.LoadVector128(&matrix2.M31), amountVec));
-                AdvSimd.Store(&matrix1.M41, VectorMath.Lerp(AdvSimd.LoadVector128(&matrix1.M41), AdvSimd.LoadVector128(&matrix2.M41), amountVec));
-                return matrix1;
-            }
-            else if (Sse.IsSupported)
-            {
-                Vector128<float> amountVec = Vector128.Create(amount);
-                Sse.Store(&matrix1.M11, VectorMath.Lerp(Sse.LoadVector128(&matrix1.M11), Sse.LoadVector128(&matrix2.M11), amountVec));
-                Sse.Store(&matrix1.M21, VectorMath.Lerp(Sse.LoadVector128(&matrix1.M21), Sse.LoadVector128(&matrix2.M21), amountVec));
-                Sse.Store(&matrix1.M31, VectorMath.Lerp(Sse.LoadVector128(&matrix1.M31), Sse.LoadVector128(&matrix2.M31), amountVec));
-                Sse.Store(&matrix1.M41, VectorMath.Lerp(Sse.LoadVector128(&matrix1.M41), Sse.LoadVector128(&matrix2.M41), amountVec));
-                return matrix1;
-            }
-
-            Matrix4x4 result;
-
-            // First row
-            result.M11 = matrix1.M11 + (matrix2.M11 - matrix1.M11) * amount;
-            result.M12 = matrix1.M12 + (matrix2.M12 - matrix1.M12) * amount;
-            result.M13 = matrix1.M13 + (matrix2.M13 - matrix1.M13) * amount;
-            result.M14 = matrix1.M14 + (matrix2.M14 - matrix1.M14) * amount;
-
-            // Second row
-            result.M21 = matrix1.M21 + (matrix2.M21 - matrix1.M21) * amount;
-            result.M22 = matrix1.M22 + (matrix2.M22 - matrix1.M22) * amount;
-            result.M23 = matrix1.M23 + (matrix2.M23 - matrix1.M23) * amount;
-            result.M24 = matrix1.M24 + (matrix2.M24 - matrix1.M24) * amount;
-
-            // Third row
-            result.M31 = matrix1.M31 + (matrix2.M31 - matrix1.M31) * amount;
-            result.M32 = matrix1.M32 + (matrix2.M32 - matrix1.M32) * amount;
-            result.M33 = matrix1.M33 + (matrix2.M33 - matrix1.M33) * amount;
-            result.M34 = matrix1.M34 + (matrix2.M34 - matrix1.M34) * amount;
-
-            // Fourth row
-            result.M41 = matrix1.M41 + (matrix2.M41 - matrix1.M41) * amount;
-            result.M42 = matrix1.M42 + (matrix2.M42 - matrix1.M42) * amount;
-            result.M43 = matrix1.M43 + (matrix2.M43 - matrix1.M43) * amount;
-            result.M44 = matrix1.M44 + (matrix2.M44 - matrix1.M44) * amount;
-
-            return result;
-        }
+            => (value1.AsImpl() - value2.AsImpl()).AsM4x4();
 
         /// <summary>Transforms the specified matrix by applying the specified Quaternion rotation.</summary>
         /// <param name="value">The matrix to transform.</param>
         /// <param name="rotation">The rotation t apply.</param>
         /// <returns>The transformed matrix.</returns>
         public static Matrix4x4 Transform(Matrix4x4 value, Quaternion rotation)
-        {
-            // Compute rotation matrix.
-            float x2 = rotation.X + rotation.X;
-            float y2 = rotation.Y + rotation.Y;
-            float z2 = rotation.Z + rotation.Z;
-
-            float wx2 = rotation.W * x2;
-            float wy2 = rotation.W * y2;
-            float wz2 = rotation.W * z2;
-            float xx2 = rotation.X * x2;
-            float xy2 = rotation.X * y2;
-            float xz2 = rotation.X * z2;
-            float yy2 = rotation.Y * y2;
-            float yz2 = rotation.Y * z2;
-            float zz2 = rotation.Z * z2;
-
-            float q11 = 1.0f - yy2 - zz2;
-            float q21 = xy2 - wz2;
-            float q31 = xz2 + wy2;
-
-            float q12 = xy2 + wz2;
-            float q22 = 1.0f - xx2 - zz2;
-            float q32 = yz2 - wx2;
-
-            float q13 = xz2 - wy2;
-            float q23 = yz2 + wx2;
-            float q33 = 1.0f - xx2 - yy2;
-
-            Matrix4x4 result;
-
-            // First row
-            result.M11 = value.M11 * q11 + value.M12 * q21 + value.M13 * q31;
-            result.M12 = value.M11 * q12 + value.M12 * q22 + value.M13 * q32;
-            result.M13 = value.M11 * q13 + value.M12 * q23 + value.M13 * q33;
-            result.M14 = value.M14;
-
-            // Second row
-            result.M21 = value.M21 * q11 + value.M22 * q21 + value.M23 * q31;
-            result.M22 = value.M21 * q12 + value.M22 * q22 + value.M23 * q32;
-            result.M23 = value.M21 * q13 + value.M22 * q23 + value.M23 * q33;
-            result.M24 = value.M24;
-
-            // Third row
-            result.M31 = value.M31 * q11 + value.M32 * q21 + value.M33 * q31;
-            result.M32 = value.M31 * q12 + value.M32 * q22 + value.M33 * q32;
-            result.M33 = value.M31 * q13 + value.M32 * q23 + value.M33 * q33;
-            result.M34 = value.M34;
-
-            // Fourth row
-            result.M41 = value.M41 * q11 + value.M42 * q21 + value.M43 * q31;
-            result.M42 = value.M41 * q12 + value.M42 * q22 + value.M43 * q32;
-            result.M43 = value.M41 * q13 + value.M42 * q23 + value.M43 * q33;
-            result.M44 = value.M44;
-
-            return result;
-        }
+            => Impl.Transform(in value.AsImpl(), in rotation).AsM4x4();
 
         /// <summary>Transposes the rows and columns of a matrix.</summary>
         /// <param name="matrix">The matrix to transpose.</param>
         /// <returns>The transposed matrix.</returns>
-        public static unsafe Matrix4x4 Transpose(Matrix4x4 matrix)
-        {
-            if (AdvSimd.Arm64.IsSupported)
-            {
-                // This implementation is based on the DirectX Math Library XMMatrixTranspose method
-                // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathMatrix.inl
-
-                Vector128<float> M11 = AdvSimd.LoadVector128(&matrix.M11);
-                Vector128<float> M31 = AdvSimd.LoadVector128(&matrix.M31);
-
-                Vector128<float> P00 = AdvSimd.Arm64.ZipLow(M11, M31);
-                Vector128<float> P01 = AdvSimd.Arm64.ZipHigh(M11, M31);
-
-                Vector128<float> M21 = AdvSimd.LoadVector128(&matrix.M21);
-                Vector128<float> M41 = AdvSimd.LoadVector128(&matrix.M41);
-
-                Vector128<float> P10 = AdvSimd.Arm64.ZipLow(M21, M41);
-                Vector128<float> P11 = AdvSimd.Arm64.ZipHigh(M21, M41);
-
-                AdvSimd.Store(&matrix.M11, AdvSimd.Arm64.ZipLow(P00, P10));
-                AdvSimd.Store(&matrix.M21, AdvSimd.Arm64.ZipHigh(P00, P10));
-                AdvSimd.Store(&matrix.M31, AdvSimd.Arm64.ZipLow(P01, P11));
-                AdvSimd.Store(&matrix.M41, AdvSimd.Arm64.ZipHigh(P01, P11));
-
-                return matrix;
-            }
-            else if (Sse.IsSupported)
-            {
-                Vector128<float> row1 = Sse.LoadVector128(&matrix.M11);
-                Vector128<float> row2 = Sse.LoadVector128(&matrix.M21);
-                Vector128<float> row3 = Sse.LoadVector128(&matrix.M31);
-                Vector128<float> row4 = Sse.LoadVector128(&matrix.M41);
-
-                Vector128<float> l12 = Sse.UnpackLow(row1, row2);
-                Vector128<float> l34 = Sse.UnpackLow(row3, row4);
-                Vector128<float> h12 = Sse.UnpackHigh(row1, row2);
-                Vector128<float> h34 = Sse.UnpackHigh(row3, row4);
-
-                Sse.Store(&matrix.M11, Sse.MoveLowToHigh(l12, l34));
-                Sse.Store(&matrix.M21, Sse.MoveHighToLow(l34, l12));
-                Sse.Store(&matrix.M31, Sse.MoveLowToHigh(h12, h34));
-                Sse.Store(&matrix.M41, Sse.MoveHighToLow(h34, h12));
-
-                return matrix;
-            }
-
-            Matrix4x4 result;
-
-            result.M11 = matrix.M11;
-            result.M12 = matrix.M21;
-            result.M13 = matrix.M31;
-            result.M14 = matrix.M41;
-            result.M21 = matrix.M12;
-            result.M22 = matrix.M22;
-            result.M23 = matrix.M32;
-            result.M24 = matrix.M42;
-            result.M31 = matrix.M13;
-            result.M32 = matrix.M23;
-            result.M33 = matrix.M33;
-            result.M34 = matrix.M43;
-            result.M41 = matrix.M14;
-            result.M42 = matrix.M24;
-            result.M43 = matrix.M34;
-            result.M44 = matrix.M44;
-
-            return result;
-        }
+        public static Matrix4x4 Transpose(Matrix4x4 matrix)
+            => Impl.Transpose(in matrix.AsImpl()).AsM4x4();
 
         /// <summary>Returns a value that indicates whether this instance and a specified object are equal.</summary>
         /// <param name="obj">The object to compare with the current instance.</param>
         /// <returns><see langword="true" /> if the current instance and <paramref name="obj" /> are equal; otherwise, <see langword="false" />. If <paramref name="obj" /> is <see langword="null" />, the method returns <see langword="false" />.</returns>
-        /// <remarks>The current instance and <paramref name="obj" /> are equal if <paramref name="obj" /> is a <see cref="System.Numerics.Matrix4x4" /> object and the corresponding elements of each matrix are equal.</remarks>
+        /// <remarks>The current instance and <paramref name="obj" /> are equal if <paramref name="obj" /> is a <see cref="Matrix4x4" /> object and the corresponding elements of each matrix are equal.</remarks>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public override readonly bool Equals([NotNullWhen(true)] object? obj)
-        {
-            return (obj is Matrix4x4 other) && Equals(other);
-        }
+            => AsROImpl().Equals(obj);
 
         /// <summary>Returns a value that indicates whether this instance and another 4x4 matrix are equal.</summary>
         /// <param name="other">The other matrix.</param>
         /// <returns><see langword="true" /> if the two matrices are equal; otherwise, <see langword="false" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public readonly bool Equals(Matrix4x4 other)
-        {
-            // This function needs to account for floating-point equality around NaN
-            // and so must behave equivalently to the underlying float/double.Equals
-
-            if (Vector128.IsHardwareAccelerated)
-            {
-                return Vector128.LoadUnsafe(ref Unsafe.AsRef(in M11)).Equals(Vector128.LoadUnsafe(ref other.M11))
-                    && Vector128.LoadUnsafe(ref Unsafe.AsRef(in M21)).Equals(Vector128.LoadUnsafe(ref other.M21))
-                    && Vector128.LoadUnsafe(ref Unsafe.AsRef(in M31)).Equals(Vector128.LoadUnsafe(ref other.M31))
-                    && Vector128.LoadUnsafe(ref Unsafe.AsRef(in M41)).Equals(Vector128.LoadUnsafe(ref other.M41));
-
-            }
-
-            return SoftwareFallback(in this, other);
-
-            static bool SoftwareFallback(in Matrix4x4 self, Matrix4x4 other)
-            {
-                return self.M11.Equals(other.M11) && self.M22.Equals(other.M22) && self.M33.Equals(other.M33) && self.M44.Equals(other.M44) // Check diagonal element first for early out.
-                    && self.M12.Equals(other.M12) && self.M13.Equals(other.M13) && self.M14.Equals(other.M14) && self.M21.Equals(other.M21)
-                    && self.M23.Equals(other.M23) && self.M24.Equals(other.M24) && self.M31.Equals(other.M31) && self.M32.Equals(other.M32)
-                    && self.M34.Equals(other.M34) && self.M41.Equals(other.M41) && self.M42.Equals(other.M42) && self.M43.Equals(other.M43);
-            }
-        }
+            => AsROImpl().Equals(in other.AsImpl());
 
         /// <summary>Calculates the determinant of the current 4x4 matrix.</summary>
         /// <returns>The determinant.</returns>
         public readonly float GetDeterminant()
-        {
-            // | a b c d |     | f g h |     | e g h |     | e f h |     | e f g |
-            // | e f g h | = a | j k l | - b | i k l | + c | i j l | - d | i j k |
-            // | i j k l |     | n o p |     | m o p |     | m n p |     | m n o |
-            // | m n o p |
-            //
-            //   | f g h |
-            // a | j k l | = a ( f ( kp - lo ) - g ( jp - ln ) + h ( jo - kn ) )
-            //   | n o p |
-            //
-            //   | e g h |
-            // b | i k l | = b ( e ( kp - lo ) - g ( ip - lm ) + h ( io - km ) )
-            //   | m o p |
-            //
-            //   | e f h |
-            // c | i j l | = c ( e ( jp - ln ) - f ( ip - lm ) + h ( in - jm ) )
-            //   | m n p |
-            //
-            //   | e f g |
-            // d | i j k | = d ( e ( jo - kn ) - f ( io - km ) + g ( in - jm ) )
-            //   | m n o |
-            //
-            // Cost of operation
-            // 17 adds and 28 muls.
-            //
-            // add: 6 + 8 + 3 = 17
-            // mul: 12 + 16 = 28
-
-            float a = M11, b = M12, c = M13, d = M14;
-            float e = M21, f = M22, g = M23, h = M24;
-            float i = M31, j = M32, k = M33, l = M34;
-            float m = M41, n = M42, o = M43, p = M44;
-
-            float kp_lo = k * p - l * o;
-            float jp_ln = j * p - l * n;
-            float jo_kn = j * o - k * n;
-            float ip_lm = i * p - l * m;
-            float io_km = i * o - k * m;
-            float in_jm = i * n - j * m;
-
-            return a * (f * kp_lo - g * jp_ln + h * jo_kn) -
-                   b * (e * kp_lo - g * ip_lm + h * io_km) +
-                   c * (e * jp_ln - f * ip_lm + h * in_jm) -
-                   d * (e * jo_kn - f * io_km + g * in_jm);
-        }
+            => AsROImpl().GetDeterminant();
 
         /// <summary>Returns the hash code for this instance.</summary>
         /// <returns>The hash code.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public override readonly int GetHashCode()
-        {
-            HashCode hash = default;
-
-            hash.Add(M11);
-            hash.Add(M12);
-            hash.Add(M13);
-            hash.Add(M14);
-
-            hash.Add(M21);
-            hash.Add(M22);
-            hash.Add(M23);
-            hash.Add(M24);
-
-            hash.Add(M31);
-            hash.Add(M32);
-            hash.Add(M33);
-            hash.Add(M34);
-
-            hash.Add(M41);
-            hash.Add(M42);
-            hash.Add(M43);
-            hash.Add(M44);
-
-            return hash.ToHashCode();
-        }
+            => AsROImpl().GetHashCode();
 
         /// <summary>Returns a string that represents this matrix.</summary>
         /// <returns>The string representation of this matrix.</returns>
         /// <remarks>The numeric values in the returned string are formatted by using the conventions of the current culture. For example, for the en-US culture, the returned string might appear as <c>{ {M11:1.1 M12:1.2 M13:1.3 M14:1.4} {M21:2.1 M22:2.2 M23:2.3 M24:2.4} {M31:3.1 M32:3.2 M33:3.3 M34:3.4} {M41:4.1 M42:4.2 M43:4.3 M44:4.4} }</c>.</remarks>
-        public override readonly string ToString() =>
-            $"{{ {{M11:{M11} M12:{M12} M13:{M13} M14:{M14}}} {{M21:{M21} M22:{M22} M23:{M23} M24:{M24}}} {{M31:{M31} M32:{M32} M33:{M33} M34:{M34}}} {{M41:{M41} M42:{M42} M43:{M43} M44:{M44}}} }}";
-
-        private struct CanonicalBasis
-        {
-            public Vector3 Row0;
-            public Vector3 Row1;
-            public Vector3 Row2;
-        };
-
-        private struct VectorBasis
-        {
-            public unsafe Vector3* Element0;
-            public unsafe Vector3* Element1;
-            public unsafe Vector3* Element2;
-        }
+        public override readonly string ToString()
+            => $"{{ {{M11:{M11} M12:{M12} M13:{M13} M14:{M14}}} {{M21:{M21} M22:{M22} M23:{M23} M24:{M24}}} {{M31:{M31} M32:{M32} M33:{M33} M34:{M34}}} {{M41:{M41} M42:{M42} M43:{M43} M44:{M44}}} }}";
     }
 }
index 1e519b8..485cbd4 100644 (file)
@@ -633,13 +633,18 @@ namespace System.Numerics
         /// <returns>The transformed vector.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector4 Transform(Vector4 vector, Matrix4x4 matrix)
+            => Transform(vector, in matrix.AsImpl());
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector4 Transform(Vector4 vector, in Matrix4x4.Impl matrix)
         {
-            return new Vector4(
-                (vector.X * matrix.M11) + (vector.Y * matrix.M21) + (vector.Z * matrix.M31) + (vector.W * matrix.M41),
-                (vector.X * matrix.M12) + (vector.Y * matrix.M22) + (vector.Z * matrix.M32) + (vector.W * matrix.M42),
-                (vector.X * matrix.M13) + (vector.Y * matrix.M23) + (vector.Z * matrix.M33) + (vector.W * matrix.M43),
-                (vector.X * matrix.M14) + (vector.Y * matrix.M24) + (vector.Z * matrix.M34) + (vector.W * matrix.M44)
-            );
+            Vector4 result = matrix.X * vector.X;
+
+            result += matrix.Y * vector.Y;
+            result += matrix.Z * vector.Z;
+            result += matrix.W * vector.W;
+
+            return result;
         }
 
         /// <summary>Transforms a four-dimensional vector by the specified Quaternion rotation value.</summary>