ARM64 intrinsic support for Vector64.Create() and Vector128.Create() (#35590)
authorKunal Pathak <Kunal.Pathak@microsoft.com>
Tue, 5 May 2020 23:44:43 +0000 (16:44 -0700)
committerGitHub <noreply@github.com>
Tue, 5 May 2020 23:44:43 +0000 (16:44 -0700)
* Make Vector64.Create() that takes multiple arguments use ARM64 intrinsic
* Make Vector128.Create() that takes multiple arguments use ARM64 intrinsic
* Intrinsify Vector64.Create() that takes single argument
* Intrinsify Vector64.Create() that takes single argument
* Fix edge case where int.MaxValue was failing if used as immediate

src/coreclr/src/jit/codegenarm64.cpp
src/coreclr/src/jit/emitarm64.cpp
src/coreclr/src/jit/emitarm64.h
src/coreclr/src/jit/hwintrinsic.cpp
src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp
src/coreclr/src/jit/hwintrinsiclistarm64.h
src/coreclr/src/jit/lowerarmarch.cpp
src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.il [new file with mode: 0644]
src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.ilproj [new file with mode: 0644]
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs

index 9389094..26c7411 100644 (file)
@@ -6984,7 +6984,6 @@ void CodeGen::genArm64EmitterUnitTests()
     genDefineTempLabel(genCreateTempLabel());
 
     theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
-    theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9);
     theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8);
     theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR);
 
@@ -7330,6 +7329,10 @@ void CodeGen::genArm64EmitterUnitTests()
     theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000);
     theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D);
 
+    // We were not encoding immediate of movi that was int.MaxValue or int.MaxValue / 2.
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V16, 0x7fffffff, INS_OPTS_2S);
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V16, 0x3fffffff, INS_OPTS_2S);
+
     theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
     theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL  8
     theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
index 008a493..0d3eaba 100644 (file)
@@ -2864,12 +2864,11 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
  *         'size' specifies the size of the result (16 or 32 bits)
  */
 
-/*static*/ INT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
+/*static*/ UINT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
 {
     bool     onesShift = (bsImm.immOnes == 1);
-    unsigned bySh      = bsImm.immBY;         // Num Bytes to shift 0,1,2,3
-    INT32    val       = (INT32)bsImm.immVal; // 8-bit immediate
-    INT32    result    = val;
+    unsigned bySh      = bsImm.immBY;          // Num Bytes to shift 0,1,2,3
+    UINT32   result    = (UINT32)bsImm.immVal; // 8-bit immediate
 
     if (bySh > 0)
     {
index f2cef87..96bbb37 100644 (file)
@@ -187,7 +187,7 @@ union byteShiftedImm {
 
 static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);
 
-static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
+static UINT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
 
 /************************************************************************
 *
index efcf5b8..674b7f9 100644 (file)
@@ -200,6 +200,8 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va
         {
             case TYP_FLOAT:
                 return m_simdHandleCache->Vector64FloatHandle;
+            case TYP_DOUBLE:
+                return m_simdHandleCache->Vector64DoubleHandle;
             case TYP_INT:
                 return m_simdHandleCache->Vector64IntHandle;
             case TYP_USHORT:
@@ -212,6 +214,10 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va
                 return m_simdHandleCache->Vector64ByteHandle;
             case TYP_UINT:
                 return m_simdHandleCache->Vector64UIntHandle;
+            case TYP_LONG:
+                return m_simdHandleCache->Vector64LongHandle;
+            case TYP_ULONG:
+                return m_simdHandleCache->Vector64ULongHandle;
             default:
                 assert(!"Didn't find a class handle for simdType");
         }
index 3d224d4..fafbf06 100644 (file)
@@ -516,11 +516,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else if (varTypeIsFloating(intrin.baseType))
                 {
-                    if (targetReg != op1Reg)
-                    {
-                        // fmov reg1, reg2
-                        GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, INS_OPTS_NONE);
-                    }
+                    // fmov reg1, reg2
+                    GetEmitter()->emitIns_R_R(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, INS_OPTS_NONE);
                 }
                 else
                 {
@@ -557,6 +554,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_4S);
                 break;
 
+            case NI_Vector64_Create:
+            case NI_Vector128_Create:
+                if (intrin.op1->isContainedFltOrDblImmed())
+                {
+                    const double dataValue = intrin.op1->AsDblCon()->gtDconVal;
+                    GetEmitter()->emitIns_R_F(INS_fmov, emitSize, targetReg, dataValue, opt);
+                }
+                else if (varTypeIsFloating(intrin.baseType))
+                {
+                    GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op1Reg, 0, opt);
+                }
+                else
+                {
+                    if (intrin.op1->isContainedIntOrIImmed())
+                    {
+                        const ssize_t dataValue = intrin.op1->AsIntCon()->gtIconVal;
+                        GetEmitter()->emitIns_R_I(INS_movi, emitSize, targetReg, dataValue, opt);
+                    }
+                    else
+                    {
+                        GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
+                    }
+                }
+                break;
+
             default:
                 unreached();
         }
index 288dcfb..bd192bd 100644 (file)
@@ -23,6 +23,7 @@ HARDWARE_INTRINSIC(Vector64,        AsSByte,
 HARDWARE_INTRINSIC(Vector64,        AsSingle,                                   8,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector64,        AsUInt16,                                   8,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector64,        AsUInt32,                                   8,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector64,        Create,                                     8,           1,     {INS_dup,               INS_dup,            INS_dup,            INS_dup,            INS_dup,            INS_dup,            INS_mov,            INS_mov,            INS_dup,            INS_dup},               HW_Category_SimpleSIMD,             HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector64,        CreateScalarUnsafe,                         8,           1,     {INS_ins,               INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_invalid,        INS_invalid,        INS_fmov,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector64,        get_AllBitsSet,                             8,           0,     {INS_mvni,              INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},              HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector64,        get_Count,                                  8,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
@@ -44,6 +45,7 @@ HARDWARE_INTRINSIC(Vector128,       AsSingle,                                  1
 HARDWARE_INTRINSIC(Vector128,       AsUInt16,                                  16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector128,       AsUInt32,                                  16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector128,       AsUInt64,                                  16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector128,       Create,                                    16,           1,     {INS_dup,               INS_dup,            INS_dup,            INS_dup,            INS_dup,            INS_dup,            INS_dup,            INS_dup,            INS_dup,            INS_dup},               HW_Category_SimpleSIMD,             HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector128,       CreateScalarUnsafe,                        16,           1,     {INS_ins,               INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_fmov,           INS_fmov},              HW_Category_SimpleSIMD,             HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector128,       get_AllBitsSet,                            16,           0,     {INS_mvni,              INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},              HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector128,       get_Count,                                 16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
index 75dfc14..6f3cbda 100644 (file)
@@ -923,6 +923,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
             }
             break;
+
+        case NI_Vector64_Create:
+        case NI_Vector128_Create:
         case NI_Vector64_CreateScalarUnsafe:
         case NI_Vector128_CreateScalarUnsafe:
             if (intrin.op1->IsCnsIntOrI())
diff --git a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.il b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.il
new file mode 100644 (file)
index 0000000..fb24816
--- /dev/null
@@ -0,0 +1,92 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// Metadata version: v4.0.30319
+.assembly extern System.Runtime
+{
+  .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A )                         // .?_....:
+  .ver 5:0:0:0
+}
+.assembly extern System.Runtime.Intrinsics
+{
+  .publickeytoken = (CC 7B 13 FF CD 2D DD 51 )                         // .{...-.Q
+  .ver 5:0:0:0
+}
+.assembly projs { }
+.module projs.dll
+// MVID: {379016DB-73C2-41D4-9E5F-5B727BC70E2C}
+.custom instance void [System.Runtime]System.Security.UnverifiableCodeAttribute::.ctor() = ( 01 00 00 00 ) 
+.imagebase 0x00400000
+.file alignment 0x00000200
+.stackreserve 0x00100000
+.subsystem 0x0003       // WINDOWS_CUI
+.corflags 0x00000001    //  ILONLY
+// Image base: 0x00000293F3DD0000
+
+
+// =============== CLASS MEMBERS DECLARATION ===================
+// This bug was found when passing Vector64<long> to a method such that
+// the vector is on the evaluation stack. C# sometimes assign it the vector64
+// to local variable before passing it to method. In such cases, the bug
+// doesn't repro.
+.class public auto ansi sealed beforefieldinit projs.GitHub_35821
+       extends [System.Runtime]System.Object
+{
+  .method private hidebysig static int32 
+          Main(string[] args) cil managed
+  {
+    .entrypoint
+    // Code size       48 (0x30)
+    .maxstack  8
+    IL_0000:  ldc.i4.s   23
+    IL_0002:  conv.i8
+    IL_0003:  call       valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<uint64> [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(uint64)
+    IL_0008:  call       void projs.GitHub_35821::Test1(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<uint64>)
+    IL_000d:  ldc.i4.s   23
+    IL_000f:  conv.i8
+    IL_0010:  call       valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<int64> [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(int64)
+    IL_0015:  call       void projs.GitHub_35821::Test2(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<int64>)
+    IL_001a:  ldc.r8     23.
+    IL_0023:  call       valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<float64> [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64::Create(float64)
+    IL_0028:  call       void projs.GitHub_35821::Test3(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<float64>)
+    IL_002d:  ldc.i4.s   100
+    IL_002f:  ret
+  } // end of method GitHub_35821::Main
+
+  .method public hidebysig static void  Test1(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<uint64> data) cil managed noinlining
+  {
+    // Code size       1 (0x1)
+    .maxstack  8
+    IL_0000:  ret
+  } // end of method GitHub_35821::Test1
+
+  .method public hidebysig static void  Test2(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<int64> data) cil managed noinlining
+  {
+    // Code size       1 (0x1)
+    .maxstack  8
+    IL_0000:  ret
+  } // end of method GitHub_35821::Test2
+
+  .method public hidebysig static void  Test3(valuetype [System.Runtime.Intrinsics]System.Runtime.Intrinsics.Vector64`1<float64> data) cil managed noinlining
+  {
+    // Code size       1 (0x1)
+    .maxstack  8
+    IL_0000:  ret
+  } // end of method GitHub_35821::Test3
+
+  .method public hidebysig specialname rtspecialname 
+          instance void  .ctor() cil managed
+  {
+    // Code size       7 (0x7)
+    .maxstack  8
+    IL_0000:  ldarg.0
+    IL_0001:  call       instance void [System.Runtime]System.Object::.ctor()
+    IL_0006:  ret
+  } // end of method GitHub_35821::.ctor
+
+} // end of class projs.GitHub_35821
+
+
+// =============================================================
+
+// *********** DISASSEMBLY COMPLETE ***********************
diff --git a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.ilproj b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_35821/GitHub_35821.ilproj
new file mode 100644 (file)
index 0000000..e7c67cc
--- /dev/null
@@ -0,0 +1,12 @@
+<Project Sdk="Microsoft.NET.Sdk.IL">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).il" />
+  </ItemGroup>
+</Project>
index 2938d63..5d55d00 100644 (file)
@@ -260,6 +260,7 @@ namespace System.Runtime.Intrinsics
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi8</remarks>
         /// <returns>A new <see cref="Vector128{Byte}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [Intrinsic]
         public static unsafe Vector128<byte> Create(byte value)
         {
             if (Avx2.IsSupported)
@@ -318,6 +319,7 @@ namespace System.Runtime.Intrinsics
         /// <remarks>On x86, this method corresponds to __m128d _mm_set1_pd</remarks>
         /// <returns>A new <see cref="Vector128{Double}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [Intrinsic]
         public static unsafe Vector128<double> Create(double value)
         {
             if (Sse3.IsSupported)
@@ -354,6 +356,7 @@ namespace System.Runtime.Intrinsics
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi16</remarks>
         /// <returns>A new <see cref="Vector128{Int16}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [Intrinsic]
         public static unsafe Vector128<short> Create(short value)
         {
             if (Avx2.IsSupported)
@@ -397,6 +400,7 @@ namespace System.Runtime.Intrinsics
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi32</remarks>
         /// <returns>A new <see cref="Vector128{Int32}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [Intrinsic]
         public static unsafe Vector128<int> Create(int value)
         {
             if (Avx2.IsSupported)
@@ -432,6 +436,7 @@ namespace System.Runtime.Intrinsics
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi64x</remarks>
         /// <returns>A new <see cref="Vector128{Int64}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [Intrinsic]
         public static unsafe Vector128<long> Create(long value)
         {
             if (Sse2.X64.IsSupported)
@@ -468,6 +473,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>A new <see cref="Vector128{SByte}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
+        [Intrinsic]
         public static unsafe Vector128<sbyte> Create(sbyte value)
         {
             if (Avx2.IsSupported)
@@ -526,6 +532,7 @@ namespace System.Runtime.Intrinsics
         /// <remarks>On x86, this method corresponds to __m128 _mm_set1_ps</remarks>
         /// <returns>A new <see cref="Vector128{Single}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [Intrinsic]
         public static unsafe Vector128<float> Create(float value)
         {
             if (Avx2.IsSupported)
@@ -568,6 +575,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>A new <see cref="Vector128{UInt16}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
+        [Intrinsic]
         public static unsafe Vector128<ushort> Create(ushort value)
         {
             if (Avx2.IsSupported)
@@ -612,6 +620,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>A new <see cref="Vector128{UInt32}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
+        [Intrinsic]
         public static unsafe Vector128<uint> Create(uint value)
         {
             if (Avx2.IsSupported)
@@ -648,6 +657,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>A new <see cref="Vector128{UInt64}" /> with all elements initialized to <paramref name="value" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
+        [Intrinsic]
         public static unsafe Vector128<ulong> Create(ulong value)
         {
             if (Sse2.X64.IsSupported)
@@ -753,6 +763,26 @@ namespace System.Runtime.Intrinsics
                 return Sse2.UnpackLow(lo64, hi64).AsByte();                                         // <  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 >
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<byte> result = CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                result = AdvSimd.Insert(result, 3, e3);
+                result = AdvSimd.Insert(result, 4, e4);
+                result = AdvSimd.Insert(result, 5, e5);
+                result = AdvSimd.Insert(result, 6, e6);
+                result = AdvSimd.Insert(result, 7, e7);
+                result = AdvSimd.Insert(result, 8, e8);
+                result = AdvSimd.Insert(result, 9, e9);
+                result = AdvSimd.Insert(result, 10, e10);
+                result = AdvSimd.Insert(result, 11, e11);
+                result = AdvSimd.Insert(result, 12, e12);
+                result = AdvSimd.Insert(result, 13, e13);
+                result = AdvSimd.Insert(result, 14, e14);
+                return AdvSimd.Insert(result, 15, e15);
+            }
+
             return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15);
 
             static Vector128<byte> SoftwareFallback(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15)
@@ -797,6 +827,12 @@ namespace System.Runtime.Intrinsics
                 return Sse.MoveLowToHigh(CreateScalarUnsafe(e0).AsSingle(), CreateScalarUnsafe(e1).AsSingle()).AsDouble();
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<double> result = CreateScalarUnsafe(e0);
+                return AdvSimd.Insert(result, 1, e1);
+            }
+
             return SoftwareFallback(e0, e1);
 
             static Vector128<double> SoftwareFallback(double e0, double e1)
@@ -837,6 +873,18 @@ namespace System.Runtime.Intrinsics
                 return Sse2.Insert(result, e7, 7);                                                  // < 0, 1, 2, 3, 4, 5, 6, 7 >
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<short> result = CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                result = AdvSimd.Insert(result, 3, e3);
+                result = AdvSimd.Insert(result, 4, e4);
+                result = AdvSimd.Insert(result, 5, e5);
+                result = AdvSimd.Insert(result, 6, e6);
+                return AdvSimd.Insert(result, 7, e7);
+            }
+
             return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7);
 
             static Vector128<short> SoftwareFallback(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7)
@@ -886,6 +934,14 @@ namespace System.Runtime.Intrinsics
                 return Sse2.UnpackLow(lo64, hi64).AsInt32();                                        // < 0, 1, 2, 3 >
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<int> result = CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                return AdvSimd.Insert(result, 3, e3);
+            }
+
             return SoftwareFallback(e0, e1, e2, e3);
 
             static Vector128<int> SoftwareFallback(int e0, int e1, int e2, int e3)
@@ -921,6 +977,12 @@ namespace System.Runtime.Intrinsics
                 return Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1));              // < 0, 1 >
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<long> result = CreateScalarUnsafe(e0);
+                return AdvSimd.Insert(result, 1, e1);
+            }
+
             return SoftwareFallback(e0, e1);
 
             static Vector128<long> SoftwareFallback(long e0, long e1)
@@ -1011,6 +1073,26 @@ namespace System.Runtime.Intrinsics
                 return Sse2.UnpackLow(lo64, hi64).AsSByte();                                        // <  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 >
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<sbyte> result = CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                result = AdvSimd.Insert(result, 3, e3);
+                result = AdvSimd.Insert(result, 4, e4);
+                result = AdvSimd.Insert(result, 5, e5);
+                result = AdvSimd.Insert(result, 6, e6);
+                result = AdvSimd.Insert(result, 7, e7);
+                result = AdvSimd.Insert(result, 8, e8);
+                result = AdvSimd.Insert(result, 9, e9);
+                result = AdvSimd.Insert(result, 10, e10);
+                result = AdvSimd.Insert(result, 11, e11);
+                result = AdvSimd.Insert(result, 12, e12);
+                result = AdvSimd.Insert(result, 13, e13);
+                result = AdvSimd.Insert(result, 14, e14);
+                return AdvSimd.Insert(result, 15, e15);
+            }
+
             return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15);
 
             static Vector128<sbyte> SoftwareFallback(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15)
@@ -1065,6 +1147,14 @@ namespace System.Runtime.Intrinsics
                 return Sse.MoveLowToHigh(lo64, hi64);                                               // < 0, 1, 2, 3 >
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<float> result = CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                return AdvSimd.Insert(result, 3, e3);
+            }
+
             return SoftwareFallback(e0, e1, e2, e3);
 
             static Vector128<float> SoftwareFallback(float e0, float e1, float e2, float e3)
@@ -1108,6 +1198,18 @@ namespace System.Runtime.Intrinsics
                 return Sse2.Insert(result, e7, 7);                                                  // < 0, 1, 2, 3, 4, 5, 6, 7 >
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<ushort> result = CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                result = AdvSimd.Insert(result, 3, e3);
+                result = AdvSimd.Insert(result, 4, e4);
+                result = AdvSimd.Insert(result, 5, e5);
+                result = AdvSimd.Insert(result, 6, e6);
+                return AdvSimd.Insert(result, 7, e7);
+            }
+
             return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7);
 
             static Vector128<ushort> SoftwareFallback(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7)
@@ -1158,6 +1260,14 @@ namespace System.Runtime.Intrinsics
                 return Sse2.UnpackLow(lo64, hi64).AsUInt32();                                       // < 0, 1, 2, 3 >
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<uint> result = CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                return AdvSimd.Insert(result, 3, e3);
+            }
+
             return SoftwareFallback(e0, e1, e2, e3);
 
             static Vector128<uint> SoftwareFallback(uint e0, uint e1, uint e2, uint e3)
@@ -1194,6 +1304,12 @@ namespace System.Runtime.Intrinsics
                 return Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1));              // < 0, 1 >
             }
 
+            if (AdvSimd.IsSupported)
+            {
+                Vector128<ulong> result = CreateScalarUnsafe(e0);
+                return AdvSimd.Insert(result, 1, e1);
+            }
+
             return SoftwareFallback(e0, e1);
 
             static Vector128<ulong> SoftwareFallback(ulong e0, ulong e1)
index 089a0b5..56cf62d 100644 (file)
@@ -2,8 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System.Runtime.CompilerServices;
 using Internal.Runtime.CompilerServices;
+using System.Runtime.CompilerServices;
 using System.Runtime.Intrinsics.Arm;
 
 namespace System.Runtime.Intrinsics
@@ -157,6 +157,7 @@ namespace System.Runtime.Intrinsics
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <remarks>On x86, this method corresponds to __m64 _mm_set1_pi8</remarks>
         /// <returns>A new <see cref="Vector64{Byte}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [Intrinsic]
         public static unsafe Vector64<byte> Create(byte value)
         {
             byte* pResult = stackalloc byte[8]
@@ -177,6 +178,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector64{Double}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Double}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [Intrinsic]
         public static unsafe Vector64<double> Create(double value)
         {
             return Unsafe.As<double, Vector64<double>>(ref value);
@@ -186,6 +188,7 @@ namespace System.Runtime.Intrinsics
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <remarks>On x86, this method corresponds to __m64 _mm_set1_pi16</remarks>
         /// <returns>A new <see cref="Vector64{Int16}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [Intrinsic]
         public static unsafe Vector64<short> Create(short value)
         {
             short* pResult = stackalloc short[4]
@@ -203,6 +206,7 @@ namespace System.Runtime.Intrinsics
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <remarks>On x86, this method corresponds to __m64 _mm_set1_pi32</remarks>
         /// <returns>A new <see cref="Vector64{Int32}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [Intrinsic]
         public static unsafe Vector64<int> Create(int value)
         {
             int* pResult = stackalloc int[2]
@@ -217,6 +221,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector64{Int64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Int64}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [Intrinsic]
         public static unsafe Vector64<long> Create(long value)
         {
             return Unsafe.As<long, Vector64<long>>(ref value);
@@ -227,6 +232,7 @@ namespace System.Runtime.Intrinsics
         /// <remarks>On x86, this method corresponds to __m64 _mm_set1_pi8</remarks>
         /// <returns>A new <see cref="Vector64{SByte}" /> with all elements initialized to <paramref name="value" />.</returns>
         [CLSCompliant(false)]
+        [Intrinsic]
         public static unsafe Vector64<sbyte> Create(sbyte value)
         {
             sbyte* pResult = stackalloc sbyte[8]
@@ -247,6 +253,7 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector64{Single}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Single}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [Intrinsic]
         public static unsafe Vector64<float> Create(float value)
         {
             float* pResult = stackalloc float[2]
@@ -263,6 +270,7 @@ namespace System.Runtime.Intrinsics
         /// <remarks>On x86, this method corresponds to __m64 _mm_set1_pi16</remarks>
         /// <returns>A new <see cref="Vector64{UInt16}" /> with all elements initialized to <paramref name="value" />.</returns>
         [CLSCompliant(false)]
+        [Intrinsic]
         public static unsafe Vector64<ushort> Create(ushort value)
         {
             ushort* pResult = stackalloc ushort[4]
@@ -281,6 +289,7 @@ namespace System.Runtime.Intrinsics
         /// <remarks>On x86, this method corresponds to __m64 _mm_set1_pi32</remarks>
         /// <returns>A new <see cref="Vector64{UInt32}" /> with all elements initialized to <paramref name="value" />.</returns>
         [CLSCompliant(false)]
+        [Intrinsic]
         public static unsafe Vector64<uint> Create(uint value)
         {
             uint* pResult = stackalloc uint[2]
@@ -296,6 +305,7 @@ namespace System.Runtime.Intrinsics
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UInt64}" /> with all elements initialized to <paramref name="value" />.</returns>
         [CLSCompliant(false)]
+        [Intrinsic]
         public static unsafe Vector64<ulong> Create(ulong value)
         {
             return Unsafe.As<ulong, Vector64<ulong>>(ref value);
@@ -314,6 +324,18 @@ namespace System.Runtime.Intrinsics
         /// <returns>A new <see cref="Vector64{Byte}" /> with each element initialized to corresponding specified value.</returns>
         public static unsafe Vector64<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7)
         {
+            if (AdvSimd.IsSupported)
+            {
+                Vector64<byte> result = Vector64.CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                result = AdvSimd.Insert(result, 3, e3);
+                result = AdvSimd.Insert(result, 4, e4);
+                result = AdvSimd.Insert(result, 5, e5);
+                result = AdvSimd.Insert(result, 6, e6);
+                return AdvSimd.Insert(result, 7, e7);
+            }
+
             byte* pResult = stackalloc byte[8]
             {
                 e0,
@@ -338,6 +360,14 @@ namespace System.Runtime.Intrinsics
         /// <returns>A new <see cref="Vector64{Int16}" /> with each element initialized to corresponding specified value.</returns>
         public static unsafe Vector64<short> Create(short e0, short e1, short e2, short e3)
         {
+            if (AdvSimd.IsSupported)
+            {
+                Vector64<short> result = Vector64.CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                return AdvSimd.Insert(result, 3, e3);
+            }
+
             short* pResult = stackalloc short[4]
             {
                 e0,
@@ -356,6 +386,12 @@ namespace System.Runtime.Intrinsics
         /// <returns>A new <see cref="Vector64{Int32}" /> with each element initialized to corresponding specified value.</returns>
         public static unsafe Vector64<int> Create(int e0, int e1)
         {
+            if (AdvSimd.IsSupported)
+            {
+                Vector64<int> result = Vector64.CreateScalarUnsafe(e0);
+                return AdvSimd.Insert(result, 1, e1);
+            }
+
             int* pResult = stackalloc int[2]
             {
                 e0,
@@ -379,6 +415,18 @@ namespace System.Runtime.Intrinsics
         [CLSCompliant(false)]
         public static unsafe Vector64<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7)
         {
+            if (AdvSimd.IsSupported)
+            {
+                Vector64<sbyte> result = Vector64.CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                result = AdvSimd.Insert(result, 3, e3);
+                result = AdvSimd.Insert(result, 4, e4);
+                result = AdvSimd.Insert(result, 5, e5);
+                result = AdvSimd.Insert(result, 6, e6);
+                return AdvSimd.Insert(result, 7, e7);
+            }
+
             sbyte* pResult = stackalloc sbyte[8]
             {
                 e0,
@@ -400,6 +448,12 @@ namespace System.Runtime.Intrinsics
         /// <returns>A new <see cref="Vector64{Single}" /> with each element initialized to corresponding specified value.</returns>
         public static unsafe Vector64<float> Create(float e0, float e1)
         {
+            if (AdvSimd.IsSupported)
+            {
+                Vector64<float> result = Vector64.CreateScalarUnsafe(e0);
+                return AdvSimd.Insert(result, 1, e1);
+            }
+
             float* pResult = stackalloc float[2]
             {
                 e0,
@@ -419,6 +473,14 @@ namespace System.Runtime.Intrinsics
         [CLSCompliant(false)]
         public static unsafe Vector64<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3)
         {
+            if (AdvSimd.IsSupported)
+            {
+                Vector64<ushort> result = Vector64.CreateScalarUnsafe(e0);
+                result = AdvSimd.Insert(result, 1, e1);
+                result = AdvSimd.Insert(result, 2, e2);
+                return AdvSimd.Insert(result, 3, e3);
+            }
+
             ushort* pResult = stackalloc ushort[4]
             {
                 e0,
@@ -438,6 +500,12 @@ namespace System.Runtime.Intrinsics
         [CLSCompliant(false)]
         public static unsafe Vector64<uint> Create(uint e0, uint e1)
         {
+            if (AdvSimd.IsSupported)
+            {
+                Vector64<uint> result = Vector64.CreateScalarUnsafe(e0);
+                return AdvSimd.Insert(result, 1, e1);
+            }
+
             uint* pResult = stackalloc uint[2]
             {
                 e0,