Managed implementation of SSE2 SetAllVector128 helper HW intrinsic
authorJacek Blaszczynski <biosciencenow@outlook.com>
Wed, 7 Mar 2018 14:48:24 +0000 (15:48 +0100)
committerJacek Blaszczynski <biosciencenow@outlook.com>
Wed, 14 Mar 2018 02:31:50 +0000 (03:31 +0100)
src/jit/hwintrinsicxarch.cpp
src/jit/namedintrinsiclist.h
src/mscorlib/src/System/Runtime/Intrinsics/X86/Sse2.cs

index 869361770c4bd864218850be7667650c5517c461..f93141bdee040f6ac720794cbd9ae6a7cd03d6c0 100644 (file)
@@ -124,7 +124,7 @@ InstructionSet Compiler::lookupHWIntrinsicISA(const char* className)
 //    isa        -- instruction set of the intrinsic.
 //
 // Return Value:
-//    Id for the hardware intrinsic.
+//    Id for the hardware intrinsic
 //
 // TODO-Throughput: replace sequential search by binary search
 NamedIntrinsic Compiler::lookupHWIntrinsic(const char* methodName, InstructionSet isa)
@@ -137,6 +137,7 @@ NamedIntrinsic Compiler::lookupHWIntrinsic(const char* methodName, InstructionSe
             if (isa == hwIntrinsicInfoArray[i].isa && strcmp(methodName, hwIntrinsicInfoArray[i].intrinsicName) == 0)
             {
                 result = hwIntrinsicInfoArray[i].intrinsicID;
+                break;
             }
         }
     }
index 91c9720580ce82f77e6ee7e1e2f919a66ed97acc..f4d7cb2a88d897c4002cd3262b06792a9a82937a 100644 (file)
@@ -27,8 +27,8 @@ enum NamedIntrinsic : unsigned int
 #define HARDWARE_INTRINSIC(id, isa, name, form, ins0, ins1, ins2, flags) id,
 #include "hwintrinsiclistArm64.h"
 #endif // !defined(_TARGET_XARCH_) && !defined(_TARGET_ARM64_)
-    NI_HW_INTRINSIC_END
-#endif
+    NI_HW_INTRINSIC_END,
+#endif // FEATURE_HW_INTRINSICS
 };
 
 #if defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_)
index 23d8c934939067c88686f66065609dadce600d91..d0eb9ef4da9bdd731732332e8517a5d73ffcf501 100644 (file)
@@ -968,6 +968,149 @@ namespace System.Runtime.Intrinsics.X86
         /// </summary>
         public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector128<short> right) => PackUnsignedSaturate(left, right);
 
+        /// <summary>
+        /// __m128i _mm_set1_epi8 (char a)
+        ///   HELPER
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<byte> SetAllVector128(byte value)
+        {
+            // Zero vector and load value et index 0
+            Vector128<byte> vector1 = Sse.StaticCast<uint, byte>(ConvertScalarToVector128UInt32(value));
+            // Create { -- -- -- -- -- -- -- --  -- -- -- -- -- -- vl vl }
+            Vector128<ushort> tmpVector1 = Sse.StaticCast<byte, ushort>(UnpackLow(vector1, vector1));
+            // Create { -- -- -- -- -- -- -- --  -- -- -- -- vl vl vl vl }
+            Vector128<uint> tmpVector2 = Sse.StaticCast<ushort, uint>(UnpackLow(tmpVector1, tmpVector1));
+            // Create { vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl } and return result
+            return Sse.StaticCast<uint, byte>(Shuffle(tmpVector2, 0));
+        }
+        /// <summary>
+        /// __m128i _mm_set1_epi8 (char a)
+        ///   HELPER
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<sbyte> SetAllVector128(sbyte value)
+        {
+            // TODO-CQ Optimize algorithm choice based on benchmarks
+
+            // Zero vector and load value et index 0
+            Vector128<sbyte> vector = Sse.StaticCast<int, sbyte>(ConvertScalarToVector128Int32(value));
+            // Create { -- -- -- -- -- -- -- --  -- -- -- -- -- -- vl vl }
+            Vector128<short> tmpVector1 = Sse.StaticCast<sbyte, short>(UnpackLow(vector, vector));
+            // Create { -- -- -- -- -- -- -- --  -- -- -- -- vl vl vl vl }
+            Vector128<int> tmpVector2 = Sse.StaticCast<short, int>(UnpackLow(tmpVector1, tmpVector1));
+            // Create { vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl } and return result
+            return Sse.StaticCast<int, sbyte>(Shuffle(tmpVector2, 0));
+        }
+        /// <summary>
+        /// __m128i _mm_set1_epi16 (short a)
+        ///   HELPER
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<short> SetAllVector128(short value)
+        {
+            // TODO-CQ Optimize algorithm choice based on benchmarks
+
+            // Zero vector and load value et index 0
+            Vector128<short> vector = Sse.StaticCast<int, short>(ConvertScalarToVector128Int32(value));
+            // Create { -- -- -- -- -- -- vl vl }
+            Vector128<int> tmpVector = Sse.StaticCast<short, int>(UnpackLow(vector, vector));
+            // Create { vl vl vl vl vl vl vl vl } and return result
+            return Sse.StaticCast<int, short>(Shuffle(tmpVector, (byte)0));
+        }
+        /// <summary>
+        /// __m128i _mm_set1_epi16 (short a)
+        ///   HELPER
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<ushort> SetAllVector128(ushort value)
+        {
+            // TODO-CQ Optimize algorithm choice based on benchmarks
+
+            // Zero vector and load value et index 0
+            Vector128<ushort> vector = Sse.StaticCast<uint, ushort>(ConvertScalarToVector128UInt32(value));
+            // Create { -- -- -- -- -- -- vl vl }
+            Vector128<uint> tmpVector = Sse.StaticCast<ushort, uint>(UnpackLow(vector, vector));
+            // Create { vl vl vl vl vl vl vl vl } and return result
+            return Sse.StaticCast<uint, ushort>(Shuffle(tmpVector, (byte)0));
+        }
+        /// <summary>
+        /// __m128i _mm_set1_epi32 (int a)
+        ///   HELPER
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<int> SetAllVector128(int value)
+        {
+            // TODO-CQ Optimize algorithm choice based on benchmarks
+
+            // Zero vector and load value et index 0
+            Vector128<int> vector = ConvertScalarToVector128Int32(value);
+            // Create { vl vl vl vl } and return result
+            return Shuffle(vector, 0);
+        }
+        /// <summary>
+        /// __m128i _mm_set1_epi32 (int a)
+        ///   HELPER
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<uint> SetAllVector128(uint value)
+        {
+            // TODO-CQ Optimize algorithm choice based on benchmarks
+
+            // Zero vector and load value et index 0
+            Vector128<uint> vector = ConvertScalarToVector128UInt32(value);
+            // Create { vl vl vl vl } and return result
+            return Shuffle(vector, 0);
+        }
+        /// <summary>
+        /// __m128i _mm_set1_epi64x (long long a)
+        ///   HELPER
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<long> SetAllVector128(long value)
+        {
+            // TODO-CQ Optimize algorithm choice based on benchmarks
+
+            // Zero vector and load value et index 0
+            Vector128<long> vector = ConvertScalarToVector128Int64(value);
+            // Create { vl vl } and return result
+            return UnpackLow(vector, vector);
+        }
+        /// <summary>
+        /// __m128i _mm_set1_epi64x (long long a)
+        ///   HELPER
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<ulong> SetAllVector128(ulong value)
+        {
+            // TODO-CQ Optimize algorithm choice based on benchmarks
+
+            // Zero vector and load value et index 0
+            Vector128<ulong> vector = ConvertScalarToVector128UInt64(value);
+            // Create { vl vl } and return result
+            return UnpackLow(vector, vector);
+        }
+        /// <summary>
+        /// __m128d _mm_set1_pd (double a)
+        ///   HELPER
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<double> SetAllVector128(double value)
+        {
+            // TODO-CQ Optimize algorithm choice based on benchmarks
+
+            // Zero vector and load value et index 0
+            Vector128<double> vector = SetScalarVector128(value);
+            // Create { vl vl } and return result
+            return UnpackLow(vector, vector);
+        }
+
+        /// <summary>
+        /// __m128d _mm_set_sd (double a)
+        ///   HELPER
+        /// </summary>
+        public static Vector128<double> SetScalarVector128(double value) => SetScalarVector128(value);
+
         /// <summary>
         /// ___m128i _mm_set_epi8 (char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0)
         ///   HELPER
@@ -1158,58 +1301,6 @@ namespace System.Runtime.Intrinsics.X86
             return UnpackLow(e0Vector, e1Vector);
         }
 
-        /// <summary>
-        /// __m128d _mm_set_sd (double a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<double> SetScalarVector128(double value) => SetScalarVector128(value);
-
-        /// <summary>
-        /// __m128i _mm_set1_epi8 (char a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<byte> SetAllVector128(byte value) => SetAllVector128(value);
-        /// <summary>
-        /// __m128i _mm_set1_epi8 (char a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<sbyte> SetAllVector128(sbyte value) => SetAllVector128(value);
-        /// <summary>
-        /// __m128i _mm_set1_epi16 (short a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<short> SetAllVector128(short value) => SetAllVector128(value);
-        /// <summary>
-        /// __m128i _mm_set1_epi16 (short a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<ushort> SetAllVector128(ushort value) => SetAllVector128(value);
-        /// <summary>
-        /// __m128i _mm_set1_epi32 (int a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<int> SetAllVector128(int value) => SetAllVector128(value);
-        /// <summary>
-        /// __m128i _mm_set1_epi32 (int a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<uint> SetAllVector128(uint value) => SetAllVector128(value);
-        /// <summary>
-        /// __m128i _mm_set1_epi64x (long long a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<long> SetAllVector128(long value) => SetAllVector128(value);
-        /// <summary>
-        /// __m128i _mm_set1_epi64x (long long a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<ulong> SetAllVector128(ulong value) => SetAllVector128(value);
-        /// <summary>
-        /// __m128d _mm_set1_pd (double a)
-        ///   HELPER
-        /// </summary>
-        public static Vector128<double> SetAllVector128(double value) => SetAllVector128(value);
-
         /// <summary>
         /// __m128i _mm_setzero_si128 ()
         ///   HELPER: PXOR