Add Shuffle* Sse2 hardware intrinsics tests
authorJacek Blaszczynski <biosciencenow@outlook.com>
Wed, 28 Feb 2018 05:11:12 +0000 (06:11 +0100)
committerJacek Blaszczynski <biosciencenow@outlook.com>
Fri, 2 Mar 2018 22:48:50 +0000 (23:48 +0100)
13 files changed:
tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.tt [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.tt [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_r.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_ro.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.tt [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_r.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_ro.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_r.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_ro.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse2/TestTableSse2.cs

diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.cs
new file mode 100644 (file)
index 0000000..59ac723
--- /dev/null
@@ -0,0 +1,297 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace IntelHardwareIntrinsicTest
+{
+    internal static partial class Program
+    {
+        const int Pass = 100;
+        const int Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            int testResult = Pass;
+            int testCount = 16;
+            string methodUnderTestName = nameof(Sse2.Shuffle);
+
+
+            if (Sse2.IsSupported)
+            {
+
+                string[] permuteData = new string[]
+                {
+                    "0b11100100",         // identity
+                    "0b00011011",         // invert
+                    "0b00000000",         // broadcast element 0
+                    "0b11111111",         // broadcast element 3
+                    "0b01010101",         // broadcast element 1
+                    "0b10101010",         // broadcast element 2
+                    "0b11011000",         // swap middle elements
+                    "0b00100111",         // swap external elements
+                    "0b10110001",         // swap internal with external elements
+                    "0b11110000",         // divide everything between external elements
+                    "0b10100101",         // divide everything between internal elements
+                    "0b00010100",         // pattern (0, 1, 1, 0)
+                    "0b10000010",         // pattern (2, 0, 0, 2)
+                    "0b11001100",         // pattern (3, 0, 3, 0)
+                    "0b01100110",         // pattern (1, 2, 1, 2)
+                    "0b10011001"          // pattern (2, 1, 2, 1)
+                };
+
+                string[] permuteDouble = new string[]
+                {
+                    "0b00",
+                    "0b01",
+                    "0b10",
+                    "0b11",
+                };
+
+                using (var doubleTable = TestTableTuvImmSse2<double, double, byte>.Create(permuteDouble.Length))
+                using (var intTable = TestTableTuvImmSse2<int, int, byte>.Create(permuteData.Length))
+                using (var uintTable = TestTableTuvImmSse2<uint, uint, byte>.Create(permuteData.Length))
+                {
+
+                    // Vector128<double> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<double>(
+                        doubleTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (uint) i % 2;
+                        });
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<double>(
+                        doubleTable.inArray2, 16, (int i, int elNo) =>
+                        {
+                            return (uint) i % 2 + 10;
+                        });
+
+                    (Vector128<double>, Vector128<double>) valueDouble_0 = doubleTable[0];
+                    Vector128<double> resultDouble_0 = Sse2.Shuffle(valueDouble_0.Item1, valueDouble_0.Item2, (byte) 0b00);
+                    doubleTable.SetOutArray(resultDouble_0, 0, (byte) 0b00);
+
+                    (Vector128<double>, Vector128<double>) valueDouble_1 = doubleTable[1];
+                    Vector128<double> resultDouble_1 = Sse2.Shuffle(valueDouble_1.Item1, valueDouble_1.Item2, (byte) 0b01);
+                    doubleTable.SetOutArray(resultDouble_1, 1, (byte) 0b01);
+
+                    (Vector128<double>, Vector128<double>) valueDouble_2 = doubleTable[2];
+                    Vector128<double> resultDouble_2 = Sse2.Shuffle(valueDouble_2.Item1, valueDouble_2.Item2, (byte) 0b10);
+                    doubleTable.SetOutArray(resultDouble_2, 2, (byte) 0b10);
+
+                    (Vector128<double>, Vector128<double>) valueDouble_3 = doubleTable[3];
+                    Vector128<double> resultDouble_3 = Sse2.Shuffle(valueDouble_3.Item1, valueDouble_3.Item2, (byte) 0b11);
+                    doubleTable.SetOutArray(resultDouble_3, 3, (byte) 0b11);
+
+
+                    // Vector128<int> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<uint>(
+                        uintTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (uint) i % 4;
+                        });
+
+                    (Vector128<int>, Vector128<int>) valueInt32_0 = intTable[0];
+                    Vector128<int> resultInt32_0 = Sse2.Shuffle(valueInt32_0.Item1, (byte) 0b11100100);
+                    intTable.SetOutArray(resultInt32_0, 0, (byte) 0b11100100);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_1 = intTable[1];
+                    Vector128<int> resultInt32_1 = Sse2.Shuffle(valueInt32_1.Item1, (byte) 0b00011011);
+                    intTable.SetOutArray(resultInt32_1, 1, (byte) 0b00011011);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_2 = intTable[2];
+                    Vector128<int> resultInt32_2 = Sse2.Shuffle(valueInt32_2.Item1, (byte) 0b00000000);
+                    intTable.SetOutArray(resultInt32_2, 2, (byte) 0b00000000);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_3 = intTable[3];
+                    Vector128<int> resultInt32_3 = Sse2.Shuffle(valueInt32_3.Item1, (byte) 0b11111111);
+                    intTable.SetOutArray(resultInt32_3, 3, (byte) 0b11111111);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_4 = intTable[4];
+                    Vector128<int> resultInt32_4 = Sse2.Shuffle(valueInt32_4.Item1, (byte) 0b01010101);
+                    intTable.SetOutArray(resultInt32_4, 4, (byte) 0b01010101);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_5 = intTable[5];
+                    Vector128<int> resultInt32_5 = Sse2.Shuffle(valueInt32_5.Item1, (byte) 0b10101010);
+                    intTable.SetOutArray(resultInt32_5, 5, (byte) 0b10101010);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_6 = intTable[6];
+                    Vector128<int> resultInt32_6 = Sse2.Shuffle(valueInt32_6.Item1, (byte) 0b11011000);
+                    intTable.SetOutArray(resultInt32_6, 6, (byte) 0b11011000);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_7 = intTable[7];
+                    Vector128<int> resultInt32_7 = Sse2.Shuffle(valueInt32_7.Item1, (byte) 0b00100111);
+                    intTable.SetOutArray(resultInt32_7, 7, (byte) 0b00100111);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_8 = intTable[8];
+                    Vector128<int> resultInt32_8 = Sse2.Shuffle(valueInt32_8.Item1, (byte) 0b10110001);
+                    intTable.SetOutArray(resultInt32_8, 8, (byte) 0b10110001);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_9 = intTable[9];
+                    Vector128<int> resultInt32_9 = Sse2.Shuffle(valueInt32_9.Item1, (byte) 0b11110000);
+                    intTable.SetOutArray(resultInt32_9, 9, (byte) 0b11110000);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_10 = intTable[10];
+                    Vector128<int> resultInt32_10 = Sse2.Shuffle(valueInt32_10.Item1, (byte) 0b10100101);
+                    intTable.SetOutArray(resultInt32_10, 10, (byte) 0b10100101);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_11 = intTable[11];
+                    Vector128<int> resultInt32_11 = Sse2.Shuffle(valueInt32_11.Item1, (byte) 0b00010100);
+                    intTable.SetOutArray(resultInt32_11, 11, (byte) 0b00010100);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_12 = intTable[12];
+                    Vector128<int> resultInt32_12 = Sse2.Shuffle(valueInt32_12.Item1, (byte) 0b10000010);
+                    intTable.SetOutArray(resultInt32_12, 12, (byte) 0b10000010);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_13 = intTable[13];
+                    Vector128<int> resultInt32_13 = Sse2.Shuffle(valueInt32_13.Item1, (byte) 0b11001100);
+                    intTable.SetOutArray(resultInt32_13, 13, (byte) 0b11001100);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_14 = intTable[14];
+                    Vector128<int> resultInt32_14 = Sse2.Shuffle(valueInt32_14.Item1, (byte) 0b01100110);
+                    intTable.SetOutArray(resultInt32_14, 14, (byte) 0b01100110);
+
+                    (Vector128<int>, Vector128<int>) valueInt32_15 = intTable[15];
+                    Vector128<int> resultInt32_15 = Sse2.Shuffle(valueInt32_15.Item1, (byte) 0b10011001);
+                    intTable.SetOutArray(resultInt32_15, 15, (byte) 0b10011001);
+
+
+                    // Vector128<uint> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<uint>(
+                        uintTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (uint) i % 4;
+                        });
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_0 = uintTable[0];
+                    Vector128<uint> resultUInt32_0 = Sse2.Shuffle(valueUInt32_0.Item1, (byte) 0b11100100);
+                    uintTable.SetOutArray(resultUInt32_0, 0, (byte) 0b11100100);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_1 = uintTable[1];
+                    Vector128<uint> resultUInt32_1 = Sse2.Shuffle(valueUInt32_1.Item1, (byte) 0b00011011);
+                    uintTable.SetOutArray(resultUInt32_1, 1, (byte) 0b00011011);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_2 = uintTable[2];
+                    Vector128<uint> resultUInt32_2 = Sse2.Shuffle(valueUInt32_2.Item1, (byte) 0b00000000);
+                    uintTable.SetOutArray(resultUInt32_2, 2, (byte) 0b00000000);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_3 = uintTable[3];
+                    Vector128<uint> resultUInt32_3 = Sse2.Shuffle(valueUInt32_3.Item1, (byte) 0b11111111);
+                    uintTable.SetOutArray(resultUInt32_3, 3, (byte) 0b11111111);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_4 = uintTable[4];
+                    Vector128<uint> resultUInt32_4 = Sse2.Shuffle(valueUInt32_4.Item1, (byte) 0b01010101);
+                    uintTable.SetOutArray(resultUInt32_4, 4, (byte) 0b01010101);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_5 = uintTable[5];
+                    Vector128<uint> resultUInt32_5 = Sse2.Shuffle(valueUInt32_5.Item1, (byte) 0b10101010);
+                    uintTable.SetOutArray(resultUInt32_5, 5, (byte) 0b10101010);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_6 = uintTable[6];
+                    Vector128<uint> resultUInt32_6 = Sse2.Shuffle(valueUInt32_6.Item1, (byte) 0b11011000);
+                    uintTable.SetOutArray(resultUInt32_6, 6, (byte) 0b11011000);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_7 = uintTable[7];
+                    Vector128<uint> resultUInt32_7 = Sse2.Shuffle(valueUInt32_7.Item1, (byte) 0b00100111);
+                    uintTable.SetOutArray(resultUInt32_7, 7, (byte) 0b00100111);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_8 = uintTable[8];
+                    Vector128<uint> resultUInt32_8 = Sse2.Shuffle(valueUInt32_8.Item1, (byte) 0b10110001);
+                    uintTable.SetOutArray(resultUInt32_8, 8, (byte) 0b10110001);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_9 = uintTable[9];
+                    Vector128<uint> resultUInt32_9 = Sse2.Shuffle(valueUInt32_9.Item1, (byte) 0b11110000);
+                    uintTable.SetOutArray(resultUInt32_9, 9, (byte) 0b11110000);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_10 = uintTable[10];
+                    Vector128<uint> resultUInt32_10 = Sse2.Shuffle(valueUInt32_10.Item1, (byte) 0b10100101);
+                    uintTable.SetOutArray(resultUInt32_10, 10, (byte) 0b10100101);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_11 = uintTable[11];
+                    Vector128<uint> resultUInt32_11 = Sse2.Shuffle(valueUInt32_11.Item1, (byte) 0b00010100);
+                    uintTable.SetOutArray(resultUInt32_11, 11, (byte) 0b00010100);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_12 = uintTable[12];
+                    Vector128<uint> resultUInt32_12 = Sse2.Shuffle(valueUInt32_12.Item1, (byte) 0b10000010);
+                    uintTable.SetOutArray(resultUInt32_12, 12, (byte) 0b10000010);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_13 = uintTable[13];
+                    Vector128<uint> resultUInt32_13 = Sse2.Shuffle(valueUInt32_13.Item1, (byte) 0b11001100);
+                    uintTable.SetOutArray(resultUInt32_13, 13, (byte) 0b11001100);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_14 = uintTable[14];
+                    Vector128<uint> resultUInt32_14 = Sse2.Shuffle(valueUInt32_14.Item1, (byte) 0b01100110);
+                    uintTable.SetOutArray(resultUInt32_14, 14, (byte) 0b01100110);
+
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_15 = uintTable[15];
+                    Vector128<uint> resultUInt32_15 = Sse2.Shuffle(valueUInt32_15.Item1, (byte) 0b10011001);
+                    uintTable.SetOutArray(resultUInt32_15, 15, (byte) 0b10011001);
+
+
+                    CheckMethodFiveDouble<double, double, byte> checkDouble =
+                    (Span<double> x, Span<double> y, byte imm, Span<double> z, Span<double> a) =>
+                    {
+                        a[0] = (0x01 & imm) > 0 ? x[1] : x[0];
+                        a[1] = (0x02 & imm) > 0 ? y[1] : y[0];
+                        return a[0] == z[0] && a[1] == z[1];
+                    };
+
+                    if (!doubleTable.CheckResultShuffle(checkDouble))
+                    {
+                        PrintError8(doubleTable, methodUnderTestName, "(double x, byte y, double z, ref double a) => (a = x * y) == z", checkDouble);
+                        testResult = Fail;
+                    }
+
+                    CheckMethodFive<int, int, byte> checkInt32 = (Span<int> x, byte imm, Span<int> z, Span<int> a) =>
+                    {
+                        bool result = true;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            a[i] = x[imm & 0x03];
+                            if (z[i] != a[i])
+                                result = false;
+                            imm = (byte) (imm >> 2);
+                        }
+                        return result;
+                    };
+
+                    if (!intTable.CheckResultShuffle(checkInt32))
+                    {
+                        PrintError(intTable, methodUnderTestName, "(int x, byte y, int z, ref int a) => (a = x << y) == z", checkInt32);
+                        testResult = Fail;
+                    }
+
+                    CheckMethodFive<uint, uint, byte> checkUInt32 = (Span<uint> x, byte imm, Span<uint> z, Span<uint> a) =>
+                    {
+                        bool result = true;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            a[i] = x[imm & 0x03];
+                            if (z[i] != a[i])
+                                result = false;
+                            imm = (byte) (imm >> 2);
+                        }
+                        return result;
+                    };
+
+                    if (!uintTable.CheckResultShuffle(checkUInt32))
+                    {
+                        PrintError(uintTable, methodUnderTestName, "(uint x, byte y, uint z, ref uint a) => (a = x << y) == z", checkUInt32);
+                        testResult = Fail;
+                    }
+                }
+            }
+            else
+            {
+                Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}");
+            }
+            return testResult;
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.tt b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle.tt
new file mode 100644 (file)
index 0000000..8922e3f
--- /dev/null
@@ -0,0 +1,225 @@
+<#@ template debug="false" hostspecific="false" language="C#" #>
+<#@ assembly name="System.Core" #>
+<#@ import namespace="System.Linq" #>
+<#@ import namespace="System.Text" #>
+<#@ import namespace="System.Collections.Generic" #>
+<#@ output extension=".cs"  encoding="utf-8" #>
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace IntelHardwareIntrinsicTest
+{
+    internal static partial class Program
+    {
+        const int Pass = 100;
+        const int Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            int testResult = Pass;
+            int testCount = 16;
+            string methodUnderTestName = nameof(Sse2.Shuffle);
+
+<#
+            string[] permuteData = new string[]
+            {
+                "0b11100100",         // identity
+                "0b00011011",         // invert
+                "0b00000000",         // broadcast element 0
+                "0b11111111",         // broadcast element 3
+                "0b01010101",         // broadcast element 1
+                "0b10101010",         // broadcast element 2
+                "0b11011000",         // swap middle elements
+                "0b00100111",         // swap external elements
+                "0b10110001",         // swap internal with external elements
+                "0b11110000",         // divide everything between external elements
+                "0b10100101",         // divide everything between internal elements
+                "0b00010100",         // pattern (0, 1, 1, 0)
+                "0b10000010",         // pattern (2, 0, 0, 2)
+                "0b11001100",         // pattern (3, 0, 3, 0)
+                "0b01100110",         // pattern (1, 2, 1, 2)
+                "0b10011001"          // pattern (2, 1, 2, 1)
+            };
+
+            string[] permuteDouble = new string[]
+            {
+                "0b00",
+                "0b01",
+                "0b10",
+                "0b11",
+            };
+
+#>
+
+            if (Sse2.IsSupported)
+            {
+
+                string[] permuteData = new string[]
+                {
+                    "0b11100100",         // identity
+                    "0b00011011",         // invert
+                    "0b00000000",         // broadcast element 0
+                    "0b11111111",         // broadcast element 3
+                    "0b01010101",         // broadcast element 1
+                    "0b10101010",         // broadcast element 2
+                    "0b11011000",         // swap middle elements
+                    "0b00100111",         // swap external elements
+                    "0b10110001",         // swap internal with external elements
+                    "0b11110000",         // divide everything between external elements
+                    "0b10100101",         // divide everything between internal elements
+                    "0b00010100",         // pattern (0, 1, 1, 0)
+                    "0b10000010",         // pattern (2, 0, 0, 2)
+                    "0b11001100",         // pattern (3, 0, 3, 0)
+                    "0b01100110",         // pattern (1, 2, 1, 2)
+                    "0b10011001"          // pattern (2, 1, 2, 1)
+                };
+
+                string[] permuteDouble = new string[]
+                {
+                    "0b00",
+                    "0b01",
+                    "0b10",
+                    "0b11",
+                };
+
+                using (var doubleTable = TestTableTuvImmSse2<double, double, byte>.Create(permuteDouble.Length))
+                using (var intTable = TestTableTuvImmSse2<int, int, byte>.Create(permuteData.Length))
+                using (var uintTable = TestTableTuvImmSse2<uint, uint, byte>.Create(permuteData.Length))
+                {
+
+                    // Vector128<double> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<double>(
+                        doubleTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (uint) i % 2;
+                        });
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<double>(
+                        doubleTable.inArray2, 16, (int i, int elNo) =>
+                        {
+                            return (uint) i % 2 + 10;
+                        });
+
+<#
+       for (int i = 0; i < permuteDouble.Length; i++)
+       {
+#>
+                    (Vector128<double>, Vector128<double>) valueDouble_<#= i #> = doubleTable[<#= i #>];
+                    Vector128<double> resultDouble_<#= i #> = Sse2.Shuffle(valueDouble_<#= i #>.Item1, valueDouble_<#= i #>.Item2, (byte) <#= permuteDouble[i] #>);
+                    doubleTable.SetOutArray(resultDouble_<#= i #>, <#= i #>, (byte) <#= permuteDouble[i] #>);
+
+<#
+
+       }
+#>
+
+                    // Vector128<int> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<uint>(
+                        uintTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (uint) i % 4;
+                        });
+
+<#
+       for (int i = 0; i < permuteData.Length; i++)
+       {
+#>
+                    (Vector128<int>, Vector128<int>) valueInt32_<#= i #> = intTable[<#= i #>];
+                    Vector128<int> resultInt32_<#= i #> = Sse2.Shuffle(valueInt32_<#= i #>.Item1, (byte) <#= permuteData[i] #>);
+                    intTable.SetOutArray(resultInt32_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>);
+
+<#
+
+       }
+#>
+
+                    // Vector128<uint> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<uint>(
+                        uintTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (uint) i % 4;
+                        });
+
+<#
+       for (int i = 0; i < permuteData.Length; i++)
+       {
+#>
+                    (Vector128<uint>, Vector128<uint>) valueUInt32_<#= i #> = uintTable[<#= i #>];
+                    Vector128<uint> resultUInt32_<#= i #> = Sse2.Shuffle(valueUInt32_<#= i #>.Item1, (byte) <#= permuteData[i] #>);
+                    uintTable.SetOutArray(resultUInt32_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>);
+
+<#
+
+       }
+#>
+
+                    CheckMethodFiveDouble<double, double, byte> checkDouble =
+                    (Span<double> x, Span<double> y, byte imm, Span<double> z, Span<double> a) =>
+                    {
+                        a[0] = (0x01 & imm) > 0 ? x[1] : x[0];
+                        a[1] = (0x02 & imm) > 0 ? y[1] : y[0];
+                        return a[0] == z[0] && a[1] == z[1];
+                    };
+
+                    if (!doubleTable.CheckResultShuffle(checkDouble))
+                    {
+                        PrintError8(doubleTable, methodUnderTestName, "(double x, byte y, double z, ref double a) => (a = x * y) == z", checkDouble);
+                        testResult = Fail;
+                    }
+
+                    CheckMethodFive<int, int, byte> checkInt32 = (Span<int> x, byte imm, Span<int> z, Span<int> a) =>
+                    {
+                        bool result = true;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            a[i] = x[imm & 0x03];
+                            if (z[i] != a[i])
+                                result = false;
+                            imm = (byte) (imm >> 2);
+                        }
+                        return result;
+                    };
+
+                    if (!intTable.CheckResultShuffle(checkInt32))
+                    {
+                        PrintError(intTable, methodUnderTestName, "(int x, byte y, int z, ref int a) => (a = x << y) == z", checkInt32);
+                        testResult = Fail;
+                    }
+
+                    CheckMethodFive<uint, uint, byte> checkUInt32 = (Span<uint> x, byte imm, Span<uint> z, Span<uint> a) =>
+                    {
+                        bool result = true;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            a[i] = x[imm & 0x03];
+                            if (z[i] != a[i])
+                                result = false;
+                            imm = (byte) (imm >> 2);
+                        }
+                        return result;
+                    };
+
+                    if (!uintTable.CheckResultShuffle(checkUInt32))
+                    {
+                        PrintError(uintTable, methodUnderTestName, "(uint x, byte y, uint z, ref uint a) => (a = x << y) == z", checkUInt32);
+                        testResult = Fail;
+                    }
+                }
+            }
+            else
+            {
+                Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}");
+            }
+            return testResult;
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.cs
new file mode 100644 (file)
index 0000000..2bc3d0f
--- /dev/null
@@ -0,0 +1,266 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace IntelHardwareIntrinsicTest
+{
+    internal static partial class Program
+    {
+        const short Pass = 100;
+        const short Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            short testResult = Pass;
+            short testsCount = 16;
+            string methodUnderTestName = nameof(Sse2.ShuffleHigh);
+
+
+
+            if (Sse2.IsSupported)
+            {
+                string[] permuteData = new string[]
+                {
+                    "0b11100100",         // identity
+                    "0b00011011",         // invert
+                    "0b00000000",         // broadcast element 0
+                    "0b11111111",         // broadcast element 3
+                    "0b01010101",         // broadcast element 1
+                    "0b10101010",         // broadcast element 2
+                    "0b11011000",         // swap middle elements
+                    "0b00100111",         // swap external elements
+                    "0b10110001",         // swap internal with external elements
+                    "0b11110000",         // divide everything between external elements
+                    "0b10100101",         // divide everything between internal elements
+                    "0b00010100",         // pattern (0, 1, 1, 0)
+                    "0b10000010",         // pattern (2, 0, 0, 2)
+                    "0b11001100",         // pattern (3, 0, 3, 0)
+                    "0b01100110",         // pattern (1, 2, 1, 2)
+                    "0b10011001"          // pattern (2, 1, 2, 1)
+                };
+
+                using (var shortTable = TestTableTuvImmSse2<short, short, byte>.Create(testsCount))
+                using (var ushortTable = TestTableTuvImmSse2<ushort, ushort, byte>.Create(testsCount))
+                {
+
+                    // Vector128<short> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<short>(
+                        shortTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (short)(i % 8);
+                        });
+
+                    TestUtilities.InitializeWithConstValue<short>(0, shortTable.inArray2);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_0 = shortTable[0];
+                    Vector128<short> resultInt16_0 = Sse2.ShuffleHigh(valueInt16_0.Item1, (byte) 0b11100100);
+                    shortTable.SetOutArray(resultInt16_0, 0, (byte) 0b11100100);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_1 = shortTable[1];
+                    Vector128<short> resultInt16_1 = Sse2.ShuffleHigh(valueInt16_1.Item1, (byte) 0b00011011);
+                    shortTable.SetOutArray(resultInt16_1, 1, (byte) 0b00011011);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_2 = shortTable[2];
+                    Vector128<short> resultInt16_2 = Sse2.ShuffleHigh(valueInt16_2.Item1, (byte) 0b00000000);
+                    shortTable.SetOutArray(resultInt16_2, 2, (byte) 0b00000000);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_3 = shortTable[3];
+                    Vector128<short> resultInt16_3 = Sse2.ShuffleHigh(valueInt16_3.Item1, (byte) 0b11111111);
+                    shortTable.SetOutArray(resultInt16_3, 3, (byte) 0b11111111);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_4 = shortTable[4];
+                    Vector128<short> resultInt16_4 = Sse2.ShuffleHigh(valueInt16_4.Item1, (byte) 0b01010101);
+                    shortTable.SetOutArray(resultInt16_4, 4, (byte) 0b01010101);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_5 = shortTable[5];
+                    Vector128<short> resultInt16_5 = Sse2.ShuffleHigh(valueInt16_5.Item1, (byte) 0b10101010);
+                    shortTable.SetOutArray(resultInt16_5, 5, (byte) 0b10101010);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_6 = shortTable[6];
+                    Vector128<short> resultInt16_6 = Sse2.ShuffleHigh(valueInt16_6.Item1, (byte) 0b11011000);
+                    shortTable.SetOutArray(resultInt16_6, 6, (byte) 0b11011000);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_7 = shortTable[7];
+                    Vector128<short> resultInt16_7 = Sse2.ShuffleHigh(valueInt16_7.Item1, (byte) 0b00100111);
+                    shortTable.SetOutArray(resultInt16_7, 7, (byte) 0b00100111);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_8 = shortTable[8];
+                    Vector128<short> resultInt16_8 = Sse2.ShuffleHigh(valueInt16_8.Item1, (byte) 0b10110001);
+                    shortTable.SetOutArray(resultInt16_8, 8, (byte) 0b10110001);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_9 = shortTable[9];
+                    Vector128<short> resultInt16_9 = Sse2.ShuffleHigh(valueInt16_9.Item1, (byte) 0b11110000);
+                    shortTable.SetOutArray(resultInt16_9, 9, (byte) 0b11110000);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_10 = shortTable[10];
+                    Vector128<short> resultInt16_10 = Sse2.ShuffleHigh(valueInt16_10.Item1, (byte) 0b10100101);
+                    shortTable.SetOutArray(resultInt16_10, 10, (byte) 0b10100101);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_11 = shortTable[11];
+                    Vector128<short> resultInt16_11 = Sse2.ShuffleHigh(valueInt16_11.Item1, (byte) 0b00010100);
+                    shortTable.SetOutArray(resultInt16_11, 11, (byte) 0b00010100);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_12 = shortTable[12];
+                    Vector128<short> resultInt16_12 = Sse2.ShuffleHigh(valueInt16_12.Item1, (byte) 0b10000010);
+                    shortTable.SetOutArray(resultInt16_12, 12, (byte) 0b10000010);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_13 = shortTable[13];
+                    Vector128<short> resultInt16_13 = Sse2.ShuffleHigh(valueInt16_13.Item1, (byte) 0b11001100);
+                    shortTable.SetOutArray(resultInt16_13, 13, (byte) 0b11001100);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_14 = shortTable[14];
+                    Vector128<short> resultInt16_14 = Sse2.ShuffleHigh(valueInt16_14.Item1, (byte) 0b01100110);
+                    shortTable.SetOutArray(resultInt16_14, 14, (byte) 0b01100110);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_15 = shortTable[15];
+                    Vector128<short> resultInt16_15 = Sse2.ShuffleHigh(valueInt16_15.Item1, (byte) 0b10011001);
+                    shortTable.SetOutArray(resultInt16_15, 15, (byte) 0b10011001);
+
+
+                    // Vector128<ushort> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<ushort>(
+                        ushortTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (ushort)(i % 8);
+                        });
+
+                    TestUtilities.InitializeWithConstValue<ushort>(0, ushortTable.inArray2);
+
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_0 = ushortTable[0];
+                    Vector128<ushort> resultUInt16_0 = Sse2.ShuffleHigh(valueUInt16_0.Item1, (byte) 0b11100100);
+                    ushortTable.SetOutArray(resultUInt16_0, 0, (byte) 0b11100100);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_1 = ushortTable[1];
+                    Vector128<ushort> resultUInt16_1 = Sse2.ShuffleHigh(valueUInt16_1.Item1, (byte) 0b00011011);
+                    ushortTable.SetOutArray(resultUInt16_1, 1, (byte) 0b00011011);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_2 = ushortTable[2];
+                    Vector128<ushort> resultUInt16_2 = Sse2.ShuffleHigh(valueUInt16_2.Item1, (byte) 0b00000000);
+                    ushortTable.SetOutArray(resultUInt16_2, 2, (byte) 0b00000000);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_3 = ushortTable[3];
+                    Vector128<ushort> resultUInt16_3 = Sse2.ShuffleHigh(valueUInt16_3.Item1, (byte) 0b11111111);
+                    ushortTable.SetOutArray(resultUInt16_3, 3, (byte) 0b11111111);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_4 = ushortTable[4];
+                    Vector128<ushort> resultUInt16_4 = Sse2.ShuffleHigh(valueUInt16_4.Item1, (byte) 0b01010101);
+                    ushortTable.SetOutArray(resultUInt16_4, 4, (byte) 0b01010101);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_5 = ushortTable[5];
+                    Vector128<ushort> resultUInt16_5 = Sse2.ShuffleHigh(valueUInt16_5.Item1, (byte) 0b10101010);
+                    ushortTable.SetOutArray(resultUInt16_5, 5, (byte) 0b10101010);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_6 = ushortTable[6];
+                    Vector128<ushort> resultUInt16_6 = Sse2.ShuffleHigh(valueUInt16_6.Item1, (byte) 0b11011000);
+                    ushortTable.SetOutArray(resultUInt16_6, 6, (byte) 0b11011000);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_7 = ushortTable[7];
+                    Vector128<ushort> resultUInt16_7 = Sse2.ShuffleHigh(valueUInt16_7.Item1, (byte) 0b00100111);
+                    ushortTable.SetOutArray(resultUInt16_7, 7, (byte) 0b00100111);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_8 = ushortTable[8];
+                    Vector128<ushort> resultUInt16_8 = Sse2.ShuffleHigh(valueUInt16_8.Item1, (byte) 0b10110001);
+                    ushortTable.SetOutArray(resultUInt16_8, 8, (byte) 0b10110001);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_9 = ushortTable[9];
+                    Vector128<ushort> resultUInt16_9 = Sse2.ShuffleHigh(valueUInt16_9.Item1, (byte) 0b11110000);
+                    ushortTable.SetOutArray(resultUInt16_9, 9, (byte) 0b11110000);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_10 = ushortTable[10];
+                    Vector128<ushort> resultUInt16_10 = Sse2.ShuffleHigh(valueUInt16_10.Item1, (byte) 0b10100101);
+                    ushortTable.SetOutArray(resultUInt16_10, 10, (byte) 0b10100101);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_11 = ushortTable[11];
+                    Vector128<ushort> resultUInt16_11 = Sse2.ShuffleHigh(valueUInt16_11.Item1, (byte) 0b00010100);
+                    ushortTable.SetOutArray(resultUInt16_11, 11, (byte) 0b00010100);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_12 = ushortTable[12];
+                    Vector128<ushort> resultUInt16_12 = Sse2.ShuffleHigh(valueUInt16_12.Item1, (byte) 0b10000010);
+                    ushortTable.SetOutArray(resultUInt16_12, 12, (byte) 0b10000010);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_13 = ushortTable[13];
+                    Vector128<ushort> resultUInt16_13 = Sse2.ShuffleHigh(valueUInt16_13.Item1, (byte) 0b11001100);
+                    ushortTable.SetOutArray(resultUInt16_13, 13, (byte) 0b11001100);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_14 = ushortTable[14];
+                    Vector128<ushort> resultUInt16_14 = Sse2.ShuffleHigh(valueUInt16_14.Item1, (byte) 0b01100110);
+                    ushortTable.SetOutArray(resultUInt16_14, 14, (byte) 0b01100110);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_15 = ushortTable[15];
+                    Vector128<ushort> resultUInt16_15 = Sse2.ShuffleHigh(valueUInt16_15.Item1, (byte) 0b10011001);
+                    ushortTable.SetOutArray(resultUInt16_15, 15, (byte) 0b10011001);
+
+
+                    CheckMethodFive<short, short, byte> checkInt16 = (Span<short> x, byte imm, Span<short> z, Span<short> a) =>
+                    {
+                        bool result = true;
+                        int halfLength = x.Length/2;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            if  (i < halfLength)
+                            {
+                                a[i] = x[i];
+                            }
+                            else
+                            {
+                                a[i] = x[(imm & 0x03) + 4];
+                                imm = (byte) (imm >> 2);
+                            }
+
+                            if (z[i] != a[i])
+                                result = false;
+                        }
+                        return result;
+                    };
+
+                    if (!shortTable.CheckResultShuffle(checkInt16))
+                    {
+                        PrintError8(shortTable, methodUnderTestName, "CheckResultShuffleHigh", checkInt16);
+                        testResult = Fail;
+                    }
+
+                   CheckMethodFive<ushort, ushort, byte> checkUInt16 = (Span<ushort> x, byte imm, Span<ushort> z, Span<ushort> a) =>
+                    {
+                        bool result = true;
+                        int halfLength = x.Length/2;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            if  (i < halfLength)
+                            {
+                                a[i] = x[i];
+                            }
+                            else
+                            {
+                                a[i] = x[(imm & 0x03) + 4];
+                                imm = (byte) (imm >> 2);
+                            }
+
+                            if (z[i] != a[i])
+                                result = false;
+                        }
+                        return result;
+                    };
+
+                    if (!ushortTable.CheckResultShuffle(checkUInt16))
+                    {
+                        PrintError8(ushortTable, methodUnderTestName, "CheckResultShuffleHigh", checkUInt16);
+                        testResult = Fail;
+                    }
+                }
+            }
+            else
+            {
+                Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}");
+            }
+            return testResult;
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.tt b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh.tt
new file mode 100644 (file)
index 0000000..b94c24c
--- /dev/null
@@ -0,0 +1,190 @@
+<#@ template debug="false" hostspecific="false" language="C#" #>
+<#@ assembly name="System.Core" #>
+<#@ import namespace="System.Linq" #>
+<#@ import namespace="System.Text" #>
+<#@ import namespace="System.Collections.Generic" #>
+<#@ output extension=".cs"  encoding="utf-8" #>
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace IntelHardwareIntrinsicTest
+{
+    internal static partial class Program
+    {
+        const short Pass = 100;
+        const short Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            short testResult = Pass;
+            short testsCount = 16;
+            string methodUnderTestName = nameof(Sse2.ShuffleHigh);
+
+<#
+            string[] permuteData = new string[]
+            {
+                "0b11100100",         // identity
+                "0b00011011",         // invert
+                "0b00000000",         // broadcast element 0
+                "0b11111111",         // broadcast element 3
+                "0b01010101",         // broadcast element 1
+                "0b10101010",         // broadcast element 2
+                "0b11011000",         // swap middle elements
+                "0b00100111",         // swap external elements
+                "0b10110001",         // swap internal with external elements
+                "0b11110000",         // divide everything between external elements
+                "0b10100101",         // divide everything between internal elements
+                "0b00010100",         // pattern (0, 1, 1, 0)
+                "0b10000010",         // pattern (2, 0, 0, 2)
+                "0b11001100",         // pattern (3, 0, 3, 0)
+                "0b01100110",         // pattern (1, 2, 1, 2)
+                "0b10011001"          // pattern (2, 1, 2, 1)
+            };
+
+#>
+
+
+            if (Sse2.IsSupported)
+            {
+                string[] permuteData = new string[]
+                {
+                    "0b11100100",         // identity
+                    "0b00011011",         // invert
+                    "0b00000000",         // broadcast element 0
+                    "0b11111111",         // broadcast element 3
+                    "0b01010101",         // broadcast element 1
+                    "0b10101010",         // broadcast element 2
+                    "0b11011000",         // swap middle elements
+                    "0b00100111",         // swap external elements
+                    "0b10110001",         // swap internal with external elements
+                    "0b11110000",         // divide everything between external elements
+                    "0b10100101",         // divide everything between internal elements
+                    "0b00010100",         // pattern (0, 1, 1, 0)
+                    "0b10000010",         // pattern (2, 0, 0, 2)
+                    "0b11001100",         // pattern (3, 0, 3, 0)
+                    "0b01100110",         // pattern (1, 2, 1, 2)
+                    "0b10011001"          // pattern (2, 1, 2, 1)
+                };
+
+                using (var shortTable = TestTableTuvImmSse2<short, short, byte>.Create(testsCount))
+                using (var ushortTable = TestTableTuvImmSse2<ushort, ushort, byte>.Create(testsCount))
+                {
+
+                    // Vector128<short> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<short>(
+                        shortTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (short)(i % 8);
+                        });
+
+                    TestUtilities.InitializeWithConstValue<short>(0, shortTable.inArray2);
+
+<#
+       for (int i = 0; i < permuteData.Length; i++)
+       {
+#>
+                    (Vector128<short>, Vector128<short>) valueInt16_<#= i #> = shortTable[<#= i #>];
+                    Vector128<short> resultInt16_<#= i #> = Sse2.ShuffleHigh(valueInt16_<#= i #>.Item1, (byte) <#= permuteData[i] #>);
+                    shortTable.SetOutArray(resultInt16_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>);
+
+<#
+
+       }
+#>
+
+                    // Vector128<ushort> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<ushort>(
+                        ushortTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (ushort)(i % 8);
+                        });
+
+                    TestUtilities.InitializeWithConstValue<ushort>(0, ushortTable.inArray2);
+
+
+<#
+       for (int i = 0; i < permuteData.Length; i++)
+       {
+#>
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_<#= i #> = ushortTable[<#= i #>];
+                    Vector128<ushort> resultUInt16_<#= i #> = Sse2.ShuffleHigh(valueUInt16_<#= i #>.Item1, (byte) <#= permuteData[i] #>);
+                    ushortTable.SetOutArray(resultUInt16_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>);
+
+<#
+
+       }
+#>
+
+                    CheckMethodFive<short, short, byte> checkInt16 = (Span<short> x, byte imm, Span<short> z, Span<short> a) =>
+                    {
+                        bool result = true;
+                        int halfLength = x.Length/2;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            if  (i < halfLength)
+                            {
+                                a[i] = x[i];
+                            }
+                            else
+                            {
+                                a[i] = x[(imm & 0x03) + 4];
+                                imm = (byte) (imm >> 2);
+                            }
+
+                            if (z[i] != a[i])
+                                result = false;
+                        }
+                        return result;
+                    };
+
+                    if (!shortTable.CheckResultShuffle(checkInt16))
+                    {
+                        PrintError8(shortTable, methodUnderTestName, "CheckResultShuffleHigh", checkInt16);
+                        testResult = Fail;
+                    }
+
+                   CheckMethodFive<ushort, ushort, byte> checkUInt16 = (Span<ushort> x, byte imm, Span<ushort> z, Span<ushort> a) =>
+                    {
+                        bool result = true;
+                        int halfLength = x.Length/2;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            if  (i < halfLength)
+                            {
+                                a[i] = x[i];
+                            }
+                            else
+                            {
+                                a[i] = x[(imm & 0x03) + 4];
+                                imm = (byte) (imm >> 2);
+                            }
+
+                            if (z[i] != a[i])
+                                result = false;
+                        }
+                        return result;
+                    };
+
+                    if (!ushortTable.CheckResultShuffle(checkUInt16))
+                    {
+                        PrintError8(ushortTable, methodUnderTestName, "CheckResultShuffleHigh", checkUInt16);
+                        testResult = Fail;
+                    }
+                }
+            }
+            else
+            {
+                Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}");
+            }
+            return testResult;
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_r.csproj
new file mode 100644 (file)
index 0000000..208548c
--- /dev/null
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{5B060A38-515A-4C2C-9DFB-2EC23B20FCA1}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>
+    </Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{508349B6-6B84-4DF5-91F0-309BEEBAD82D}" />
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="ShuffleHigh.cs">
+      <AutoGen>True</AutoGen>
+      <DesignTime>True</DesignTime>
+      <DependentUpon>ShuffleHigh.tt</DependentUpon>
+    </Compile>
+    <Compile Include="TestTableSse2.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="ShuffleHigh.tt">
+      <Generator>TextTemplatingFileGenerator</Generator>
+      <LastGenOutput>ShuffleHigh.cs</LastGenOutput>
+    </Content>
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' ">
+  </PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleHigh_ro.csproj
new file mode 100644 (file)
index 0000000..a9a3a47
--- /dev/null
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{47111150-B83B-44A1-AB94-807BEF42E5E0}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{508349B6-6B84-4DF5-91F0-309BEEBAD82D}" />
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="ShuffleHigh.cs">
+      <AutoGen>True</AutoGen>
+      <DesignTime>True</DesignTime>
+      <DependentUpon>ShuffleHigh.tt</DependentUpon>
+    </Compile>
+    <Compile Include="TestTableSse2.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="ShuffleHigh.tt">
+      <Generator>TextTemplatingFileGenerator</Generator>
+      <LastGenOutput>ShuffleHigh.cs</LastGenOutput>
+    </Content>
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' ">
+  </PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.cs
new file mode 100644 (file)
index 0000000..cfbf02b
--- /dev/null
@@ -0,0 +1,266 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace IntelHardwareIntrinsicTest
+{
+    internal static partial class Program
+    {
+        const short Pass = 100;
+        const short Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            short testResult = Pass;
+            short testsCount = 16;
+            string methodUnderTestName = nameof(Sse2.ShuffleLow);
+
+
+
+            if (Sse2.IsSupported)
+            {
+                string[] permuteData = new string[]
+                {
+                    "0b11100100",         // identity
+                    "0b00011011",         // invert
+                    "0b00000000",         // broadcast element 0
+                    "0b11111111",         // broadcast element 3
+                    "0b01010101",         // broadcast element 1
+                    "0b10101010",         // broadcast element 2
+                    "0b11011000",         // swap middle elements
+                    "0b00100111",         // swap external elements
+                    "0b10110001",         // swap internal with external elements
+                    "0b11110000",         // divide everything between external elements
+                    "0b10100101",         // divide everything between internal elements
+                    "0b00010100",         // pattern (0, 1, 1, 0)
+                    "0b10000010",         // pattern (2, 0, 0, 2)
+                    "0b11001100",         // pattern (3, 0, 3, 0)
+                    "0b01100110",         // pattern (1, 2, 1, 2)
+                    "0b10011001"          // pattern (2, 1, 2, 1)
+                };
+
+                using (var shortTable = TestTableTuvImmSse2<short, short, byte>.Create(testsCount))
+                using (var ushortTable = TestTableTuvImmSse2<ushort, ushort, byte>.Create(testsCount))
+                {
+
+                    // Vector128<short> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<short>(
+                        shortTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (short)(i % 8);
+                        });
+
+                    TestUtilities.InitializeWithConstValue<short>(0, shortTable.inArray2);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_0 = shortTable[0];
+                    Vector128<short> resultInt16_0 = Sse2.ShuffleLow(valueInt16_0.Item1, (byte) 0b11100100);
+                    shortTable.SetOutArray(resultInt16_0, 0, (byte) 0b11100100);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_1 = shortTable[1];
+                    Vector128<short> resultInt16_1 = Sse2.ShuffleLow(valueInt16_1.Item1, (byte) 0b00011011);
+                    shortTable.SetOutArray(resultInt16_1, 1, (byte) 0b00011011);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_2 = shortTable[2];
+                    Vector128<short> resultInt16_2 = Sse2.ShuffleLow(valueInt16_2.Item1, (byte) 0b00000000);
+                    shortTable.SetOutArray(resultInt16_2, 2, (byte) 0b00000000);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_3 = shortTable[3];
+                    Vector128<short> resultInt16_3 = Sse2.ShuffleLow(valueInt16_3.Item1, (byte) 0b11111111);
+                    shortTable.SetOutArray(resultInt16_3, 3, (byte) 0b11111111);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_4 = shortTable[4];
+                    Vector128<short> resultInt16_4 = Sse2.ShuffleLow(valueInt16_4.Item1, (byte) 0b01010101);
+                    shortTable.SetOutArray(resultInt16_4, 4, (byte) 0b01010101);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_5 = shortTable[5];
+                    Vector128<short> resultInt16_5 = Sse2.ShuffleLow(valueInt16_5.Item1, (byte) 0b10101010);
+                    shortTable.SetOutArray(resultInt16_5, 5, (byte) 0b10101010);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_6 = shortTable[6];
+                    Vector128<short> resultInt16_6 = Sse2.ShuffleLow(valueInt16_6.Item1, (byte) 0b11011000);
+                    shortTable.SetOutArray(resultInt16_6, 6, (byte) 0b11011000);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_7 = shortTable[7];
+                    Vector128<short> resultInt16_7 = Sse2.ShuffleLow(valueInt16_7.Item1, (byte) 0b00100111);
+                    shortTable.SetOutArray(resultInt16_7, 7, (byte) 0b00100111);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_8 = shortTable[8];
+                    Vector128<short> resultInt16_8 = Sse2.ShuffleLow(valueInt16_8.Item1, (byte) 0b10110001);
+                    shortTable.SetOutArray(resultInt16_8, 8, (byte) 0b10110001);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_9 = shortTable[9];
+                    Vector128<short> resultInt16_9 = Sse2.ShuffleLow(valueInt16_9.Item1, (byte) 0b11110000);
+                    shortTable.SetOutArray(resultInt16_9, 9, (byte) 0b11110000);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_10 = shortTable[10];
+                    Vector128<short> resultInt16_10 = Sse2.ShuffleLow(valueInt16_10.Item1, (byte) 0b10100101);
+                    shortTable.SetOutArray(resultInt16_10, 10, (byte) 0b10100101);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_11 = shortTable[11];
+                    Vector128<short> resultInt16_11 = Sse2.ShuffleLow(valueInt16_11.Item1, (byte) 0b00010100);
+                    shortTable.SetOutArray(resultInt16_11, 11, (byte) 0b00010100);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_12 = shortTable[12];
+                    Vector128<short> resultInt16_12 = Sse2.ShuffleLow(valueInt16_12.Item1, (byte) 0b10000010);
+                    shortTable.SetOutArray(resultInt16_12, 12, (byte) 0b10000010);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_13 = shortTable[13];
+                    Vector128<short> resultInt16_13 = Sse2.ShuffleLow(valueInt16_13.Item1, (byte) 0b11001100);
+                    shortTable.SetOutArray(resultInt16_13, 13, (byte) 0b11001100);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_14 = shortTable[14];
+                    Vector128<short> resultInt16_14 = Sse2.ShuffleLow(valueInt16_14.Item1, (byte) 0b01100110);
+                    shortTable.SetOutArray(resultInt16_14, 14, (byte) 0b01100110);
+
+                    (Vector128<short>, Vector128<short>) valueInt16_15 = shortTable[15];
+                    Vector128<short> resultInt16_15 = Sse2.ShuffleLow(valueInt16_15.Item1, (byte) 0b10011001);
+                    shortTable.SetOutArray(resultInt16_15, 15, (byte) 0b10011001);
+
+
+                    // Vector128<ushort> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<ushort>(
+                        ushortTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (ushort)(i % 8);
+                        });
+
+                    TestUtilities.InitializeWithConstValue<ushort>(0, ushortTable.inArray2);
+
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_0 = ushortTable[0];
+                    Vector128<ushort> resultUInt16_0 = Sse2.ShuffleLow(valueUInt16_0.Item1, (byte) 0b11100100);
+                    ushortTable.SetOutArray(resultUInt16_0, 0, (byte) 0b11100100);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_1 = ushortTable[1];
+                    Vector128<ushort> resultUInt16_1 = Sse2.ShuffleLow(valueUInt16_1.Item1, (byte) 0b00011011);
+                    ushortTable.SetOutArray(resultUInt16_1, 1, (byte) 0b00011011);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_2 = ushortTable[2];
+                    Vector128<ushort> resultUInt16_2 = Sse2.ShuffleLow(valueUInt16_2.Item1, (byte) 0b00000000);
+                    ushortTable.SetOutArray(resultUInt16_2, 2, (byte) 0b00000000);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_3 = ushortTable[3];
+                    Vector128<ushort> resultUInt16_3 = Sse2.ShuffleLow(valueUInt16_3.Item1, (byte) 0b11111111);
+                    ushortTable.SetOutArray(resultUInt16_3, 3, (byte) 0b11111111);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_4 = ushortTable[4];
+                    Vector128<ushort> resultUInt16_4 = Sse2.ShuffleLow(valueUInt16_4.Item1, (byte) 0b01010101);
+                    ushortTable.SetOutArray(resultUInt16_4, 4, (byte) 0b01010101);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_5 = ushortTable[5];
+                    Vector128<ushort> resultUInt16_5 = Sse2.ShuffleLow(valueUInt16_5.Item1, (byte) 0b10101010);
+                    ushortTable.SetOutArray(resultUInt16_5, 5, (byte) 0b10101010);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_6 = ushortTable[6];
+                    Vector128<ushort> resultUInt16_6 = Sse2.ShuffleLow(valueUInt16_6.Item1, (byte) 0b11011000);
+                    ushortTable.SetOutArray(resultUInt16_6, 6, (byte) 0b11011000);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_7 = ushortTable[7];
+                    Vector128<ushort> resultUInt16_7 = Sse2.ShuffleLow(valueUInt16_7.Item1, (byte) 0b00100111);
+                    ushortTable.SetOutArray(resultUInt16_7, 7, (byte) 0b00100111);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_8 = ushortTable[8];
+                    Vector128<ushort> resultUInt16_8 = Sse2.ShuffleLow(valueUInt16_8.Item1, (byte) 0b10110001);
+                    ushortTable.SetOutArray(resultUInt16_8, 8, (byte) 0b10110001);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_9 = ushortTable[9];
+                    Vector128<ushort> resultUInt16_9 = Sse2.ShuffleLow(valueUInt16_9.Item1, (byte) 0b11110000);
+                    ushortTable.SetOutArray(resultUInt16_9, 9, (byte) 0b11110000);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_10 = ushortTable[10];
+                    Vector128<ushort> resultUInt16_10 = Sse2.ShuffleLow(valueUInt16_10.Item1, (byte) 0b10100101);
+                    ushortTable.SetOutArray(resultUInt16_10, 10, (byte) 0b10100101);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_11 = ushortTable[11];
+                    Vector128<ushort> resultUInt16_11 = Sse2.ShuffleLow(valueUInt16_11.Item1, (byte) 0b00010100);
+                    ushortTable.SetOutArray(resultUInt16_11, 11, (byte) 0b00010100);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_12 = ushortTable[12];
+                    Vector128<ushort> resultUInt16_12 = Sse2.ShuffleLow(valueUInt16_12.Item1, (byte) 0b10000010);
+                    ushortTable.SetOutArray(resultUInt16_12, 12, (byte) 0b10000010);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_13 = ushortTable[13];
+                    Vector128<ushort> resultUInt16_13 = Sse2.ShuffleLow(valueUInt16_13.Item1, (byte) 0b11001100);
+                    ushortTable.SetOutArray(resultUInt16_13, 13, (byte) 0b11001100);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_14 = ushortTable[14];
+                    Vector128<ushort> resultUInt16_14 = Sse2.ShuffleLow(valueUInt16_14.Item1, (byte) 0b01100110);
+                    ushortTable.SetOutArray(resultUInt16_14, 14, (byte) 0b01100110);
+
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_15 = ushortTable[15];
+                    Vector128<ushort> resultUInt16_15 = Sse2.ShuffleLow(valueUInt16_15.Item1, (byte) 0b10011001);
+                    ushortTable.SetOutArray(resultUInt16_15, 15, (byte) 0b10011001);
+
+
+                    CheckMethodFive<short, short, byte> checkInt16 = (Span<short> x, byte imm, Span<short> z, Span<short> a) =>
+                    {
+                        bool result = true;
+                        int halfLength = x.Length/2;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            if  (i >= halfLength)
+                            {
+                                a[i] = x[i];
+                            }
+                            else
+                            {
+                                a[i] = x[(imm & 0x03)];
+                                imm = (byte) (imm >> 2);
+                            }
+
+                            if (z[i] != a[i])
+                                result = false;
+                        }
+                        return result;
+                    };
+
+                    if (!shortTable.CheckResultShuffle(checkInt16))
+                    {
+                        PrintError8(shortTable, methodUnderTestName, "CheckResultShuffleHigh", checkInt16);
+                        testResult = Fail;
+                    }
+
+                   CheckMethodFive<ushort, ushort, byte> checkUInt16 = (Span<ushort> x, byte imm, Span<ushort> z, Span<ushort> a) =>
+                    {
+                        bool result = true;
+                        int halfLength = x.Length/2;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            if  (i >= halfLength)
+                            {
+                                a[i] = x[i];
+                            }
+                            else
+                            {
+                                a[i] = x[(imm & 0x03)];
+                                imm = (byte) (imm >> 2);
+                            }
+
+                            if (z[i] != a[i])
+                                result = false;
+                        }
+                        return result;
+                    };
+
+                    if (!ushortTable.CheckResultShuffle(checkUInt16))
+                    {
+                        PrintError8(ushortTable, methodUnderTestName, "CheckResultShuffleHigh", checkUInt16);
+                        testResult = Fail;
+                    }
+                }
+            }
+            else
+            {
+                Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}");
+            }
+            return testResult;
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.tt b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow.tt
new file mode 100644 (file)
index 0000000..04327da
--- /dev/null
@@ -0,0 +1,190 @@
+<#@ template debug="false" hostspecific="false" language="C#" #>
+<#@ assembly name="System.Core" #>
+<#@ import namespace="System.Linq" #>
+<#@ import namespace="System.Text" #>
+<#@ import namespace="System.Collections.Generic" #>
+<#@ output extension=".cs"  encoding="utf-8" #>
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace IntelHardwareIntrinsicTest
+{
+    internal static partial class Program
+    {
+        const short Pass = 100;
+        const short Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            short testResult = Pass;
+            short testsCount = 16;
+            string methodUnderTestName = nameof(Sse2.ShuffleLow);
+
+<#
+            string[] permuteData = new string[]
+            {
+                "0b11100100",         // identity
+                "0b00011011",         // invert
+                "0b00000000",         // broadcast element 0
+                "0b11111111",         // broadcast element 3
+                "0b01010101",         // broadcast element 1
+                "0b10101010",         // broadcast element 2
+                "0b11011000",         // swap middle elements
+                "0b00100111",         // swap external elements
+                "0b10110001",         // swap internal with external elements
+                "0b11110000",         // divide everything between external elements
+                "0b10100101",         // divide everything between internal elements
+                "0b00010100",         // pattern (0, 1, 1, 0)
+                "0b10000010",         // pattern (2, 0, 0, 2)
+                "0b11001100",         // pattern (3, 0, 3, 0)
+                "0b01100110",         // pattern (1, 2, 1, 2)
+                "0b10011001"          // pattern (2, 1, 2, 1)
+            };
+
+#>
+
+
+            if (Sse2.IsSupported)
+            {
+                string[] permuteData = new string[]
+                {
+                    "0b11100100",         // identity
+                    "0b00011011",         // invert
+                    "0b00000000",         // broadcast element 0
+                    "0b11111111",         // broadcast element 3
+                    "0b01010101",         // broadcast element 1
+                    "0b10101010",         // broadcast element 2
+                    "0b11011000",         // swap middle elements
+                    "0b00100111",         // swap external elements
+                    "0b10110001",         // swap internal with external elements
+                    "0b11110000",         // divide everything between external elements
+                    "0b10100101",         // divide everything between internal elements
+                    "0b00010100",         // pattern (0, 1, 1, 0)
+                    "0b10000010",         // pattern (2, 0, 0, 2)
+                    "0b11001100",         // pattern (3, 0, 3, 0)
+                    "0b01100110",         // pattern (1, 2, 1, 2)
+                    "0b10011001"          // pattern (2, 1, 2, 1)
+                };
+
+                using (var shortTable = TestTableTuvImmSse2<short, short, byte>.Create(testsCount))
+                using (var ushortTable = TestTableTuvImmSse2<ushort, ushort, byte>.Create(testsCount))
+                {
+
+                    // Vector128<short> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<short>(
+                        shortTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (short)(i % 8);
+                        });
+
+                    TestUtilities.InitializeWithConstValue<short>(0, shortTable.inArray2);
+
+<#
+       for (int i = 0; i < permuteData.Length; i++)
+       {
+#>
+                    (Vector128<short>, Vector128<short>) valueInt16_<#= i #> = shortTable[<#= i #>];
+                    Vector128<short> resultInt16_<#= i #> = Sse2.ShuffleLow(valueInt16_<#= i #>.Item1, (byte) <#= permuteData[i] #>);
+                    shortTable.SetOutArray(resultInt16_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>);
+
+<#
+
+       }
+#>
+
+                    // Vector128<ushort> tests
+
+                    TestUtilities.InitializeWithElementNumberingModuloVectorLength<ushort>(
+                        ushortTable.inArray1, 16, (int i, int elNo) =>
+                        {
+                            return (ushort)(i % 8);
+                        });
+
+                    TestUtilities.InitializeWithConstValue<ushort>(0, ushortTable.inArray2);
+
+
+<#
+       for (int i = 0; i < permuteData.Length; i++)
+       {
+#>
+                    (Vector128<ushort>, Vector128<ushort>) valueUInt16_<#= i #> = ushortTable[<#= i #>];
+                    Vector128<ushort> resultUInt16_<#= i #> = Sse2.ShuffleLow(valueUInt16_<#= i #>.Item1, (byte) <#= permuteData[i] #>);
+                    ushortTable.SetOutArray(resultUInt16_<#= i #>, <#= i #>, (byte) <#= permuteData[i] #>);
+
+<#
+
+       }
+#>
+
+                    CheckMethodFive<short, short, byte> checkInt16 = (Span<short> x, byte imm, Span<short> z, Span<short> a) =>
+                    {
+                        bool result = true;
+                        int halfLength = x.Length/2;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            if  (i >= halfLength)
+                            {
+                                a[i] = x[i];
+                            }
+                            else
+                            {
+                                a[i] = x[(imm & 0x03)];
+                                imm = (byte) (imm >> 2);
+                            }
+
+                            if (z[i] != a[i])
+                                result = false;
+                        }
+                        return result;
+                    };
+
+                    if (!shortTable.CheckResultShuffle(checkInt16))
+                    {
+                        PrintError8(shortTable, methodUnderTestName, "CheckResultShuffleHigh", checkInt16);
+                        testResult = Fail;
+                    }
+
+                   CheckMethodFive<ushort, ushort, byte> checkUInt16 = (Span<ushort> x, byte imm, Span<ushort> z, Span<ushort> a) =>
+                    {
+                        bool result = true;
+                        int halfLength = x.Length/2;
+                        for (int i = 0; i < x.Length; i++)
+                        {
+                            if  (i >= halfLength)
+                            {
+                                a[i] = x[i];
+                            }
+                            else
+                            {
+                                a[i] = x[(imm & 0x03)];
+                                imm = (byte) (imm >> 2);
+                            }
+
+                            if (z[i] != a[i])
+                                result = false;
+                        }
+                        return result;
+                    };
+
+                    if (!ushortTable.CheckResultShuffle(checkUInt16))
+                    {
+                        PrintError8(ushortTable, methodUnderTestName, "CheckResultShuffleHigh", checkUInt16);
+                        testResult = Fail;
+                    }
+                }
+            }
+            else
+            {
+                Console.WriteLine($"Sse2.IsSupported: {Sse2.IsSupported}, skipped tests of {typeof(Sse2)}.{methodUnderTestName}");
+            }
+            return testResult;
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_r.csproj
new file mode 100644 (file)
index 0000000..8369d6a
--- /dev/null
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{3DCFB777-8A32-443E-ABD9-4636600D2B4F}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>
+    </Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{508349B6-6B84-4DF5-91F0-309BEEBAD82D}" />
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="ShuffleLow.cs">
+      <AutoGen>True</AutoGen>
+      <DesignTime>True</DesignTime>
+      <DependentUpon>ShuffleLow.tt</DependentUpon>
+    </Compile>
+    <Compile Include="TestTableSse2.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="ShuffleLow.tt">
+      <Generator>TextTemplatingFileGenerator</Generator>
+      <LastGenOutput>ShuffleLow.cs</LastGenOutput>
+    </Content>
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' ">
+  </PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/ShuffleLow_ro.csproj
new file mode 100644 (file)
index 0000000..721cd05
--- /dev/null
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{A9DAC473-F5CB-4DA9-ADE4-2F9EB53FC4A8}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{508349B6-6B84-4DF5-91F0-309BEEBAD82D}" />
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="ShuffleLow.cs">
+      <AutoGen>True</AutoGen>
+      <DesignTime>True</DesignTime>
+      <DependentUpon>ShuffleLow.tt</DependentUpon>
+    </Compile>
+    <Compile Include="TestTableSse2.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="ShuffleLow.tt">
+      <Generator>TextTemplatingFileGenerator</Generator>
+      <LastGenOutput>ShuffleLow.cs</LastGenOutput>
+    </Content>
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' ">
+  </PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_r.csproj
new file mode 100644 (file)
index 0000000..6cfebb7
--- /dev/null
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{D25DF7E1-96B0-454A-A5BE-70C26BE49559}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>
+    </Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{508349B6-6B84-4DF5-91F0-309BEEBAD82D}" />
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Shuffle.cs">
+      <AutoGen>True</AutoGen>
+      <DesignTime>True</DesignTime>
+      <DependentUpon>Shuffle.tt</DependentUpon>
+    </Compile>
+    <Compile Include="TestTableSse2.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="Shuffle.tt">
+      <Generator>TextTemplatingFileGenerator</Generator>
+      <LastGenOutput>Shuffle.cs</LastGenOutput>
+    </Content>
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' ">
+  </PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/Shuffle_ro.csproj
new file mode 100644 (file)
index 0000000..46f64bc
--- /dev/null
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{695760F3-DA13-4227-9ED6-AD8C5E5D88C6}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{508349B6-6B84-4DF5-91F0-309BEEBAD82D}" />
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Shuffle.cs">
+      <AutoGen>True</AutoGen>
+      <DesignTime>True</DesignTime>
+      <DependentUpon>Shuffle.tt</DependentUpon>
+    </Compile>
+    <Compile Include="TestTableSse2.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="Shuffle.tt">
+      <Generator>TextTemplatingFileGenerator</Generator>
+      <LastGenOutput>Shuffle.cs</LastGenOutput>
+    </Content>
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' ">
+  </PropertyGroup>
+</Project>
\ No newline at end of file
index e3fc3e8..00b7e00 100644 (file)
@@ -14,16 +14,24 @@ namespace IntelHardwareIntrinsicTest
 {
     public delegate bool CheckMethod<T>(T x, T y, T z, ref T c);
 
+    public delegate bool CheckMethodSpan<T>(Span<T> x, Span<T> y, Span<T> z, Span<T> a);
+
     public delegate bool CheckMethodTwo<T, U>(T x, T y, U z, ref U c);
 
     public delegate bool CheckMethodTwo<T, U, V>(T x, V y, U z, ref U c);
 
+    public delegate bool CheckMethodTwoSpan<T, U, V>(Span<T> x, V y, U z, ref U c);
+
     public delegate bool CheckMethodThree<T, U>(T x1, T x2, T y1, T y2, U z, ref U c);
 
     public delegate bool CheckMethodFour<T, U>(T x1, T x2, U z1, U z2, ref U c1, ref U c2);
 
     public delegate bool CheckMethodFive<T, U>(T x1, T x2, T y1, T y2, U z1, U z2, ref U c1, ref U c2);
 
+    public delegate bool CheckMethodFive<T, U, V>(Span<T> x, V imm, Span<U> z, Span<U> a);
+
+    public delegate bool CheckMethodFiveDouble<T, U, V>(Span<T> x, Span<T> y, V imm, Span<U> z, Span<U> a);
+
     public delegate bool CheckMethodFourTFourU<T, U>(
         ValueTuple<T, T, T, T> x,
         ValueTuple<T, T, T, T> y,
@@ -51,6 +59,9 @@ namespace IntelHardwareIntrinsicTest
     public delegate bool CheckMethodEightImm<T, U, V>(
         Span<T> x, T value, V i, Span<U> z, Span<U> a);
 
+    public delegate bool CheckMethodEightInsert<T, U, V>(
+        Span<T> x, T value, V i, Span<U> z, Span<U> a);
+
     public delegate bool CheckMethodSixteen<T, U>(
         ValueTuple<T, T, T, T, T, T, T, ValueTuple<T>> x,
         ValueTuple<T, T, T, T, T, T, T, ValueTuple<T>> y,
@@ -73,7 +84,9 @@ namespace IntelHardwareIntrinsicTest
     public enum InitMode
     {
         Undefined = 0,
-        NumberFirstVectors = 0b00000001
+        NumberFirstVectors = 0b00000001,
+        NumberAllVectors = 0b00000010,
+        UseConstValue = 0b00000100,
     }
 
     public unsafe struct TestTableSse2<T> : IDisposable where T : struct
@@ -129,6 +142,18 @@ namespace IntelHardwareIntrinsicTest
             return (inArray1[index], inArray2[index], outArray[index], checkArray[index]);
         }
 
+        public Memory<T> GetAssignmentData(int index)
+        {
+            _index = index;
+            return new Memory<T>(inArray2, index * _stepSize, _stepSize);
+        }
+
+        public ValueTuple<Memory<T>, Memory<T>, Memory<T>, Memory<T>> GetAssignmentDataPoint(int index)
+        {
+            return (new Memory<T>(inArray1, index, _stepSize), new Memory<T>(inArray2, index, _stepSize),
+                    new Memory<T>(outArray, index, _stepSize), new Memory<T>(outArray, index, _stepSize));
+        }
+
         public static TestTableSse2<T> Create(int lengthInVectors)
         {
             int length = _stepSize / Marshal.SizeOf<T>() * lengthInVectors;
@@ -195,6 +220,24 @@ namespace IntelHardwareIntrinsicTest
             return result;
         }
 
+        public bool CheckResult(CheckMethodSpan<T> check)
+        {
+            bool result = true;
+            for (int i = 0; i < inArray1.Length; i += _stepSize)
+            {
+                var x = new Span<T>(inArray1, i, _stepSize);
+                var y = new Span<T>(inArray2, i, _stepSize);
+                var z = new Span<T>(inArray2, i, _stepSize);
+                var a = new Span<T>(inArray2, i, _stepSize);
+
+                if (!check(x, y, z, a))
+                {
+                    result = false;
+                }
+            }
+            return result;
+        }
+
         public void Dispose()
         {
             _inHandle1.Free();
@@ -802,6 +845,12 @@ namespace IntelHardwareIntrinsicTest
             Unsafe.Write((byte*)OutArrayPtr + (_index * _stepSize), value);
         }
 
+        public void SetOutArray(U value, int index = -1)
+        {
+            index = index < 0 ? _index : index;
+            outArray[_index] = value;
+        }
+
         public Vector128<T> this[int index]
         {
             get
@@ -1050,7 +1099,8 @@ namespace IntelHardwareIntrinsicTest
     {
         private const int _stepSize = 16;
         private static int s_tSize;
-        private static int s_ElementCount;
+        public static int ElementCount;
+        private int _lengthInVectors;
 
         private GCHandle _inHandle1;
         private GCHandle _inHandle2;
@@ -1073,12 +1123,26 @@ namespace IntelHardwareIntrinsicTest
         public void* CheckArrayPtr => _checkHandle.AddrOfPinnedObject().ToPointer();
 
         public Vector128<T> Vector1 => Unsafe.Read<Vector128<T>>((byte*)InArray1Ptr + (_index * _stepSize));
-        public T Value => Unsafe.Read<T>((byte*)InArray2Ptr + (_index));
-        public V Immediate => Unsafe.Read<V>((byte*)ImmArrayPtr + (_index));
+        public T Value => inArray2[_index];
+        public V Immediate => immArray[_index];
         public Vector128<U> Vector3 => Unsafe.Read<Vector128<U>>((byte*)OutArrayPtr + (_index * _stepSize));
         public Vector128<U> Vector4 => Unsafe.Read<Vector128<U>>((byte*)CheckArrayPtr + (_index * _stepSize));
 
-        public int Index { get => _index; set => _index = value; }
+        public int Index
+        {
+            get => _index;
+            set
+            {
+                if (value < 0 || value >= _lengthInVectors)
+                {
+                    throw new IndexOutOfRangeException();
+                }
+                else
+                {
+                    _index = value;
+                }
+            }
+        }
 
         public void SetOutArray(Vector128<T> value, int index = -1)
         {
@@ -1086,12 +1150,26 @@ namespace IntelHardwareIntrinsicTest
             Unsafe.Write((byte*)OutArrayPtr + (_index * _stepSize), value);
         }
 
+        public void SetOutArray(U value, V imm, int index = -1)
+        {
+            index = index < 0 ? _index : index;
+            outArray[_index] = value;
+            immArray[_index] = imm;
+        }
+
+        public void SetOutArray(Vector128<T> value1, int index, V value2)
+        {
+            Index = index;
+            Unsafe.Write((byte*)OutArrayPtr + (index * _stepSize), value1);
+            immArray[index] = value2;
+        }
+
         public (Vector128<T>, T) this[int index]
         {
             get
             {
-                _index = index;
-                return (Vector1, Value);
+                Index = index;
+                return (Vector1, inArray2[index]);
             }
         }
 
@@ -1103,6 +1181,12 @@ namespace IntelHardwareIntrinsicTest
                     (checkArray[index], checkArray[index + 1], checkArray[index + 2], checkArray[index + 3], checkArray[index + 4], checkArray[index + 5], checkArray[index + 6], checkArray[index + 7]));
         }
 
+        public (Memory<T>, V, U, U) GetExtractDataPoint(int index)
+        {
+            int ii = index / ElementCount;
+            return (new Memory<T>(inArray1, index, ElementCount), immArray[ii], outArray[ii], checkArray[ii]);
+        }
+
         public static TestTableImmSse2<T, U, V> Create(int lengthInVectors, double tSizeMultiplier = 1.0)
         {
             return new TestTableImmSse2<T, U, V>(lengthInVectors, tSizeMultiplier);
@@ -1111,8 +1195,9 @@ namespace IntelHardwareIntrinsicTest
         public TestTableImmSse2(int lengthInVectors, double tSizeMultiplier = 1.0, bool initialize = true)
         {
             s_tSize = Marshal.SizeOf<T>();
-            s_ElementCount = _stepSize / s_tSize;
-            int length = s_ElementCount * lengthInVectors;
+            ElementCount = _stepSize / s_tSize;
+            _lengthInVectors = lengthInVectors;
+            int length = ElementCount * lengthInVectors;
             inArray1 = new T[length];
             inArray2 = new T[lengthInVectors];
             immArray = new V[lengthInVectors];
@@ -1243,12 +1328,46 @@ namespace IntelHardwareIntrinsicTest
             }
         }
 
+        public bool CheckResultExtract(CheckMethodTwoSpan<T, U, V> check)
+        {
+            int topIndex = inArray1.Length - ElementCount + 1;
+            bool result = true;
+            for (int i = 0; i < topIndex; i += ElementCount)
+            {
+                int ii = i / ElementCount;
+                var x = new Span<T>(inArray1, i, ElementCount);
+                if (!check(x, immArray[ii], outArray[ii], ref checkArray[ii]))
+                {
+                    result = false;
+                }
+            }
+            return result;
+        }
+
+        public bool CheckResultInsert(CheckMethodEightInsert<T, U, V> check)
+        {
+            bool result = true;
+            for (int i = 0; i < inArray1.Length - 1; i+= ElementCount)
+            {
+                var x = new Span<T>(inArray1, i, ElementCount);
+                var z = new Span<U>(outArray, i, ElementCount);
+                var a = new Span<U>(checkArray, i, ElementCount);
+                int ii = i / ElementCount;
+
+                if (!check(x, inArray2[ii], immArray[ii], z, a))
+                {
+                    result = false;
+                }
+            }
+            return result;
+        }
+
         public bool CheckResultImm(CheckMethodEightImm<T, U, V> check)
         {
             bool result = true;
+            int elNo = _stepSize / s_tSize;
             for (int i = 0; i < inArray1.Length; i++)
             {
-                int elNo = _stepSize / s_tSize;
                 if (!check(
                     new Span<T>(inArray1, Index * elNo, elNo),
                     inArray2[i], immArray[i],
@@ -1522,6 +1641,398 @@ namespace IntelHardwareIntrinsicTest
         }
     }
 
+    public unsafe struct TestTableTuvImmSse2<T, U, V> : IDisposable
+        where T : struct
+        where U : struct
+        where V : struct
+    {
+        private const int _vectorSize = 16;
+        private static int _tSize;
+        private static int _elementsNo;
+        private static int _lengthInVectors;
+
+        private GCHandle _inHandle1;
+        private GCHandle _inHandle2;
+        private GCHandle _immHandle;
+        private GCHandle _outHandle;
+        private GCHandle _checkHandle;
+
+        private int _index;
+
+        public T[] inArray1;
+        public T[] inArray2;
+        public V[] immArray;
+        public U[] outArray;
+        public U[] checkArray;
+
+        public void* InArray1Ptr => _inHandle1.AddrOfPinnedObject().ToPointer();
+        public void* InArray2Ptr => _inHandle2.AddrOfPinnedObject().ToPointer();
+        public void* ImmArrayPtr => _inHandle2.AddrOfPinnedObject().ToPointer();
+        public void* OutArrayPtr => _outHandle.AddrOfPinnedObject().ToPointer();
+        public void* CheckArrayPtr => _checkHandle.AddrOfPinnedObject().ToPointer();
+
+        public Vector128<T> Vector1 => Unsafe.Read<Vector128<T>>((byte*)InArray1Ptr + (_index * _vectorSize));
+        public Vector128<T> Vector2 => Unsafe.Read<Vector128<T>>((byte*)InArray2Ptr + (_index * _vectorSize));
+        public V Immediate => Unsafe.Read<V>((byte*)ImmArrayPtr + (_index));
+        public Vector128<U> Vector3 => Unsafe.Read<Vector128<U>>((byte*)OutArrayPtr + (_index * _vectorSize));
+        public Vector128<U> Vector4 => Unsafe.Read<Vector128<U>>((byte*)CheckArrayPtr + (_index * _vectorSize));
+
+        public int Index
+        {
+            get => _index;
+            set
+            {
+                if (value < 0 || value >= _lengthInVectors)
+                {
+                    throw new IndexOutOfRangeException();
+                }
+                else
+                {
+                    _index = value;
+                }
+            }
+        }
+
+        public void SetOutArray(Vector128<T> value, int index = -1)
+        {
+            index = index < 0 ? _index : index;
+            Unsafe.Write((byte*)OutArrayPtr + (_index * _vectorSize), value);
+        }
+
+        public void SetOutArray(Vector128<T> value1, int index, V value2)
+        {
+            Index = index;
+            Unsafe.Write((byte*)OutArrayPtr + (Index * _vectorSize), value1);
+            immArray[Index] = value2;
+        }
+
+        public (Vector128<T>, Vector128<T>) this[int index]
+        {
+            get
+            {
+                _index = index;
+                return (Vector1, Vector2);
+            }
+        }
+
+        public unsafe ValueTuple<T, V, U, U> GetQuad22DataPoint(int index)
+        {
+            return (inArray1[index], immArray[index / (_vectorSize / _tSize)], outArray[index], checkArray[index]);
+        }
+
+        public ((T, T), (T, T), V, (U, U), (U, U)) GetDoubleImmDataPoint(int index)
+        {
+            return ((inArray1[index], inArray1[index + 1]),
+                    (inArray2[index], inArray2[index + 1]),
+                    immArray[index / 2],
+                    (outArray[index], outArray[index + 1]),
+                    (checkArray[index], checkArray[index + 1]));
+        }
+
+        public ((T, T, T, T), V, (U, U, U, U), (U, U, U, U)) GetQuadImmDataPoint(int index)
+        {
+            return ((inArray1[index], inArray1[index + 1], inArray1[index + 2], inArray1[index + 3]),
+                    immArray[index / 4],
+                    (outArray[index], outArray[index + 1], outArray[index + 2], outArray[index + 3]),
+                    (checkArray[index], checkArray[index + 1], checkArray[index + 2], checkArray[index + 3]));
+
+        }
+
+        public ((T, T, T, T, T, T, T, T), T, V, (U, U, U, U, U, U, U, U), (U, U, U, U, U, U, U, U)) GetOctaImmDataPoint(int index)
+        {
+            return ((inArray1[index], inArray1[index + 1], inArray1[index + 2], inArray1[index + 3], inArray1[index + 4], inArray1[index + 5], inArray1[index + 6], inArray1[index + 7]),
+                    inArray2[index / 8], immArray[index / 8],
+                    (outArray[index], outArray[index + 1], outArray[index + 2], outArray[index + 3], outArray[index + 4], outArray[index + 5], outArray[index + 6], outArray[index + 7]),
+                    (checkArray[index], checkArray[index + 1], checkArray[index + 2], checkArray[index + 3], checkArray[index + 4], checkArray[index + 5], checkArray[index + 6], checkArray[index + 7]));
+        }
+
+        public static TestTableTuvImmSse2<T, U, V> Create(int lengthInVectors, double tSizeMultiplier = 1.0)
+        {
+            return new TestTableTuvImmSse2<T, U, V>(lengthInVectors, tSizeMultiplier);
+        }
+
+        public TestTableTuvImmSse2(int lengthInVectors, double tSizeMultiplier = 1.0, bool initialize = true)
+        {
+            _lengthInVectors = lengthInVectors;
+            _tSize = Marshal.SizeOf<T>();
+            _elementsNo = _vectorSize / _tSize;
+            int length = _elementsNo * lengthInVectors;
+            inArray1 = new T[length];
+            inArray2 = new T[length];
+            immArray = new V[lengthInVectors];
+            outArray = new U[(int)(length * (1 / tSizeMultiplier))];
+            checkArray = new U[(int)(length * (1 / tSizeMultiplier))];
+            _index = 0;
+            _inHandle1 = GCHandle.Alloc(inArray1, GCHandleType.Pinned);
+            _inHandle2 = GCHandle.Alloc(inArray2, GCHandleType.Pinned);
+            _immHandle = GCHandle.Alloc(inArray2, GCHandleType.Pinned);
+            _outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned);
+            _checkHandle = GCHandle.Alloc(checkArray, GCHandleType.Pinned);
+            if (initialize)
+            {
+                Initialize();
+            }
+        }
+
+        public void Initialize(InitMode mode = InitMode.Undefined)
+        {
+            TestUtilities.InitializeData(inArray1, inArray2, mode, InArray1Ptr, InArray2Ptr);
+        }
+
+        public bool CheckResultImm(CheckMethodTwo<T, U, V> check)
+        {
+            bool result = true;
+            for (int i = 0; i < inArray1.Length; i++)
+            {
+                int elNo = _vectorSize / _tSize;
+                if (!check(
+                    inArray1[i], immArray[i / elNo],
+                    outArray[i], ref checkArray[i]))
+                {
+                    result = false;
+                }
+            }
+            return result;
+        }
+
+        public bool CheckResultImm(CheckMethodEightImm<T, U, V> check)
+        {
+            bool result = true;
+            for (int i = 0; i < inArray1.Length; i++)
+            {
+                if (!check(
+                    new Span<T>(inArray1, Index * _elementsNo, _elementsNo),
+                    inArray2[i], immArray[i],
+                    new Span<U>(outArray, Index * _elementsNo, _elementsNo),
+                    new Span<U>(checkArray, Index * _elementsNo, _elementsNo)))
+                {
+                    result = false;
+                }
+            }
+            return result;
+        }
+
+        public bool CheckResultShuffle(CheckMethodFive<T, U, V> check)
+        {
+            bool result = true;
+            for (int i = 0; i < inArray1.Length; i += _elementsNo)
+            {
+                if (!check(
+                    new Span<T>(inArray1, i, _elementsNo),
+                    immArray[i / _elementsNo],
+                    new Span<U>(outArray, i, _elementsNo),
+                    new Span<U>(checkArray, i, _elementsNo)))
+                {
+                    result = false;
+                }
+            }
+            return result;
+        }
+
+        public bool CheckResultShuffle(CheckMethodFiveDouble<T, U, V> check)
+        {
+            bool result = true;
+            for (int i = 0; i < inArray1.Length; i += _elementsNo)
+            {
+                if (!check(
+                    new Span<T>(inArray1, i, _elementsNo),
+                    new Span<T>(inArray2, i, _elementsNo),
+                    immArray[i / _elementsNo],
+                    new Span<U>(outArray, i, _elementsNo),
+                    new Span<U>(checkArray, i, _elementsNo)))
+                {
+                    result = false;
+                }
+            }
+            return result;
+        }
+
+        public void Dispose()
+        {
+            _inHandle1.Free();
+            _inHandle2.Free();
+            _immHandle.Free();
+            _outHandle.Free();
+            _checkHandle.Free();
+        }
+    }
+
+    public static class TestUtilities
+    {
+        public static unsafe void InitializeData<T>(
+            T[] inArray1, T[] inArray2, InitMode mode = InitMode.Undefined,
+            void* InArray1Ptr = null, void* InArray2Ptr = null, T value = default(T))
+        {
+            if (mode == InitMode.Undefined)
+            {
+                InitializeWithRandomData(inArray1, inArray2, InArray1Ptr, InArray2Ptr);
+            }
+            else if (mode == InitMode.NumberFirstVectors)
+            {
+                InitializeWithContinuosIndependentNumbering<T>(inArray1, inArray2);
+            }
+            else if (mode == InitMode.UseConstValue)
+            {
+                InitializeWithConstValue(value, inArray1);
+            }
+            else if (mode == InitMode.NumberAllVectors)
+            {
+            }
+        }
+
+        public static unsafe void InitializeWithRandomData<T>(
+            T[] inArray1, T[] inArray2, void* InArray1Ptr = null, void* InArray2Ptr = null)
+        {
+            InitializeWithRandomData<T>(inArray1, InArray1Ptr);
+            InitializeWithRandomData<T>(inArray2, InArray2Ptr);
+        }
+
+        public static unsafe void InitializeWithRandomData<T>(
+            T[] array, void* arrayPtr = null)
+        {
+            Random random = new Random(unchecked((int)(DateTime.UtcNow.Ticks & 0x00000000ffffffffl)));
+            if (array is double[])
+            {
+                var array1 = array as double[];
+                for (int i = 0; i < array1.Length; i++)
+                {
+                    array1[i] = random.NextDouble() * random.Next();
+                }
+            }
+            else if (array is float[])
+            {
+                var arrayFloat1 = array as float[];
+                for (int i = 0; i < arrayFloat1.Length; i++)
+                {
+                    arrayFloat1[i] = (float)(random.NextDouble() * random.Next(ushort.MaxValue));
+                }
+            }
+            else
+            {
+                if (arrayPtr == null)
+                    throw new ArgumentNullException(nameof(arrayPtr));
+
+                int tSize = Marshal.SizeOf<T>();
+                random.NextBytes(new Span<byte>(((byte*)arrayPtr), array.Length * tSize));
+            }
+        }
+
+        public static void InitializeWithContinuosIndependentNumbering<T>(T[] array1, T[] array2)
+        {
+            if (array1 is double[] doubleArray1)
+            {
+                double[] doubleArray2 = array2 as double[];
+                for (double i = 0.0, j = 10000.0; i < doubleArray1.Length; i++, j++)
+                {
+                    doubleArray1[(int)i] = i;
+                    doubleArray2[(int)i] = j;
+                }
+            }
+            else if (array1 is float[] floatArray1)
+            {
+                float[] floatArray2 = array2 as float[];
+                for (float i = 0.0f, j = 10000.0f; i < floatArray1.Length; i++, j++)
+                {
+                    floatArray1[(int)i] = i;
+                    floatArray2[(int)i] = j;
+                }
+            }
+            else if (array1 is byte[] byteArray1)
+            {
+                byte[] byteArray2 = array2 as byte[];
+                for (byte i = 0, j = 100; i < byteArray1.Length; i++, j++)
+                {
+                    byteArray1[i] = i;
+                    byteArray2[i] = j;
+                }
+            }
+            else if (array1 is sbyte[] sbyteArray1)
+            {
+                sbyte[] sbyteArray2 = array2 as sbyte[];
+                for (sbyte i = 0, j = 100; i < sbyteArray1.Length; i++, j++)
+                {
+                    sbyteArray1[i] = i;
+                    sbyteArray2[i] = j;
+                }
+            }
+            else if (array1 is short[] shortArray1)
+            {
+                short[] shortArray2 = array2 as short[];
+                for (short i = 0, j = 10000; i < shortArray1.Length; i++, j++)
+                {
+                    shortArray1[i] = i;
+                    shortArray2[i] = j;
+                }
+
+            }
+            else if (array1 is ushort[] ushortArray1)
+            {
+                ushort[] ushortArray2 = array2 as ushort[];
+                for (ushort i = 0, j = 10000; i < ushortArray1.Length; i++, j++)
+                {
+                    ushortArray1[i] = i;
+                    ushortArray2[i] = j;
+                }
+            }
+            else if (array1 is int[] intArray1)
+            {
+                int[] intArray2 = array2 as int[];
+                for (int i = 0, j = 10000; i < intArray1.Length; i++, j++)
+                {
+                    intArray1[i] = i;
+                    intArray2[i] = j;
+                }
+            }
+            else if (array1 is uint[] uintArray1)
+            {
+                uint[] uintArray2 = array2 as uint[];
+                for (uint i = 0, j = 10000; i < uintArray1.Length; i++, j++)
+                {
+                    uintArray1[i] = i;
+                    uintArray2[i] = j;
+                }
+            }
+            else if (array1 is long[] longArray1)
+            {
+                long[] longArray2 = array2 as long[];
+                for (long i = 0, j = 10000; i < longArray1.Length; i++, j++)
+                {
+                    longArray1[i] = i;
+                    longArray2[i] = j;
+                }
+            }
+            else if (array1 is ulong[] ulongArray1)
+            {
+                ulong[] ulongArray2 = array2 as ulong[];
+                for (uint i = 0, j = 10000; i < ulongArray1.Length; i++, j++)
+                {
+                    ulongArray1[i] = i;
+                    ulongArray2[i] = j;
+                }
+            }
+        }
+
+        public static void InitializeWithConstValue<T>(T value, T[] array)
+        {
+            if (array == null)
+                throw new ArgumentNullException(nameof(array));
+
+            for (int i = 0; i < array.Length; i++)
+            {
+                array[i] = value;
+            }
+        }
+
+        public static void InitializeWithElementNumberingModuloVectorLength<T>(T[] array, int vectorSize, Func<int, int, T> function)
+        {
+            int elNo = vectorSize / Marshal.SizeOf<T>();
+            for (int i = 0; i < array.Length; i++)
+            {
+                array[i] = function(i, elNo);
+            }
+        }
+    }
+
     public enum SpecialCheck
     {
         Undefined = 0,
@@ -1571,6 +2082,20 @@ namespace IntelHardwareIntrinsicTest
             Console.WriteLine("\n");
         }
 
+        private static void PrintError<T>(TestTableSse2<T> testTable, string functionName = "", string testFuncString = "",
+            CheckMethodSpan<T> check = null) where T : struct
+        {
+            PrintErrorHeaderTu<T>(functionName, testFuncString);
+            for (int i = 0; i < testTable.outArray.Length; i++)
+            {
+                (Memory<T>, Memory<T>, Memory<T>, Memory<T>) item = testTable.GetAssignmentDataPoint(i);
+                Console.Write(
+                    $"({(PrintMemory<T>(item.Item1), PrintMemory<T>(item.Item2), PrintMemory<T>(item.Item3), PrintMemory<T>(item.Item4))})" +
+                    (check != null ? $"->{check(item.Item1.Span, item.Item2.Span, item.Item3.Span, item.Item4.Span)}, " : ", "));
+            }
+            Console.WriteLine("\n");
+        }
+
         private static void PrintError<T, U>(TestTableSse2<T, U> testTable, string functionName = "", string testFuncString = "",
             CheckMethodTwo<T, U> check = null) where T : struct where U : struct
         {
@@ -1595,6 +2120,18 @@ namespace IntelHardwareIntrinsicTest
             Console.WriteLine();
         }
 
+        private static void PrintError<T, U, V>(TestTableImmSse2<T, U, V> testTable, string functionName = "", string testFuncString = "",
+            CheckMethodTwoSpan<T, U, V> check = null) where T : struct where U : struct where V : struct
+        {
+            PrintErrorHeaderTuv<T, V>(functionName, testFuncString);
+            for (int i = 0; i < testTable.inArray1.Length; i+= TestTableImmSse2<T, U, V>.ElementCount)
+            {
+                (Memory<T>, V, U, U) item = testTable.GetExtractDataPoint(i);
+                Console.Write($"({item})" + (check != null ? $"->{check(item.Item1.Span, item.Item2, item.Item3, ref item.Item4)}, " : ", "));
+            }
+            Console.WriteLine();
+        }
+
         private static void PrintError<T, U>(TestTableSse2<T, U> testTable, string functionName = "", string testFuncString = "",
             CheckMethodThree<T, U> check = null) where T : struct where U : struct
         {
@@ -1653,6 +2190,61 @@ namespace IntelHardwareIntrinsicTest
             Console.WriteLine();
         }
 
+        private static void PrintError<T, U, V>(TestTableTuvImmSse2<T, U, V> testTable, string functionName = "", string testFuncString = "",
+            CheckMethodFive<T, U, V> check = null) where T : struct where U : struct where V : struct
+        {
+            PrintErrorHeaderTu<T>(functionName, testFuncString);
+            for (int i = 0; i < testTable.inArray1.Length - 3; i += 4)
+            {
+                // (T, T, T, T, U, U, U, U)
+                var item = testTable.GetQuadImmDataPoint(i);
+                Console.Write($"({item}), ");
+            }
+            Console.WriteLine();
+        }
+
+        private static void PrintError8<T, U, V>(TestTableTuvImmSse2<T, U, V> testTable, string functionName = "", string testFuncString = "",
+            CheckMethodFive<T, U, V> check = null) where T : struct where U : struct where V : struct
+        {
+            byte ConvertToByte(V value)
+            {
+                if (value is byte result)
+                    return result;
+                else
+                    return 0;
+            }
+
+            PrintErrorHeaderTu<T>(functionName, testFuncString);
+            for (int i = 0; i < testTable.inArray1.Length - 7; i += 8)
+            {
+                // ((T, T, T, T, T, T, T, T), T, V, (U, U, U, U, U, U, U, U), (U, U, U, U, U, U, U, U))
+                var item = testTable.GetOctaImmDataPoint(i);
+                Console.Write($"((x{item.Item1}, y({item.Item2}), imm({Convert.ToString(ConvertToByte(item.Item3), 2)}), z{item.Item4}, a{item.Item5})), ");
+            }
+            Console.WriteLine();
+        }
+
+        private static void PrintError8<T, U, V>(TestTableTuvImmSse2<T, U, V> testTable, string functionName = "", string testFuncString = "",
+            CheckMethodFiveDouble<T, U, V> check = null) where T : struct where U : struct where V : struct
+        {
+            byte ConvertToByte(V value)
+            {
+                if (value is byte result)
+                    return result;
+                else
+                    return 0;
+            }
+
+            PrintErrorHeaderTu<T>(functionName, testFuncString);
+            for (int i = 0; i < testTable.inArray1.Length - 7; i += 8)
+            {
+                // ((T, T), (T, T), V, (U, U), (U, U))
+                var item = testTable.GetDoubleImmDataPoint(i);
+                Console.Write($"((x{item.Item1}, y({item.Item2}), imm({Convert.ToString(ConvertToByte(item.Item3), 2)}), z{item.Item4}, a{item.Item5})), ");
+            }
+            Console.WriteLine();
+        }
+
         private static void PrintError<T, U>(TestTableSse2<T, U> testTable, string functionName = "", string testFuncString = "",
             CheckMethodSix<T, U> check = null) where T : struct where U : struct
         {
@@ -1716,6 +2308,19 @@ namespace IntelHardwareIntrinsicTest
             Console.WriteLine();
         }
 
+        private static void PrintError<T, U, V>(TestTableImmSse2<T, U, V> testTable, string functionName = "", string testFuncString = "",
+            CheckMethodEightInsert<T, U, V> check = null) where T : struct where U : struct where V : struct
+        {
+            PrintErrorHeaderTu<T>(functionName, testFuncString);
+            for (int i = 0, j = 0; i < testTable.inArray1.Length - 7 && j < testTable.inArray2.Length; i += 8, j += 1)
+            {
+                // ((T, T, T, T, T, T, T, T), T, V, (U, U, U, U, U, U, U, U), (U, U, U, U, U, U, U, U))
+                var item = testTable.GetOctaImmDataPoint(i);
+                Console.Write($"({item})");
+            }
+            Console.WriteLine();
+        }
+
         private static void PrintError<T, U>(TestTableSse2<T, U> testTable, string functionName = "", string testFuncString = "",
             CheckMethodEightOfTEightOfU<T, U> check = null) where T : struct where U : struct
         {