Implement SSE4.1 LoadAlignedVector128NonTemporal
authorFei Peng <fei.peng@intel.com>
Thu, 15 Feb 2018 23:25:09 +0000 (15:25 -0800)
committerFei Peng <fei.peng@intel.com>
Thu, 15 Feb 2018 23:25:09 +0000 (15:25 -0800)
src/jit/hwintrinsiclistxarch.h
tests/src/JIT/HardwareIntrinsics/X86/Sse41/LoadAlignedVector128NonTemporal.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/LoadAlignedVector128NonTemporal_r.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/LoadAlignedVector128NonTemporal_ro.csproj [new file with mode: 0644]

index 54b1ac0..ccdd883 100644 (file)
@@ -236,6 +236,7 @@ HARDWARE_INTRINSIC(SSSE3_IsSupported,                                "get_IsSupp
 HARDWARE_INTRINSIC(SSE41_IsSupported,                                "get_IsSupported",                                  SSE41,      -1,           0,            0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_BlendVariable,                              "BlendVariable",                                    SSE41,      -1,           16,           3,           {INS_pblendvb,  INS_pblendvb,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_blendvps,  INS_blendvpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_CompareEqual,                               "CompareEqual",                                     SSE41,      -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pcmpeqq,   INS_pcmpeqq,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE41_LoadAlignedVector128NonTemporal,            "LoadAlignedVector128NonTemporal",                  SSE41,      -1,           16,           1,           {INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)                 
 HARDWARE_INTRINSIC(SSE41_Multiply,                                   "Multiply",                                         SSE41,      -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pmuldq,    INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 
 //  SSE42 Intrinsics
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/LoadAlignedVector128NonTemporal.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/LoadAlignedVector128NonTemporal.cs
new file mode 100644 (file)
index 0000000..cddebe3
--- /dev/null
@@ -0,0 +1,231 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+    class Program
+    {
+        const int Pass = 100;
+        const int Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            int testResult = Pass;
+
+            if (Sse41.IsSupported)
+            {
+                {
+                    byte* inBuffer = stackalloc byte[32];
+                    int* inArray = (int*)Align(inBuffer, 16);
+                    int* outArray = stackalloc int[4];
+                    var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 4; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on int:");
+                            for (var n = 0; n < 4; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[32];
+                    long* inArray = (long*)Align(inBuffer, 16);
+                    long* outArray = stackalloc long[2];
+                    var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 2; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on long:");
+                            for (var n = 0; n < 2; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[32];
+                    uint* inArray = (uint*)Align(inBuffer, 16);
+                    uint* outArray = stackalloc uint[4];
+                    var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 4; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on uint:");
+                            for (var n = 0; n < 4; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[32];
+                    ulong* inArray = (ulong*)Align(inBuffer, 16);
+                    ulong* outArray = stackalloc ulong[2];
+                    var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 2; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on ulong:");
+                            for (var n = 0; n < 2; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[32];
+                    short* inArray = (short*)Align(inBuffer, 16);
+                    short* outArray = stackalloc short[8];
+                    var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 8; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on short:");
+                            for (var n = 0; n < 8; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[32];
+                    ushort* inArray = (ushort*)Align(inBuffer, 16);
+                    ushort* outArray = stackalloc ushort[8];
+                    var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 8; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on ushort:");
+                            for (var n = 0; n < 8; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[32];
+                    sbyte* inArray = (sbyte*)Align(inBuffer, 16);
+                    sbyte* outArray = stackalloc sbyte[16];
+                    var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 16; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on sbyte:");
+                            for (var n = 0; n < 16; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[32];
+                    byte* inArray = (byte*)Align(inBuffer, 16);
+                    byte* outArray = stackalloc byte[16];
+                    var vf = Sse41.LoadAlignedVector128NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 16; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Sse41 LoadAlignedVector128NonTemporal failed on byte:");
+                            for (var n = 0; n < 16; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+            }
+
+            return testResult;
+        }
+
+        static unsafe void* Align(byte* buffer, byte expectedAlignment)
+        {
+            // Compute how bad the misalignment is, which is at most (expectedAlignment - 1).
+            // Then subtract that from the expectedAlignment and add it to the original address
+            // to compute the aligned address.
+
+            var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment);
+            return (void*)(buffer + misalignment);
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/LoadAlignedVector128NonTemporal_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/LoadAlignedVector128NonTemporal_r.csproj
new file mode 100644 (file)
index 0000000..87293f5
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize></Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="LoadAlignedVector128NonTemporal.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/LoadAlignedVector128NonTemporal_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/LoadAlignedVector128NonTemporal_ro.csproj
new file mode 100644 (file)
index 0000000..5573316
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="LoadAlignedVector128NonTemporal.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>