Implement AVX2 LoadAlignedVector256/128NonTemporal
authorFei Peng <fei.peng@intel.com>
Mon, 5 Feb 2018 20:14:52 +0000 (12:14 -0800)
committerFei Peng <fei.peng@intel.com>
Tue, 6 Feb 2018 21:44:21 +0000 (13:44 -0800)
src/jit/hwintrinsiclistxarch.h
src/jit/instrsxarch.h
src/jit/lowerxarch.cpp
tests/src/JIT/HardwareIntrinsics/X86/Avx2/LoadAlignedVector256NonTemporal.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/LoadAlignedVector256NonTemporal_r.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/LoadAlignedVector256NonTemporal_ro.csproj [new file with mode: 0644]

index 2b44ad7..05f00d2 100644 (file)
@@ -214,6 +214,7 @@ HARDWARE_INTRINSIC(AVX2_IsSupported,                                 "get_IsSupp
 HARDWARE_INTRINSIC(AVX2_Add,                                         "Add",                                              AVX2,       -1,           32,           2,           {INS_paddb,     INS_paddb,     INS_paddw,     INS_paddw,     INS_paddd,     INS_paddd,     INS_paddq,     INS_paddq,     INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_Multiply,                                    "Multiply",                                         AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pmuldq,    INS_pmuludq,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_BlendVariable,                               "BlendVariable",                                    AVX2,       -1,           32,           3,           {INS_vpblendvb, INS_vpblendvb, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal,             "LoadAlignedVector256NonTemporal",                  AVX2,       -1,           32,           1,           {INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
 
 //  AES Intrinsics
 HARDWARE_INTRINSIC(AES_IsSupported,                                  "get_IsSupported",                                  AES,        -1,           0,            0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
index 5d934ae..eedfd63 100644 (file)
@@ -415,6 +415,7 @@ INST3( blendvpd,     "blendvpd"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SS
 INST3( pblendvb,     "pblendvb"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x10))   // Variable Blend Packed Bytes
 INST3( lddqu,        "lddqu"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEDBL(0xF0))  // Load Unaligned integer
 INST3( movntdqa,     "movntdqa"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x2A))   // Load Double Quadword Non-Temporal Aligned Hint
+INST3( movddup,      "movddup"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEDBL(0x12))   // Replicate Double FP Values
 
 INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
 
index 28ce7a8..0c2f2a2 100644 (file)
@@ -2344,6 +2344,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
         // VEX encoding supports unaligned memory ops, so we can fold them
         case NI_SSE_LoadVector128:
         case NI_SSE2_LoadVector128:
+        case NI_AVX_LoadVector256:
+        case NI_AVX_LoadAlignedVector256:
             isContainable = (containingCategory == HW_Category_SimpleSIMD) && comp->canUseVexEncoding();
             break;
 
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/LoadAlignedVector256NonTemporal.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/LoadAlignedVector256NonTemporal.cs
new file mode 100644 (file)
index 0000000..cf0c29a
--- /dev/null
@@ -0,0 +1,231 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+    class Program
+    {
+        const int Pass = 100;
+        const int Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            int testResult = Pass;
+
+            if (Avx2.IsSupported)
+            {
+                {
+                    byte* inBuffer = stackalloc byte[64];
+                    int* inArray = (int*)Align(inBuffer, 32);
+                    int* outArray = stackalloc int[8];
+                    var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 8; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on int:");
+                            for (var n = 0; n < 8; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[64];
+                    long* inArray = (long*)Align(inBuffer, 32);
+                    long* outArray = stackalloc long[4];
+                    var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 4; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on long:");
+                            for (var n = 0; n < 4; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[64];
+                    uint* inArray = (uint*)Align(inBuffer, 32);
+                    uint* outArray = stackalloc uint[8];
+                    var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 8; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on uint:");
+                            for (var n = 0; n < 8; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[64];
+                    ulong* inArray = (ulong*)Align(inBuffer, 32);
+                    ulong* outArray = stackalloc ulong[4];
+                    var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 4; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on ulong:");
+                            for (var n = 0; n < 4; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[64];
+                    short* inArray = (short*)Align(inBuffer, 32);
+                    short* outArray = stackalloc short[16];
+                    var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 16; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on short:");
+                            for (var n = 0; n < 16; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[64];
+                    ushort* inArray = (ushort*)Align(inBuffer, 32);
+                    ushort* outArray = stackalloc ushort[16];
+                    var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 16; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on ushort:");
+                            for (var n = 0; n < 16; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[64];
+                    sbyte* inArray = (sbyte*)Align(inBuffer, 32);
+                    sbyte* outArray = stackalloc sbyte[32];
+                    var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 32; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on sbyte:");
+                            for (var n = 0; n < 32; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+
+                {
+                    byte* inBuffer = stackalloc byte[64];
+                    byte* inArray = (byte*)Align(inBuffer, 32);
+                    byte* outArray = stackalloc byte[32];
+                    var vf = Avx2.LoadAlignedVector256NonTemporal(inArray);
+                    Unsafe.Write(outArray, vf);
+
+                    for (var i = 0; i < 32; i++)
+                    {
+                        if (inArray[i] != outArray[i])
+                        {
+                            Console.WriteLine("Avx2 LoadAlignedVector256NonTemporal failed on byte:");
+                            for (var n = 0; n < 32; n++)
+                            {
+                                Console.Write(outArray[n] + ", ");
+                            }
+                            Console.WriteLine();
+
+                            testResult = Fail;
+                            break;
+                        }
+                    }
+                }
+            }
+
+            return testResult;
+        }
+
+        static unsafe void* Align(byte* buffer, byte expectedAlignment)
+        {
+            // Compute how bad the misalignment is, which is at most (expectedAlignment - 1).
+            // Then subtract that from the expectedAlignment and add it to the original address
+            // to compute the aligned address.
+
+            var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment);
+            return (void*)(buffer + misalignment);
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/LoadAlignedVector256NonTemporal_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/LoadAlignedVector256NonTemporal_r.csproj
new file mode 100644 (file)
index 0000000..bd210dc
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize></Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="LoadAlignedVector256NonTemporal.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/LoadAlignedVector256NonTemporal_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/LoadAlignedVector256NonTemporal_ro.csproj
new file mode 100644 (file)
index 0000000..164b3f8
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="LoadAlignedVector256NonTemporal.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>