Updating XML doc comments for hardware intrinsics to include VEX and EVEX forms ...

author Tanner Gooding <tagoo@outlook.com>

Wed, 19 Apr 2023 03:23:49 +0000 (20:23 -0700)

committer GitHub <noreply@github.com>

Wed, 19 Apr 2023 03:23:49 +0000 (20:23 -0700)
author Tanner Gooding <tagoo@outlook.com>
Wed, 19 Apr 2023 03:23:49 +0000 (20:23 -0700)
committer GitHub <noreply@github.com>
Wed, 19 Apr 2023 03:23:49 +0000 (20:23 -0700)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.PlatformNotSupported.cs

index 72da97c9c4b77df37e2d6d33cd03dd521881afb1..b4a56e0b9d60a7ac0ec3141a37a6313893283c61 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.PlatformNotSupported.cs
@@ -27,40 +27,44 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_aesdec_si128 (__m128i a, __m128i RoundKey)
-        ///   AESDEC xmm, xmm/m128
+        ///    AESDEC xmm1,       xmm2/m128
+        ///   VAESDEC xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Decrypt(Vector128<byte> value, Vector128<byte> roundKey) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_aesdeclast_si128 (__m128i a, __m128i RoundKey)
-        ///   AESDECLAST xmm, xmm/m128
+        ///    AESDECLAST xmm1,       xmm2/m128
+        ///   VAESDECLAST xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> DecryptLast(Vector128<byte> value, Vector128<byte> roundKey) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_aesenc_si128 (__m128i a, __m128i RoundKey)
-        ///   AESENC xmm, xmm/m128
+        ///    AESENC xmm1,       xmm2/m128
+        ///   VAESENC xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Encrypt(Vector128<byte> value, Vector128<byte> roundKey) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_aesenclast_si128 (__m128i a, __m128i RoundKey)
-        ///   AESENCLAST xmm, xmm/m128
+        ///    AESENCLAST xmm1,       xmm2/m128
+        ///   VAESENCLAST xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> EncryptLast(Vector128<byte> value, Vector128<byte> roundKey) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_aesimc_si128 (__m128i a)
-        ///   AESIMC xmm, xmm/m128
+        ///    AESIMC xmm1, xmm2/m128
+        ///   VAESIMC xmm1, xmm2/m128
          /// </summary>
          public static Vector128<byte> InverseMixColumns(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_aeskeygenassist_si128 (__m128i a, const int imm8)
-        ///   AESKEYGENASSIST xmm, xmm/m128, imm8
+        ///    AESKEYGENASSIST xmm1, xmm2/m128, imm8
+        ///   VAESKEYGENASSIST xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<byte> KeygenAssist(Vector128<byte> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
      }
-
  }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.cs

index 77008b678c92db401fc8105fa561fc69aa04d689..1424a66dc83521239a500a519990085aadaebe1e 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.cs
@@ -27,37 +27,43 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_aesdec_si128 (__m128i a, __m128i RoundKey)
-        ///   AESDEC xmm, xmm/m128
+        ///    AESDEC xmm1,       xmm2/m128
+        ///   VAESDEC xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Decrypt(Vector128<byte> value, Vector128<byte> roundKey) => Decrypt(value, roundKey);
  
          /// <summary>
          /// __m128i _mm_aesdeclast_si128 (__m128i a, __m128i RoundKey)
-        ///   AESDECLAST xmm, xmm/m128
+        ///    AESDECLAST xmm1,       xmm2/m128
+        ///   VAESDECLAST xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> DecryptLast(Vector128<byte> value, Vector128<byte> roundKey) => DecryptLast(value, roundKey);
  
          /// <summary>
          /// __m128i _mm_aesenc_si128 (__m128i a, __m128i RoundKey)
-        ///   AESENC xmm, xmm/m128
+        ///    AESENC xmm1,       xmm2/m128
+        ///   VAESENC xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Encrypt(Vector128<byte> value, Vector128<byte> roundKey) => Encrypt(value, roundKey);
  
          /// <summary>
          /// __m128i _mm_aesenclast_si128 (__m128i a, __m128i RoundKey)
-        ///   AESENCLAST xmm, xmm/m128
+        ///    AESENCLAST xmm1,       xmm2/m128
+        ///   VAESENCLAST xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> EncryptLast(Vector128<byte> value, Vector128<byte> roundKey) => EncryptLast(value, roundKey);
  
          /// <summary>
          /// __m128i _mm_aesimc_si128 (__m128i a)
-        ///   AESIMC xmm, xmm/m128
+        ///    AESIMC xmm1, xmm2/m128
+        ///   VAESIMC xmm1, xmm2/m128
          /// </summary>
          public static Vector128<byte> InverseMixColumns(Vector128<byte> value) => InverseMixColumns(value);
  
          /// <summary>
          /// __m128i _mm_aeskeygenassist_si128 (__m128i a, const int imm8)
-        ///   AESKEYGENASSIST xmm, xmm/m128, imm8
+        ///    AESKEYGENASSIST xmm1, xmm2/m128, imm8
+        ///   VAESKEYGENASSIST xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<byte> KeygenAssist(Vector128<byte> value, [ConstantExpected] byte control) => KeygenAssist(value, control);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs

index 715cebdfe192fc90bd4e701bac4e24c030b7b675..663071e37e669881ded6635649747c9bbb03f5c2 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs
@@ -27,347 +27,359 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m256 _mm256_add_ps (__m256 a, __m256 b)
-        ///   VADDPS ymm, ymm, ymm/m256
+        ///   VADDPS ymm1,         ymm2, ymm3/m256
+        ///   VADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Add(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_add_pd (__m256d a, __m256d b)
-        ///   VADDPD ymm, ymm, ymm/m256
+        ///   VADDPD ymm1,         ymm2, ymm3/m256
+        ///   VADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Add(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_addsub_ps (__m256 a, __m256 b)
-        ///   VADDSUBPS ymm, ymm, ymm/m256
+        ///   VADDSUBPS ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<float> AddSubtract(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_addsub_pd (__m256d a, __m256d b)
-        ///   VADDSUBPD ymm, ymm, ymm/m256
+        ///   VADDSUBPD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<double> AddSubtract(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_and_ps (__m256 a, __m256 b)
-        ///   VANDPS ymm, ymm, ymm/m256
+        ///   VANDPS ymm1,         ymm2, ymm2/m256
+        ///   VANDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> And(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_and_pd (__m256d a, __m256d b)
-        ///   VANDPD ymm, ymm, ymm/m256
+        ///   VANDPD ymm1,         ymm2, ymm2/m256
+        ///   VANDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> And(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_andnot_ps (__m256 a, __m256 b)
-        ///   VANDNPS ymm, ymm, ymm/m256
+        ///   VANDNPS ymm1,         ymm2, ymm2/m256
+        ///   VANDNPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> AndNot(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_andnot_pd (__m256d a, __m256d b)
-        ///   VANDNPD ymm, ymm, ymm/m256
+        ///   VANDNPD ymm1,         ymm2, ymm2/m256
+        ///   VANDNPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> AndNot(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_blend_ps (__m256 a, __m256 b, const int imm8)
-        ///   VBLENDPS ymm, ymm, ymm/m256, imm8
+        ///   VBLENDPS ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<float> Blend(Vector256<float> left, Vector256<float> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_blend_pd (__m256d a, __m256d b, const int imm8)
-        ///   VBLENDPD ymm, ymm, ymm/m256, imm8
+        ///   VBLENDPD ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<double> Blend(Vector256<double> left, Vector256<double> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_blendv_ps (__m256 a, __m256 b, __m256 mask)
-        ///   VBLENDVPS ymm, ymm, ymm/m256, ymm
+        ///   VBLENDVPS ymm1, ymm2, ymm3/m256, ymm4
          /// </summary>
          public static Vector256<float> BlendVariable(Vector256<float> left, Vector256<float> right, Vector256<float> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_blendv_pd (__m256d a, __m256d b, __m256d mask)
-        ///   VBLENDVPD ymm, ymm, ymm/m256, ymm
+        ///   VBLENDVPD ymm1, ymm2, ymm3/m256, ymm4
          /// </summary>
          public static Vector256<double> BlendVariable(Vector256<double> left, Vector256<double> right, Vector256<double> mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_broadcast_ss (float const * mem_addr)
-        ///   VBROADCASTSS xmm, m32
+        ///   VBROADCASTSS xmm1,         m32
+        ///   VBROADCASTSS xmm1 {k1}{z}, m32
          /// </summary>
          public static unsafe Vector128<float> BroadcastScalarToVector128(float* source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_broadcast_ss (float const * mem_addr)
-        ///   VBROADCASTSS ymm, m32
+        ///   VBROADCASTSS ymm1,         m32
+        ///   VBROADCASTSS ymm1 {k1}{z}, m32
          /// </summary>
          public static unsafe Vector256<float> BroadcastScalarToVector256(float* source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_broadcast_sd (double const * mem_addr)
-        ///   VBROADCASTSD ymm, m64
+        ///   VBROADCASTSD ymm1,         m64
+        ///   VBROADCASTSD ymm1 {k1}{z}, m64
          /// </summary>
          public static unsafe Vector256<double> BroadcastScalarToVector256(double* source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_broadcast_ps (__m128 const * mem_addr)
-        ///   VBROADCASTF128, ymm, m128
+        ///   VBROADCASTF128  ymm1,         m128
+        ///   VBROADCASTF32x4 ymm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector256<float> BroadcastVector128ToVector256(float* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_broadcast_pd (__m128d const * mem_addr)
-        ///   VBROADCASTF128, ymm, m128
+        ///   VBROADCASTF128  ymm1,         m128
+        ///   VBROADCASTF64x2 ymm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector256<double> BroadcastVector128ToVector256(double* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m256 _mm256_ceil_ps (__m256 a)
-        ///   VROUNDPS ymm, ymm/m256, imm8(10)
+        /// __m256 _mm256_ceil_ps (__m128 a)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(10)
          /// </summary>
          public static Vector256<float> Ceiling(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_ceil_pd (__m256d a)
-        ///   VROUNDPD ymm, ymm/m256, imm8(10)
+        /// __m256d _mm256_ceil_pd (__m128d a)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(10)
          /// </summary>
          public static Vector256<double> Ceiling(Vector256<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_cmp_ps (__m128 a, __m128 b, const int imm8)
-        ///   VCMPPS xmm, xmm, xmm/m128, imm8
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<float> Compare(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmp_pd (__m128d a, __m128d b, const int imm8)
-        ///   VCMPPD xmm, xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<double> Compare(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
-        /// <summary>
          /// __m256 _mm256_cmp_ps (__m256 a, __m256 b, const int imm8)
-        ///   VCMPPS ymm, ymm, ymm/m256, imm8
+        ///   VCMPPS ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<float> Compare(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256d _mm256_cmp_pd (__m256d a, __m256d b, const int imm8)
-        ///   VCMPPD ymm, ymm, ymm/m256, imm8
-        /// </summary>
-        public static Vector256<double> Compare(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256 _mm256_cmpeq_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(0)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(0)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static Vector256<float> CompareEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256d _mm256_cmpeq_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(0)
-        /// The above native signature does not exist. We provide this additional overload for completeness.
-        /// </summary>
-        public static Vector256<double> CompareEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256 _mm256_cmpgt_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(14)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(14)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static Vector256<float> CompareGreaterThan(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256d _mm256_cmpgt_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(14)
-        /// The above native signature does not exist. We provide this additional overload for completeness.
-        /// </summary>
-        public static Vector256<double> CompareGreaterThan(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256 _mm256_cmpge_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(13)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(13)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static Vector256<float> CompareGreaterThanOrEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cmpge_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(13)
+        /// __m256 _mm256_cmplt_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(1)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareGreaterThanOrEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<float> CompareLessThan(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_cmplt_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(1)
+        /// __m256 _mm256_cmple_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(2)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareLessThan(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> CompareLessThanOrEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cmplt_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(1)
+        /// __m256 _mm256_cmpneq_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareLessThan(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<float> CompareNotEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_cmple_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(2)
+        /// __m256 _mm256_cmpngt_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(10)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareLessThanOrEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> CompareNotGreaterThan(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cmple_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(2)
+        /// __m256 _mm256_cmpnge_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareLessThanOrEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<float> CompareNotGreaterThanOrEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_cmpneq_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(4)
+        /// __m256 _mm256_cmpnlt_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(5)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> CompareNotLessThan(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cmpneq_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(4)
+        /// __m256 _mm256_cmpnle_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(6)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<float> CompareNotLessThanOrEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_cmpngt_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(10)
+        /// __m256 _mm256_cmpord_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(7)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotGreaterThan(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> CompareOrdered(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cmpngt_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(10)
+        /// __m256 _mm256_cmpunord_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(3)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotGreaterThan(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> CompareUnordered(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m256 _mm256_cmpnge_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(9)
+        /// __m128d _mm_cmp_pd (__m128d a, __m128d b, const int imm8)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8
+        /// </summary>
+        public static Vector128<double> Compare(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256d _mm256_cmp_pd (__m256d a, __m256d b, const int imm8)
+        ///   VCMPPD ymm1, ymm2, ymm3/m256, imm8
+        /// </summary>
+        public static Vector256<double> Compare(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256d _mm256_cmpeq_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(0)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotGreaterThanOrEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> CompareEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cmpnge_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(9)
+        /// __m256d _mm256_cmpgt_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(14)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotGreaterThanOrEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<double> CompareGreaterThan(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_cmpnlt_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(5)
+        /// __m256d _mm256_cmpge_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(13)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotLessThan(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> CompareGreaterThanOrEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cmpnlt_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(5)
+        /// __m256d _mm256_cmplt_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(1)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotLessThan(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<double> CompareLessThan(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_cmpnle_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(6)
+        /// __m256d _mm256_cmple_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(2)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotLessThanOrEqual(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> CompareLessThanOrEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cmpnle_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(6)
+        /// __m256d _mm256_cmpneq_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotLessThanOrEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<double> CompareNotEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_cmpord_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(7)
+        /// __m256d _mm256_cmpngt_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(10)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareOrdered(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> CompareNotGreaterThan(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cmpord_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(7)
+        /// __m256d _mm256_cmpnge_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareOrdered(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<double> CompareNotGreaterThanOrEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmp_sd (__m128d a, __m128d b, const int imm8)
-        ///   VCMPSS xmm, xmm, xmm/m32, imm8
+        /// __m256d _mm256_cmpnlt_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(5)
+        /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector128<double> CompareScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> CompareNotLessThan(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmp_ss (__m128 a, __m128 b, const int imm8)
-        ///   VCMPSD xmm, xmm, xmm/m64, imm8
+        /// __m256d _mm256_cmpnle_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(6)
+        /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector128<float> CompareScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<double> CompareNotLessThanOrEqual(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_cmpunord_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(3)
+        /// __m256d _mm256_cmpord_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(7)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareUnordered(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> CompareOrdered(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_cmpunord_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(3)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(3)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static Vector256<double> CompareUnordered(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// __m128 _mm_cmp_ss (__m128 a, __m128 b, const int imm8)
+        ///   VCMPSD xmm1, xmm2, xmm3/m64, imm8
+        /// </summary>
+        public static Vector128<float> CompareScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_cmp_sd (__m128d a, __m128d b, const int imm8)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8
+        /// </summary>
+        public static Vector128<double> CompareScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m128i _mm256_cvtpd_epi32 (__m256d a)
-        ///   VCVTPD2DQ xmm, ymm/m256
+        ///   VCVTPD2DQ xmm1,         ymm2/m256
+        ///   VCVTPD2DQ xmm1 {k1}{z}, ymm2/m256/m64bcst
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector256<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m128 _mm256_cvtpd_ps (__m256d a)
-        ///   VCVTPD2PS xmm, ymm/m256
+        ///   VCVTPD2PS xmm1,         ymm2/m256
+        ///   VCVTPD2PS xmm1 {k1}{z}, ymm2/m256/m64bcst
          /// </summary>
          public static Vector128<float> ConvertToVector128Single(Vector256<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m256i _mm256_cvtps_epi32 (__m256 a)
-        ///   VCVTPS2DQ ymm, ymm/m256
-        /// </summary>
-        public static Vector256<int> ConvertToVector256Int32(Vector256<float> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256 _mm256_cvtepi32_ps (__m256i a)
-        ///   VCVTDQ2PS ymm, ymm/m256
+        /// __m256d _mm256_cvtepi32_pd (__m128i a)
+        ///   VCVTDQ2PD ymm1,         xmm2/m128
+        ///   VCVTDQ2PD ymm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
-        public static Vector256<float> ConvertToVector256Single(Vector256<int> value) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> ConvertToVector256Double(Vector128<int> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_cvtps_pd (__m128 a)
-        ///   VCVTPS2PD ymm, xmm/m128
+        ///   VCVTPS2PD ymm1,         xmm2/m128
+        ///   VCVTPS2PD ymm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
          public static Vector256<double> ConvertToVector256Double(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_cvtepi32_pd (__m128i a)
-        ///   VCVTDQ2PD ymm, xmm/m128
+        /// __m256i _mm256_cvtps_epi32 (__m256 a)
+        ///   VCVTPS2DQ ymm1,         ymm2/m256
+        ///   VCVTPS2DQ ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
-        public static Vector256<double> ConvertToVector256Double(Vector128<int> value) { throw new PlatformNotSupportedException(); }
-
+        public static Vector256<int> ConvertToVector256Int32(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm256_cvttpd_epi32 (__m256d a)
-        ///   VCVTTPD2DQ xmm, ymm/m256
+        /// __m256 _mm256_cvtepi32_ps (__m256i a)
+        ///   VCVTDQ2PS ymm1,         ymm2/m256
+        ///   VCVTDQ2PS ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector256<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> ConvertToVector256Single(Vector256<int> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m256i _mm256_cvttps_epi32 (__m256 a)
-        ///   VCVTTPS2DQ ymm, ymm/m256
+        ///   VCVTTPS2DQ ymm1,         ymm2/m256
+        ///   VCVTTPS2DQ ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32WithTruncation(Vector256<float> value) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm256_cvttpd_epi32 (__m256d a)
+        ///   VCVTTPD2DQ xmm1,         ymm2/m256
+        ///   VCVTTPD2DQ xmm1 {k1}{z}, ymm2/m256/m64bcst
+        /// </summary>
+        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector256<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_div_ps (__m256 a, __m256 b)
-        ///   VDIVPS ymm, ymm, ymm/m256
+        ///   VDIVPS ymm1,         ymm2, ymm3/m256
+        ///   VDIVPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Divide(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_div_pd (__m256d a, __m256d b)
-        ///   VDIVPD ymm, ymm, ymm/m256
+        ///   VDIVPD ymm1,         ymm2, ymm3/m256
+        ///   VDIVPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Divide(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
@@ -379,1011 +391,1056 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m256 _mm256_moveldup_ps (__m256 a)
-        ///   VMOVSLDUP ymm, ymm/m256
+        ///   VMOVSLDUP ymm1,         ymm2/m256
+        ///   VMOVSLDUP ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<float> DuplicateEvenIndexed(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_movedup_pd (__m256d a)
-        ///   VMOVDDUP ymm, ymm/m256
+        ///   VMOVDDUP ymm1,         ymm2/m256
+        ///   VMOVDDUP ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<double> DuplicateEvenIndexed(Vector256<double> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256 _mm256_movehdup_ps (__m256 a)
-        ///   VMOVSHDUP ymm, ymm/m256
+        ///   VMOVSHDUP ymm1,         ymm2/m256
+        ///   VMOVSHDUP ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<float> DuplicateOddIndexed(Vector256<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<byte> ExtractVector128(Vector256<byte> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<sbyte> ExtractVector128(Vector256<sbyte> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<short> ExtractVector128(Vector256<short> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<ushort> ExtractVector128(Vector256<ushort> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<int> ExtractVector128(Vector256<int> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<uint> ExtractVector128(Vector256<uint> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<long> ExtractVector128(Vector256<long> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<ulong> ExtractVector128(Vector256<ulong> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm256_extractf128_ps (__m256 a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<float> ExtractVector128(Vector256<float> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm256_extractf128_pd (__m256d a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<double> ExtractVector128(Vector256<double> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_floor_ps (__m256 a)
-        ///   VROUNDPS ymm, ymm/m256, imm8(9)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(9)
          /// </summary>
          public static Vector256<float> Floor(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_floor_pd (__m256d a)
-        ///   VROUNDPS ymm, ymm/m256, imm8(9)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(9)
          /// </summary>
          public static Vector256<double> Floor(Vector256<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_hadd_ps (__m256 a, __m256 b)
-        ///   VHADDPS ymm, ymm, ymm/m256
+        ///   VHADDPS ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<float> HorizontalAdd(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_hadd_pd (__m256d a, __m256d b)
-        ///   VHADDPD ymm, ymm, ymm/m256
+        ///   VHADDPD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<double> HorizontalAdd(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_hsub_ps (__m256 a, __m256 b)
-        ///   VHSUBPS ymm, ymm, ymm/m256
+        ///   VHSUBPS ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<float> HorizontalSubtract(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_hsub_pd (__m256d a, __m256d b)
-        ///   VHSUBPD ymm, ymm, ymm/m256
+        ///   VHSUBPD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<double> HorizontalSubtract(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<byte> InsertVector128(Vector256<byte> value, Vector128<byte> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<sbyte> InsertVector128(Vector256<sbyte> value, Vector128<sbyte> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<short> InsertVector128(Vector256<short> value, Vector128<short> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<ushort> InsertVector128(Vector256<ushort> value, Vector128<ushort> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<int> InsertVector128(Vector256<int> value, Vector128<int> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<uint> InsertVector128(Vector256<uint> value, Vector128<uint> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<long> InsertVector128(Vector256<long> value, Vector128<long> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<ulong> InsertVector128(Vector256<ulong> value, Vector128<ulong> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256 _mm256_insertf128_ps (__m256 a, __m128 b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<float> InsertVector128(Vector256<float> value, Vector128<float> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256d _mm256_insertf128_pd (__m256d a, __m128d b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<double> InsertVector128(Vector256<double> value, Vector128<double> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<sbyte> LoadVector256(sbyte* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<byte> LoadVector256(byte* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<short> LoadVector256(short* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<ushort> LoadVector256(ushort* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<int> LoadVector256(int* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<uint> LoadVector256(uint* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<long> LoadVector256(long* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<ulong> LoadVector256(ulong* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256 _mm256_loadu_ps (float const * mem_addr)
-        ///   VMOVUPS ymm, ymm/m256
-        /// </summary>
-        public static unsafe Vector256<float> LoadVector256(float* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256d _mm256_loadu_pd (double const * mem_addr)
-        ///   VMOVUPD ymm, ymm/m256
-        /// </summary>
-        public static unsafe Vector256<double> LoadVector256(double* address) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<sbyte> LoadAlignedVector256(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<byte> LoadAlignedVector256(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<short> LoadAlignedVector256(short* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<ushort> LoadAlignedVector256(ushort* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<int> LoadAlignedVector256(int* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<uint> LoadAlignedVector256(uint* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA64 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<long> LoadAlignedVector256(long* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA64 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<ulong> LoadAlignedVector256(ulong* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_load_ps (float const * mem_addr)
-        ///   VMOVAPS ymm, ymm/m256
+        ///   VMOVAPS ymm1,         m256
+        ///   VMOVAPS ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<float> LoadAlignedVector256(float* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_load_pd (double const * mem_addr)
-        ///   VMOVAPD ymm, ymm/m256
+        ///   VMOVAPD ymm1,         m256
+        ///   VMOVAPD ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<double> LoadAlignedVector256(double* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<sbyte> LoadDquVector256(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<byte> LoadDquVector256(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<short> LoadDquVector256(short* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<ushort> LoadDquVector256(ushort* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<int> LoadDquVector256(int* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<uint> LoadDquVector256(uint* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<long> LoadDquVector256(long* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<ulong> LoadDquVector256(ulong* address) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU  ymm1,         m256
+        ///   VMOVDQU8 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<sbyte> LoadVector256(sbyte* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU  ymm1,         m256
+        ///   VMOVDQU8 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<byte> LoadVector256(byte* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU16 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<short> LoadVector256(short* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU16 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<ushort> LoadVector256(ushort* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU32 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<int> LoadVector256(int* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU32 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<uint> LoadVector256(uint* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU64 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<long> LoadVector256(long* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU64 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<ulong> LoadVector256(ulong* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256 _mm256_loadu_ps (float const * mem_addr)
+        ///   VMOVUPS ymm1,         m256
+        ///   VMOVUPS ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<float> LoadVector256(float* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256d _mm256_loadu_pd (double const * mem_addr)
+        ///   VMOVUPD ymm1,         m256
+        ///   VMOVUPD ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<double> LoadVector256(double* address) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
-        ///   VMASKMOVPS xmm, xmm, m128
+        ///   VMASKMOVPS xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<float> MaskLoad(float* address, Vector128<float> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask)
-        ///   VMASKMOVPD xmm, xmm, m128
+        ///   VMASKMOVPD xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<double> MaskLoad(double* address, Vector128<double> mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
-        ///   VMASKMOVPS ymm, ymm, m256
+        ///   VMASKMOVPS ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<float> MaskLoad(float* address, Vector256<float> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask)
-        ///   VMASKMOVPD ymm, ymm, m256
+        ///   VMASKMOVPD ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<double> MaskLoad(double* address, Vector256<double> mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
-        ///   VMASKMOVPS m128, xmm, xmm
+        ///   VMASKMOVPS m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<float> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a)
-        ///   VMASKMOVPD m128, xmm, xmm
+        ///   VMASKMOVPD m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<double> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
-        ///   VMASKMOVPS m256, ymm, ymm
+        ///   VMASKMOVPS m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<float> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a)
-        ///   VMASKMOVPD m256, ymm, ymm
+        ///   VMASKMOVPD m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<double> source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_max_ps (__m256 a, __m256 b)
-        ///   VMAXPS ymm, ymm, ymm/m256
+        ///   VMAXPS ymm1,         ymm2, ymm3/m256
+        ///   VMAXPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Max(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_max_pd (__m256d a, __m256d b)
-        ///   VMAXPD ymm, ymm, ymm/m256
+        ///   VMAXPD ymm1,         ymm2, ymm3/m256
+        ///   VMAXPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Max(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_min_ps (__m256 a, __m256 b)
-        ///   VMINPS ymm, ymm, ymm/m256
+        ///   VMINPS ymm1,         ymm2, ymm3/m256
+        ///   VMINPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Min(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_min_pd (__m256d a, __m256d b)
-        ///   VMINPD ymm, ymm, ymm/m256
+        ///   VMINPD ymm1,         ymm2, ymm3/m256
+        ///   VMINPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Min(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm256_movemask_ps (__m256 a)
-        ///   VMOVMSKPS reg, ymm
+        ///   VMOVMSKPS r32, ymm1
          /// </summary>
          public static int MoveMask(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm256_movemask_pd (__m256d a)
-        ///   VMOVMSKPD reg, ymm
+        ///   VMOVMSKPD r32, ymm1
          /// </summary>
          public static int MoveMask(Vector256<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_mul_ps (__m256 a, __m256 b)
-        ///   VMULPS ymm, ymm, ymm/m256
+        ///   VMULPS ymm1,         ymm2, ymm3/m256
+        ///   VMULPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Multiply(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_mul_pd (__m256d a, __m256d b)
-        ///   VMULPD ymm, ymm, ymm/m256
+        ///   VMULPD ymm1,         ymm2, ymm3/m256
+        ///   VMULPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Multiply(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_or_ps (__m256 a, __m256 b)
-        ///   VORPS ymm, ymm, ymm/m256
+        ///   VORPS ymm1,         ymm2, ymm3/m256
+        ///   VORPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Or(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_or_pd (__m256d a, __m256d b)
-        ///   VORPD ymm, ymm, ymm/m256
+        ///   VORPD ymm1,         ymm2, ymm3/m256
+        ///   VORPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Or(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_permute_ps (__m128 a, int imm8)
-        ///   VPERMILPS xmm, xmm, imm8
+        ///   VPERMILPS xmm1,         xmm2/m128,         imm8
+        ///   VPERMILPS xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8
          /// </summary>
          public static Vector128<float> Permute(Vector128<float> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_permute_pd (__m128d a, int imm8)
-        ///   VPERMILPD xmm, xmm, imm8
+        ///   VPERMILPD xmm1,         xmm2/m128,         imm8
+        ///   VPERMILPD xmm1 {k1}{z}, xmm2/m128/m64bcst, imm8
          /// </summary>
          public static Vector128<double> Permute(Vector128<double> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_permute_ps (__m256 a, int imm8)
-        ///   VPERMILPS ymm, ymm, imm8
+        ///   VPERMILPS ymm1,         ymm2/m256,         imm8
+        ///   VPERMILPS ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<float> Permute(Vector256<float> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_permute_pd (__m256d a, int imm8)
-        ///   VPERMILPD ymm, ymm, imm8
+        ///   VPERMILPD ymm1,         ymm2/m256,         imm8
+        ///   VPERMILPD ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<double> Permute(Vector256<double> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<byte> Permute2x128(Vector256<byte> left, Vector256<byte> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<sbyte> Permute2x128(Vector256<sbyte> left, Vector256<sbyte> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<short> Permute2x128(Vector256<short> left, Vector256<short> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<ushort> Permute2x128(Vector256<ushort> left, Vector256<ushort> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<int> Permute2x128(Vector256<int> left, Vector256<int> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<uint> Permute2x128(Vector256<uint> left, Vector256<uint> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<long> Permute2x128(Vector256<long> left, Vector256<long> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<ulong> Permute2x128(Vector256<ulong> left, Vector256<ulong> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_permute2f128_ps (__m256 a, __m256 b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<float> Permute2x128(Vector256<float> left, Vector256<float> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256d _mm256_permute2f128_pd (__m256d a, __m256d b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<double> Permute2x128(Vector256<double> left, Vector256<double> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_permutevar_ps (__m128 a, __m128i b)
-        ///   VPERMILPS xmm, xmm, xmm/m128
+        ///   VPERMILPS xmm1,         xmm2, xmm3/m128
+        ///   VPERMILPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> PermuteVar(Vector128<float> left, Vector128<int> control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_permutevar_pd (__m128d a, __m128i b)
-        ///   VPERMILPD xmm, xmm, xmm/m128
+        ///   VPERMILPD xmm1,         xmm2, xmm3/m128
+        ///   VPERMILPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> PermuteVar(Vector128<double> left, Vector128<long> control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_permutevar_ps (__m256 a, __m256i b)
-        ///   VPERMILPS ymm, ymm, ymm/m256
+        ///   VPERMILPS ymm1,         ymm2, ymm3/m256
+        ///   VPERMILPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> PermuteVar(Vector256<float> left, Vector256<int> control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_permutevar_pd (__m256d a, __m256i b)
-        ///   VPERMILPD ymm, ymm, ymm/m256
+        ///   VPERMILPD ymm1,         ymm2, ymm3/m256
+        ///   VPERMILPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> PermuteVar(Vector256<double> left, Vector256<long> control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_rcp_ps (__m256 a)
-        ///   VRCPPS ymm, ymm/m256
+        ///   VRCPPS ymm1, ymm2/m256
          /// </summary>
          public static Vector256<float> Reciprocal(Vector256<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_rsqrt_ps (__m256 a)
-        ///   VRSQRTPS ymm, ymm/m256
+        ///   VRSQRTPS ymm1, ymm2/m256
          /// </summary>
          public static Vector256<float> ReciprocalSqrt(Vector256<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
-        ///   VROUNDPS ymm, ymm/m256, imm8(8)
-        /// </summary>
-        public static Vector256<float> RoundToNearestInteger(Vector256<float> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
-        ///   VROUNDPS ymm, ymm/m256, imm8(9)
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_CUR_DIRECTION)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(4)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<float> RoundToNegativeInfinity(Vector256<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> RoundCurrentDirection(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
-        ///   VROUNDPS ymm, ymm/m256, imm8(10)
+        /// __m256d _mm256_round_ps (__m256d a, _MM_FROUND_CUR_DIRECTION)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(4)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<float> RoundToPositiveInfinity(Vector256<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> RoundCurrentDirection(Vector256<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
-        ///   VROUNDPS ymm, ymm/m256, imm8(11)
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEAREST_INT)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<float> RoundToZero(Vector256<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> RoundToNearestInteger(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_CUR_DIRECTION)
-        ///   VROUNDPS ymm, ymm/m256, imm8(4)
+        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEAREST_INT)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<float> RoundCurrentDirection(Vector256<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> RoundToNearestInteger(Vector256<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
-        ///   VROUNDPD ymm, ymm/m256, imm8(8)
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(9)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<double> RoundToNearestInteger(Vector256<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> RoundToNegativeInfinity(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
-        ///   VROUNDPD ymm, ymm/m256, imm8(9)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(9)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector256<double> RoundToNegativeInfinity(Vector256<double> value) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
+        /// </summary>
+        public static Vector256<float> RoundToPositiveInfinity(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
-        ///   VROUNDPD ymm, ymm/m256, imm8(10)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector256<double> RoundToPositiveInfinity(Vector256<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
-        ///   VROUNDPD ymm, ymm/m256, imm8(11)
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<double> RoundToZero(Vector256<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector256<float> RoundToZero(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_CUR_DIRECTION)
-        ///   VROUNDPD ymm, ymm/m256, imm8(4)
+        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<double> RoundCurrentDirection(Vector256<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector256<double> RoundToZero(Vector256<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_shuffle_ps (__m256 a, __m256 b, const int imm8)
-        ///   VSHUFPS ymm, ymm, ymm/m256, imm8
+        ///   VSHUFPS ymm1,         ymm2, ymm3/m256,         imm8
+        ///   VSHUFPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<float> Shuffle(Vector256<float> value, Vector256<float> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_shuffle_pd (__m256d a, __m256d b, const int imm8)
-        ///   VSHUFPD ymm, ymm, ymm/m256, imm8
+        ///   VSHUFPD ymm1,         ymm2, ymm3/m256,         imm8
+        ///   VSHUFPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<double> Shuffle(Vector256<double> value, Vector256<double> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_sqrt_ps (__m256 a)
-        ///   VSQRTPS ymm, ymm/m256
+        ///   VSQRTPS ymm1,         ymm2/m256
+        ///   VSQRTPS ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
          public static Vector256<float> Sqrt(Vector256<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_sqrt_pd (__m256d a)
-        ///   VSQRTPD ymm, ymm/m256
+        ///   VSQRTPD ymm1,         ymm2/m256
+        ///   VSQRTPD ymm1 {k1}{z}, ymm2/m256/m64bcst
          /// </summary>
          public static Vector256<double> Sqrt(Vector256<double> value) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU  m256,         ymm1
+        ///   VMOVDQU8 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(sbyte* address, Vector256<sbyte> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU  m256,         ymm1
+        ///   VMOVDQU8 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(byte* address, Vector256<byte> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU16 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(short* address, Vector256<short> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU16 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(ushort* address, Vector256<ushort> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU32 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(int* address, Vector256<int> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU32 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(uint* address, Vector256<uint> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU64 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(long* address, Vector256<long> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU64 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(ulong* address, Vector256<ulong> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm256_storeu_ps (float * mem_addr, __m256 a)
+        ///   VMOVUPS m256,         ymm1
+        ///   VMOVUPS m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(float* address, Vector256<float> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm256_storeu_pd (double * mem_addr, __m256d a)
+        ///   VMOVUPD m256,         ymm1
+        ///   VMOVUPD m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(double* address, Vector256<double> source) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(sbyte* address, Vector256<sbyte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(byte* address, Vector256<byte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(short* address, Vector256<short> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(ushort* address, Vector256<ushort> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(int* address, Vector256<int> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(uint* address, Vector256<uint> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA64 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(long* address, Vector256<long> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA64 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(ulong* address, Vector256<ulong> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_store_ps (float * mem_addr, __m256 a)
-        ///   VMOVAPS m256, ymm
+        ///   VMOVAPS m256,         ymm1
+        ///   VMOVAPS m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(float* address, Vector256<float> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_store_pd (double * mem_addr, __m256d a)
-        ///   VMOVAPD m256, ymm
+        ///   VMOVAPD m256,         ymm1
+        ///   VMOVAPD m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(double* address, Vector256<double> source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector256<sbyte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(byte* address, Vector256<byte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(short* address, Vector256<short> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector256<ushort> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(int* address, Vector256<int> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(uint* address, Vector256<uint> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(long* address, Vector256<long> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector256<ulong> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_stream_ps (float * mem_addr, __m256 a)
-        ///   MOVNTPS m256, ymm
+        ///   VMOVNTPS m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(float* address, Vector256<float> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_stream_pd (double * mem_addr, __m256d a)
-        ///   MOVNTPD m256, ymm
+        ///   VMOVNTPD m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(double* address, Vector256<double> source) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(sbyte* address, Vector256<sbyte> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(byte* address, Vector256<byte> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(short* address, Vector256<short> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(ushort* address, Vector256<ushort> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(int* address, Vector256<int> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(uint* address, Vector256<uint> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(long* address, Vector256<long> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(ulong* address, Vector256<ulong> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm256_storeu_ps (float * mem_addr, __m256 a)
-        ///   MOVUPS m256, ymm
-        /// </summary>
-        public static unsafe void Store(float* address, Vector256<float> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm256_storeu_pd (double * mem_addr, __m256d a)
-        ///   MOVUPD m256, ymm
-        /// </summary>
-        public static unsafe void Store(double* address, Vector256<double> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256 _mm256_sub_ps (__m256 a, __m256 b)
-        ///   VSUBPS ymm, ymm, ymm/m256
+        ///   VSUBPS ymm1,         ymm2, ymm3/m256
+        ///   VSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Subtract(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_sub_pd (__m256d a, __m256d b)
-        ///   VSUBPD ymm, ymm, ymm/m256
+        ///   VSUBPD ymm1,         ymm2, ymm3/m256
+        ///   VSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Subtract(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm_testc_ps (__m128 a, __m128 b)
-        ///   VTESTPS xmm, xmm/m128
+        ///   VTESTPS xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testc_pd (__m128d a, __m128d b)
-        ///   VTESTPD xmm, xmm/m128
+        ///   VTESTPD xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_ps (__m256 a, __m256 b)
-        ///   VTESTPS ymm, ymm/m256
+        ///   VTESTPS ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testc_pd (__m256d a, __m256d b)
-        ///   VTESTPS ymm, ymm/m256
+        ///   VTESTPD ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm_testnzc_ps (__m128 a, __m128 b)
-        ///   VTESTPS xmm, xmm/m128
+        ///   VTESTPS xmm1, ymm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testnzc_pd (__m128d a, __m128d b)
-        ///   VTESTPD xmm, xmm/m128
+        ///   VTESTPD xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_ps (__m256 a, __m256 b)
-        ///   VTESTPS ymm, ymm/m256
+        ///   VTESTPS ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testnzc_pd (__m256d a, __m256d b)
-        ///   VTESTPD ymm, ymm/m256
+        ///   VTESTPD ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm_testz_ps (__m128 a, __m128 b)
-        ///   VTESTPS xmm, xmm/m128
+        ///   VTESTPS xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testz_pd (__m128d a, __m128d b)
-        ///   VTESTPD xmm, xmm/m128
+        ///   VTESTPD xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_ps (__m256 a, __m256 b)
-        ///   VTESTPS ymm, ymm/m256
+        ///   VTESTPS ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm256_testz_pd (__m256d a, __m256d b)
-        ///   VTESTPD ymm, ymm/m256
+        ///   VTESTPD ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_unpackhi_ps (__m256 a, __m256 b)
-        ///   VUNPCKHPS ymm, ymm, ymm/m256
+        ///   VUNPCKHPS ymm1,         ymm2, ymm3/m256
+        ///   VUNPCKHPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> UnpackHigh(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_unpackhi_pd (__m256d a, __m256d b)
-        ///   VUNPCKHPD ymm, ymm, ymm/m256
+        ///   VUNPCKHPD ymm1,         ymm2, ymm3/m256
+        ///   VUNPCKHPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> UnpackHigh(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_unpacklo_ps (__m256 a, __m256 b)
-        ///   VUNPCKLPS ymm, ymm, ymm/m256
+        ///   VUNPCKLPS ymm1,         ymm2, ymm3/m256
+        ///   VUNPCKLPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> UnpackLow(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_unpacklo_pd (__m256d a, __m256d b)
-        ///   VUNPCKLPD ymm, ymm, ymm/m256
+        ///   VUNPCKLPD ymm1,         ymm2, ymm3/m256
+        ///   VUNPCKLPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> UnpackLow(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256 _mm256_xor_ps (__m256 a, __m256 b)
-        ///   VXORPS ymm, ymm, ymm/m256
+        ///   VXORPS ymm1,         ymm2, ymm3/m256
+        ///   VXORPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Xor(Vector256<float> left, Vector256<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_xor_pd (__m256d a, __m256d b)
-        ///   VXORPS ymm, ymm, ymm/m256
+        ///   VXORPD ymm1,         ymm2, ymm3/m256
+        ///   VXORPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Xor(Vector256<double> left, Vector256<double> right) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs

index 0d13a2bf8607ef95194bedd11bfe4992b4c1bb41..c096d893ae019f0ef6f376115a8593f50db1f307 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs
@@ -28,1363 +28,1419 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m256 _mm256_add_ps (__m256 a, __m256 b)
-        ///   VADDPS ymm, ymm, ymm/m256
+        ///   VADDPS ymm1,         ymm2, ymm3/m256
+        ///   VADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Add(Vector256<float> left, Vector256<float> right) => Add(left, right);
          /// <summary>
          /// __m256d _mm256_add_pd (__m256d a, __m256d b)
-        ///   VADDPD ymm, ymm, ymm/m256
+        ///   VADDPD ymm1,         ymm2, ymm3/m256
+        ///   VADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Add(Vector256<double> left, Vector256<double> right) => Add(left, right);
  
          /// <summary>
          /// __m256 _mm256_addsub_ps (__m256 a, __m256 b)
-        ///   VADDSUBPS ymm, ymm, ymm/m256
+        ///   VADDSUBPS ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<float> AddSubtract(Vector256<float> left, Vector256<float> right) => AddSubtract(left, right);
          /// <summary>
          /// __m256d _mm256_addsub_pd (__m256d a, __m256d b)
-        ///   VADDSUBPD ymm, ymm, ymm/m256
+        ///   VADDSUBPD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<double> AddSubtract(Vector256<double> left, Vector256<double> right) => AddSubtract(left, right);
  
          /// <summary>
          /// __m256 _mm256_and_ps (__m256 a, __m256 b)
-        ///   VANDPS ymm, ymm, ymm/m256
+        ///   VANDPS ymm1,         ymm2, ymm2/m256
+        ///   VANDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> And(Vector256<float> left, Vector256<float> right) => And(left, right);
          /// <summary>
          /// __m256d _mm256_and_pd (__m256d a, __m256d b)
-        ///   VANDPD ymm, ymm, ymm/m256
+        ///   VANDPD ymm1,         ymm2, ymm2/m256
+        ///   VANDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> And(Vector256<double> left, Vector256<double> right) => And(left, right);
  
          /// <summary>
          /// __m256 _mm256_andnot_ps (__m256 a, __m256 b)
-        ///   VANDNPS ymm, ymm, ymm/m256
+        ///   VANDNPS ymm1,         ymm2, ymm2/m256
+        ///   VANDNPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> AndNot(Vector256<float> left, Vector256<float> right) => AndNot(left, right);
          /// <summary>
          /// __m256d _mm256_andnot_pd (__m256d a, __m256d b)
-        ///   VANDNPD ymm, ymm, ymm/m256
+        ///   VANDNPD ymm1,         ymm2, ymm2/m256
+        ///   VANDNPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> AndNot(Vector256<double> left, Vector256<double> right) => AndNot(left, right);
  
          /// <summary>
          /// __m256 _mm256_blend_ps (__m256 a, __m256 b, const int imm8)
-        ///   VBLENDPS ymm, ymm, ymm/m256, imm8
+        ///   VBLENDPS ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<float> Blend(Vector256<float> left, Vector256<float> right, [ConstantExpected] byte control) => Blend(left, right, control);
          /// <summary>
          /// __m256d _mm256_blend_pd (__m256d a, __m256d b, const int imm8)
-        ///   VBLENDPD ymm, ymm, ymm/m256, imm8
+        ///   VBLENDPD ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<double> Blend(Vector256<double> left, Vector256<double> right, [ConstantExpected] byte control) => Blend(left, right, control);
  
          /// <summary>
          /// __m256 _mm256_blendv_ps (__m256 a, __m256 b, __m256 mask)
-        ///   VBLENDVPS ymm, ymm, ymm/m256, ymm
+        ///   VBLENDVPS ymm1, ymm2, ymm3/m256, ymm4
          /// </summary>
          public static Vector256<float> BlendVariable(Vector256<float> left, Vector256<float> right, Vector256<float> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m256d _mm256_blendv_pd (__m256d a, __m256d b, __m256d mask)
-        ///   VBLENDVPD ymm, ymm, ymm/m256, ymm
+        ///   VBLENDVPD ymm1, ymm2, ymm3/m256, ymm4
          /// </summary>
          public static Vector256<double> BlendVariable(Vector256<double> left, Vector256<double> right, Vector256<double> mask) => BlendVariable(left, right, mask);
  
          /// <summary>
          /// __m128 _mm_broadcast_ss (float const * mem_addr)
-        ///   VBROADCASTSS xmm, m32
+        ///   VBROADCASTSS xmm1,         m32
+        ///   VBROADCASTSS xmm1 {k1}{z}, m32
          /// </summary>
          public static unsafe Vector128<float> BroadcastScalarToVector128(float* source) => BroadcastScalarToVector128(source);
  
          /// <summary>
          /// __m256 _mm256_broadcast_ss (float const * mem_addr)
-        ///   VBROADCASTSS ymm, m32
+        ///   VBROADCASTSS ymm1,         m32
+        ///   VBROADCASTSS ymm1 {k1}{z}, m32
          /// </summary>
          public static unsafe Vector256<float> BroadcastScalarToVector256(float* source) => BroadcastScalarToVector256(source);
          /// <summary>
          /// __m256d _mm256_broadcast_sd (double const * mem_addr)
-        ///   VBROADCASTSD ymm, m64
+        ///   VBROADCASTSD ymm1,         m64
+        ///   VBROADCASTSD ymm1 {k1}{z}, m64
          /// </summary>
          public static unsafe Vector256<double> BroadcastScalarToVector256(double* source) => BroadcastScalarToVector256(source);
  
          /// <summary>
          /// __m256 _mm256_broadcast_ps (__m128 const * mem_addr)
-        ///   VBROADCASTF128, ymm, m128
+        ///   VBROADCASTF128  ymm1,         m128
+        ///   VBROADCASTF32x4 ymm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector256<float> BroadcastVector128ToVector256(float* address) => BroadcastVector128ToVector256(address);
          /// <summary>
          /// __m256d _mm256_broadcast_pd (__m128d const * mem_addr)
-        ///   VBROADCASTF128, ymm, m128
+        ///   VBROADCASTF128  ymm1,         m128
+        ///   VBROADCASTF64x2 ymm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector256<double> BroadcastVector128ToVector256(double* address) => BroadcastVector128ToVector256(address);
  
          /// <summary>
-        /// __m256 _mm256_ceil_ps (__m256 a)
-        ///   VROUNDPS ymm, ymm/m256, imm8(10)
+        /// __m256 _mm256_ceil_ps (__m128 a)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(10)
          /// </summary>
          public static Vector256<float> Ceiling(Vector256<float> value) => Ceiling(value);
          /// <summary>
-        /// __m256d _mm256_ceil_pd (__m256d a)
-        ///   VROUNDPD ymm, ymm/m256, imm8(10)
+        /// __m256d _mm256_ceil_pd (__m128d a)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(10)
          /// </summary>
          public static Vector256<double> Ceiling(Vector256<double> value) => Ceiling(value);
  
          /// <summary>
          /// __m128 _mm_cmp_ps (__m128 a, __m128 b, const int imm8)
-        ///   VCMPPS xmm, xmm, xmm/m128, imm8
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<float> Compare(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode);
          /// <summary>
-        /// __m128d _mm_cmp_pd (__m128d a, __m128d b, const int imm8)
-        ///   VCMPPD xmm, xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<double> Compare(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode);
-        /// <summary>
          /// __m256 _mm256_cmp_ps (__m256 a, __m256 b, const int imm8)
-        ///   VCMPPS ymm, ymm, ymm/m256, imm8
+        ///   VCMPPS ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<float> Compare(Vector256<float> left, Vector256<float> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode);
-        /// <summary>
-        /// __m256d _mm256_cmp_pd (__m256d a, __m256d b, const int imm8)
-        ///   VCMPPD ymm, ymm, ymm/m256, imm8
-        /// </summary>
-        public static Vector256<double> Compare(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode);
-
          /// <summary>
          /// __m256 _mm256_cmpeq_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(0)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(0)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static Vector256<float> CompareEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.OrderedEqualNonSignaling);
-        /// <summary>
-        /// __m256d _mm256_cmpeq_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(0)
-        /// The above native signature does not exist. We provide this additional overload for completeness.
-        /// </summary>
-        public static Vector256<double> CompareEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedEqualNonSignaling);
-
          /// <summary>
          /// __m256 _mm256_cmpgt_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(14)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(14)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static Vector256<float> CompareGreaterThan(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanSignaling);
-        /// <summary>
-        /// __m256d _mm256_cmpgt_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(14)
-        /// The above native signature does not exist. We provide this additional overload for completeness.
-        /// </summary>
-        public static Vector256<double> CompareGreaterThan(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanSignaling);
-
          /// <summary>
          /// __m256 _mm256_cmpge_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(13)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(13)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static Vector256<float> CompareGreaterThanOrEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanOrEqualSignaling);
          /// <summary>
-        /// __m256d _mm256_cmpge_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(13)
+        /// __m256 _mm256_cmplt_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(1)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareGreaterThanOrEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanOrEqualSignaling);
-
+        public static Vector256<float> CompareLessThan(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.OrderedLessThanSignaling);
          /// <summary>
-        /// __m256 _mm256_cmplt_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(1)
+        /// __m256 _mm256_cmple_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(2)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareLessThan(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.OrderedLessThanSignaling);
+        public static Vector256<float> CompareLessThanOrEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualSignaling);
          /// <summary>
-        /// __m256d _mm256_cmplt_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(1)
+        /// __m256 _mm256_cmpneq_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareLessThan(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedLessThanSignaling);
-
+        public static Vector256<float> CompareNotEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotEqualNonSignaling);
          /// <summary>
-        /// __m256 _mm256_cmple_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(2)
+        /// __m256 _mm256_cmpngt_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(10)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareLessThanOrEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualSignaling);
+        public static Vector256<float> CompareNotGreaterThan(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanSignaling);
          /// <summary>
-        /// __m256d _mm256_cmple_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(2)
+        /// __m256 _mm256_cmpnge_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareLessThanOrEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualSignaling);
-
+        public static Vector256<float> CompareNotGreaterThanOrEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanOrEqualSignaling);
          /// <summary>
-        /// __m256 _mm256_cmpneq_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(4)
+        /// __m256 _mm256_cmpnlt_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(5)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotEqualNonSignaling);
+        public static Vector256<float> CompareNotLessThan(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanSignaling);
          /// <summary>
-        /// __m256d _mm256_cmpneq_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(4)
+        /// __m256 _mm256_cmpnle_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(6)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotEqualNonSignaling);
-
+        public static Vector256<float> CompareNotLessThanOrEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanOrEqualSignaling);
          /// <summary>
-        /// __m256 _mm256_cmpngt_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(10)
+        /// __m256 _mm256_cmpord_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(7)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotGreaterThan(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanSignaling);
+        public static Vector256<float> CompareOrdered(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling);
          /// <summary>
-        /// __m256d _mm256_cmpngt_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(10)
+        /// __m256 _mm256_cmpunord_ps (__m256 a,  __m256 b)
+        ///   VCMPPS ymm1, ymm2/m256, imm8(3)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotGreaterThan(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanSignaling);
+        public static Vector256<float> CompareUnordered(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNonSignaling);
  
          /// <summary>
-        /// __m256 _mm256_cmpnge_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(9)
+        /// __m128d _mm_cmp_pd (__m128d a, __m128d b, const int imm8)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8
+        /// </summary>
+        public static Vector128<double> Compare(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode);
+        /// <summary>
+        /// __m256d _mm256_cmp_pd (__m256d a, __m256d b, const int imm8)
+        ///   VCMPPD ymm1, ymm2, ymm3/m256, imm8
+        /// </summary>
+        public static Vector256<double> Compare(Vector256<double> left, Vector256<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => Compare(left, right, mode);
+        /// <summary>
+        /// __m256d _mm256_cmpeq_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(0)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotGreaterThanOrEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanOrEqualSignaling);
+        public static Vector256<double> CompareEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedEqualNonSignaling);
          /// <summary>
-        /// __m256d _mm256_cmpnge_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(9)
+        /// __m256d _mm256_cmpgt_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(14)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotGreaterThanOrEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanOrEqualSignaling);
-
+        public static Vector256<double> CompareGreaterThan(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanSignaling);
          /// <summary>
-        /// __m256 _mm256_cmpnlt_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(5)
+        /// __m256d _mm256_cmpge_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(13)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotLessThan(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanSignaling);
+        public static Vector256<double> CompareGreaterThanOrEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedGreaterThanOrEqualSignaling);
          /// <summary>
-        /// __m256d _mm256_cmpnlt_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(5)
+        /// __m256d _mm256_cmplt_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(1)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotLessThan(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanSignaling);
-
+        public static Vector256<double> CompareLessThan(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedLessThanSignaling);
          /// <summary>
-        /// __m256 _mm256_cmpnle_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(6)
+        /// __m256d _mm256_cmple_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(2)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareNotLessThanOrEqual(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanOrEqualSignaling);
+        public static Vector256<double> CompareLessThanOrEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualSignaling);
          /// <summary>
-        /// __m256d _mm256_cmpnle_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(6)
+        /// __m256d _mm256_cmpneq_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareNotLessThanOrEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanOrEqualSignaling);
-
+        public static Vector256<double> CompareNotEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotEqualNonSignaling);
          /// <summary>
-        /// __m256 _mm256_cmpord_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(7)
+        /// __m256d _mm256_cmpngt_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(10)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareOrdered(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling);
+        public static Vector256<double> CompareNotGreaterThan(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanSignaling);
          /// <summary>
-        /// __m256d _mm256_cmpord_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(7)
+        /// __m256d _mm256_cmpnge_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<double> CompareOrdered(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling);
-
+        public static Vector256<double> CompareNotGreaterThanOrEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotGreaterThanOrEqualSignaling);
          /// <summary>
-        /// __m128d _mm_cmp_sd (__m128d a, __m128d b, const int imm8)
-        ///   VCMPSS xmm, xmm, xmm/m32, imm8
+        /// __m256d _mm256_cmpnlt_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(5)
+        /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector128<double> CompareScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => CompareScalar(left, right, mode);
+        public static Vector256<double> CompareNotLessThan(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanSignaling);
          /// <summary>
-        /// __m128 _mm_cmp_ss (__m128 a, __m128 b, const int imm8)
-        ///   VCMPSD xmm, xmm, xmm/m64, imm8
+        /// __m256d _mm256_cmpnle_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(6)
+        /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector128<float> CompareScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => CompareScalar(left, right, mode);
-
+        public static Vector256<double> CompareNotLessThanOrEqual(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNotLessThanOrEqualSignaling);
          /// <summary>
-        /// __m256 _mm256_cmpunord_ps (__m256 a,  __m256 b)
-        ///   CMPPS ymm, ymm/m256, imm8(3)
+        /// __m256d _mm256_cmpord_pd (__m256d a,  __m256d b)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(7)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
-        public static Vector256<float> CompareUnordered(Vector256<float> left, Vector256<float> right) => Compare(left, right, FloatComparisonMode.UnorderedNonSignaling);
+        public static Vector256<double> CompareOrdered(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.OrderedNonSignaling);
          /// <summary>
          /// __m256d _mm256_cmpunord_pd (__m256d a,  __m256d b)
-        ///   CMPPD ymm, ymm/m256, imm8(3)
+        ///   VCMPPD ymm1, ymm2/m256, imm8(3)
          /// The above native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static Vector256<double> CompareUnordered(Vector256<double> left, Vector256<double> right) => Compare(left, right, FloatComparisonMode.UnorderedNonSignaling);
  
+        /// <summary>
+        /// __m128 _mm_cmp_ss (__m128 a, __m128 b, const int imm8)
+        ///   VCMPSD xmm1, xmm2, xmm3/m64, imm8
+        /// </summary>
+        public static Vector128<float> CompareScalar(Vector128<float> left, Vector128<float> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => CompareScalar(left, right, mode);
+        /// <summary>
+        /// __m128d _mm_cmp_sd (__m128d a, __m128d b, const int imm8)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8
+        /// </summary>
+        public static Vector128<double> CompareScalar(Vector128<double> left, Vector128<double> right, [ConstantExpected(Max = FloatComparisonMode.UnorderedTrueSignaling)] FloatComparisonMode mode) => CompareScalar(left, right, mode);
+
          /// <summary>
          /// __m128i _mm256_cvtpd_epi32 (__m256d a)
-        ///   VCVTPD2DQ xmm, ymm/m256
+        ///   VCVTPD2DQ xmm1,         ymm2/m256
+        ///   VCVTPD2DQ xmm1 {k1}{z}, ymm2/m256/m64bcst
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector256<double> value) => ConvertToVector128Int32(value);
+
          /// <summary>
          /// __m128 _mm256_cvtpd_ps (__m256d a)
-        ///   VCVTPD2PS xmm, ymm/m256
+        ///   VCVTPD2PS xmm1,         ymm2/m256
+        ///   VCVTPD2PS xmm1 {k1}{z}, ymm2/m256/m64bcst
          /// </summary>
          public static Vector128<float> ConvertToVector128Single(Vector256<double> value) => ConvertToVector128Single(value);
+
          /// <summary>
-        /// __m256i _mm256_cvtps_epi32 (__m256 a)
-        ///   VCVTPS2DQ ymm, ymm/m256
-        /// </summary>
-        public static Vector256<int> ConvertToVector256Int32(Vector256<float> value) => ConvertToVector256Int32(value);
-        /// <summary>
-        /// __m256 _mm256_cvtepi32_ps (__m256i a)
-        ///   VCVTDQ2PS ymm, ymm/m256
+        /// __m256d _mm256_cvtepi32_pd (__m128i a)
+        ///   VCVTDQ2PD ymm1,         xmm2/m128
+        ///   VCVTDQ2PD ymm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
-        public static Vector256<float> ConvertToVector256Single(Vector256<int> value) => ConvertToVector256Single(value);
+        public static Vector256<double> ConvertToVector256Double(Vector128<int> value) => ConvertToVector256Double(value);
          /// <summary>
          /// __m256d _mm256_cvtps_pd (__m128 a)
-        ///   VCVTPS2PD ymm, xmm/m128
+        ///   VCVTPS2PD ymm1,         xmm2/m128
+        ///   VCVTPS2PD ymm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
          public static Vector256<double> ConvertToVector256Double(Vector128<float> value) => ConvertToVector256Double(value);
          /// <summary>
-        /// __m256d _mm256_cvtepi32_pd (__m128i a)
-        ///   VCVTDQ2PD ymm, xmm/m128
+        /// __m256i _mm256_cvtps_epi32 (__m256 a)
+        ///   VCVTPS2DQ ymm1,         ymm2/m256
+        ///   VCVTPS2DQ ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
-        public static Vector256<double> ConvertToVector256Double(Vector128<int> value) => ConvertToVector256Double(value);
-
+        public static Vector256<int> ConvertToVector256Int32(Vector256<float> value) => ConvertToVector256Int32(value);
          /// <summary>
-        /// __m128i _mm256_cvttpd_epi32 (__m256d a)
-        ///   VCVTTPD2DQ xmm, ymm/m256
+        /// __m256 _mm256_cvtepi32_ps (__m256i a)
+        ///   VCVTDQ2PS ymm1,         ymm2/m256
+        ///   VCVTDQ2PS ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector256<double> value) => ConvertToVector128Int32WithTruncation(value);
+        public static Vector256<float> ConvertToVector256Single(Vector256<int> value) => ConvertToVector256Single(value);
+
          /// <summary>
          /// __m256i _mm256_cvttps_epi32 (__m256 a)
-        ///   VCVTTPS2DQ ymm, ymm/m256
+        ///   VCVTTPS2DQ ymm1,         ymm2/m256
+        ///   VCVTTPS2DQ ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32WithTruncation(Vector256<float> value) => ConvertToVector256Int32WithTruncation(value);
+        /// <summary>
+        /// __m128i _mm256_cvttpd_epi32 (__m256d a)
+        ///   VCVTTPD2DQ xmm1,         ymm2/m256
+        ///   VCVTTPD2DQ xmm1 {k1}{z}, ymm2/m256/m64bcst
+        /// </summary>
+        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector256<double> value) => ConvertToVector128Int32WithTruncation(value);
  
          /// <summary>
          /// __m256 _mm256_div_ps (__m256 a, __m256 b)
-        ///   VDIVPS ymm, ymm, ymm/m256
+        ///   VDIVPS ymm1,         ymm2, ymm3/m256
+        ///   VDIVPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Divide(Vector256<float> left, Vector256<float> right) => Divide(left, right);
          /// <summary>
          /// __m256d _mm256_div_pd (__m256d a, __m256d b)
-        ///   VDIVPD ymm, ymm, ymm/m256
+        ///   VDIVPD ymm1,         ymm2, ymm3/m256
+        ///   VDIVPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Divide(Vector256<double> left, Vector256<double> right) => Divide(left, right);
  
          /// <summary>
          /// __m256 _mm256_dp_ps (__m256 a, __m256 b, const int imm8)
-        ///   VDPPS ymm, ymm, ymm/m256, imm8
+        ///   VDPPS ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<float> DotProduct(Vector256<float> left, Vector256<float> right, [ConstantExpected] byte control) => DotProduct(left, right, control);
  
          /// <summary>
          /// __m256 _mm256_moveldup_ps (__m256 a)
-        ///   VMOVSLDUP ymm, ymm/m256
+        ///   VMOVSLDUP ymm1,         ymm2/m256
+        ///   VMOVSLDUP ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<float> DuplicateEvenIndexed(Vector256<float> value) => DuplicateEvenIndexed(value);
          /// <summary>
          /// __m256d _mm256_movedup_pd (__m256d a)
-        ///   VMOVDDUP ymm, ymm/m256
+        ///   VMOVDDUP ymm1,         ymm2/m256
+        ///   VMOVDDUP ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<double> DuplicateEvenIndexed(Vector256<double> value) => DuplicateEvenIndexed(value);
-
          /// <summary>
          /// __m256 _mm256_movehdup_ps (__m256 a)
-        ///   VMOVSHDUP ymm, ymm/m256
+        ///   VMOVSHDUP ymm1,         ymm2/m256
+        ///   VMOVSHDUP ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<float> DuplicateOddIndexed(Vector256<float> value) => DuplicateOddIndexed(value);
  
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<byte> ExtractVector128(Vector256<byte> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<sbyte> ExtractVector128(Vector256<sbyte> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<short> ExtractVector128(Vector256<short> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<ushort> ExtractVector128(Vector256<ushort> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<int> ExtractVector128(Vector256<int> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<uint> ExtractVector128(Vector256<uint> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<long> ExtractVector128(Vector256<long> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<ulong> ExtractVector128(Vector256<ulong> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128 _mm256_extractf128_ps (__m256 a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<float> ExtractVector128(Vector256<float> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128d _mm256_extractf128_pd (__m256d a, const int imm8)
-        ///   VEXTRACTF128 xmm/m128, ymm, imm8
+        ///   VEXTRACTF128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTF64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector128<double> ExtractVector128(Vector256<double> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
  
          /// <summary>
-        /// __m256 _mm256_floor_ps (__m256 a)
-        ///   VROUNDPS ymm, ymm/m256, imm8(9)
+        /// __m256 _mm256_ceil_ps (__m128 a)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(9)
          /// </summary>
          public static Vector256<float> Floor(Vector256<float> value) => Floor(value);
          /// <summary>
-        /// __m256d _mm256_floor_pd (__m256d a)
-        ///   VROUNDPS ymm, ymm/m256, imm8(9)
+        /// __m256d _mm256_ceil_pd (__m128d a)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(9)
          /// </summary>
          public static Vector256<double> Floor(Vector256<double> value) => Floor(value);
  
          /// <summary>
          /// __m256 _mm256_hadd_ps (__m256 a, __m256 b)
-        ///   VHADDPS ymm, ymm, ymm/m256
+        ///   VHADDPS ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<float> HorizontalAdd(Vector256<float> left, Vector256<float> right) => HorizontalAdd(left, right);
          /// <summary>
          /// __m256d _mm256_hadd_pd (__m256d a, __m256d b)
-        ///   VHADDPD ymm, ymm, ymm/m256
+        ///   VHADDPD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<double> HorizontalAdd(Vector256<double> left, Vector256<double> right) => HorizontalAdd(left, right);
  
          /// <summary>
          /// __m256 _mm256_hsub_ps (__m256 a, __m256 b)
-        ///   VHSUBPS ymm, ymm, ymm/m256
+        ///   VHSUBPS ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<float> HorizontalSubtract(Vector256<float> left, Vector256<float> right) => HorizontalSubtract(left, right);
          /// <summary>
          /// __m256d _mm256_hsub_pd (__m256d a, __m256d b)
-        ///   VHSUBPD ymm, ymm, ymm/m256
+        ///   VHSUBPD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<double> HorizontalSubtract(Vector256<double> left, Vector256<double> right) => HorizontalSubtract(left, right);
  
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<byte> InsertVector128(Vector256<byte> value, Vector128<byte> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<sbyte> InsertVector128(Vector256<sbyte> value, Vector128<sbyte> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<short> InsertVector128(Vector256<short> value, Vector128<short> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<ushort> InsertVector128(Vector256<ushort> value, Vector128<ushort> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<int> InsertVector128(Vector256<int> value, Vector128<int> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<uint> InsertVector128(Vector256<uint> value, Vector128<uint> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<long> InsertVector128(Vector256<long> value, Vector128<long> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<ulong> InsertVector128(Vector256<ulong> value, Vector128<ulong> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256 _mm256_insertf128_ps (__m256 a, __m128 b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<float> InsertVector128(Vector256<float> value, Vector128<float> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256d _mm256_insertf128_pd (__m256d a, __m128d b, int imm8)
-        ///   VINSERTF128 ymm, ymm, xmm/m128, imm8
+        ///   VINSERTF128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTF64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static Vector256<double> InsertVector128(Vector256<double> value, Vector128<double> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
  
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<sbyte> LoadVector256(sbyte* address) => LoadVector256(address);
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<byte> LoadVector256(byte* address) => LoadVector256(address);
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<short> LoadVector256(short* address) => LoadVector256(address);
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<ushort> LoadVector256(ushort* address) => LoadVector256(address);
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<int> LoadVector256(int* address) => LoadVector256(address);
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<uint> LoadVector256(uint* address) => LoadVector256(address);
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<long> LoadVector256(long* address) => LoadVector256(address);
-        /// <summary>
-        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
-        ///   VMOVDQU ymm, m256
-        /// </summary>
-        public static unsafe Vector256<ulong> LoadVector256(ulong* address) => LoadVector256(address);
-        /// <summary>
-        /// __m256 _mm256_loadu_ps (float const * mem_addr)
-        ///   VMOVUPS ymm, ymm/m256
-        /// </summary>
-        public static unsafe Vector256<float> LoadVector256(float* address) => LoadVector256(address);
-        /// <summary>
-        /// __m256d _mm256_loadu_pd (double const * mem_addr)
-        ///   VMOVUPD ymm, ymm/m256
-        /// </summary>
-        public static unsafe Vector256<double> LoadVector256(double* address) => LoadVector256(address);
-
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<sbyte> LoadAlignedVector256(sbyte* address) => LoadAlignedVector256(address);
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<byte> LoadAlignedVector256(byte* address) => LoadAlignedVector256(address);
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<short> LoadAlignedVector256(short* address) => LoadAlignedVector256(address);
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<ushort> LoadAlignedVector256(ushort* address) => LoadAlignedVector256(address);
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<int> LoadAlignedVector256(int* address) => LoadAlignedVector256(address);
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA32 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<uint> LoadAlignedVector256(uint* address) => LoadAlignedVector256(address);
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA64 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<long> LoadAlignedVector256(long* address) => LoadAlignedVector256(address);
          /// <summary>
          /// __m256i _mm256_load_si256 (__m256i const * mem_addr)
-        ///   VMOVDQA ymm, m256
+        ///   VMOVDQA   ymm1,         m256
+        ///   VMOVDQA64 ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<ulong> LoadAlignedVector256(ulong* address) => LoadAlignedVector256(address);
          /// <summary>
          /// __m256 _mm256_load_ps (float const * mem_addr)
-        ///   VMOVAPS ymm, ymm/m256
+        ///   VMOVAPS ymm1,         m256
+        ///   VMOVAPS ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<float> LoadAlignedVector256(float* address) => LoadAlignedVector256(address);
          /// <summary>
          /// __m256d _mm256_load_pd (double const * mem_addr)
-        ///   VMOVAPD ymm, ymm/m256
+        ///   VMOVAPD ymm1,         m256
+        ///   VMOVAPD ymm1 {k1}{z}, m256
          /// </summary>
          public static unsafe Vector256<double> LoadAlignedVector256(double* address) => LoadAlignedVector256(address);
  
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<sbyte> LoadDquVector256(sbyte* address) => LoadDquVector256(address);
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<byte> LoadDquVector256(byte* address) => LoadDquVector256(address);
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<short> LoadDquVector256(short* address) => LoadDquVector256(address);
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<ushort> LoadDquVector256(ushort* address) => LoadDquVector256(address);
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<int> LoadDquVector256(int* address) => LoadDquVector256(address);
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<uint> LoadDquVector256(uint* address) => LoadDquVector256(address);
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<long> LoadDquVector256(long* address) => LoadDquVector256(address);
          /// <summary>
          /// __m256i _mm256_lddqu_si256 (__m256i const * mem_addr)
-        ///   VLDDQU ymm, m256
+        ///   VLDDQU ymm1, m256
          /// </summary>
          public static unsafe Vector256<ulong> LoadDquVector256(ulong* address) => LoadDquVector256(address);
  
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU  ymm1,         m256
+        ///   VMOVDQU8 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<sbyte> LoadVector256(sbyte* address) => LoadVector256(address);
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU  ymm1,         m256
+        ///   VMOVDQU8 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<byte> LoadVector256(byte* address) => LoadVector256(address);
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU16 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<short> LoadVector256(short* address) => LoadVector256(address);
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU16 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<ushort> LoadVector256(ushort* address) => LoadVector256(address);
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU32 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<int> LoadVector256(int* address) => LoadVector256(address);
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU32 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<uint> LoadVector256(uint* address) => LoadVector256(address);
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU64 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<long> LoadVector256(long* address) => LoadVector256(address);
+        /// <summary>
+        /// __m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+        ///   VMOVDQU   ymm1,         m256
+        ///   VMOVDQU64 ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<ulong> LoadVector256(ulong* address) => LoadVector256(address);
+        /// <summary>
+        /// __m256 _mm256_loadu_ps (float const * mem_addr)
+        ///   VMOVUPS ymm1,         m256
+        ///   VMOVUPS ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<float> LoadVector256(float* address) => LoadVector256(address);
+        /// <summary>
+        /// __m256d _mm256_loadu_pd (double const * mem_addr)
+        ///   VMOVUPD ymm1,         m256
+        ///   VMOVUPD ymm1 {k1}{z}, m256
+        /// </summary>
+        public static unsafe Vector256<double> LoadVector256(double* address) => LoadVector256(address);
+
          /// <summary>
          /// __m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
-        ///   VMASKMOVPS xmm, xmm, m128
+        ///   VMASKMOVPS xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<float> MaskLoad(float* address, Vector128<float> mask) => MaskLoad(address, mask);
          /// <summary>
          /// __m128d _mm_maskload_pd (double const * mem_addr, __m128i mask)
-        ///   VMASKMOVPD xmm, xmm, m128
+        ///   VMASKMOVPD xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<double> MaskLoad(double* address, Vector128<double> mask) => MaskLoad(address, mask);
-
          /// <summary>
          /// __m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
-        ///   VMASKMOVPS ymm, ymm, m256
+        ///   VMASKMOVPS ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<float> MaskLoad(float* address, Vector256<float> mask) => MaskLoad(address, mask);
          /// <summary>
          /// __m256d _mm256_maskload_pd (double const * mem_addr, __m256i mask)
-        ///   VMASKMOVPD ymm, ymm, m256
+        ///   VMASKMOVPD ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<double> MaskLoad(double* address, Vector256<double> mask) => MaskLoad(address, mask);
  
          /// <summary>
          /// void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
-        ///   VMASKMOVPS m128, xmm, xmm
+        ///   VMASKMOVPS m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(float* address, Vector128<float> mask, Vector128<float> source) => MaskStore(address, mask, source);
          /// <summary>
          /// void _mm_maskstore_pd (double * mem_addr, __m128i mask, __m128d a)
-        ///   VMASKMOVPD m128, xmm, xmm
+        ///   VMASKMOVPD m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(double* address, Vector128<double> mask, Vector128<double> source) => MaskStore(address, mask, source);
-
          /// <summary>
          /// void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
-        ///   VMASKMOVPS m256, ymm, ymm
+        ///   VMASKMOVPS m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(float* address, Vector256<float> mask, Vector256<float> source) => MaskStore(address, mask, source);
          /// <summary>
          /// void _mm256_maskstore_pd (double * mem_addr, __m256i mask, __m256d a)
-        ///   VMASKMOVPD m256, ymm, ymm
+        ///   VMASKMOVPD m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(double* address, Vector256<double> mask, Vector256<double> source) => MaskStore(address, mask, source);
  
          /// <summary>
          /// __m256 _mm256_max_ps (__m256 a, __m256 b)
-        ///   VMAXPS ymm, ymm, ymm/m256
+        ///   VMAXPS ymm1,         ymm2, ymm3/m256
+        ///   VMAXPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Max(Vector256<float> left, Vector256<float> right) => Max(left, right);
          /// <summary>
          /// __m256d _mm256_max_pd (__m256d a, __m256d b)
-        ///   VMAXPD ymm, ymm, ymm/m256
+        ///   VMAXPD ymm1,         ymm2, ymm3/m256
+        ///   VMAXPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Max(Vector256<double> left, Vector256<double> right) => Max(left, right);
  
          /// <summary>
          /// __m256 _mm256_min_ps (__m256 a, __m256 b)
-        ///   VMINPS ymm, ymm, ymm/m256
+        ///   VMINPS ymm1,         ymm2, ymm3/m256
+        ///   VMINPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Min(Vector256<float> left, Vector256<float> right) => Min(left, right);
          /// <summary>
          /// __m256d _mm256_min_pd (__m256d a, __m256d b)
-        ///   VMINPD ymm, ymm, ymm/m256
+        ///   VMINPD ymm1,         ymm2, ymm3/m256
+        ///   VMINPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Min(Vector256<double> left, Vector256<double> right) => Min(left, right);
  
          /// <summary>
          /// int _mm256_movemask_ps (__m256 a)
-        ///   VMOVMSKPS reg, ymm
+        ///   VMOVMSKPS r32, ymm1
          /// </summary>
          public static int MoveMask(Vector256<float> value) => MoveMask(value);
          /// <summary>
          /// int _mm256_movemask_pd (__m256d a)
-        ///   VMOVMSKPD reg, ymm
+        ///   VMOVMSKPD r32, ymm1
          /// </summary>
          public static int MoveMask(Vector256<double> value) => MoveMask(value);
  
          /// <summary>
          /// __m256 _mm256_mul_ps (__m256 a, __m256 b)
-        ///   VMULPS ymm, ymm, ymm/m256
+        ///   VMULPS ymm1,         ymm2, ymm3/m256
+        ///   VMULPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Multiply(Vector256<float> left, Vector256<float> right) => Multiply(left, right);
          /// <summary>
          /// __m256d _mm256_mul_pd (__m256d a, __m256d b)
-        ///   VMULPD ymm, ymm, ymm/m256
+        ///   VMULPD ymm1,         ymm2, ymm3/m256
+        ///   VMULPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Multiply(Vector256<double> left, Vector256<double> right) => Multiply(left, right);
  
          /// <summary>
          /// __m256 _mm256_or_ps (__m256 a, __m256 b)
-        ///   VORPS ymm, ymm, ymm/m256
+        ///   VORPS ymm1,         ymm2, ymm3/m256
+        ///   VORPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Or(Vector256<float> left, Vector256<float> right) => Or(left, right);
          /// <summary>
          /// __m256d _mm256_or_pd (__m256d a, __m256d b)
-        ///   VORPD ymm, ymm, ymm/m256
+        ///   VORPD ymm1,         ymm2, ymm3/m256
+        ///   VORPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Or(Vector256<double> left, Vector256<double> right) => Or(left, right);
  
          /// <summary>
          /// __m128 _mm_permute_ps (__m128 a, int imm8)
-        ///   VPERMILPS xmm, xmm, imm8
+        ///   VPERMILPS xmm1,         xmm2/m128,         imm8
+        ///   VPERMILPS xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8
          /// </summary>
          public static Vector128<float> Permute(Vector128<float> value, [ConstantExpected] byte control) => Permute(value, control);
          /// <summary>
          /// __m128d _mm_permute_pd (__m128d a, int imm8)
-        ///   VPERMILPD xmm, xmm, imm8
+        ///   VPERMILPD xmm1,         xmm2/m128,         imm8
+        ///   VPERMILPD xmm1 {k1}{z}, xmm2/m128/m64bcst, imm8
          /// </summary>
          public static Vector128<double> Permute(Vector128<double> value, [ConstantExpected] byte control) => Permute(value, control);
-
          /// <summary>
          /// __m256 _mm256_permute_ps (__m256 a, int imm8)
-        ///   VPERMILPS ymm, ymm, imm8
+        ///   VPERMILPS ymm1,         ymm2/m256,         imm8
+        ///   VPERMILPS ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<float> Permute(Vector256<float> value, [ConstantExpected] byte control) => Permute(value, control);
          /// <summary>
          /// __m256d _mm256_permute_pd (__m256d a, int imm8)
-        ///   VPERMILPD ymm, ymm, imm8
+        ///   VPERMILPD ymm1,         ymm2/m256,         imm8
+        ///   VPERMILPD ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<double> Permute(Vector256<double> value, [ConstantExpected] byte control) => Permute(value, control);
  
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<byte> Permute2x128(Vector256<byte> left, Vector256<byte> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<sbyte> Permute2x128(Vector256<sbyte> left, Vector256<sbyte> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<short> Permute2x128(Vector256<short> left, Vector256<short> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<ushort> Permute2x128(Vector256<ushort> left, Vector256<ushort> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<int> Permute2x128(Vector256<int> left, Vector256<int> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<uint> Permute2x128(Vector256<uint> left, Vector256<uint> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<long> Permute2x128(Vector256<long> left, Vector256<long> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
-
          /// <summary>
          /// __m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<ulong> Permute2x128(Vector256<ulong> left, Vector256<ulong> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
          /// <summary>
          /// __m256 _mm256_permute2f128_ps (__m256 a, __m256 b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<float> Permute2x128(Vector256<float> left, Vector256<float> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
-
          /// <summary>
          /// __m256d _mm256_permute2f128_pd (__m256d a, __m256d b, int imm8)
-        ///   VPERM2F128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<double> Permute2x128(Vector256<double> left, Vector256<double> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
  
          /// <summary>
          /// __m128 _mm_permutevar_ps (__m128 a, __m128i b)
-        ///   VPERMILPS xmm, xmm, xmm/m128
+        ///   VPERMILPS xmm1,         xmm2, xmm3/m128
+        ///   VPERMILPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> PermuteVar(Vector128<float> left, Vector128<int> control) => PermuteVar(left, control);
          /// <summary>
          /// __m128d _mm_permutevar_pd (__m128d a, __m128i b)
-        ///   VPERMILPD xmm, xmm, xmm/m128
+        ///   VPERMILPD xmm1,         xmm2, xmm3/m128
+        ///   VPERMILPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> PermuteVar(Vector128<double> left, Vector128<long> control) => PermuteVar(left, control);
          /// <summary>
          /// __m256 _mm256_permutevar_ps (__m256 a, __m256i b)
-        ///   VPERMILPS ymm, ymm, ymm/m256
+        ///   VPERMILPS ymm1,         ymm2, ymm3/m256
+        ///   VPERMILPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> PermuteVar(Vector256<float> left, Vector256<int> control) => PermuteVar(left, control);
          /// <summary>
          /// __m256d _mm256_permutevar_pd (__m256d a, __m256i b)
-        ///   VPERMILPD ymm, ymm, ymm/m256
+        ///   VPERMILPD ymm1,         ymm2, ymm3/m256
+        ///   VPERMILPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> PermuteVar(Vector256<double> left, Vector256<long> control) => PermuteVar(left, control);
  
          /// <summary>
          /// __m256 _mm256_rcp_ps (__m256 a)
-        ///   VRCPPS ymm, ymm/m256
+        ///   VRCPPS ymm1, ymm2/m256
          /// </summary>
          public static Vector256<float> Reciprocal(Vector256<float> value) => Reciprocal(value);
  
          /// <summary>
          /// __m256 _mm256_rsqrt_ps (__m256 a)
-        ///   VRSQRTPS ymm, ymm/m256
+        ///   VRSQRTPS ymm1, ymm2/m256
          /// </summary>
          public static Vector256<float> ReciprocalSqrt(Vector256<float> value) => ReciprocalSqrt(value);
  
          /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
-        ///   VROUNDPS ymm, ymm/m256, imm8(8)
-        /// </summary>
-        public static Vector256<float> RoundToNearestInteger(Vector256<float> value) => RoundToNearestInteger(value);
-        /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
-        ///   VROUNDPS ymm, ymm/m256, imm8(9)
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_CUR_DIRECTION)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(4)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<float> RoundToNegativeInfinity(Vector256<float> value) => RoundToNegativeInfinity(value);
+        public static Vector256<float> RoundCurrentDirection(Vector256<float> value) => RoundCurrentDirection(value);
          /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
-        ///   VROUNDPS ymm, ymm/m256, imm8(10)
+        /// __m256d _mm256_round_ps (__m256d a, _MM_FROUND_CUR_DIRECTION)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(4)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<float> RoundToPositiveInfinity(Vector256<float> value) => RoundToPositiveInfinity(value);
+        public static Vector256<double> RoundCurrentDirection(Vector256<double> value) => RoundCurrentDirection(value);
+
          /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
-        ///   VROUNDPS ymm, ymm/m256, imm8(11)
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEAREST_INT)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<float> RoundToZero(Vector256<float> value) => RoundToZero(value);
+        public static Vector256<float> RoundToNearestInteger(Vector256<float> value) => RoundToNearestInteger(value);
          /// <summary>
-        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_CUR_DIRECTION)
-        ///   VROUNDPS ymm, ymm/m256, imm8(4)
+        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEAREST_INT)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<float> RoundCurrentDirection(Vector256<float> value) => RoundCurrentDirection(value);
+        public static Vector256<double> RoundToNearestInteger(Vector256<double> value) => RoundToNearestInteger(value);
  
          /// <summary>
-        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
-        ///   VROUNDPD ymm, ymm/m256, imm8(8)
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(9)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<double> RoundToNearestInteger(Vector256<double> value) => RoundToNearestInteger(value);
+        public static Vector256<float> RoundToNegativeInfinity(Vector256<float> value) => RoundToNegativeInfinity(value);
          /// <summary>
          /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
-        ///   VROUNDPD ymm, ymm/m256, imm8(9)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(9)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector256<double> RoundToNegativeInfinity(Vector256<double> value) => RoundToNegativeInfinity(value);
+
+        /// <summary>
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
+        /// </summary>
+        public static Vector256<float> RoundToPositiveInfinity(Vector256<float> value) => RoundToPositiveInfinity(value);
          /// <summary>
          /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
-        ///   VROUNDPD ymm, ymm/m256, imm8(10)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector256<double> RoundToPositiveInfinity(Vector256<double> value) => RoundToPositiveInfinity(value);
+
          /// <summary>
-        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
-        ///   VROUNDPD ymm, ymm/m256, imm8(11)
+        /// __m256 _mm256_round_ps (__m256 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///   VROUNDPS ymm1, ymm2/m256, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<double> RoundToZero(Vector256<double> value) => RoundToZero(value);
+        public static Vector256<float> RoundToZero(Vector256<float> value) => RoundToZero(value);
          /// <summary>
-        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_CUR_DIRECTION)
-        ///   VROUNDPD ymm, ymm/m256, imm8(4)
+        /// __m256d _mm256_round_pd (__m256d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///   VROUNDPD ymm1, ymm2/m256, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector256<double> RoundCurrentDirection(Vector256<double> value) => RoundCurrentDirection(value);
+        public static Vector256<double> RoundToZero(Vector256<double> value) => RoundToZero(value);
  
          /// <summary>
          /// __m256 _mm256_shuffle_ps (__m256 a, __m256 b, const int imm8)
-        ///   VSHUFPS ymm, ymm, ymm/m256, imm8
+        ///   VSHUFPS ymm1,         ymm2, ymm3/m256,         imm8
+        ///   VSHUFPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<float> Shuffle(Vector256<float> value, Vector256<float> right, [ConstantExpected] byte control) => Shuffle(value, right, control);
          /// <summary>
          /// __m256d _mm256_shuffle_pd (__m256d a, __m256d b, const int imm8)
-        ///   VSHUFPD ymm, ymm, ymm/m256, imm8
+        ///   VSHUFPD ymm1,         ymm2, ymm3/m256,         imm8
+        ///   VSHUFPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<double> Shuffle(Vector256<double> value, Vector256<double> right, [ConstantExpected] byte control) => Shuffle(value, right, control);
  
          /// <summary>
          /// __m256 _mm256_sqrt_ps (__m256 a)
-        ///   VSQRTPS ymm, ymm/m256
+        ///   VSQRTPS ymm1,         ymm2/m256
+        ///   VSQRTPS ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
          public static Vector256<float> Sqrt(Vector256<float> value) => Sqrt(value);
          /// <summary>
          /// __m256d _mm256_sqrt_pd (__m256d a)
-        ///   VSQRTPD ymm, ymm/m256
+        ///   VSQRTPD ymm1,         ymm2/m256
+        ///   VSQRTPD ymm1 {k1}{z}, ymm2/m256/m64bcst
          /// </summary>
          public static Vector256<double> Sqrt(Vector256<double> value) => Sqrt(value);
  
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU  m256,         ymm1
+        ///   VMOVDQU8 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(sbyte* address, Vector256<sbyte> source) => Store(address, source);
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU  m256,         ymm1
+        ///   VMOVDQU8 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(byte* address, Vector256<byte> source) => Store(address, source);
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU16 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(short* address, Vector256<short> source) => Store(address, source);
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU16 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(ushort* address, Vector256<ushort> source) => Store(address, source);
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU32 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(int* address, Vector256<int> source) => Store(address, source);
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU32 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(uint* address, Vector256<uint> source) => Store(address, source);
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU64 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(long* address, Vector256<long> source) => Store(address, source);
+        /// <summary>
+        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+        ///   VMOVDQU   m256,         ymm1
+        ///   VMOVDQU64 m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(ulong* address, Vector256<ulong> source) => Store(address, source);
+        /// <summary>
+        /// void _mm256_storeu_ps (float * mem_addr, __m256 a)
+        ///   VMOVUPS m256,         ymm1
+        ///   VMOVUPS m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(float* address, Vector256<float> source) => Store(address, source);
+        /// <summary>
+        /// void _mm256_storeu_pd (double * mem_addr, __m256d a)
+        ///   VMOVUPD m256,         ymm1
+        ///   VMOVUPD m256 {k1}{z}, ymm1
+        /// </summary>
+        public static unsafe void Store(double* address, Vector256<double> source) => Store(address, source);
+
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(sbyte* address, Vector256<sbyte> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(byte* address, Vector256<byte> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(short* address, Vector256<short> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(ushort* address, Vector256<ushort> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(int* address, Vector256<int> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA32 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(uint* address, Vector256<uint> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA64 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(long* address, Vector256<long> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm256_store_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQA m256, ymm
+        ///   VMOVDQA   m256,         ymm1
+        ///   VMOVDQA64 m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(ulong* address, Vector256<ulong> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm256_store_ps (float * mem_addr, __m256 a)
-        ///   VMOVAPS m256, ymm
+        ///   VMOVAPS m256,         ymm1
+        ///   VMOVAPS m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(float* address, Vector256<float> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm256_store_pd (double * mem_addr, __m256d a)
-        ///   VMOVAPD m256, ymm
+        ///   VMOVAPD m256,         ymm1
+        ///   VMOVAPD m256 {k1}{z}, ymm1
          /// </summary>
          public static unsafe void StoreAligned(double* address, Vector256<double> source) => StoreAligned(address, source);
  
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector256<sbyte> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(byte* address, Vector256<byte> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(short* address, Vector256<short> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector256<ushort> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(int* address, Vector256<int> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(uint* address, Vector256<uint> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(long* address, Vector256<long> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm256_stream_si256 (__m256i * mem_addr, __m256i a)
-        ///   VMOVNTDQ m256, ymm
+        ///   VMOVNTDQ m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector256<ulong> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm256_stream_ps (float * mem_addr, __m256 a)
-        ///   MOVNTPS m256, ymm
+        ///   VMOVNTPS m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(float* address, Vector256<float> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm256_stream_pd (double * mem_addr, __m256d a)
-        ///   MOVNTPD m256, ymm
+        ///   VMOVNTPD m256, ymm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(double* address, Vector256<double> source) => StoreAlignedNonTemporal(address, source);
  
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(sbyte* address, Vector256<sbyte> source) => Store(address, source);
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(byte* address, Vector256<byte> source) => Store(address, source);
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(short* address, Vector256<short> source) => Store(address, source);
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(ushort* address, Vector256<ushort> source) => Store(address, source);
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(int* address, Vector256<int> source) => Store(address, source);
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(uint* address, Vector256<uint> source) => Store(address, source);
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(long* address, Vector256<long> source) => Store(address, source);
-        /// <summary>
-        /// void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
-        ///   MOVDQU m256, ymm
-        /// </summary>
-        public static unsafe void Store(ulong* address, Vector256<ulong> source) => Store(address, source);
-        /// <summary>
-        /// void _mm256_storeu_ps (float * mem_addr, __m256 a)
-        ///   MOVUPS m256, ymm
-        /// </summary>
-        public static unsafe void Store(float* address, Vector256<float> source) => Store(address, source);
-        /// <summary>
-        /// void _mm256_storeu_pd (double * mem_addr, __m256d a)
-        ///   MOVUPD m256, ymm
-        /// </summary>
-        public static unsafe void Store(double* address, Vector256<double> source) => Store(address, source);
-
          /// <summary>
          /// __m256 _mm256_sub_ps (__m256 a, __m256 b)
-        ///   VSUBPS ymm, ymm, ymm/m256
+        ///   VSUBPS ymm1,         ymm2, ymm3/m256
+        ///   VSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Subtract(Vector256<float> left, Vector256<float> right) => Subtract(left, right);
          /// <summary>
          /// __m256d _mm256_sub_pd (__m256d a, __m256d b)
-        ///   VSUBPD ymm, ymm, ymm/m256
+        ///   VSUBPD ymm1,         ymm2, ymm3/m256
+        ///   VSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Subtract(Vector256<double> left, Vector256<double> right) => Subtract(left, right);
  
          /// <summary>
          /// int _mm_testc_ps (__m128 a, __m128 b)
-        ///   VTESTPS xmm, xmm/m128
+        ///   VTESTPS xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<float> left, Vector128<float> right) => TestC(left, right);
          /// <summary>
          /// int _mm_testc_pd (__m128d a, __m128d b)
-        ///   VTESTPD xmm, xmm/m128
+        ///   VTESTPD xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<double> left, Vector128<double> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<byte> left, Vector256<byte> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<sbyte> left, Vector256<sbyte> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<short> left, Vector256<short> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<ushort> left, Vector256<ushort> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<int> left, Vector256<int> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<uint> left, Vector256<uint> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<long> left, Vector256<long> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<ulong> left, Vector256<ulong> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_ps (__m256 a, __m256 b)
-        ///   VTESTPS ymm, ymm/m256
+        ///   VTESTPS ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<float> left, Vector256<float> right) => TestC(left, right);
-
          /// <summary>
          /// int _mm256_testc_pd (__m256d a, __m256d b)
-        ///   VTESTPS ymm, ymm/m256
+        ///   VTESTPD ymm1, ymm2/m256    ; CF=1
          /// </summary>
          public static bool TestC(Vector256<double> left, Vector256<double> right) => TestC(left, right);
  
          /// <summary>
          /// int _mm_testnzc_ps (__m128 a, __m128 b)
-        ///   VTESTPS xmm, xmm/m128
+        ///   VTESTPS xmm1, ymm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<float> left, Vector128<float> right) => TestNotZAndNotC(left, right);
          /// <summary>
          /// int _mm_testnzc_pd (__m128d a, __m128d b)
-        ///   VTESTPD xmm, xmm/m128
+        ///   VTESTPD xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<double> left, Vector128<double> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<byte> left, Vector256<byte> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<sbyte> left, Vector256<sbyte> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<short> left, Vector256<short> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<ushort> left, Vector256<ushort> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<int> left, Vector256<int> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<uint> left, Vector256<uint> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<long> left, Vector256<long> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<ulong> left, Vector256<ulong> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_ps (__m256 a, __m256 b)
-        ///   VTESTPS ymm, ymm/m256
+        ///   VTESTPS ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<float> left, Vector256<float> right) => TestNotZAndNotC(left, right);
-
          /// <summary>
          /// int _mm256_testnzc_pd (__m256d a, __m256d b)
-        ///   VTESTPD ymm, ymm/m256
+        ///   VTESTPD ymm1, ymm2/m256    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector256<double> left, Vector256<double> right) => TestNotZAndNotC(left, right);
  
          /// <summary>
          /// int _mm_testz_ps (__m128 a, __m128 b)
-        ///   VTESTPS xmm, xmm/m128
+        ///   VTESTPS xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<float> left, Vector128<float> right) => TestZ(left, right);
          /// <summary>
          /// int _mm_testz_pd (__m128d a, __m128d b)
-        ///   VTESTPD xmm, xmm/m128
+        ///   VTESTPD xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<double> left, Vector128<double> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<byte> left, Vector256<byte> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<sbyte> left, Vector256<sbyte> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<short> left, Vector256<short> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<ushort> left, Vector256<ushort> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<int> left, Vector256<int> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<uint> left, Vector256<uint> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<long> left, Vector256<long> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_si256 (__m256i a, __m256i b)
-        ///   VPTEST ymm, ymm/m256
+        ///   VPTEST ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<ulong> left, Vector256<ulong> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_ps (__m256 a, __m256 b)
-        ///   VTESTPS ymm, ymm/m256
+        ///   VTESTPS ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<float> left, Vector256<float> right) => TestZ(left, right);
-
          /// <summary>
          /// int _mm256_testz_pd (__m256d a, __m256d b)
-        ///   VTESTPD ymm, ymm/m256
+        ///   VTESTPD ymm1, ymm2/m256    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector256<double> left, Vector256<double> right) => TestZ(left, right);
  
          /// <summary>
          /// __m256 _mm256_unpackhi_ps (__m256 a, __m256 b)
-        ///   VUNPCKHPS ymm, ymm, ymm/m256
+        ///   VUNPCKHPS ymm1,         ymm2, ymm3/m256
+        ///   VUNPCKHPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> UnpackHigh(Vector256<float> left, Vector256<float> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m256d _mm256_unpackhi_pd (__m256d a, __m256d b)
-        ///   VUNPCKHPD ymm, ymm, ymm/m256
+        ///   VUNPCKHPD ymm1,         ymm2, ymm3/m256
+        ///   VUNPCKHPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> UnpackHigh(Vector256<double> left, Vector256<double> right) => UnpackHigh(left, right);
  
          /// <summary>
          /// __m256 _mm256_unpacklo_ps (__m256 a, __m256 b)
-        ///   VUNPCKLPS ymm, ymm, ymm/m256
+        ///   VUNPCKLPS ymm1,         ymm2, ymm3/m256
+        ///   VUNPCKLPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> UnpackLow(Vector256<float> left, Vector256<float> right) => UnpackLow(left, right);
          /// <summary>
          /// __m256d _mm256_unpacklo_pd (__m256d a, __m256d b)
-        ///   VUNPCKLPD ymm, ymm, ymm/m256
+        ///   VUNPCKLPD ymm1,         ymm2, ymm3/m256
+        ///   VUNPCKLPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> UnpackLow(Vector256<double> left, Vector256<double> right) => UnpackLow(left, right);
  
          /// <summary>
          /// __m256 _mm256_xor_ps (__m256 a, __m256 b)
-        ///   VXORPS ymm, ymm, ymm/m256
+        ///   VXORPS ymm1,         ymm2, ymm3/m256
+        ///   VXORPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> Xor(Vector256<float> left, Vector256<float> right) => Xor(left, right);
          /// <summary>
          /// __m256d _mm256_xor_pd (__m256d a, __m256d b)
-        ///   VXORPS ymm, ymm, ymm/m256
+        ///   VXORPD ymm1,         ymm2, ymm3/m256
+        ///   VXORPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> Xor(Vector256<double> left, Vector256<double> right) => Xor(left, right);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs

index 9e8a0f8e017166b1da0e87b03a55e43920a772cc..f85e14928be0e06db4c6a87d47f8c0b7674becda 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs
@@ -27,2171 +27,2368 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m256i _mm256_abs_epi8 (__m256i a)
-        ///   VPABSB ymm, ymm/m256
+        ///   VPABSB ymm1,         ymm2/m256
+        ///   VPABSB ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<byte> Abs(Vector256<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_abs_epi16 (__m256i a)
-        ///   VPABSW ymm, ymm/m256
+        ///   VPABSW ymm1,         ymm2/m256
+        ///   VPABSW ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<ushort> Abs(Vector256<short> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_abs_epi32 (__m256i a)
-        ///   VPABSD ymm, ymm/m256
+        ///   VPABSD ymm1,         ymm2/m256
+        ///   VPABSD ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Abs(Vector256<int> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_add_epi8 (__m256i a, __m256i b)
-        ///   VPADDB ymm, ymm, ymm/m256
+        ///   VPADDB ymm1,         ymm2, ymm3/m256
+        ///   VPADDB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Add(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_add_epi8 (__m256i a, __m256i b)
-        ///   VPADDB ymm, ymm, ymm/m256
+        ///   VPADDB ymm1,         ymm2, ymm3/m256
+        ///   VPADDB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Add(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_add_epi16 (__m256i a, __m256i b)
-        ///   VPADDW ymm, ymm, ymm/m256
+        ///   VPADDW ymm1,         ymm2, ymm3/m256
+        ///   VPADDW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Add(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_add_epi16 (__m256i a, __m256i b)
-        ///   VPADDW ymm, ymm, ymm/m256
+        ///   VPADDW ymm1,         ymm2, ymm3/m256
+        ///   VPADDW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Add(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_add_epi32 (__m256i a, __m256i b)
-        ///   VPADDD ymm, ymm, ymm/m256
+        ///   VPADDD ymm1,         ymm2, ymm3/m256
+        ///   VPADDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Add(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_add_epi32 (__m256i a, __m256i b)
-        ///   VPADDD ymm, ymm, ymm/m256
+        ///   VPADDD ymm1,         ymm2, ymm3/m256
+        ///   VPADDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Add(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_add_epi64 (__m256i a, __m256i b)
-        ///   VPADDQ ymm, ymm, ymm/m256
+        ///   VPADDQ ymm1,         ymm2, ymm3/m256
+        ///   VPADDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> Add(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_add_epi64 (__m256i a, __m256i b)
-        ///   VPADDQ ymm, ymm, ymm/m256
+        ///   VPADDQ ymm1,         ymm2, ymm3/m256
+        ///   VPADDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> Add(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_adds_epi8 (__m256i a, __m256i b)
-        ///   VPADDSB ymm, ymm, ymm/m256
+        ///   VPADDSB ymm1,         ymm2, ymm3/m256
+        ///   VPADDSB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> AddSaturate(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_adds_epu8 (__m256i a, __m256i b)
-        ///   VPADDUSB ymm, ymm, ymm/m256
+        ///   VPADDUSB ymm1,         ymm2, ymm3/m256
+        ///   VPADDUSB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> AddSaturate(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_adds_epi16 (__m256i a, __m256i b)
-        ///   VPADDSW ymm, ymm, ymm/m256
+        ///   VPADDSW ymm1,         ymm2, ymm3/m256
+        ///   VPADDSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> AddSaturate(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_adds_epu16 (__m256i a, __m256i b)
-        ///   VPADDUSW ymm, ymm, ymm/m256
+        ///   VPADDUSW ymm1,         ymm2, ymm3/m256
+        ///   VPADDUSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> AddSaturate(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<sbyte> AlignRight(Vector256<sbyte> left, Vector256<sbyte> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<byte> AlignRight(Vector256<byte> left, Vector256<byte> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<short> AlignRight(Vector256<short> left, Vector256<short> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ushort> AlignRight(Vector256<ushort> left, Vector256<ushort> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<int> AlignRight(Vector256<int> left, Vector256<int> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<uint> AlignRight(Vector256<uint> left, Vector256<uint> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<long> AlignRight(Vector256<long> left, Vector256<long> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ulong> AlignRight(Vector256<ulong> left, Vector256<ulong> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> And(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> And(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> And(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> And(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND  ymm1,         ymm2, ymm3/m256
+        ///   VPANDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> And(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND  ymm1,         ymm2, ymm3/m256
+        ///   VPANDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> And(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND  ymm1,         ymm2, ymm3/m256
+        ///   VPANDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> And(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND  ymm1,         ymm2, ymm3/m256
+        ///   VPANDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> And(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> AndNot(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> AndNot(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> AndNot(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> AndNot(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN  ymm1,         ymm2, ymm3/m256
+        ///   VPANDND ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> AndNot(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN  ymm1,         ymm2, ymm3/m256
+        ///   VPANDND ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> AndNot(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN  ymm1,         ymm2, ymm3/m256
+        ///   VPANDNQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> AndNot(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN  ymm1,         ymm2, ymm3/m256
+        ///   VPANDNQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> AndNot(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_avg_epu8 (__m256i a, __m256i b)
-        ///   VPAVGB ymm, ymm, ymm/m256
+        ///   VPAVGB ymm1,         ymm2, ymm3/m256
+        ///   VPAVGB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Average(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_avg_epu16 (__m256i a, __m256i b)
-        ///   VPAVGW ymm, ymm, ymm/m256
+        ///   VPAVGW ymm1,         ymm2, ymm3/m256
+        ///   VPAVGW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Average(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8)
-        ///   VPBLENDD xmm, xmm, xmm/m128, imm8
+        ///   VPBLENDD xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<int> Blend(Vector128<int> left, Vector128<int> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8)
-        ///   VPBLENDD xmm, xmm, xmm/m128, imm8
+        ///   VPBLENDD xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<uint> Blend(Vector128<uint> left, Vector128<uint> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_blend_epi16 (__m256i a, __m256i b, const int imm8)
-        ///   VPBLENDW ymm, ymm, ymm/m256, imm8
+        ///   VPBLENDW ymm1, ymm2, ymm3/m256 imm8
          /// </summary>
          public static Vector256<short> Blend(Vector256<short> left, Vector256<short> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_blend_epi16 (__m256i a, __m256i b, const int imm8)
-        ///   VPBLENDW ymm, ymm, ymm/m256, imm8
+        ///   VPBLENDW ymm1, ymm2, ymm3/m256 imm8
          /// </summary>
          public static Vector256<ushort> Blend(Vector256<ushort> left, Vector256<ushort> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8)
-        ///   VPBLENDD ymm, ymm, ymm/m256, imm8
+        ///   VPBLENDD ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<int> Blend(Vector256<int> left, Vector256<int> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8)
-        ///   VPBLENDD ymm, ymm, ymm/m256, imm8
+        ///   VPBLENDD ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<uint> Blend(Vector256<uint> left, Vector256<uint> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// </summary>
          public static Vector256<sbyte> BlendVariable(Vector256<sbyte> left, Vector256<sbyte> right, Vector256<sbyte> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// </summary>
          public static Vector256<byte> BlendVariable(Vector256<byte> left, Vector256<byte> right, Vector256<byte> mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<short> BlendVariable(Vector256<short> left, Vector256<short> right, Vector256<short> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<ushort> BlendVariable(Vector256<ushort> left, Vector256<ushort> right, Vector256<ushort> mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<int> BlendVariable(Vector256<int> left, Vector256<int> right, Vector256<int> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<uint> BlendVariable(Vector256<uint> left, Vector256<uint> right, Vector256<uint> mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<long> BlendVariable(Vector256<long> left, Vector256<long> right, Vector256<long> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<ulong> BlendVariable(Vector256<ulong> left, Vector256<ulong> right, Vector256<ulong> mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB xmm, xmm
+        ///   VPBROADCASTB xmm1,         xmm2/m8
+        ///   VPBROADCASTB xmm1 {k1}{z}, xmm2/m8
          /// </summary>
          public static Vector128<byte> BroadcastScalarToVector128(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB xmm, xmm
+        ///   VPBROADCASTB xmm1,         xmm2/m8
+        ///   VPBROADCASTB xmm1 {k1}{z}, xmm2/m8
          /// </summary>
          public static Vector128<sbyte> BroadcastScalarToVector128(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW xmm, xmm
+        ///   VPBROADCASTW xmm1,         xmm2/m16
+        ///   VPBROADCASTW xmm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector128<short> BroadcastScalarToVector128(Vector128<short> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW xmm, xmm
+        ///   VPBROADCASTW xmm1,         xmm2/m16
+        ///   VPBROADCASTW xmm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector128<ushort> BroadcastScalarToVector128(Vector128<ushort> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD xmm, xmm
+        ///   VPBROADCASTD xmm1,         xmm2/m32
+        ///   VPBROADCASTD xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<int> BroadcastScalarToVector128(Vector128<int> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD xmm, xmm
+        ///   VPBROADCASTD xmm1,         xmm2/m32
+        ///   VPBROADCASTD xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<uint> BroadcastScalarToVector128(Vector128<uint> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ xmm, xmm
+        ///   VPBROADCASTQ xmm1,         xmm2/m64
+        ///   VPBROADCASTQ xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<long> BroadcastScalarToVector128(Vector128<long> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ xmm, xmm
+        ///   VPBROADCASTQ xmm1,         xmm2/m64
+        ///   VPBROADCASTQ xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<ulong> BroadcastScalarToVector128(Vector128<ulong> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_broadcastss_ps (__m128 a)
-        ///   VBROADCASTSS xmm, xmm
+        ///   VBROADCASTSS xmm1,         xmm2/m32
+        ///   VBROADCASTSS xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<float> BroadcastScalarToVector128(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm_broadcastsd_pd (__m128d a)
-        ///   VMOVDDUP xmm, xmm
+        ///   VMOVDDUP xmm1, xmm/m64
          /// </summary>
          public static Vector128<double> BroadcastScalarToVector128(Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB xmm, m8
+        ///   VPBROADCASTB xmm1,         m8
+        ///   VPBROADCASTB xmm1 {k1}{z}, m8
          /// The above native signature does not directly correspond to the managed signature.
-        /// We provide this additional overload for the lack of pointers to managed.
          /// </summary>
          public static unsafe Vector128<byte> BroadcastScalarToVector128(byte* source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB xmm, m8
+        ///   VPBROADCASTB xmm1,         m8
+        ///   VPBROADCASTB xmm1 {k1}{z}, m8
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<sbyte> BroadcastScalarToVector128(sbyte* source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW xmm, m16
+        ///   VPBROADCASTW xmm1,         m16
+        ///   VPBROADCASTW xmm1 {k1}{z}, m16
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<short> BroadcastScalarToVector128(short* source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW xmm, m16
+        ///   VPBROADCASTW xmm1,         m16
+        ///   VPBROADCASTW xmm1 {k1}{z}, m16
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<ushort> BroadcastScalarToVector128(ushort* source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD xmm, m32
+        ///   VPBROADCASTD xmm1,         m32
+        ///   VPBROADCASTD xmm1 {k1}{z}, m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<int> BroadcastScalarToVector128(int* source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD xmm, m32
+        ///   VPBROADCASTD xmm1,         m32
+        ///   VPBROADCASTD xmm1 {k1}{z}, m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<uint> BroadcastScalarToVector128(uint* source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ xmm, m64
+        ///   VPBROADCASTQ xmm1,         m64
+        ///   VPBROADCASTQ xmm1 {k1}{z}, m64
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<long> BroadcastScalarToVector128(long* source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ xmm, m64
+        ///   VPBROADCASTQ xmm1,         m64
+        ///   VPBROADCASTQ xmm1 {k1}{z}, m64
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<ulong> BroadcastScalarToVector128(ulong* source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB ymm, xmm
+        ///   VPBROADCASTB ymm1,         xmm2/m8
+        ///   VPBROADCASTB ymm1 {k1}{z}, xmm2/m8
          /// </summary>
          public static Vector256<byte> BroadcastScalarToVector256(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB ymm, xmm
+        ///   VPBROADCASTB ymm1,         xmm2/m8
+        ///   VPBROADCASTB ymm1 {k1}{z}, xmm2/m8
          /// </summary>
          public static Vector256<sbyte> BroadcastScalarToVector256(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW ymm, xmm
+        ///   VPBROADCASTW ymm1,         xmm2/m16
+        ///   VPBROADCASTW ymm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector256<short> BroadcastScalarToVector256(Vector128<short> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW ymm, xmm
+        ///   VPBROADCASTW ymm1,         xmm2/m16
+        ///   VPBROADCASTW ymm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector256<ushort> BroadcastScalarToVector256(Vector128<ushort> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD ymm, xmm
+        ///   VPBROADCASTD ymm1,         xmm2/m32
+        ///   VPBROADCASTD ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<int> BroadcastScalarToVector256(Vector128<int> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD ymm, xmm
+        ///   VPBROADCASTD ymm1,         xmm2/m32
+        ///   VPBROADCASTD ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<uint> BroadcastScalarToVector256(Vector128<uint> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ ymm, xmm
+        ///   VPBROADCASTQ ymm1,         xmm2/m64
+        ///   VPBROADCASTQ ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<long> BroadcastScalarToVector256(Vector128<long> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ ymm, xmm
+        ///   VPBROADCASTQ ymm1,         xmm2/m64
+        ///   VPBROADCASTQ ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<ulong> BroadcastScalarToVector256(Vector128<ulong> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256 _mm256_broadcastss_ps (__m128 a)
-        ///   VBROADCASTSS ymm, xmm
+        ///   VBROADCASTSS ymm1,         xmm2/m32
+        ///   VBROADCASTSS ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<float> BroadcastScalarToVector256(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256d _mm256_broadcastsd_pd (__m128d a)
-        ///   VBROADCASTSD ymm, xmm
+        ///   VBROADCASTSD ymm1,         xmm2/m64
+        ///   VBROADCASTSD ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<double> BroadcastScalarToVector256(Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB ymm, m8
+        ///   VPBROADCASTB ymm1,         m8
+        ///   VPBROADCASTB ymm1 {k1}{z}, m8
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<byte> BroadcastScalarToVector256(byte* source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB ymm, m8
+        ///   VPBROADCASTB ymm1,         m8
+        ///   VPBROADCASTB ymm1 {k1}{z}, m8
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<sbyte> BroadcastScalarToVector256(sbyte* source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW ymm, m16
+        ///   VPBROADCASTW ymm1,         m16
+        ///   VPBROADCASTW ymm1 {k1}{z}, m16
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<short> BroadcastScalarToVector256(short* source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW ymm, m16
+        ///   VPBROADCASTW ymm1,         m16
+        ///   VPBROADCASTW ymm1 {k1}{z}, m16
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<ushort> BroadcastScalarToVector256(ushort* source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD ymm, m32
+        ///   VPBROADCASTD ymm1,         m32
+        ///   VPBROADCASTD ymm1 {k1}{z}, m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<int> BroadcastScalarToVector256(int* source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD ymm, m32
+        ///   VPBROADCASTD ymm1,         m32
+        ///   VPBROADCASTD ymm1 {k1}{z}, m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<uint> BroadcastScalarToVector256(uint* source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ ymm, m64
+        ///   VPBROADCASTQ ymm1,         m64
+        ///   VPBROADCASTQ ymm1 {k1}{z}, m64
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<long> BroadcastScalarToVector256(long* source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ ymm, m64
+        ///   VPBROADCASTQ ymm1,         m64
+        ///   VPBROADCASTQ ymm1 {k1}{z}, m64
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<ulong> BroadcastScalarToVector256(ulong* source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<sbyte> BroadcastVector128ToVector256(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<byte> BroadcastVector128ToVector256(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<short> BroadcastVector128ToVector256(short* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<ushort> BroadcastVector128ToVector256(ushort* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<int> BroadcastVector128ToVector256(int* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<uint> BroadcastVector128ToVector256(uint* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI64x2 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<long> BroadcastVector128ToVector256(long* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI64x2 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<ulong> BroadcastVector128ToVector256(ulong* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b)
-        ///   VPCMPEQB ymm, ymm, ymm/m256
+        ///   VPCMPEQB ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> CompareEqual(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b)
-        ///   VPCMPEQB ymm, ymm, ymm/m256
+        ///   VPCMPEQB ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> CompareEqual(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpeq_epi16 (__m256i a, __m256i b)
-        ///   VPCMPEQW ymm, ymm, ymm/m256
+        ///   VPCMPEQW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> CompareEqual(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpeq_epi16 (__m256i a, __m256i b)
-        ///   VPCMPEQW ymm, ymm, ymm/m256
+        ///   VPCMPEQW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> CompareEqual(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpeq_epi32 (__m256i a, __m256i b)
-        ///   VPCMPEQD ymm, ymm, ymm/m256
+        ///   VPCMPEQD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> CompareEqual(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpeq_epi32 (__m256i a, __m256i b)
-        ///   VPCMPEQD ymm, ymm, ymm/m256
+        ///   VPCMPEQD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<uint> CompareEqual(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b)
-        ///   VPCMPEQQ ymm, ymm, ymm/m256
+        ///   VPCMPEQQ ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<long> CompareEqual(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b)
-        ///   VPCMPEQQ ymm, ymm, ymm/m256
+        ///   VPCMPEQQ ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ulong> CompareEqual(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_cmpgt_epi8 (__m256i a, __m256i b)
-        ///   VPCMPGTB ymm, ymm, ymm/m256
+        ///   VPCMPGTB ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> CompareGreaterThan(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpgt_epi16 (__m256i a, __m256i b)
-        ///   VPCMPGTW ymm, ymm, ymm/m256
+        ///   VPCMPGTW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> CompareGreaterThan(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpgt_epi32 (__m256i a, __m256i b)
-        ///   VPCMPGTD ymm, ymm, ymm/m256
+        ///   VPCMPGTD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> CompareGreaterThan(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cmpgt_epi64 (__m256i a, __m256i b)
-        ///   VPCMPGTQ ymm, ymm, ymm/m256
+        ///   VPCMPGTQ ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<long> CompareGreaterThan(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm256_cvtsi256_si32 (__m256i a)
-        ///   MOVD reg/m32, xmm
+        ///   VMOVD r/m32, ymm1
          /// </summary>
          public static int ConvertToInt32(Vector256<int> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm256_cvtsi256_si32 (__m256i a)
-        ///   MOVD reg/m32, xmm
+        ///   VMOVD r/m32, ymm1
          /// </summary>
          public static uint ConvertToUInt32(Vector256<uint> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_cvtepi8_epi16 (__m128i a)
-        ///   VPMOVSXBW ymm, xmm
+        ///   VPMOVSXBW ymm1,         xmm2/m128
+        ///   VPMOVSXBW ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<short> ConvertToVector256Int16(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepu8_epi16 (__m128i a)
-        ///   VPMOVZXBW ymm, xmm
+        ///   VPMOVZXBW ymm1,         xmm2/m128
+        ///   VPMOVZXBW ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<short> ConvertToVector256Int16(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepi8_epi32 (__m128i a)
-        ///   VPMOVSXBD ymm, xmm
+        ///   VPMOVSXBD ymm1,         xmm2/m64
+        ///   VPMOVSXBD ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepu8_epi32 (__m128i a)
-        ///   VPMOVZXBD ymm, xmm
+        ///   VPMOVZXBD ymm1,         xmm2/m64
+        ///   VPMOVZXBD ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepi16_epi32 (__m128i a)
-        ///   VPMOVSXWD ymm, xmm
+        ///   VPMOVSXWD ymm1,         xmm2/m128
+        ///   VPMOVSXWD ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32(Vector128<short> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepu16_epi32 (__m128i a)
-        ///   VPMOVZXWD ymm, xmm
+        ///   VPMOVZXWD ymm1,         xmm2/m128
+        ///   VPMOVZXWD ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32(Vector128<ushort> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepi8_epi64 (__m128i a)
-        ///   VPMOVSXBQ ymm, xmm
+        ///   VPMOVSXBQ ymm1,         xmm2/m32
+        ///   VPMOVSXBQ ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepu8_epi64 (__m128i a)
-        ///   VPMOVZXBQ ymm, xmm
+        ///   VPMOVZXBQ ymm1,         xmm2/m32
+        ///   VPMOVZXBQ ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepi16_epi64 (__m128i a)
-        ///   VPMOVSXWQ ymm, xmm
+        ///   VPMOVSXWQ ymm1,         xmm2/m64
+        ///   VPMOVSXWQ ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<short> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepu16_epi64 (__m128i a)
-        ///   VPMOVZXWQ ymm, xmm
+        ///   VPMOVZXWQ ymm1,         xmm2/m64
+        ///   VPMOVZXWQ ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<ushort> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepi32_epi64 (__m128i a)
-        ///   VPMOVSXDQ ymm, xmm
+        ///   VPMOVSXDQ ymm1,         xmm2/m128
+        ///   VPMOVSXDQ ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<int> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_cvtepu32_epi64 (__m128i a)
-        ///   VPMOVZXDQ ymm, xmm
+        ///   VPMOVZXDQ ymm1,         xmm2/m128
+        ///   VPMOVZXDQ ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<uint> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        ///   VPMOVSXBW ymm, m128
+        ///   VPMOVSXBW ymm1,         m128
+        ///   VPMOVSXBW ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<short> ConvertToVector256Int16(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVZXBW ymm, m128
+        ///   VPMOVZXBW ymm1,         m128
+        ///   VPMOVZXBW ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<short> ConvertToVector256Int16(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVSXBD ymm, m64
+        ///   VPMOVSXBD ymm1,         m64
+        ///   VPMOVSXBD ymm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<int> ConvertToVector256Int32(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVZXBD ymm, m64
+        ///   VPMOVZXBD ymm1,         m64
+        ///   VPMOVZXBD ymm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<int> ConvertToVector256Int32(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVSXWD ymm, m128
+        ///   VPMOVSXWD ymm1,         m128
+        ///   VPMOVSXWD ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<int> ConvertToVector256Int32(short* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVZXWD ymm, m128
+        ///   VPMOVZXWD ymm1,         m128
+        ///   VPMOVZXWD ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<int> ConvertToVector256Int32(ushort* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVSXBQ ymm, m32
+        ///   VPMOVSXBQ ymm1,         m32
+        ///   VPMOVSXBQ ymm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVZXBQ ymm, m32
+        ///   VPMOVZXBQ ymm1,         m32
+        ///   VPMOVZXBQ ymm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVSXWQ ymm, m64
+        ///   VPMOVSXWQ ymm1,         m64
+        ///   VPMOVSXWQ ymm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(short* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVZXWQ ymm, m64
+        ///   VPMOVZXWQ ymm1,         m64
+        ///   VPMOVZXWQ ymm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(ushort* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVSXDQ ymm, m128
+        ///   VPMOVSXDQ ymm1,         m128
+        ///   VPMOVSXDQ ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(int* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   VPMOVZXDQ ymm, m128
+        ///   VPMOVZXDQ ymm1,         m128
+        ///   VPMOVZXDQ ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(uint* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<sbyte> ExtractVector128(Vector256<sbyte> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<byte> ExtractVector128(Vector256<byte> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<short> ExtractVector128(Vector256<short> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<ushort> ExtractVector128(Vector256<ushort> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<int> ExtractVector128(Vector256<int> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<uint> ExtractVector128(Vector256<uint> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<long> ExtractVector128(Vector256<long> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<ulong> ExtractVector128(Vector256<ulong> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDD xmm, vm32x, xmm
+        ///   VPGATHERDD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherVector128(int* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDD xmm, vm32x, xmm
+        ///   VPGATHERDD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherVector128(uint* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDQ xmm, vm32x, xmm
+        ///   VPGATHERDQ xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<long> GatherVector128(long* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDQ xmm, vm32x, xmm
+        ///   VPGATHERDQ xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<ulong> GatherVector128(ulong* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm_i32gather_ps (float const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERDPS xmm, vm32x, xmm
+        ///   VGATHERDPS xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherVector128(float* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_i32gather_pd (double const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERDPD xmm, vm32x, xmm
+        ///   VGATHERDPD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<double> GatherVector128(double* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERQD xmm, vm64x, xmm
+        ///   VPGATHERQD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherVector128(int* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERQD xmm, vm64x, xmm
+        ///   VPGATHERQD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherVector128(uint* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERQQ xmm, vm64x, xmm
+        ///   VPGATHERQQ xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<long> GatherVector128(long* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERQQ xmm, vm64x, xmm
+        ///   VPGATHERQQ xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<ulong> GatherVector128(ulong* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm_i64gather_ps (float const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERQPS xmm, vm64x, xmm
+        ///   VGATHERQPS xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherVector128(float* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_i64gather_pd (double const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERQPD xmm, vm64x, xmm
+        ///   VGATHERQPD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<double> GatherVector128(double* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERDD ymm, vm32y, ymm
+        ///   VPGATHERDD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<int> GatherVector256(int* baseAddress, Vector256<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERDD ymm, vm32y, ymm
+        ///   VPGATHERDD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<uint> GatherVector256(uint* baseAddress, Vector256<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDQ ymm, vm32y, ymm
+        ///   VPGATHERDQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<long> GatherVector256(long* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDQ ymm, vm32y, ymm
+        ///   VPGATHERDQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<ulong> GatherVector256(ulong* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_i32gather_ps (float const* base_addr, __m256i vindex, const int scale)
-        ///   VGATHERDPS ymm, vm32y, ymm
+        ///   VGATHERDPS ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<float> GatherVector256(float* baseAddress, Vector256<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_i32gather_pd (double const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERDPD ymm, vm32y, ymm
+        ///   VGATHERDPD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<double> GatherVector256(double* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERQD xmm, vm64y, xmm
+        ///   VPGATHERQD xmm1, vm64y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherVector128(int* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERQD xmm, vm64y, xmm
+        ///   VPGATHERQD xmm1, vm64y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherVector128(uint* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERQQ ymm, vm64y, ymm
+        ///   VPGATHERQQ ymm1, vm64y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<long> GatherVector256(long* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERQQ ymm, vm64y, ymm
+        ///   VPGATHERQQ ymm1, vm64y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<ulong> GatherVector256(ulong* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm256_i64gather_ps (float const* base_addr, __m256i vindex, const int scale)
-        ///   VGATHERQPS xmm, vm64y, xmm
+        ///   VGATHERQPS xmm1, vm64y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherVector128(float* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_i64gather_pd (double const* base_addr, __m256i vindex, const int scale)
-        ///   VGATHERQPD ymm, vm64y, ymm
+        ///   VGATHERQPD ymm1, vm64y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<double> GatherVector256(double* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERDD xmm, vm32x, xmm
+        ///   VPGATHERDD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherMaskVector128(Vector128<int> source, int* baseAddress, Vector128<int> index, Vector128<int> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERDD xmm, vm32x, xmm
+        ///   VPGATHERDD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherMaskVector128(Vector128<uint> source, uint* baseAddress, Vector128<int> index, Vector128<uint> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERDQ xmm, vm32x, xmm
+        ///   VPGATHERDQ xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<long> GatherMaskVector128(Vector128<long> source, long* baseAddress, Vector128<int> index, Vector128<long> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERDQ xmm, vm32x, xmm
+        ///   VPGATHERDQ xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<ulong> GatherMaskVector128(Vector128<ulong> source, ulong* baseAddress, Vector128<int> index, Vector128<ulong> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm_mask_i32gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale)
-        ///   VGATHERDPS xmm, vm32x, xmm
+        ///   VGATHERDPS xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherMaskVector128(Vector128<float> source, float* baseAddress, Vector128<int> index, Vector128<float> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_mask_i32gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale)
-        ///   VGATHERDPD xmm, vm32x, xmm
+        ///   VGATHERDPD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<double> GatherMaskVector128(Vector128<double> source, double* baseAddress, Vector128<int> index, Vector128<double> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQD xmm, vm64x, xmm
+        ///   VPGATHERQD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherMaskVector128(Vector128<int> source, int* baseAddress, Vector128<long> index, Vector128<int> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQD xmm, vm64x, xmm
+        ///   VPGATHERQD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherMaskVector128(Vector128<uint> source, uint* baseAddress, Vector128<long> index, Vector128<uint> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQQ xmm, vm64x, xmm
+        ///   VPGATHERQQ xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<long> GatherMaskVector128(Vector128<long> source, long* baseAddress, Vector128<long> index, Vector128<long> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQQ xmm, vm64x, xmm
+        ///   VPGATHERQQ xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<ulong> GatherMaskVector128(Vector128<ulong> source, ulong* baseAddress, Vector128<long> index, Vector128<ulong> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm_mask_i64gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale)
-        ///   VGATHERQPS xmm, vm64x, xmm
+        ///   VGATHERQPS xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherMaskVector128(Vector128<float> source, float* baseAddress, Vector128<long> index, Vector128<float> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_mask_i64gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale)
-        ///   VGATHERQPD xmm, vm64x, xmm
+        ///   VGATHERQPD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<double> GatherMaskVector128(Vector128<double> source, double* baseAddress, Vector128<long> index, Vector128<double> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale)
-        ///   VPGATHERDD ymm, vm32y, ymm
+        ///   VPGATHERDD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<int> GatherMaskVector256(Vector256<int> source, int* baseAddress, Vector256<int> index, Vector256<int> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale)
-        ///   VPGATHERDD ymm, vm32y, ymm
+        ///   VPGATHERDD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<uint> GatherMaskVector256(Vector256<uint> source, uint* baseAddress, Vector256<int> index, Vector256<uint> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale)
-        ///   VPGATHERDQ ymm, vm32y, ymm
+        ///   VPGATHERDQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<long> GatherMaskVector256(Vector256<long> source, long* baseAddress, Vector128<int> index, Vector256<long> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale)
-        ///   VPGATHERDQ ymm, vm32y, ymm
+        ///   VPGATHERDQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<ulong> GatherMaskVector256(Vector256<ulong> source, ulong* baseAddress, Vector128<int> index, Vector256<ulong> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_mask_i32gather_ps (__m256 src, float const* base_addr, __m256i vindex, __m256 mask, const int scale)
-        ///   VPGATHERDPS ymm, vm32y, ymm
+        ///   VPGATHERDPS ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<float> GatherMaskVector256(Vector256<float> source, float* baseAddress, Vector256<int> index, Vector256<float> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_mask_i32gather_pd (__m256d src, double const* base_addr, __m128i vindex, __m256d mask, const int scale)
-        ///   VPGATHERDPD ymm, vm32y, ymm
+        ///   VPGATHERDPD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<double> GatherMaskVector256(Vector256<double> source, double* baseAddress, Vector128<int> index, Vector256<double> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQD xmm, vm32y, xmm
+        ///   VPGATHERQD xmm1, vm32y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherMaskVector128(Vector128<int> source, int* baseAddress, Vector256<long> index, Vector128<int> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQD xmm, vm32y, xmm
+        ///   VPGATHERQD xmm1, vm32y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherMaskVector128(Vector128<uint> source, uint* baseAddress, Vector256<long> index, Vector128<uint> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale)
-        ///   VPGATHERQQ ymm, vm32y, ymm
+        ///   VPGATHERQQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<long> GatherMaskVector256(Vector256<long> source, long* baseAddress, Vector256<long> index, Vector256<long> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale)
-        ///   VPGATHERQQ ymm, vm32y, ymm
+        ///   VPGATHERQQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<ulong> GatherMaskVector256(Vector256<ulong> source, ulong* baseAddress, Vector256<long> index, Vector256<ulong> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm256_mask_i64gather_ps (__m128 src, float const* base_addr, __m256i vindex, __m128 mask, const int scale)
-        ///   VGATHERQPS xmm, vm32y, xmm
+        ///   VGATHERQPS xmm1, vm32y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherMaskVector128(Vector128<float> source, float* baseAddress, Vector256<long> index, Vector128<float> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_mask_i64gather_pd (__m256d src, double const* base_addr, __m256i vindex, __m256d mask, const int scale)
-        ///   VGATHERQPD ymm, vm32y, ymm
+        ///   VGATHERQPD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<double> GatherMaskVector256(Vector256<double> source, double* baseAddress, Vector256<long> index, Vector256<double> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_hadd_epi16 (__m256i a, __m256i b)
-        ///   VPHADDW ymm, ymm, ymm/m256
+        ///   VPHADDW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> HorizontalAdd(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_hadd_epi32 (__m256i a, __m256i b)
-        ///   VPHADDD ymm, ymm, ymm/m256
+        ///   VPHADDD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> HorizontalAdd(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_hadds_epi16 (__m256i a, __m256i b)
-        ///   VPHADDSW ymm, ymm, ymm/m256
+        ///   VPHADDSW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> HorizontalAddSaturate(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_hsub_epi16 (__m256i a, __m256i b)
-        ///   VPHSUBW ymm, ymm, ymm/m256
+        ///   VPHSUBW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> HorizontalSubtract(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_hsub_epi32 (__m256i a, __m256i b)
-        ///   VPHSUBD ymm, ymm, ymm/m256
+        ///   VPHSUBD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> HorizontalSubtract(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_hsubs_epi16 (__m256i a, __m256i b)
-        ///   VPHSUBSW ymm, ymm, ymm/m256
+        ///   VPHSUBSW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> HorizontalSubtractSaturate(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<sbyte> InsertVector128(Vector256<sbyte> value, Vector128<sbyte> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<byte> InsertVector128(Vector256<byte> value, Vector128<byte> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<short> InsertVector128(Vector256<short> value, Vector128<short> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<ushort> InsertVector128(Vector256<ushort> value, Vector128<ushort> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<int> InsertVector128(Vector256<int> value, Vector128<int> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<uint> InsertVector128(Vector256<uint> value, Vector128<uint> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<long> InsertVector128(Vector256<long> value, Vector128<long> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<ulong> InsertVector128(Vector256<ulong> value, Vector128<ulong> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<sbyte> LoadAlignedVector256NonTemporal(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<byte> LoadAlignedVector256NonTemporal(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<short> LoadAlignedVector256NonTemporal(short* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<ushort> LoadAlignedVector256NonTemporal(ushort* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<int> LoadAlignedVector256NonTemporal(int* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<uint> LoadAlignedVector256NonTemporal(uint* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<long> LoadAlignedVector256NonTemporal(long* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<ulong> LoadAlignedVector256NonTemporal(ulong* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_maskload_epi32 (int const* mem_addr, __m128i mask)
-        ///   VPMASKMOVD xmm, xmm, m128
+        ///   VPMASKMOVD xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<int> MaskLoad(int* address, Vector128<int> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_maskload_epi32 (int const* mem_addr, __m128i mask)
-        ///   VPMASKMOVD xmm, xmm, m128
+        ///   VPMASKMOVD xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<uint> MaskLoad(uint* address, Vector128<uint> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_maskload_epi64 (__int64 const* mem_addr, __m128i mask)
-        ///   VPMASKMOVQ xmm, xmm, m128
+        ///   VPMASKMOVQ xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<long> MaskLoad(long* address, Vector128<long> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_maskload_epi64 (__int64 const* mem_addr, __m128i mask)
-        ///   VPMASKMOVQ xmm, xmm, m128
+        ///   VPMASKMOVQ xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<ulong> MaskLoad(ulong* address, Vector128<ulong> mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_maskload_epi32 (int const* mem_addr, __m256i mask)
-        ///   VPMASKMOVD ymm, ymm, m256
+        ///   VPMASKMOVD ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<int> MaskLoad(int* address, Vector256<int> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_maskload_epi32 (int const* mem_addr, __m256i mask)
-        ///   VPMASKMOVD ymm, ymm, m256
+        ///   VPMASKMOVD ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<uint> MaskLoad(uint* address, Vector256<uint> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_maskload_epi64 (__int64 const* mem_addr, __m256i mask)
-        ///   VPMASKMOVQ ymm, ymm, m256
+        ///   VPMASKMOVQ ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<long> MaskLoad(long* address, Vector256<long> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_maskload_epi64 (__int64 const* mem_addr, __m256i mask)
-        ///   VPMASKMOVQ ymm, ymm, m256
+        ///   VPMASKMOVQ ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<ulong> MaskLoad(ulong* address, Vector256<ulong> mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// void _mm_maskstore_epi32 (int* mem_addr, __m128i mask, __m128i a)
-        ///   VPMASKMOVD m128, xmm, xmm
+        ///   VPMASKMOVD m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(int* address, Vector128<int> mask, Vector128<int> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_maskstore_epi32 (int* mem_addr, __m128i mask, __m128i a)
-        ///   VPMASKMOVD m128, xmm, xmm
+        ///   VPMASKMOVD m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(uint* address, Vector128<uint> mask, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_maskstore_epi64 (__int64* mem_addr, __m128i mask, __m128i a)
-        ///   VPMASKMOVQ m128, xmm, xmm
+        ///   VPMASKMOVQ m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(long* address, Vector128<long> mask, Vector128<long> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_maskstore_epi64 (__int64* mem_addr, __m128i mask, __m128i a)
-        ///   VPMASKMOVQ m128, xmm, xmm
+        ///   VPMASKMOVQ m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(ulong* address, Vector128<ulong> mask, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a)
-        ///   VPMASKMOVD m256, ymm, ymm
+        ///   VPMASKMOVD m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(int* address, Vector256<int> mask, Vector256<int> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a)
-        ///   VPMASKMOVD m256, ymm, ymm
+        ///   VPMASKMOVD m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(uint* address, Vector256<uint> mask, Vector256<uint> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_maskstore_epi64 (__int64* mem_addr, __m256i mask, __m256i a)
-        ///   VPMASKMOVQ m256, ymm, ymm
+        ///   VPMASKMOVQ m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(long* address, Vector256<long> mask, Vector256<long> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm256_maskstore_epi64 (__int64* mem_addr, __m256i mask, __m256i a)
-        ///   VPMASKMOVQ m256, ymm, ymm
+        ///   VPMASKMOVQ m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(ulong* address, Vector256<ulong> mask, Vector256<ulong> source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_madd_epi16 (__m256i a, __m256i b)
-        ///   VPMADDWD ymm, ymm, ymm/m256
+        ///   VPMADDWD ymm1,         ymm2, ymm3/m256
+        ///   VPMADDWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> MultiplyAddAdjacent(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_maddubs_epi16 (__m256i a, __m256i b)
-        ///   VPMADDUBSW ymm, ymm, ymm/m256
+        ///   VPMADDUBSW ymm1,         ymm2, ymm3/m256
+        ///   VPMADDUBSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> MultiplyAddAdjacent(Vector256<byte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_max_epi8 (__m256i a, __m256i b)
-        ///   VPMAXSB ymm, ymm, ymm/m256
+        ///   VPMAXSB ymm1,         ymm2, ymm3/m256
+        ///   VPMAXSB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Max(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_max_epu8 (__m256i a, __m256i b)
-        ///   VPMAXUB ymm, ymm, ymm/m256
+        ///   VPMAXUB ymm1,         ymm2, ymm3/m256
+        ///   VPMAXUB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Max(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_max_epi16 (__m256i a, __m256i b)
-        ///   VPMAXSW ymm, ymm, ymm/m256
+        ///   VPMAXSW ymm1,         ymm2, ymm3/m256
+        ///   VPMAXSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Max(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_max_epu16 (__m256i a, __m256i b)
-        ///   VPMAXUW ymm, ymm, ymm/m256
+        ///   VPMAXUW ymm1,         ymm2, ymm3/m256
+        ///   VPMAXUW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Max(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_max_epi32 (__m256i a, __m256i b)
-        ///   VPMAXSD ymm, ymm, ymm/m256
+        ///   VPMAXSD ymm1,         ymm2, ymm3/m256
+        ///   VPMAXSD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Max(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_max_epu32 (__m256i a, __m256i b)
-        ///   VPMAXUD ymm, ymm, ymm/m256
+        ///   VPMAXUD ymm1,         ymm2, ymm3/m256
+        ///   VPMAXUD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Max(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_min_epi8 (__m256i a, __m256i b)
-        ///   VPMINSB ymm, ymm, ymm/m256
+        ///   VPMINSB ymm1,         ymm2, ymm3/m256
+        ///   VPMINSB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Min(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_min_epu8 (__m256i a, __m256i b)
-        ///   VPMINUB ymm, ymm, ymm/m256
+        ///   VPMINUB ymm1,         ymm2, ymm3/m256
+        ///   VPMINUB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Min(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_min_epi16 (__m256i a, __m256i b)
-        ///   VPMINSW ymm, ymm, ymm/m256
+        ///   VPMINSW ymm1,         ymm2, ymm3/m256
+        ///   VPMINSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Min(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_min_epu16 (__m256i a, __m256i b)
-        ///   VPMINUW ymm, ymm, ymm/m256
+        ///   VPMINUW ymm1,         ymm2, ymm3/m256
+        ///   VPMINUW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Min(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_min_epi32 (__m256i a, __m256i b)
-        ///   VPMINSD ymm, ymm, ymm/m256
+        ///   VPMINSD ymm1,         ymm2, ymm3/m256
+        ///   VPMINSD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Min(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_min_epu32 (__m256i a, __m256i b)
-        ///   VPMINUD ymm, ymm, ymm/m256
+        ///   VPMINUD ymm1,         ymm2, ymm3/m256
+        ///   VPMINUD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Min(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm256_movemask_epi8 (__m256i a)
-        ///   VPMOVMSKB reg, ymm
+        ///   VPMOVMSKB r32, ymm1
          /// </summary>
          public static int MoveMask(Vector256<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm256_movemask_epi8 (__m256i a)
-        ///   VPMOVMSKB reg, ymm
+        ///   VPMOVMSKB r32, ymm1
          /// </summary>
          public static int MoveMask(Vector256<byte> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_mpsadbw_epu8 (__m256i a, __m256i b, const int imm8)
-        ///   VMPSADBW ymm, ymm, ymm/m256, imm8
+        ///   VMPSADBW ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<ushort> MultipleSumAbsoluteDifferences(Vector256<byte> left, Vector256<byte> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_mul_epi32 (__m256i a, __m256i b)
-        ///   VPMULDQ ymm, ymm, ymm/m256
+        ///   VPMULDQ ymm1,         ymm2, ymm3/m256
+        ///   VPMULDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> Multiply(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mul_epu32 (__m256i a, __m256i b)
-        ///   VPMULUDQ ymm, ymm, ymm/m256
+        ///   VPMULUDQ ymm1,         ymm2, ymm3/m256
+        ///   VPMULUDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> Multiply(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_mulhi_epi16 (__m256i a, __m256i b)
-        ///   VPMULHW ymm, ymm, ymm/m256
+        ///   VPMULHW ymm1,         ymm2, ymm3/m256
+        ///   VPMULHW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> MultiplyHigh(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mulhi_epu16 (__m256i a, __m256i b)
-        ///   VPMULHUW ymm, ymm, ymm/m256
+        ///   VPMULHUW ymm1,         ymm2, ymm3/m256
+        ///   VPMULHUW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> MultiplyHigh(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_mulhrs_epi16 (__m256i a, __m256i b)
-        ///   VPMULHRSW ymm, ymm, ymm/m256
+        ///   VPMULHRSW ymm1,         ymm2, ymm3/m256
+        ///   VPMULHRSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> MultiplyHighRoundScale(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b)
-        ///   VPMULLW ymm, ymm, ymm/m256
+        ///   VPMULLW ymm1,         ymm2, ymm3/m256
+        ///   VPMULLW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> MultiplyLow(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b)
-        ///   VPMULLW ymm, ymm, ymm/m256
+        ///   VPMULLW ymm1,         ymm2, ymm3/m256
+        ///   VPMULLW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> MultiplyLow(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b)
-        ///   VPMULLD ymm, ymm, ymm/m256
+        ///   VPMULLD ymm1,         ymm2, ymm3/m256
+        ///   VPMULLD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> MultiplyLow(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b)
-        ///   VPMULLD ymm, ymm, ymm/m256
+        ///   VPMULLD ymm1,         ymm2, ymm3/m256
+        ///   VPMULLD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> MultiplyLow(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Or(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Or(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Or(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Or(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR  ymm1,         ymm2, ymm3/m256
+        ///   VPORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Or(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR  ymm1,         ymm2, ymm3/m256
+        ///   VPORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Or(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR  ymm1,         ymm2, ymm3/m256
+        ///   VPORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> Or(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR  ymm1,         ymm2, ymm3/m256
+        ///   VPORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> Or(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_packs_epi16 (__m256i a, __m256i b)
-        ///   VPACKSSWB ymm, ymm, ymm/m256
+        ///   VPACKSSWB ymm1,         ymm2, ymm3/m256
+        ///   VPACKSSWB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> PackSignedSaturate(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_packs_epi32 (__m256i a, __m256i b)
-        ///   VPACKSSDW ymm, ymm, ymm/m256
+        ///   VPACKSSDW ymm1,         ymm2, ymm3/m256
+        ///   VPACKSSDW ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<short> PackSignedSaturate(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m256i _mm256_packus_epi16 (__m256i a, __m256i b)
-        ///   VPACKUSWB ymm, ymm, ymm/m256
+        ///   VPACKUSWB ymm1,         ymm2, ymm3/m256
+        ///   VPACKUSWB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> PackUnsignedSaturate(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_packus_epi32 (__m256i a, __m256i b)
-        ///   VPACKUSDW ymm, ymm, ymm/m256
+        ///   VPACKUSDW ymm1,         ymm2, ymm3/m256
+        ///   VPACKUSDW ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<ushort> PackUnsignedSaturate(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<sbyte> Permute2x128(Vector256<sbyte> left, Vector256<sbyte> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<byte> Permute2x128(Vector256<byte> left, Vector256<byte> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<short> Permute2x128(Vector256<short> left, Vector256<short> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<ushort> Permute2x128(Vector256<ushort> left, Vector256<ushort> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<int> Permute2x128(Vector256<int> left, Vector256<int> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<uint> Permute2x128(Vector256<uint> left, Vector256<uint> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<long> Permute2x128(Vector256<long> left, Vector256<long> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<ulong> Permute2x128(Vector256<ulong> left, Vector256<ulong> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_permute4x64_epi64 (__m256i a, const int imm8)
-        ///   VPERMQ ymm, ymm/m256, imm8
+        ///   VPERMQ ymm1,         ymm2/m256,         imm8
+        ///   VPERMQ ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<long> Permute4x64(Vector256<long> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_permute4x64_epi64 (__m256i a, const int imm8)
-        ///   VPERMQ ymm, ymm/m256, imm8
+        ///   VPERMQ ymm1,         ymm2/m256,         imm8
+        ///   VPERMQ ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<ulong> Permute4x64(Vector256<ulong> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_permute4x64_pd (__m256d a, const int imm8)
-        ///   VPERMPD ymm, ymm/m256, imm8
+        ///   VPERMPD ymm1,         ymm2/m256,         imm8
+        ///   VPERMPD ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<double> Permute4x64(Vector256<double> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx)
-        ///   VPERMD ymm, ymm/m256, ymm
+        ///   VPERMD ymm1,         ymm2/m256,         imm8
+        ///   VPERMD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<int> PermuteVar8x32(Vector256<int> left, Vector256<int> control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx)
-        ///   VPERMD ymm, ymm/m256, ymm
+        ///   VPERMD ymm1,         ymm2/m256,         imm8
+        ///   VPERMD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<uint> PermuteVar8x32(Vector256<uint> left, Vector256<uint> control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_permutevar8x32_ps (__m256 a, __m256i idx)
-        ///   VPERMPS ymm, ymm/m256, ymm
+        ///   VPERMPS ymm1,         ymm2/m256,         imm8
+        ///   VPERMPS ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<float> PermuteVar8x32(Vector256<float> left, Vector256<int> control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_sll_epi16 (__m256i a, __m128i count)
-        ///   VPSLLW ymm, ymm, xmm/m128
+        ///   VPSLLW ymm1,         ymm2, xmm3/m128
+        ///   VPSLLW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<short> ShiftLeftLogical(Vector256<short> value, Vector128<short> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sll_epi16 (__m256i a, __m128i count)
-        ///   VPSLLW ymm, ymm, xmm/m128
+        ///   VPSLLW ymm1,         ymm2, xmm3/m128
+        ///   VPSLLW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<ushort> ShiftLeftLogical(Vector256<ushort> value, Vector128<ushort> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sll_epi32 (__m256i a, __m128i count)
-        ///   VPSLLD ymm, ymm, xmm/m128
+        ///   VPSLLD ymm1,         ymm2, xmm3/m128
+        ///   VPSLLD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<int> ShiftLeftLogical(Vector256<int> value, Vector128<int> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sll_epi32 (__m256i a, __m128i count)
-        ///   VPSLLD ymm, ymm, xmm/m128
+        ///   VPSLLD ymm1,         ymm2, xmm3/m128
+        ///   VPSLLD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<uint> ShiftLeftLogical(Vector256<uint> value, Vector128<uint> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sll_epi64 (__m256i a, __m128i count)
-        ///   VPSLLQ ymm, ymm, xmm/m128
+        ///   VPSLLQ ymm1,         ymm2, xmm3/m128
+        ///   VPSLLQ ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<long> ShiftLeftLogical(Vector256<long> value, Vector128<long> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sll_epi64 (__m256i a, __m128i count)
-        ///   VPSLLQ ymm, ymm, xmm/m128
+        ///   VPSLLQ ymm1,         ymm2, xmm3/m128
+        ///   VPSLLQ ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<ulong> ShiftLeftLogical(Vector256<ulong> value, Vector128<ulong> count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_slli_epi16 (__m256i a, int imm8)
-        ///   VPSLLW ymm, ymm, imm8
+        ///   VPSLLW ymm1,         ymm2, imm8
+        ///   VPSLLW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<short> ShiftLeftLogical(Vector256<short> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_slli_epi16 (__m256i a, int imm8)
-        ///   VPSLLW ymm, ymm, imm8
+        ///   VPSLLW ymm1,         ymm2, imm8
+        ///   VPSLLW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<ushort> ShiftLeftLogical(Vector256<ushort> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_slli_epi32 (__m256i a, int imm8)
-        ///   VPSLLD ymm, ymm, imm8
+        ///   VPSLLD ymm1,         ymm2, imm8
+        ///   VPSLLD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<int> ShiftLeftLogical(Vector256<int> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_slli_epi32 (__m256i a, int imm8)
-        ///   VPSLLD ymm, ymm, imm8
+        ///   VPSLLD ymm1,         ymm2, imm8
+        ///   VPSLLD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<uint> ShiftLeftLogical(Vector256<uint> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_slli_epi64 (__m256i a, int imm8)
-        ///   VPSLLQ ymm, ymm, imm8
+        ///   VPSLLQ ymm1,         ymm2, imm8
+        ///   VPSLLQ ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<long> ShiftLeftLogical(Vector256<long> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_slli_epi64 (__m256i a, int imm8)
-        ///   VPSLLQ ymm, ymm, imm8
+        ///   VPSLLQ ymm1,         ymm2, imm8
+        ///   VPSLLQ ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<ulong> ShiftLeftLogical(Vector256<ulong> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
          /// </summary>
          public static Vector256<sbyte> ShiftLeftLogical128BitLane(Vector256<sbyte> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
          /// </summary>
          public static Vector256<byte> ShiftLeftLogical128BitLane(Vector256<byte> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<short> ShiftLeftLogical128BitLane(Vector256<short> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ushort> ShiftLeftLogical128BitLane(Vector256<ushort> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<int> ShiftLeftLogical128BitLane(Vector256<int> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<uint> ShiftLeftLogical128BitLane(Vector256<uint> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<long> ShiftLeftLogical128BitLane(Vector256<long> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ulong> ShiftLeftLogical128BitLane(Vector256<ulong> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
-        ///   VPSLLVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<int> ShiftLeftLogicalVariable(Vector256<int> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
-        ///   VPSLLVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<uint> ShiftLeftLogicalVariable(Vector256<uint> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count)
-        ///   VPSLLVQ ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<long> ShiftLeftLogicalVariable(Vector256<long> value, Vector256<ulong> count) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count)
-        ///   VPSLLVQ ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<ulong> ShiftLeftLogicalVariable(Vector256<ulong> value, Vector256<ulong> count) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_sllv_epi32 (__m128i a, __m128i count)
-        ///   VPSLLVD xmm, ymm, xmm/m128
+        ///   VPSLLVD xmm1,         xmm2, xmm3/m128
+        ///   VPSLLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> ShiftLeftLogicalVariable(Vector128<int> value, Vector128<uint> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sllv_epi32 (__m128i a, __m128i count)
-        ///   VPSLLVD xmm, ymm, xmm/m128
+        ///   VPSLLVD xmm1,         xmm2, xmm3/m128
+        ///   VPSLLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> ShiftLeftLogicalVariable(Vector128<uint> value, Vector128<uint> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sllv_epi64 (__m128i a, __m128i count)
-        ///   VPSLLVQ xmm, ymm, xmm/m128
+        ///   VPSLLVQ xmm1,         xmm2, xmm3/m128
+        ///   VPSLLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> ShiftLeftLogicalVariable(Vector128<long> value, Vector128<ulong> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sllv_epi64 (__m128i a, __m128i count)
-        ///   VPSLLVQ xmm, ymm, xmm/m128
+        ///   VPSLLVQ xmm1,         xmm2, xmm3/m128
+        ///   VPSLLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> ShiftLeftLogicalVariable(Vector128<ulong> value, Vector128<ulong> count) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
+        ///   VPSLLVD ymm1,         ymm2, ymm3/m256
+        ///   VPSLLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<int> ShiftLeftLogicalVariable(Vector256<int> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
+        ///   VPSLLVD ymm1,         ymm2, ymm3/m256
+        ///   VPSLLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<uint> ShiftLeftLogicalVariable(Vector256<uint> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count)
+        ///   VPSLLVQ ymm1,         ymm2, ymm3/m256
+        ///   VPSLLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
+        /// </summary>
+        public static Vector256<long> ShiftLeftLogicalVariable(Vector256<long> value, Vector256<ulong> count) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count)
+        ///   VPSLLVQ ymm1,         ymm2, ymm3/m256
+        ///   VPSLLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
+        /// </summary>
+        public static Vector256<ulong> ShiftLeftLogicalVariable(Vector256<ulong> value, Vector256<ulong> count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// _mm256_sra_epi16 (__m256i a, __m128i count)
-        ///   VPSRAW ymm, ymm, xmm/m128
+        ///   VPSRAW ymm1,         ymm2, xmm3/m128
+        ///   VPSRAW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<short> ShiftRightArithmetic(Vector256<short> value, Vector128<short> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// _mm256_sra_epi32 (__m256i a, __m128i count)
-        ///   VPSRAD ymm, ymm, xmm/m128
+        ///   VPSRAD ymm1,         ymm2, xmm3/m128
+        ///   VPSRAD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<int> ShiftRightArithmetic(Vector256<int> value, Vector128<int> count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_srai_epi16 (__m256i a, int imm8)
-        ///   VPSRAW ymm, ymm, imm8
+        ///   VPSRAW ymm1,         ymm2, imm8
+        ///   VPSRAW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<short> ShiftRightArithmetic(Vector256<short> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srai_epi32 (__m256i a, int imm8)
-        ///   VPSRAD ymm, ymm, imm8
+        ///   VPSRAD ymm1,         ymm2, imm8
+        ///   VPSRAD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<int> ShiftRightArithmetic(Vector256<int> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m256i _mm256_srav_epi32 (__m256i a, __m256i count)
-        ///   VPSRAVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<int> ShiftRightArithmeticVariable(Vector256<int> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_srav_epi32 (__m128i a, __m128i count)
-        ///   VPSRAVD xmm, xmm, xmm/m128
+        ///   VPSRAVD xmm1,         xmm2, xmm3/m128
+        ///   VPSRAVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> ShiftRightArithmeticVariable(Vector128<int> value, Vector128<uint> count) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_srav_epi32 (__m256i a, __m256i count)
+        ///   VPSRAVD ymm1,         ymm2, ymm3/m256
+        ///   VPSRAVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<int> ShiftRightArithmeticVariable(Vector256<int> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_srl_epi16 (__m256i a, __m128i count)
-        ///   VPSRLW ymm, ymm, xmm/m128
+        ///   VPSRLW ymm1,         ymm2, xmm3/m128
+        ///   VPSRLW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<short> ShiftRightLogical(Vector256<short> value, Vector128<short> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srl_epi16 (__m256i a, __m128i count)
-        ///   VPSRLW ymm, ymm, xmm/m128
+        ///   VPSRLW ymm1,         ymm2, xmm3/m128
+        ///   VPSRLW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<ushort> ShiftRightLogical(Vector256<ushort> value, Vector128<ushort> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srl_epi32 (__m256i a, __m128i count)
-        ///   VPSRLD ymm, ymm, xmm/m128
+        ///   VPSRLD ymm1,         ymm2, xmm3/m128
+        ///   VPSRLD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<int> ShiftRightLogical(Vector256<int> value, Vector128<int> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srl_epi32 (__m256i a, __m128i count)
-        ///   VPSRLD ymm, ymm, xmm/m128
+        ///   VPSRLD ymm1,         ymm2, xmm3/m128
+        ///   VPSRLD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<uint> ShiftRightLogical(Vector256<uint> value, Vector128<uint> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srl_epi64 (__m256i a, __m128i count)
-        ///   VPSRLQ ymm, ymm, xmm/m128
+        ///   VPSRLQ ymm1,         ymm2, xmm3/m128
+        ///   VPSRLQ ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<long> ShiftRightLogical(Vector256<long> value, Vector128<long> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srl_epi64 (__m256i a, __m128i count)
-        ///   VPSRLQ ymm, ymm, xmm/m128
+        ///   VPSRLQ ymm1,         ymm2, xmm3/m128
+        ///   VPSRLQ ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<ulong> ShiftRightLogical(Vector256<ulong> value, Vector128<ulong> count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_srli_epi16 (__m256i a, int imm8)
-        ///   VPSRLW ymm, ymm, imm8
+        ///   VPSRLW ymm1,         ymm2, imm8
+        ///   VPSRLW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<short> ShiftRightLogical(Vector256<short> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srli_epi16 (__m256i a, int imm8)
-        ///   VPSRLW ymm, ymm, imm8
+        ///   VPSRLW ymm1,         ymm2, imm8
+        ///   VPSRLW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<ushort> ShiftRightLogical(Vector256<ushort> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srli_epi32 (__m256i a, int imm8)
-        ///   VPSRLD ymm, ymm, imm8
+        ///   VPSRLD ymm1,         ymm2, imm8
+        ///   VPSRLD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<int> ShiftRightLogical(Vector256<int> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srli_epi32 (__m256i a, int imm8)
-        ///   VPSRLD ymm, ymm, imm8
+        ///   VPSRLD ymm1,         ymm2, imm8
+        ///   VPSRLD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<uint> ShiftRightLogical(Vector256<uint> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srli_epi64 (__m256i a, int imm8)
-        ///   VPSRLQ ymm, ymm, imm8
+        ///   VPSRLQ ymm1,         ymm2, imm8
+        ///   VPSRLQ ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<long> ShiftRightLogical(Vector256<long> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_srli_epi64 (__m256i a, int imm8)
-        ///   VPSRLQ ymm, ymm, imm8
+        ///   VPSRLQ ymm1,         ymm2, imm8
+        ///   VPSRLQ ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<ulong> ShiftRightLogical(Vector256<ulong> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
          /// </summary>
          public static Vector256<sbyte> ShiftRightLogical128BitLane(Vector256<sbyte> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
          /// </summary>
          public static Vector256<byte> ShiftRightLogical128BitLane(Vector256<byte> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<short> ShiftRightLogical128BitLane(Vector256<short> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ushort> ShiftRightLogical128BitLane(Vector256<ushort> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<int> ShiftRightLogical128BitLane(Vector256<int> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<uint> ShiftRightLogical128BitLane(Vector256<uint> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<long> ShiftRightLogical128BitLane(Vector256<long> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ulong> ShiftRightLogical128BitLane(Vector256<ulong> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
-        ///   VPSRLVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<int> ShiftRightLogicalVariable(Vector256<int> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
-        ///   VPSRLVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<uint> ShiftRightLogicalVariable(Vector256<uint> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count)
-        ///   VPSRLVQ ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<long> ShiftRightLogicalVariable(Vector256<long> value, Vector256<ulong> count) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count)
-        ///   VPSRLVQ ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<ulong> ShiftRightLogicalVariable(Vector256<ulong> value, Vector256<ulong> count) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_srlv_epi32 (__m128i a, __m128i count)
-        ///   VPSRLVD xmm, xmm, xmm/m128
+        ///   VPSRLVD xmm1,         xmm2, xmm3/m128
+        ///   VPSRLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> ShiftRightLogicalVariable(Vector128<int> value, Vector128<uint> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srlv_epi32 (__m128i a, __m128i count)
-        ///   VPSRLVD xmm, xmm, xmm/m128
+        ///   VPSRLVD xmm1,         xmm2, xmm3/m128
+        ///   VPSRLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> ShiftRightLogicalVariable(Vector128<uint> value, Vector128<uint> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srlv_epi64 (__m128i a, __m128i count)
-        ///   VPSRLVQ xmm, xmm, xmm/m128
+        ///   VPSRLVQ xmm1,         xmm2, xmm3/m128
+        ///   VPSRLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> ShiftRightLogicalVariable(Vector128<long> value, Vector128<ulong> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srlv_epi64 (__m128i a, __m128i count)
-        ///   VPSRLVQ xmm, xmm, xmm/m128
+        ///   VPSRLVQ xmm1,         xmm2, xmm3/m128
+        ///   VPSRLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> ShiftRightLogicalVariable(Vector128<ulong> value, Vector128<ulong> count) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
+        ///   VPSRLVD ymm1,         ymm2, ymm3/m256
+        ///   VPSRLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<int> ShiftRightLogicalVariable(Vector256<int> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
+        ///   VPSRLVD ymm1,         ymm2, ymm3/m256
+        ///   VPSRLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<uint> ShiftRightLogicalVariable(Vector256<uint> value, Vector256<uint> count) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count)
+        ///   VPSRLVQ ymm1,         ymm2, ymm3/m256
+        ///   VPSRLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
+        /// </summary>
+        public static Vector256<long> ShiftRightLogicalVariable(Vector256<long> value, Vector256<ulong> count) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count)
+        ///   VPSRLVQ ymm1,         ymm2, ymm3/m256
+        ///   VPSRLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
+        /// </summary>
+        public static Vector256<ulong> ShiftRightLogicalVariable(Vector256<ulong> value, Vector256<ulong> count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_shuffle_epi8 (__m256i a, __m256i b)
-        ///   VPSHUFB ymm, ymm, ymm/m256
+        ///   VPSHUFB ymm1,         ymm2, ymm3/m256
+        ///   VPSHUFB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Shuffle(Vector256<sbyte> value, Vector256<sbyte> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_shuffle_epi8 (__m256i a, __m256i b)
-        ///   VPSHUFB ymm, ymm, ymm/m256
+        ///   VPSHUFB ymm1,         ymm2, ymm3/m256
+        ///   VPSHUFB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Shuffle(Vector256<byte> value, Vector256<byte> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8)
-        ///   VPSHUFD ymm, ymm/m256, imm8
+        ///   VPSHUFD ymm1,         ymm2/m256,         imm8
+        ///   VPSHUFD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<int> Shuffle(Vector256<int> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8)
-        ///   VPSHUFD ymm, ymm/m256, imm8
+        ///   VPSHUFD ymm1,         ymm2/m256,         imm8
+        ///   VPSHUFD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<uint> Shuffle(Vector256<uint> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_shufflehi_epi16 (__m256i a, const int imm8)
-        ///   VPSHUFHW ymm, ymm/m256, imm8
+        ///   VPSHUFHW ymm1,         ymm2/m256, imm8
+        ///   VPSHUFHW ymm1 {k1}{z}, ymm2/m256, imm8
          /// </summary>
          public static Vector256<short> ShuffleHigh(Vector256<short> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_shufflehi_epi16 (__m256i a, const int imm8)
-        ///   VPSHUFHW ymm, ymm/m256, imm8
+        ///   VPSHUFHW ymm1,         ymm2/m256, imm8
+        ///   VPSHUFHW ymm1 {k1}{z}, ymm2/m256, imm8
          /// </summary>
          public static Vector256<ushort> ShuffleHigh(Vector256<ushort> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8)
-        ///   VPSHUFLW ymm, ymm/m256, imm8
+        ///   VPSHUFLW ymm1,         ymm2/m256, imm8
+        ///   VPSHUFLW ymm1 {k1}{z}, ymm2/m256, imm8
          /// </summary>
          public static Vector256<short> ShuffleLow(Vector256<short> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8)
-        ///   VPSHUFLW ymm, ymm/m256, imm8
+        ///   VPSHUFLW ymm1,         ymm2/m256, imm8
+        ///   VPSHUFLW ymm1 {k1}{z}, ymm2/m256, imm8
          /// </summary>
          public static Vector256<ushort> ShuffleLow(Vector256<ushort> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_sign_epi8 (__m256i a, __m256i b)
-        ///   VPSIGNB ymm, ymm, ymm/m256
+        ///   VPSIGNB ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Sign(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sign_epi16 (__m256i a, __m256i b)
-        ///   VPSIGNW ymm, ymm, ymm/m256
+        ///   VPSIGNW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Sign(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sign_epi32 (__m256i a, __m256i b)
-        ///   VPSIGND ymm, ymm, ymm/m256
+        ///   VPSIGND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> Sign(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_sub_epi8 (__m256i a, __m256i b)
-        ///   VPSUBB ymm, ymm, ymm/m256
+        ///   VPSUBB ymm1,         ymm2, ymm3/m256
+        ///   VPSUBB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Subtract(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sub_epi8 (__m256i a, __m256i b)
-        ///   VPSUBB ymm, ymm, ymm/m256
+        ///   VPSUBB ymm1,         ymm2, ymm3/m256
+        ///   VPSUBB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Subtract(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sub_epi16 (__m256i a, __m256i b)
-        ///   VPSUBW ymm, ymm, ymm/m256
+        ///   VPSUBW ymm1,         ymm2, ymm3/m256
+        ///   VPSUBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Subtract(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sub_epi16 (__m256i a, __m256i b)
-        ///   VPSUBW ymm, ymm, ymm/m256
+        ///   VPSUBW ymm1,         ymm2, ymm3/m256
+        ///   VPSUBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Subtract(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sub_epi32 (__m256i a, __m256i b)
-        ///   VPSUBD ymm, ymm, ymm/m256
+        ///   VPSUBD ymm1,         ymm2, ymm3/m256
+        ///   VPSUBD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> Subtract(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sub_epi32 (__m256i a, __m256i b)
-        ///   VPSUBD ymm, ymm, ymm/m256
+        ///   VPSUBD ymm1,         ymm2, ymm3/m256
+        ///   VPSUBD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<uint> Subtract(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sub_epi64 (__m256i a, __m256i b)
-        ///   VPSUBQ ymm, ymm, ymm/m256
+        ///   VPSUBQ ymm1,         ymm2, ymm3/m256
+        ///   VPSUBQ ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<long> Subtract(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_sub_epi64 (__m256i a, __m256i b)
-        ///   VPSUBQ ymm, ymm, ymm/m256
+        ///   VPSUBQ ymm1,         ymm2, ymm3/m256
+        ///   VPSUBQ ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ulong> Subtract(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_subs_epi8 (__m256i a, __m256i b)
-        ///   VPSUBSB ymm, ymm, ymm/m256
+        ///   VPSUBSB ymm1,         ymm2, ymm3/m128
+        ///   VPSUBSB ymm1 {k1}{z}, ymm2, ymm3/m128
          /// </summary>
          public static Vector256<sbyte> SubtractSaturate(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_subs_epi16 (__m256i a, __m256i b)
-        ///   VPSUBSW ymm, ymm, ymm/m256
+        ///   VPSUBSW ymm1,         ymm2, ymm3/m128
+        ///   VPSUBSW ymm1 {k1}{z}, ymm2, ymm3/m128
          /// </summary>
          public static Vector256<short> SubtractSaturate(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_subs_epu8 (__m256i a, __m256i b)
-        ///   VPSUBUSB ymm, ymm, ymm/m256
+        ///   VPSUBUSB ymm1,         ymm2, ymm3/m128
+        ///   VPSUBUSB ymm1 {k1}{z}, ymm2, ymm3/m128
          /// </summary>
          public static Vector256<byte> SubtractSaturate(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_subs_epu16 (__m256i a, __m256i b)
-        ///   VPSUBUSW ymm, ymm, ymm/m256
+        ///   VPSUBUSW ymm1,         ymm2, ymm3/m128
+        ///   VPSUBUSW ymm1 {k1}{z}, ymm2, ymm3/m128
          /// </summary>
          public static Vector256<ushort> SubtractSaturate(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_sad_epu8 (__m256i a, __m256i b)
-        ///   VPSADBW ymm, ymm, ymm/m256
+        ///   VPSADBW ymm1,         ymm2, ymm3/m256
+        ///   VPSADBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> SumAbsoluteDifferences(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_unpackhi_epi8 (__m256i a, __m256i b)
-        ///   VPUNPCKHBW ymm, ymm, ymm/m256
+        ///   VPUNPCKHBW ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> UnpackHigh(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpackhi_epi8 (__m256i a, __m256i b)
-        ///   VPUNPCKHBW ymm, ymm, ymm/m256
+        ///   VPUNPCKHBW ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> UnpackHigh(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpackhi_epi16 (__m256i a, __m256i b)
-        ///   VPUNPCKHWD ymm, ymm, ymm/m256
+        ///   VPUNPCKHWD ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> UnpackHigh(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpackhi_epi16 (__m256i a, __m256i b)
-        ///   VPUNPCKHWD ymm, ymm, ymm/m256
+        ///   VPUNPCKHWD ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> UnpackHigh(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpackhi_epi32 (__m256i a, __m256i b)
-        ///   VPUNPCKHDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKHDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> UnpackHigh(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpackhi_epi32 (__m256i a, __m256i b)
-        ///   VPUNPCKHDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKHDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> UnpackHigh(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpackhi_epi64 (__m256i a, __m256i b)
-        ///   VPUNPCKHQDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKHQDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> UnpackHigh(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpackhi_epi64 (__m256i a, __m256i b)
-        ///   VPUNPCKHQDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKHQDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> UnpackHigh(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b)
-        ///   VPUNPCKLBW ymm, ymm, ymm/m256
+        ///   VPUNPCKLBW ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> UnpackLow(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b)
-        ///   VPUNPCKLBW ymm, ymm, ymm/m256
+        ///   VPUNPCKLBW ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> UnpackLow(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpacklo_epi16 (__m256i a, __m256i b)
-        ///   VPUNPCKLWD ymm, ymm, ymm/m256
+        ///   VPUNPCKLWD ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> UnpackLow(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpacklo_epi16 (__m256i a, __m256i b)
-        ///   VPUNPCKLWD ymm, ymm, ymm/m256
+        ///   VPUNPCKLWD ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> UnpackLow(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpacklo_epi32 (__m256i a, __m256i b)
-        ///   VPUNPCKLDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKLDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> UnpackLow(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpacklo_epi32 (__m256i a, __m256i b)
-        ///   VPUNPCKLDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKLDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> UnpackLow(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b)
-        ///   VPUNPCKLQDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKLQDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> UnpackLow(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b)
-        ///   VPUNPCKLQDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKLQDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> UnpackLow(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Xor(Vector256<sbyte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Xor(Vector256<byte> left, Vector256<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Xor(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Xor(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR  ymm1,         ymm2, ymm3/m256
+        ///   VPXORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Xor(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR  ymm1,         ymm2, ymm3/m256
+        ///   VPXORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Xor(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR  ymm1,         ymm2, ymm3/m256
+        ///   VPXORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> Xor(Vector256<long> left, Vector256<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR  ymm1,         ymm2, ymm3/m256
+        ///   VPXORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> Xor(Vector256<ulong> left, Vector256<ulong> right) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs

index e275baffece76ecb3051c1e1c5142b262bdfb89d..3783778cc857ab829b2b8f131fd464a40f9a24ea 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs
@@ -27,829 +27,896 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m256i _mm256_abs_epi8 (__m256i a)
-        ///   VPABSB ymm, ymm/m256
+        ///   VPABSB ymm1,         ymm2/m256
+        ///   VPABSB ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<byte> Abs(Vector256<sbyte> value) => Abs(value);
          /// <summary>
          /// __m256i _mm256_abs_epi16 (__m256i a)
-        ///   VPABSW ymm, ymm/m256
+        ///   VPABSW ymm1,         ymm2/m256
+        ///   VPABSW ymm1 {k1}{z}, ymm2/m256
          /// </summary>
          public static Vector256<ushort> Abs(Vector256<short> value) => Abs(value);
          /// <summary>
          /// __m256i _mm256_abs_epi32 (__m256i a)
-        ///   VPABSD ymm, ymm/m256
+        ///   VPABSD ymm1,         ymm2/m256
+        ///   VPABSD ymm1 {k1}{z}, ymm2/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Abs(Vector256<int> value) => Abs(value);
  
          /// <summary>
          /// __m256i _mm256_add_epi8 (__m256i a, __m256i b)
-        ///   VPADDB ymm, ymm, ymm/m256
+        ///   VPADDB ymm1,         ymm2, ymm3/m256
+        ///   VPADDB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Add(Vector256<sbyte> left, Vector256<sbyte> right) => Add(left, right);
          /// <summary>
          /// __m256i _mm256_add_epi8 (__m256i a, __m256i b)
-        ///   VPADDB ymm, ymm, ymm/m256
+        ///   VPADDB ymm1,         ymm2, ymm3/m256
+        ///   VPADDB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Add(Vector256<byte> left, Vector256<byte> right) => Add(left, right);
          /// <summary>
          /// __m256i _mm256_add_epi16 (__m256i a, __m256i b)
-        ///   VPADDW ymm, ymm, ymm/m256
+        ///   VPADDW ymm1,         ymm2, ymm3/m256
+        ///   VPADDW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Add(Vector256<short> left, Vector256<short> right) => Add(left, right);
          /// <summary>
          /// __m256i _mm256_add_epi16 (__m256i a, __m256i b)
-        ///   VPADDW ymm, ymm, ymm/m256
+        ///   VPADDW ymm1,         ymm2, ymm3/m256
+        ///   VPADDW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Add(Vector256<ushort> left, Vector256<ushort> right) => Add(left, right);
          /// <summary>
          /// __m256i _mm256_add_epi32 (__m256i a, __m256i b)
-        ///   VPADDD ymm, ymm, ymm/m256
+        ///   VPADDD ymm1,         ymm2, ymm3/m256
+        ///   VPADDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Add(Vector256<int> left, Vector256<int> right) => Add(left, right);
          /// <summary>
          /// __m256i _mm256_add_epi32 (__m256i a, __m256i b)
-        ///   VPADDD ymm, ymm, ymm/m256
+        ///   VPADDD ymm1,         ymm2, ymm3/m256
+        ///   VPADDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Add(Vector256<uint> left, Vector256<uint> right) => Add(left, right);
          /// <summary>
          /// __m256i _mm256_add_epi64 (__m256i a, __m256i b)
-        ///   VPADDQ ymm, ymm, ymm/m256
+        ///   VPADDQ ymm1,         ymm2, ymm3/m256
+        ///   VPADDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> Add(Vector256<long> left, Vector256<long> right) => Add(left, right);
          /// <summary>
          /// __m256i _mm256_add_epi64 (__m256i a, __m256i b)
-        ///   VPADDQ ymm, ymm, ymm/m256
+        ///   VPADDQ ymm1,         ymm2, ymm3/m256
+        ///   VPADDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> Add(Vector256<ulong> left, Vector256<ulong> right) => Add(left, right);
  
          /// <summary>
          /// __m256i _mm256_adds_epi8 (__m256i a, __m256i b)
-        ///   VPADDSB ymm, ymm, ymm/m256
+        ///   VPADDSB ymm1,         ymm2, ymm3/m256
+        ///   VPADDSB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> AddSaturate(Vector256<sbyte> left, Vector256<sbyte> right) => AddSaturate(left, right);
          /// <summary>
          /// __m256i _mm256_adds_epu8 (__m256i a, __m256i b)
-        ///   VPADDUSB ymm, ymm, ymm/m256
+        ///   VPADDUSB ymm1,         ymm2, ymm3/m256
+        ///   VPADDUSB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> AddSaturate(Vector256<byte> left, Vector256<byte> right) => AddSaturate(left, right);
          /// <summary>
          /// __m256i _mm256_adds_epi16 (__m256i a, __m256i b)
-        ///   VPADDSW ymm, ymm, ymm/m256
+        ///   VPADDSW ymm1,         ymm2, ymm3/m256
+        ///   VPADDSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> AddSaturate(Vector256<short> left, Vector256<short> right) => AddSaturate(left, right);
          /// <summary>
          /// __m256i _mm256_adds_epu16 (__m256i a, __m256i b)
-        ///   VPADDUSW ymm, ymm, ymm/m256
+        ///   VPADDUSW ymm1,         ymm2, ymm3/m256
+        ///   VPADDUSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> AddSaturate(Vector256<ushort> left, Vector256<ushort> right) => AddSaturate(left, right);
  
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<sbyte> AlignRight(Vector256<sbyte> left, Vector256<sbyte> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<byte> AlignRight(Vector256<byte> left, Vector256<byte> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<short> AlignRight(Vector256<short> left, Vector256<short> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ushort> AlignRight(Vector256<ushort> left, Vector256<ushort> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<int> AlignRight(Vector256<int> left, Vector256<int> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<uint> AlignRight(Vector256<uint> left, Vector256<uint> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<long> AlignRight(Vector256<long> left, Vector256<long> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
-        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        ///   VPALIGNR ymm1,         ymm2, ymm3/m256, imm8
+        ///   VPALIGNR ymm1 {k1}{z}, ymm2, ymm3/m256, imm8
          /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ulong> AlignRight(Vector256<ulong> left, Vector256<ulong> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
  
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> And(Vector256<sbyte> left, Vector256<sbyte> right) => And(left, right);
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> And(Vector256<byte> left, Vector256<byte> right) => And(left, right);
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> And(Vector256<short> left, Vector256<short> right) => And(left, right);
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> And(Vector256<ushort> left, Vector256<ushort> right) => And(left, right);
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND  ymm1,         ymm2, ymm3/m256
+        ///   VPANDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> And(Vector256<int> left, Vector256<int> right) => And(left, right);
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND  ymm1,         ymm2, ymm3/m256
+        ///   VPANDD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> And(Vector256<uint> left, Vector256<uint> right) => And(left, right);
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND  ymm1,         ymm2, ymm3/m256
+        ///   VPANDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> And(Vector256<long> left, Vector256<long> right) => And(left, right);
          /// <summary>
          /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
-        ///   VPAND ymm, ymm, ymm/m256
+        ///   VPAND  ymm1,         ymm2, ymm3/m256
+        ///   VPANDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> And(Vector256<ulong> left, Vector256<ulong> right) => And(left, right);
  
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> AndNot(Vector256<sbyte> left, Vector256<sbyte> right) => AndNot(left, right);
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> AndNot(Vector256<byte> left, Vector256<byte> right) => AndNot(left, right);
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> AndNot(Vector256<short> left, Vector256<short> right) => AndNot(left, right);
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> AndNot(Vector256<ushort> left, Vector256<ushort> right) => AndNot(left, right);
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN  ymm1,         ymm2, ymm3/m256
+        ///   VPANDND ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> AndNot(Vector256<int> left, Vector256<int> right) => AndNot(left, right);
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN  ymm1,         ymm2, ymm3/m256
+        ///   VPANDND ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> AndNot(Vector256<uint> left, Vector256<uint> right) => AndNot(left, right);
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN  ymm1,         ymm2, ymm3/m256
+        ///   VPANDNQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> AndNot(Vector256<long> left, Vector256<long> right) => AndNot(left, right);
          /// <summary>
          /// __m256i _mm256_andnot_si256 (__m256i a, __m256i b)
-        ///   VPANDN ymm, ymm, ymm/m256
+        ///   VPANDN  ymm1,         ymm2, ymm3/m256
+        ///   VPANDNQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> AndNot(Vector256<ulong> left, Vector256<ulong> right) => AndNot(left, right);
  
          /// <summary>
          /// __m256i _mm256_avg_epu8 (__m256i a, __m256i b)
-        ///   VPAVGB ymm, ymm, ymm/m256
+        ///   VPAVGB ymm1,         ymm2, ymm3/m256
+        ///   VPAVGB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Average(Vector256<byte> left, Vector256<byte> right) => Average(left, right);
          /// <summary>
          /// __m256i _mm256_avg_epu16 (__m256i a, __m256i b)
-        ///   VPAVGW ymm, ymm, ymm/m256
+        ///   VPAVGW ymm1,         ymm2, ymm3/m256
+        ///   VPAVGW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Average(Vector256<ushort> left, Vector256<ushort> right) => Average(left, right);
  
          /// <summary>
          /// __m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8)
-        ///   VPBLENDD xmm, xmm, xmm/m128, imm8
+        ///   VPBLENDD xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<int> Blend(Vector128<int> left, Vector128<int> right, [ConstantExpected] byte control) => Blend(left, right, control);
          /// <summary>
          /// __m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8)
-        ///   VPBLENDD xmm, xmm, xmm/m128, imm8
+        ///   VPBLENDD xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<uint> Blend(Vector128<uint> left, Vector128<uint> right, [ConstantExpected] byte control) => Blend(left, right, control);
          /// <summary>
          /// __m256i _mm256_blend_epi16 (__m256i a, __m256i b, const int imm8)
-        ///   VPBLENDW ymm, ymm, ymm/m256, imm8
+        ///   VPBLENDW ymm1, ymm2, ymm3/m256 imm8
          /// </summary>
          public static Vector256<short> Blend(Vector256<short> left, Vector256<short> right, [ConstantExpected] byte control) => Blend(left, right, control);
          /// <summary>
          /// __m256i _mm256_blend_epi16 (__m256i a, __m256i b, const int imm8)
-        ///   VPBLENDW ymm, ymm, ymm/m256, imm8
+        ///   VPBLENDW ymm1, ymm2, ymm3/m256 imm8
          /// </summary>
          public static Vector256<ushort> Blend(Vector256<ushort> left, Vector256<ushort> right, [ConstantExpected] byte control) => Blend(left, right, control);
          /// <summary>
          /// __m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8)
-        ///   VPBLENDD ymm, ymm, ymm/m256, imm8
+        ///   VPBLENDD ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<int> Blend(Vector256<int> left, Vector256<int> right, [ConstantExpected] byte control) => Blend(left, right, control);
          /// <summary>
          /// __m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8)
-        ///   VPBLENDD ymm, ymm, ymm/m256, imm8
+        ///   VPBLENDD ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<uint> Blend(Vector256<uint> left, Vector256<uint> right, [ConstantExpected] byte control) => Blend(left, right, control);
  
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// </summary>
          public static Vector256<sbyte> BlendVariable(Vector256<sbyte> left, Vector256<sbyte> right, Vector256<sbyte> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// </summary>
          public static Vector256<byte> BlendVariable(Vector256<byte> left, Vector256<byte> right, Vector256<byte> mask) => BlendVariable(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<short> BlendVariable(Vector256<short> left, Vector256<short> right, Vector256<short> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<ushort> BlendVariable(Vector256<ushort> left, Vector256<ushort> right, Vector256<ushort> mask) => BlendVariable(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<int> BlendVariable(Vector256<int> left, Vector256<int> right, Vector256<int> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<uint> BlendVariable(Vector256<uint> left, Vector256<uint> right, Vector256<uint> mask) => BlendVariable(left, right, mask);
-
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<long> BlendVariable(Vector256<long> left, Vector256<long> right, Vector256<long> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm1, ymm2, ymm3/m256, ymm4
          /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector256<ulong> BlendVariable(Vector256<ulong> left, Vector256<ulong> right, Vector256<ulong> mask) => BlendVariable(left, right, mask);
  
          /// <summary>
          /// __m128i _mm_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB xmm, xmm
+        ///   VPBROADCASTB xmm1,         xmm2/m8
+        ///   VPBROADCASTB xmm1 {k1}{z}, xmm2/m8
          /// </summary>
          public static Vector128<byte> BroadcastScalarToVector128(Vector128<byte> value) => BroadcastScalarToVector128(value);
-
          /// <summary>
          /// __m128i _mm_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB xmm, xmm
+        ///   VPBROADCASTB xmm1,         xmm2/m8
+        ///   VPBROADCASTB xmm1 {k1}{z}, xmm2/m8
          /// </summary>
          public static Vector128<sbyte> BroadcastScalarToVector128(Vector128<sbyte> value) => BroadcastScalarToVector128(value);
-
          /// <summary>
          /// __m128i _mm_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW xmm, xmm
+        ///   VPBROADCASTW xmm1,         xmm2/m16
+        ///   VPBROADCASTW xmm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector128<short> BroadcastScalarToVector128(Vector128<short> value) => BroadcastScalarToVector128(value);
-
          /// <summary>
          /// __m128i _mm_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW xmm, xmm
+        ///   VPBROADCASTW xmm1,         xmm2/m16
+        ///   VPBROADCASTW xmm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector128<ushort> BroadcastScalarToVector128(Vector128<ushort> value) => BroadcastScalarToVector128(value);
-
          /// <summary>
          /// __m128i _mm_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD xmm, xmm
+        ///   VPBROADCASTD xmm1,         xmm2/m32
+        ///   VPBROADCASTD xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<int> BroadcastScalarToVector128(Vector128<int> value) => BroadcastScalarToVector128(value);
-
          /// <summary>
          /// __m128i _mm_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD xmm, xmm
+        ///   VPBROADCASTD xmm1,         xmm2/m32
+        ///   VPBROADCASTD xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<uint> BroadcastScalarToVector128(Vector128<uint> value) => BroadcastScalarToVector128(value);
-
          /// <summary>
          /// __m128i _mm_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ xmm, xmm
+        ///   VPBROADCASTQ xmm1,         xmm2/m64
+        ///   VPBROADCASTQ xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<long> BroadcastScalarToVector128(Vector128<long> value) => BroadcastScalarToVector128(value);
-
          /// <summary>
          /// __m128i _mm_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ xmm, xmm
+        ///   VPBROADCASTQ xmm1,         xmm2/m64
+        ///   VPBROADCASTQ xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<ulong> BroadcastScalarToVector128(Vector128<ulong> value) => BroadcastScalarToVector128(value);
-
          /// <summary>
          /// __m128 _mm_broadcastss_ps (__m128 a)
-        ///   VBROADCASTSS xmm, xmm
+        ///   VBROADCASTSS xmm1,         xmm2/m32
+        ///   VBROADCASTSS xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<float> BroadcastScalarToVector128(Vector128<float> value) => BroadcastScalarToVector128(value);
-
          /// <summary>
          /// __m128d _mm_broadcastsd_pd (__m128d a)
-        ///   VMOVDDUP xmm, xmm
+        ///   VMOVDDUP xmm1, xmm/m64
          /// </summary>
          public static Vector128<double> BroadcastScalarToVector128(Vector128<double> value) => BroadcastScalarToVector128(value);
  
          /// <summary>
          /// __m128i _mm_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB xmm, m8
+        ///   VPBROADCASTB xmm1,         m8
+        ///   VPBROADCASTB xmm1 {k1}{z}, m8
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<byte> BroadcastScalarToVector128(byte* source) => BroadcastScalarToVector128(source);
          /// <summary>
          /// __m128i _mm_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB xmm, m8
+        ///   VPBROADCASTB xmm1,         m8
+        ///   VPBROADCASTB xmm1 {k1}{z}, m8
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<sbyte> BroadcastScalarToVector128(sbyte* source) => BroadcastScalarToVector128(source);
-
          /// <summary>
          /// __m128i _mm_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW xmm, m16
+        ///   VPBROADCASTW xmm1,         m16
+        ///   VPBROADCASTW xmm1 {k1}{z}, m16
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<short> BroadcastScalarToVector128(short* source) => BroadcastScalarToVector128(source);
          /// <summary>
          /// __m128i _mm_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW xmm, m16
+        ///   VPBROADCASTW xmm1,         m16
+        ///   VPBROADCASTW xmm1 {k1}{z}, m16
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<ushort> BroadcastScalarToVector128(ushort* source) => BroadcastScalarToVector128(source);
-
          /// <summary>
          /// __m128i _mm_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD xmm, m32
+        ///   VPBROADCASTD xmm1,         m32
+        ///   VPBROADCASTD xmm1 {k1}{z}, m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<int> BroadcastScalarToVector128(int* source) => BroadcastScalarToVector128(source);
          /// <summary>
          /// __m128i _mm_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD xmm, m32
+        ///   VPBROADCASTD xmm1,         m32
+        ///   VPBROADCASTD xmm1 {k1}{z}, m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<uint> BroadcastScalarToVector128(uint* source) => BroadcastScalarToVector128(source);
-
          /// <summary>
          /// __m128i _mm_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ xmm, m64
+        ///   VPBROADCASTQ xmm1,         m64
+        ///   VPBROADCASTQ xmm1 {k1}{z}, m64
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<long> BroadcastScalarToVector128(long* source) => BroadcastScalarToVector128(source);
          /// <summary>
          /// __m128i _mm_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ xmm, m64
+        ///   VPBROADCASTQ xmm1,         m64
+        ///   VPBROADCASTQ xmm1 {k1}{z}, m64
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector128<ulong> BroadcastScalarToVector128(ulong* source) => BroadcastScalarToVector128(source);
  
          /// <summary>
          /// __m256i _mm256_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB ymm, xmm
+        ///   VPBROADCASTB ymm1,         xmm2/m8
+        ///   VPBROADCASTB ymm1 {k1}{z}, xmm2/m8
          /// </summary>
          public static Vector256<byte> BroadcastScalarToVector256(Vector128<byte> value) => BroadcastScalarToVector256(value);
-
          /// <summary>
          /// __m256i _mm256_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB ymm, xmm
+        ///   VPBROADCASTB ymm1,         xmm2/m8
+        ///   VPBROADCASTB ymm1 {k1}{z}, xmm2/m8
          /// </summary>
          public static Vector256<sbyte> BroadcastScalarToVector256(Vector128<sbyte> value) => BroadcastScalarToVector256(value);
-
          /// <summary>
          /// __m256i _mm256_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW ymm, xmm
+        ///   VPBROADCASTW ymm1,         xmm2/m16
+        ///   VPBROADCASTW ymm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector256<short> BroadcastScalarToVector256(Vector128<short> value) => BroadcastScalarToVector256(value);
-
          /// <summary>
          /// __m256i _mm256_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW ymm, xmm
+        ///   VPBROADCASTW ymm1,         xmm2/m16
+        ///   VPBROADCASTW ymm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector256<ushort> BroadcastScalarToVector256(Vector128<ushort> value) => BroadcastScalarToVector256(value);
-
          /// <summary>
          /// __m256i _mm256_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD ymm, xmm
+        ///   VPBROADCASTD ymm1,         xmm2/m32
+        ///   VPBROADCASTD ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<int> BroadcastScalarToVector256(Vector128<int> value) => BroadcastScalarToVector256(value);
-
          /// <summary>
          /// __m256i _mm256_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD ymm, xmm
+        ///   VPBROADCASTD ymm1,         xmm2/m32
+        ///   VPBROADCASTD ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<uint> BroadcastScalarToVector256(Vector128<uint> value) => BroadcastScalarToVector256(value);
-
          /// <summary>
          /// __m256i _mm256_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ ymm, xmm
+        ///   VPBROADCASTQ ymm1,         xmm2/m64
+        ///   VPBROADCASTQ ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<long> BroadcastScalarToVector256(Vector128<long> value) => BroadcastScalarToVector256(value);
-
          /// <summary>
          /// __m256i _mm256_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ ymm, xmm
+        ///   VPBROADCASTQ ymm1,         xmm2/m64
+        ///   VPBROADCASTQ ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<ulong> BroadcastScalarToVector256(Vector128<ulong> value) => BroadcastScalarToVector256(value);
-
          /// <summary>
          /// __m256 _mm256_broadcastss_ps (__m128 a)
-        ///   VBROADCASTSS ymm, xmm
+        ///   VBROADCASTSS ymm1,         xmm2/m32
+        ///   VBROADCASTSS ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<float> BroadcastScalarToVector256(Vector128<float> value) => BroadcastScalarToVector256(value);
-
          /// <summary>
          /// __m256d _mm256_broadcastsd_pd (__m128d a)
-        ///   VBROADCASTSD ymm, xmm
+        ///   VBROADCASTSD ymm1,         xmm2/m64
+        ///   VBROADCASTSD ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<double> BroadcastScalarToVector256(Vector128<double> value) => BroadcastScalarToVector256(value);
  
          /// <summary>
          /// __m256i _mm256_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB ymm, m8
+        ///   VPBROADCASTB ymm1,         m8
+        ///   VPBROADCASTB ymm1 {k1}{z}, m8
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<byte> BroadcastScalarToVector256(byte* source) => BroadcastScalarToVector256(source);
          /// <summary>
          /// __m256i _mm256_broadcastb_epi8 (__m128i a)
-        ///   VPBROADCASTB ymm, m8
+        ///   VPBROADCASTB ymm1,         m8
+        ///   VPBROADCASTB ymm1 {k1}{z}, m8
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<sbyte> BroadcastScalarToVector256(sbyte* source) => BroadcastScalarToVector256(source);
-
          /// <summary>
          /// __m256i _mm256_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW ymm, m16
+        ///   VPBROADCASTW ymm1,         m16
+        ///   VPBROADCASTW ymm1 {k1}{z}, m16
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<short> BroadcastScalarToVector256(short* source) => BroadcastScalarToVector256(source);
          /// <summary>
          /// __m256i _mm256_broadcastw_epi16 (__m128i a)
-        ///   VPBROADCASTW ymm, m16
+        ///   VPBROADCASTW ymm1,         m16
+        ///   VPBROADCASTW ymm1 {k1}{z}, m16
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<ushort> BroadcastScalarToVector256(ushort* source) => BroadcastScalarToVector256(source);
-
          /// <summary>
          /// __m256i _mm256_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD ymm, m32
+        ///   VPBROADCASTD ymm1,         m32
+        ///   VPBROADCASTD ymm1 {k1}{z}, m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<int> BroadcastScalarToVector256(int* source) => BroadcastScalarToVector256(source);
          /// <summary>
          /// __m256i _mm256_broadcastd_epi32 (__m128i a)
-        ///   VPBROADCASTD ymm, m32
+        ///   VPBROADCASTD ymm1,         m32
+        ///   VPBROADCASTD ymm1 {k1}{z}, m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<uint> BroadcastScalarToVector256(uint* source) => BroadcastScalarToVector256(source);
-
          /// <summary>
          /// __m256i _mm256_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ ymm, m64
+        ///   VPBROADCASTQ ymm1,         m64
+        ///   VPBROADCASTQ ymm1 {k1}{z}, m64
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<long> BroadcastScalarToVector256(long* source) => BroadcastScalarToVector256(source);
          /// <summary>
          /// __m256i _mm256_broadcastq_epi64 (__m128i a)
-        ///   VPBROADCASTQ ymm, m64
+        ///   VPBROADCASTQ ymm1,         m64
+        ///   VPBROADCASTQ ymm1 {k1}{z}, m64
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<ulong> BroadcastScalarToVector256(ulong* source) => BroadcastScalarToVector256(source);
  
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<sbyte> BroadcastVector128ToVector256(sbyte* address) => BroadcastVector128ToVector256(address);
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<byte> BroadcastVector128ToVector256(byte* address) => BroadcastVector128ToVector256(address);
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<short> BroadcastVector128ToVector256(short* address) => BroadcastVector128ToVector256(address);
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<ushort> BroadcastVector128ToVector256(ushort* address) => BroadcastVector128ToVector256(address);
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<int> BroadcastVector128ToVector256(int* address) => BroadcastVector128ToVector256(address);
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI32x4 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<uint> BroadcastVector128ToVector256(uint* address) => BroadcastVector128ToVector256(address);
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI64x2 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<long> BroadcastVector128ToVector256(long* address) => BroadcastVector128ToVector256(address);
          /// <summary>
          /// __m256i _mm256_broadcastsi128_si256 (__m128i a)
-        ///   VBROADCASTI128 ymm, m128
+        ///   VBROADCASTI128  ymm1,         m128
+        ///   VBROADCASTI64x2 ymm1 {k1}{z}, m128
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe Vector256<ulong> BroadcastVector128ToVector256(ulong* address) => BroadcastVector128ToVector256(address);
  
          /// <summary>
          /// __m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b)
-        ///   VPCMPEQB ymm, ymm, ymm/m256
+        ///   VPCMPEQB ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> CompareEqual(Vector256<sbyte> left, Vector256<sbyte> right) => CompareEqual(left, right);
          /// <summary>
          /// __m256i _mm256_cmpeq_epi8 (__m256i a, __m256i b)
-        ///   VPCMPEQB ymm, ymm, ymm/m256
+        ///   VPCMPEQB ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> CompareEqual(Vector256<byte> left, Vector256<byte> right) => CompareEqual(left, right);
          /// <summary>
          /// __m256i _mm256_cmpeq_epi16 (__m256i a, __m256i b)
-        ///   VPCMPEQW ymm, ymm, ymm/m256
+        ///   VPCMPEQW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> CompareEqual(Vector256<short> left, Vector256<short> right) => CompareEqual(left, right);
          /// <summary>
          /// __m256i _mm256_cmpeq_epi16 (__m256i a, __m256i b)
-        ///   VPCMPEQW ymm, ymm, ymm/m256
+        ///   VPCMPEQW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> CompareEqual(Vector256<ushort> left, Vector256<ushort> right) => CompareEqual(left, right);
          /// <summary>
          /// __m256i _mm256_cmpeq_epi32 (__m256i a, __m256i b)
-        ///   VPCMPEQD ymm, ymm, ymm/m256
+        ///   VPCMPEQD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> CompareEqual(Vector256<int> left, Vector256<int> right) => CompareEqual(left, right);
          /// <summary>
          /// __m256i _mm256_cmpeq_epi32 (__m256i a, __m256i b)
-        ///   VPCMPEQD ymm, ymm, ymm/m256
+        ///   VPCMPEQD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<uint> CompareEqual(Vector256<uint> left, Vector256<uint> right) => CompareEqual(left, right);
          /// <summary>
          /// __m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b)
-        ///   VPCMPEQQ ymm, ymm, ymm/m256
+        ///   VPCMPEQQ ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<long> CompareEqual(Vector256<long> left, Vector256<long> right) => CompareEqual(left, right);
          /// <summary>
          /// __m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b)
-        ///   VPCMPEQQ ymm, ymm, ymm/m256
+        ///   VPCMPEQQ ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ulong> CompareEqual(Vector256<ulong> left, Vector256<ulong> right) => CompareEqual(left, right);
  
          /// <summary>
          /// __m256i _mm256_cmpgt_epi8 (__m256i a, __m256i b)
-        ///   VPCMPGTB ymm, ymm, ymm/m256
+        ///   VPCMPGTB ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> CompareGreaterThan(Vector256<sbyte> left, Vector256<sbyte> right) => CompareGreaterThan(left, right);
          /// <summary>
          /// __m256i _mm256_cmpgt_epi16 (__m256i a, __m256i b)
-        ///   VPCMPGTW ymm, ymm, ymm/m256
+        ///   VPCMPGTW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> CompareGreaterThan(Vector256<short> left, Vector256<short> right) => CompareGreaterThan(left, right);
          /// <summary>
          /// __m256i _mm256_cmpgt_epi32 (__m256i a, __m256i b)
-        ///   VPCMPGTD ymm, ymm, ymm/m256
+        ///   VPCMPGTD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> CompareGreaterThan(Vector256<int> left, Vector256<int> right) => CompareGreaterThan(left, right);
          /// <summary>
          /// __m256i _mm256_cmpgt_epi64 (__m256i a, __m256i b)
-        ///   VPCMPGTQ ymm, ymm, ymm/m256
+        ///   VPCMPGTQ ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<long> CompareGreaterThan(Vector256<long> left, Vector256<long> right) => CompareGreaterThan(left, right);
  
          /// <summary>
          /// int _mm256_cvtsi256_si32 (__m256i a)
-        ///   MOVD reg/m32, xmm
+        ///   VMOVD r/m32, ymm1
          /// </summary>
          public static int ConvertToInt32(Vector256<int> value) => ConvertToInt32(value);
          /// <summary>
          /// int _mm256_cvtsi256_si32 (__m256i a)
-        ///   MOVD reg/m32, xmm
+        ///   VMOVD r/m32, ymm1
          /// </summary>
          public static uint ConvertToUInt32(Vector256<uint> value) => ConvertToUInt32(value);
  
          /// <summary>
          /// __m256i _mm256_cvtepi8_epi16 (__m128i a)
-        ///   VPMOVSXBW ymm, xmm
+        ///   VPMOVSXBW ymm1,         xmm2/m128
+        ///   VPMOVSXBW ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<short> ConvertToVector256Int16(Vector128<sbyte> value) => ConvertToVector256Int16(value);
          /// <summary>
          /// __m256i _mm256_cvtepu8_epi16 (__m128i a)
-        ///   VPMOVZXBW ymm, xmm
+        ///   VPMOVZXBW ymm1,         xmm2/m128
+        ///   VPMOVZXBW ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<short> ConvertToVector256Int16(Vector128<byte> value) => ConvertToVector256Int16(value);
          /// <summary>
          /// __m256i _mm256_cvtepi8_epi32 (__m128i a)
-        ///   VPMOVSXBD ymm, xmm
+        ///   VPMOVSXBD ymm1,         xmm2/m64
+        ///   VPMOVSXBD ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32(Vector128<sbyte> value) => ConvertToVector256Int32(value);
          /// <summary>
          /// __m256i _mm256_cvtepu8_epi32 (__m128i a)
-        ///   VPMOVZXBD ymm, xmm
+        ///   VPMOVZXBD ymm1,         xmm2/m64
+        ///   VPMOVZXBD ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32(Vector128<byte> value) => ConvertToVector256Int32(value);
          /// <summary>
          /// __m256i _mm256_cvtepi16_epi32 (__m128i a)
-        ///   VPMOVSXWD ymm, xmm
+        ///   VPMOVSXWD ymm1,         xmm2/m128
+        ///   VPMOVSXWD ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32(Vector128<short> value) => ConvertToVector256Int32(value);
          /// <summary>
          /// __m256i _mm256_cvtepu16_epi32 (__m128i a)
-        ///   VPMOVZXWD ymm, xmm
+        ///   VPMOVZXWD ymm1,         xmm2/m128
+        ///   VPMOVZXWD ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<int> ConvertToVector256Int32(Vector128<ushort> value) => ConvertToVector256Int32(value);
          /// <summary>
          /// __m256i _mm256_cvtepi8_epi64 (__m128i a)
-        ///   VPMOVSXBQ ymm, xmm
+        ///   VPMOVSXBQ ymm1,         xmm2/m32
+        ///   VPMOVSXBQ ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<sbyte> value) => ConvertToVector256Int64(value);
          /// <summary>
          /// __m256i _mm256_cvtepu8_epi64 (__m128i a)
-        ///   VPMOVZXBQ ymm, xmm
+        ///   VPMOVZXBQ ymm1,         xmm2/m32
+        ///   VPMOVZXBQ ymm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<byte> value) => ConvertToVector256Int64(value);
          /// <summary>
          /// __m256i _mm256_cvtepi16_epi64 (__m128i a)
-        ///   VPMOVSXWQ ymm, xmm
+        ///   VPMOVSXWQ ymm1,         xmm2/m64
+        ///   VPMOVSXWQ ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<short> value) => ConvertToVector256Int64(value);
          /// <summary>
          /// __m256i _mm256_cvtepu16_epi64 (__m128i a)
-        ///   VPMOVZXWQ ymm, xmm
+        ///   VPMOVZXWQ ymm1,         xmm2/m64
+        ///   VPMOVZXWQ ymm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<ushort> value) => ConvertToVector256Int64(value);
          /// <summary>
          /// __m256i _mm256_cvtepi32_epi64 (__m128i a)
-        ///   VPMOVSXDQ ymm, xmm
+        ///   VPMOVSXDQ ymm1,         xmm2/m128
+        ///   VPMOVSXDQ ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<int> value) => ConvertToVector256Int64(value);
          /// <summary>
          /// __m256i _mm256_cvtepu32_epi64 (__m128i a)
-        ///   VPMOVZXDQ ymm, xmm
+        ///   VPMOVZXDQ ymm1,         xmm2/m128
+        ///   VPMOVZXDQ ymm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector256<long> ConvertToVector256Int64(Vector128<uint> value) => ConvertToVector256Int64(value);
  
          /// <summary>
-        ///   VPMOVSXBW ymm, m128
+        ///   VPMOVSXBW ymm1,         m128
+        ///   VPMOVSXBW ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<short> ConvertToVector256Int16(sbyte* address) => ConvertToVector256Int16(address);
          /// <summary>
-        ///   VPMOVZXBW ymm, m128
+        ///   VPMOVZXBW ymm1,         m128
+        ///   VPMOVZXBW ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<short> ConvertToVector256Int16(byte* address) => ConvertToVector256Int16(address);
          /// <summary>
-        ///   VPMOVSXBD ymm, m64
+        ///   VPMOVSXBD ymm1,         m64
+        ///   VPMOVSXBD ymm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<int> ConvertToVector256Int32(sbyte* address) => ConvertToVector256Int32(address);
          /// <summary>
-        ///   VPMOVZXBD ymm, m64
+        ///   VPMOVZXBD ymm1,         m64
+        ///   VPMOVZXBD ymm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<int> ConvertToVector256Int32(byte* address) => ConvertToVector256Int32(address);
          /// <summary>
-        ///   VPMOVSXWD ymm, m128
+        ///   VPMOVSXWD ymm1,         m128
+        ///   VPMOVSXWD ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<int> ConvertToVector256Int32(short* address) => ConvertToVector256Int32(address);
          /// <summary>
-        ///   VPMOVZXWD ymm, m128
+        ///   VPMOVZXWD ymm1,         m128
+        ///   VPMOVZXWD ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<int> ConvertToVector256Int32(ushort* address) => ConvertToVector256Int32(address);
          /// <summary>
-        ///   VPMOVSXBQ ymm, m32
+        ///   VPMOVSXBQ ymm1,         m32
+        ///   VPMOVSXBQ ymm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(sbyte* address) => ConvertToVector256Int64(address);
          /// <summary>
-        ///   VPMOVZXBQ ymm, m32
+        ///   VPMOVZXBQ ymm1,         m32
+        ///   VPMOVZXBQ ymm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(byte* address) => ConvertToVector256Int64(address);
          /// <summary>
-        ///   VPMOVSXWQ ymm, m64
+        ///   VPMOVSXWQ ymm1,         m64
+        ///   VPMOVSXWQ ymm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(short* address) => ConvertToVector256Int64(address);
          /// <summary>
-        ///   VPMOVZXWQ ymm, m64
+        ///   VPMOVZXWQ ymm1,         m64
+        ///   VPMOVZXWQ ymm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(ushort* address) => ConvertToVector256Int64(address);
          /// <summary>
-        ///   VPMOVSXDQ ymm, m128
+        ///   VPMOVSXDQ ymm1,         m128
+        ///   VPMOVSXDQ ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(int* address) => ConvertToVector256Int64(address);
          /// <summary>
-        ///   VPMOVZXDQ ymm, m128
+        ///   VPMOVZXDQ ymm1,         m128
+        ///   VPMOVZXDQ ymm1 {k1}{z}, m128
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector256<long> ConvertToVector256Int64(uint* address) => ConvertToVector256Int64(address);
  
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<sbyte> ExtractVector128(Vector256<sbyte> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<byte> ExtractVector128(Vector256<byte> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<short> ExtractVector128(Vector256<short> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<ushort> ExtractVector128(Vector256<ushort> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<int> ExtractVector128(Vector256<int> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI32x4 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<uint> ExtractVector128(Vector256<uint> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<long> ExtractVector128(Vector256<long> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
-
          /// <summary>
          /// __m128i _mm256_extracti128_si256 (__m256i a, const int imm8)
-        ///   VEXTRACTI128 xmm, ymm, imm8
+        ///   VEXTRACTI128  xmm1/m128,         ymm2, imm8
+        ///   VEXTRACTI64x2 xmm1/m128 {k1}{z}, ymm2, imm8
          /// </summary>
          public static new Vector128<ulong> ExtractVector128(Vector256<ulong> value, [ConstantExpected] byte index) => ExtractVector128(value, index);
  
          /// <summary>
          /// __m128i _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDD xmm, vm32x, xmm
+        ///   VPGATHERDD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherVector128(int* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -865,7 +932,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_i32gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDD xmm, vm32x, xmm
+        ///   VPGATHERDD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherVector128(uint* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -881,7 +948,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDQ xmm, vm32x, xmm
+        ///   VPGATHERDQ xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<long> GatherVector128(long* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -897,7 +964,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDQ xmm, vm32x, xmm
+        ///   VPGATHERDQ xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<ulong> GatherVector128(ulong* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -913,7 +980,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128 _mm_i32gather_ps (float const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERDPS xmm, vm32x, xmm
+        ///   VGATHERDPS xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherVector128(float* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -929,7 +996,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128d _mm_i32gather_pd (double const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERDPD xmm, vm32x, xmm
+        ///   VGATHERDPD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<double> GatherVector128(double* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -945,7 +1012,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERQD xmm, vm64x, xmm
+        ///   VPGATHERQD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherVector128(int* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -961,7 +1028,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_i64gather_epi32 (int const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERQD xmm, vm64x, xmm
+        ///   VPGATHERQD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherVector128(uint* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -977,7 +1044,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERQQ xmm, vm64x, xmm
+        ///   VPGATHERQQ xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<long> GatherVector128(long* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -993,7 +1060,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_i64gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERQQ xmm, vm64x, xmm
+        ///   VPGATHERQQ xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<ulong> GatherVector128(ulong* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1009,7 +1076,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128 _mm_i64gather_ps (float const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERQPS xmm, vm64x, xmm
+        ///   VGATHERQPS xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherVector128(float* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1025,7 +1092,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128d _mm_i64gather_pd (double const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERQPD xmm, vm64x, xmm
+        ///   VGATHERQPD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<double> GatherVector128(double* baseAddress, Vector128<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1041,7 +1108,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERDD ymm, vm32y, ymm
+        ///   VPGATHERDD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<int> GatherVector256(int* baseAddress, Vector256<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1057,7 +1124,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERDD ymm, vm32y, ymm
+        ///   VPGATHERDD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<uint> GatherVector256(uint* baseAddress, Vector256<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1073,7 +1140,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDQ ymm, vm32y, ymm
+        ///   VPGATHERDQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<long> GatherVector256(long* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1089,7 +1156,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_i32gather_epi64 (__int64 const* base_addr, __m128i vindex, const int scale)
-        ///   VPGATHERDQ ymm, vm32y, ymm
+        ///   VPGATHERDQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<ulong> GatherVector256(ulong* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1105,7 +1172,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256 _mm256_i32gather_ps (float const* base_addr, __m256i vindex, const int scale)
-        ///   VGATHERDPS ymm, vm32y, ymm
+        ///   VGATHERDPS ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<float> GatherVector256(float* baseAddress, Vector256<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1121,7 +1188,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256d _mm256_i32gather_pd (double const* base_addr, __m128i vindex, const int scale)
-        ///   VGATHERDPD ymm, vm32y, ymm
+        ///   VGATHERDPD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<double> GatherVector256(double* baseAddress, Vector128<int> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1137,7 +1204,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERQD xmm, vm64y, xmm
+        ///   VPGATHERQD xmm1, vm64y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherVector128(int* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1153,7 +1220,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm256_i64gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERQD xmm, vm64y, xmm
+        ///   VPGATHERQD xmm1, vm64y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherVector128(uint* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1169,7 +1236,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERQQ ymm, vm64y, ymm
+        ///   VPGATHERQQ ymm1, vm64y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<long> GatherVector256(long* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1185,7 +1252,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_i64gather_epi64 (__int64 const* base_addr, __m256i vindex, const int scale)
-        ///   VPGATHERQQ ymm, vm64y, ymm
+        ///   VPGATHERQQ ymm1, vm64y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<ulong> GatherVector256(ulong* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1201,7 +1268,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128 _mm256_i64gather_ps (float const* base_addr, __m256i vindex, const int scale)
-        ///   VGATHERQPS xmm, vm64y, xmm
+        ///   VGATHERQPS xmm1, vm64y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherVector128(float* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1217,7 +1284,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256d _mm256_i64gather_pd (double const* base_addr, __m256i vindex, const int scale)
-        ///   VGATHERQPD ymm, vm64y, ymm
+        ///   VGATHERQPD ymm1, vm64y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<double> GatherVector256(double* baseAddress, Vector256<long> index, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1234,7 +1301,7 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERDD xmm, vm32x, xmm
+        ///   VPGATHERDD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherMaskVector128(Vector128<int> source, int* baseAddress, Vector128<int> index, Vector128<int> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1250,7 +1317,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_mask_i32gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERDD xmm, vm32x, xmm
+        ///   VPGATHERDD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherMaskVector128(Vector128<uint> source, uint* baseAddress, Vector128<int> index, Vector128<uint> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1266,7 +1333,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERDQ xmm, vm32x, xmm
+        ///   VPGATHERDQ xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<long> GatherMaskVector128(Vector128<long> source, long* baseAddress, Vector128<int> index, Vector128<long> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1282,7 +1349,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_mask_i32gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERDQ xmm, vm32x, xmm
+        ///   VPGATHERDQ xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<ulong> GatherMaskVector128(Vector128<ulong> source, ulong* baseAddress, Vector128<int> index, Vector128<ulong> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1298,7 +1365,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128 _mm_mask_i32gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale)
-        ///   VGATHERDPS xmm, vm32x, xmm
+        ///   VGATHERDPS xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherMaskVector128(Vector128<float> source, float* baseAddress, Vector128<int> index, Vector128<float> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1314,7 +1381,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128d _mm_mask_i32gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale)
-        ///   VGATHERDPD xmm, vm32x, xmm
+        ///   VGATHERDPD xmm1, vm32x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<double> GatherMaskVector128(Vector128<double> source, double* baseAddress, Vector128<int> index, Vector128<double> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1330,7 +1397,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQD xmm, vm64x, xmm
+        ///   VPGATHERQD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherMaskVector128(Vector128<int> source, int* baseAddress, Vector128<long> index, Vector128<int> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1346,7 +1413,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQD xmm, vm64x, xmm
+        ///   VPGATHERQD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherMaskVector128(Vector128<uint> source, uint* baseAddress, Vector128<long> index, Vector128<uint> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1362,7 +1429,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQQ xmm, vm64x, xmm
+        ///   VPGATHERQQ xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<long> GatherMaskVector128(Vector128<long> source, long* baseAddress, Vector128<long> index, Vector128<long> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1378,7 +1445,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm_mask_i64gather_epi64 (__m128i src, __int64 const* base_addr, __m128i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQQ xmm, vm64x, xmm
+        ///   VPGATHERQQ xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<ulong> GatherMaskVector128(Vector128<ulong> source, ulong* baseAddress, Vector128<long> index, Vector128<ulong> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1394,7 +1461,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128 _mm_mask_i64gather_ps (__m128 src, float const* base_addr, __m128i vindex, __m128 mask, const int scale)
-        ///   VGATHERQPS xmm, vm64x, xmm
+        ///   VGATHERQPS xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherMaskVector128(Vector128<float> source, float* baseAddress, Vector128<long> index, Vector128<float> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1410,7 +1477,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128d _mm_mask_i64gather_pd (__m128d src, double const* base_addr, __m128i vindex, __m128d mask, const int scale)
-        ///   VGATHERQPD xmm, vm64x, xmm
+        ///   VGATHERQPD xmm1, vm64x, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<double> GatherMaskVector128(Vector128<double> source, double* baseAddress, Vector128<long> index, Vector128<double> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1426,7 +1493,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale)
-        ///   VPGATHERDD ymm, vm32y, ymm
+        ///   VPGATHERDD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<int> GatherMaskVector256(Vector256<int> source, int* baseAddress, Vector256<int> index, Vector256<int> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1442,7 +1509,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale)
-        ///   VPGATHERDD ymm, vm32y, ymm
+        ///   VPGATHERDD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<uint> GatherMaskVector256(Vector256<uint> source, uint* baseAddress, Vector256<int> index, Vector256<uint> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1458,7 +1525,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale)
-        ///   VPGATHERDQ ymm, vm32y, ymm
+        ///   VPGATHERDQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<long> GatherMaskVector256(Vector256<long> source, long* baseAddress, Vector128<int> index, Vector256<long> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1474,7 +1541,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_mask_i32gather_epi64 (__m256i src, __int64 const* base_addr, __m128i vindex, __m256i mask, const int scale)
-        ///   VPGATHERDQ ymm, vm32y, ymm
+        ///   VPGATHERDQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<ulong> GatherMaskVector256(Vector256<ulong> source, ulong* baseAddress, Vector128<int> index, Vector256<ulong> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1490,7 +1557,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256 _mm256_mask_i32gather_ps (__m256 src, float const* base_addr, __m256i vindex, __m256 mask, const int scale)
-        ///   VPGATHERDPS ymm, vm32y, ymm
+        ///   VPGATHERDPS ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<float> GatherMaskVector256(Vector256<float> source, float* baseAddress, Vector256<int> index, Vector256<float> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1506,7 +1573,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256d _mm256_mask_i32gather_pd (__m256d src, double const* base_addr, __m128i vindex, __m256d mask, const int scale)
-        ///   VPGATHERDPD ymm, vm32y, ymm
+        ///   VPGATHERDPD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<double> GatherMaskVector256(Vector256<double> source, double* baseAddress, Vector128<int> index, Vector256<double> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1522,7 +1589,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQD xmm, vm32y, xmm
+        ///   VPGATHERQD xmm1, vm32y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<int> GatherMaskVector128(Vector128<int> source, int* baseAddress, Vector256<long> index, Vector128<int> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1538,7 +1605,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128i _mm256_mask_i64gather_epi32 (__m128i src, int const* base_addr, __m256i vindex, __m128i mask, const int scale)
-        ///   VPGATHERQD xmm, vm32y, xmm
+        ///   VPGATHERQD xmm1, vm32y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<uint> GatherMaskVector128(Vector128<uint> source, uint* baseAddress, Vector256<long> index, Vector128<uint> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1554,7 +1621,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale)
-        ///   VPGATHERQQ ymm, vm32y, ymm
+        ///   VPGATHERQQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<long> GatherMaskVector256(Vector256<long> source, long* baseAddress, Vector256<long> index, Vector256<long> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1570,7 +1637,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256i _mm256_mask_i64gather_epi64 (__m256i src, __int64 const* base_addr, __m256i vindex, __m256i mask, const int scale)
-        ///   VPGATHERQQ ymm, vm32y, ymm
+        ///   VPGATHERQQ ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<ulong> GatherMaskVector256(Vector256<ulong> source, ulong* baseAddress, Vector256<long> index, Vector256<ulong> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1586,7 +1653,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m128 _mm256_mask_i64gather_ps (__m128 src, float const* base_addr, __m256i vindex, __m128 mask, const int scale)
-        ///   VGATHERQPS xmm, vm32y, xmm
+        ///   VGATHERQPS xmm1, vm32y, xmm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector128<float> GatherMaskVector128(Vector128<float> source, float* baseAddress, Vector256<long> index, Vector128<float> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1602,7 +1669,7 @@ namespace System.Runtime.Intrinsics.X86
          }
          /// <summary>
          /// __m256d _mm256_mask_i64gather_pd (__m256d src, double const* base_addr, __m256i vindex, __m256d mask, const int scale)
-        ///   VGATHERQPD ymm, vm32y, ymm
+        ///   VGATHERQPD ymm1, vm32y, ymm2
          /// The scale parameter should be 1, 2, 4 or 8, otherwise, ArgumentOutOfRangeException will be thrown.
          /// </summary>
          public static unsafe Vector256<double> GatherMaskVector256(Vector256<double> source, double* baseAddress, Vector256<long> index, Vector256<double> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
@@ -1619,1058 +1686,1189 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m256i _mm256_hadd_epi16 (__m256i a, __m256i b)
-        ///   VPHADDW ymm, ymm, ymm/m256
+        ///   VPHADDW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> HorizontalAdd(Vector256<short> left, Vector256<short> right) => HorizontalAdd(left, right);
          /// <summary>
          /// __m256i _mm256_hadd_epi32 (__m256i a, __m256i b)
-        ///   VPHADDD ymm, ymm, ymm/m256
+        ///   VPHADDD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> HorizontalAdd(Vector256<int> left, Vector256<int> right) => HorizontalAdd(left, right);
  
          /// <summary>
          /// __m256i _mm256_hadds_epi16 (__m256i a, __m256i b)
-        ///   VPHADDSW ymm, ymm, ymm/m256
+        ///   VPHADDSW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> HorizontalAddSaturate(Vector256<short> left, Vector256<short> right) => HorizontalAddSaturate(left, right);
  
          /// <summary>
          /// __m256i _mm256_hsub_epi16 (__m256i a, __m256i b)
-        ///   VPHSUBW ymm, ymm, ymm/m256
+        ///   VPHSUBW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> HorizontalSubtract(Vector256<short> left, Vector256<short> right) => HorizontalSubtract(left, right);
          /// <summary>
          /// __m256i _mm256_hsub_epi32 (__m256i a, __m256i b)
-        ///   VPHSUBD ymm, ymm, ymm/m256
+        ///   VPHSUBD ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> HorizontalSubtract(Vector256<int> left, Vector256<int> right) => HorizontalSubtract(left, right);
  
          /// <summary>
          /// __m256i _mm256_hsubs_epi16 (__m256i a, __m256i b)
-        ///   VPHSUBSW ymm, ymm, ymm/m256
+        ///   VPHSUBSW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> HorizontalSubtractSaturate(Vector256<short> left, Vector256<short> right) => HorizontalSubtractSaturate(left, right);
  
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<sbyte> InsertVector128(Vector256<sbyte> value, Vector128<sbyte> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<byte> InsertVector128(Vector256<byte> value, Vector128<byte> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<short> InsertVector128(Vector256<short> value, Vector128<short> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<ushort> InsertVector128(Vector256<ushort> value, Vector128<ushort> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<int> InsertVector128(Vector256<int> value, Vector128<int> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI32x4 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<uint> InsertVector128(Vector256<uint> value, Vector128<uint> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<long> InsertVector128(Vector256<long> value, Vector128<long> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
-
          /// <summary>
          /// __m256i _mm256_inserti128_si256 (__m256i a, __m128i b, const int imm8)
-        ///   VINSERTI128 ymm, ymm, xmm, imm8
+        ///   VINSERTI128  ymm1,         ymm2, xmm3/m128, imm8
+        ///   VINSERTI64x2 ymm1 {k1}{z}, ymm2, xmm3/m128, imm8
          /// </summary>
          public static new Vector256<ulong> InsertVector128(Vector256<ulong> value, Vector128<ulong> data, [ConstantExpected] byte index) => InsertVector128(value, data, index);
  
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<sbyte> LoadAlignedVector256NonTemporal(sbyte* address) => LoadAlignedVector256NonTemporal(address);
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<byte> LoadAlignedVector256NonTemporal(byte* address) => LoadAlignedVector256NonTemporal(address);
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<short> LoadAlignedVector256NonTemporal(short* address) => LoadAlignedVector256NonTemporal(address);
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<ushort> LoadAlignedVector256NonTemporal(ushort* address) => LoadAlignedVector256NonTemporal(address);
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<int> LoadAlignedVector256NonTemporal(int* address) => LoadAlignedVector256NonTemporal(address);
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<uint> LoadAlignedVector256NonTemporal(uint* address) => LoadAlignedVector256NonTemporal(address);
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<long> LoadAlignedVector256NonTemporal(long* address) => LoadAlignedVector256NonTemporal(address);
          /// <summary>
          /// __m256i _mm256_stream_load_si256 (__m256i const* mem_addr)
-        ///   VMOVNTDQA ymm, m256
+        ///   VMOVNTDQA ymm1, m256
          /// </summary>
          public static unsafe Vector256<ulong> LoadAlignedVector256NonTemporal(ulong* address) => LoadAlignedVector256NonTemporal(address);
  
          /// <summary>
          /// __m128i _mm_maskload_epi32 (int const* mem_addr, __m128i mask)
-        ///   VPMASKMOVD xmm, xmm, m128
+        ///   VPMASKMOVD xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<int> MaskLoad(int* address, Vector128<int> mask) => MaskLoad(address, mask);
          /// <summary>
          /// __m128i _mm_maskload_epi32 (int const* mem_addr, __m128i mask)
-        ///   VPMASKMOVD xmm, xmm, m128
+        ///   VPMASKMOVD xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<uint> MaskLoad(uint* address, Vector128<uint> mask) => MaskLoad(address, mask);
          /// <summary>
          /// __m128i _mm_maskload_epi64 (__int64 const* mem_addr, __m128i mask)
-        ///   VPMASKMOVQ xmm, xmm, m128
+        ///   VPMASKMOVQ xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<long> MaskLoad(long* address, Vector128<long> mask) => MaskLoad(address, mask);
          /// <summary>
          /// __m128i _mm_maskload_epi64 (__int64 const* mem_addr, __m128i mask)
-        ///   VPMASKMOVQ xmm, xmm, m128
+        ///   VPMASKMOVQ xmm1, xmm2, m128
          /// </summary>
          public static unsafe Vector128<ulong> MaskLoad(ulong* address, Vector128<ulong> mask) => MaskLoad(address, mask);
-
          /// <summary>
          /// __m256i _mm256_maskload_epi32 (int const* mem_addr, __m256i mask)
-        ///   VPMASKMOVD ymm, ymm, m256
+        ///   VPMASKMOVD ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<int> MaskLoad(int* address, Vector256<int> mask) => MaskLoad(address, mask);
          /// <summary>
          /// __m256i _mm256_maskload_epi32 (int const* mem_addr, __m256i mask)
-        ///   VPMASKMOVD ymm, ymm, m256
+        ///   VPMASKMOVD ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<uint> MaskLoad(uint* address, Vector256<uint> mask) => MaskLoad(address, mask);
          /// <summary>
          /// __m256i _mm256_maskload_epi64 (__int64 const* mem_addr, __m256i mask)
-        ///   VPMASKMOVQ ymm, ymm, m256
+        ///   VPMASKMOVQ ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<long> MaskLoad(long* address, Vector256<long> mask) => MaskLoad(address, mask);
          /// <summary>
          /// __m256i _mm256_maskload_epi64 (__int64 const* mem_addr, __m256i mask)
-        ///   VPMASKMOVQ ymm, ymm, m256
+        ///   VPMASKMOVQ ymm1, ymm2, m256
          /// </summary>
          public static unsafe Vector256<ulong> MaskLoad(ulong* address, Vector256<ulong> mask) => MaskLoad(address, mask);
  
          /// <summary>
          /// void _mm_maskstore_epi32 (int* mem_addr, __m128i mask, __m128i a)
-        ///   VPMASKMOVD m128, xmm, xmm
+        ///   VPMASKMOVD m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(int* address, Vector128<int> mask, Vector128<int> source) => MaskStore(address, mask, source);
          /// <summary>
          /// void _mm_maskstore_epi32 (int* mem_addr, __m128i mask, __m128i a)
-        ///   VPMASKMOVD m128, xmm, xmm
+        ///   VPMASKMOVD m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(uint* address, Vector128<uint> mask, Vector128<uint> source) => MaskStore(address, mask, source);
          /// <summary>
          /// void _mm_maskstore_epi64 (__int64* mem_addr, __m128i mask, __m128i a)
-        ///   VPMASKMOVQ m128, xmm, xmm
+        ///   VPMASKMOVQ m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(long* address, Vector128<long> mask, Vector128<long> source) => MaskStore(address, mask, source);
          /// <summary>
          /// void _mm_maskstore_epi64 (__int64* mem_addr, __m128i mask, __m128i a)
-        ///   VPMASKMOVQ m128, xmm, xmm
+        ///   VPMASKMOVQ m128, xmm1, xmm2
          /// </summary>
          public static unsafe void MaskStore(ulong* address, Vector128<ulong> mask, Vector128<ulong> source) => MaskStore(address, mask, source);
-
          /// <summary>
          /// void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a)
-        ///   VPMASKMOVD m256, ymm, ymm
+        ///   VPMASKMOVD m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(int* address, Vector256<int> mask, Vector256<int> source) => MaskStore(address, mask, source);
          /// <summary>
          /// void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a)
-        ///   VPMASKMOVD m256, ymm, ymm
+        ///   VPMASKMOVD m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(uint* address, Vector256<uint> mask, Vector256<uint> source) => MaskStore(address, mask, source);
          /// <summary>
          /// void _mm256_maskstore_epi64 (__int64* mem_addr, __m256i mask, __m256i a)
-        ///   VPMASKMOVQ m256, ymm, ymm
+        ///   VPMASKMOVQ m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(long* address, Vector256<long> mask, Vector256<long> source) => MaskStore(address, mask, source);
          /// <summary>
          /// void _mm256_maskstore_epi64 (__int64* mem_addr, __m256i mask, __m256i a)
-        ///   VPMASKMOVQ m256, ymm, ymm
+        ///   VPMASKMOVQ m256, ymm1, ymm2
          /// </summary>
          public static unsafe void MaskStore(ulong* address, Vector256<ulong> mask, Vector256<ulong> source) => MaskStore(address, mask, source);
  
          /// <summary>
          /// __m256i _mm256_madd_epi16 (__m256i a, __m256i b)
-        ///   VPMADDWD ymm, ymm, ymm/m256
+        ///   VPMADDWD ymm1,         ymm2, ymm3/m256
+        ///   VPMADDWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> MultiplyAddAdjacent(Vector256<short> left, Vector256<short> right) => MultiplyAddAdjacent(left, right);
-
          /// <summary>
          /// __m256i _mm256_maddubs_epi16 (__m256i a, __m256i b)
-        ///   VPMADDUBSW ymm, ymm, ymm/m256
+        ///   VPMADDUBSW ymm1,         ymm2, ymm3/m256
+        ///   VPMADDUBSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> MultiplyAddAdjacent(Vector256<byte> left, Vector256<sbyte> right) => MultiplyAddAdjacent(left, right);
  
          /// <summary>
          /// __m256i _mm256_max_epi8 (__m256i a, __m256i b)
-        ///   VPMAXSB ymm, ymm, ymm/m256
+        ///   VPMAXSB ymm1,         ymm2, ymm3/m256
+        ///   VPMAXSB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Max(Vector256<sbyte> left, Vector256<sbyte> right) => Max(left, right);
          /// <summary>
          /// __m256i _mm256_max_epu8 (__m256i a, __m256i b)
-        ///   VPMAXUB ymm, ymm, ymm/m256
+        ///   VPMAXUB ymm1,         ymm2, ymm3/m256
+        ///   VPMAXUB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Max(Vector256<byte> left, Vector256<byte> right) => Max(left, right);
          /// <summary>
          /// __m256i _mm256_max_epi16 (__m256i a, __m256i b)
-        ///   VPMAXSW ymm, ymm, ymm/m256
+        ///   VPMAXSW ymm1,         ymm2, ymm3/m256
+        ///   VPMAXSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Max(Vector256<short> left, Vector256<short> right) => Max(left, right);
          /// <summary>
          /// __m256i _mm256_max_epu16 (__m256i a, __m256i b)
-        ///   VPMAXUW ymm, ymm, ymm/m256
+        ///   VPMAXUW ymm1,         ymm2, ymm3/m256
+        ///   VPMAXUW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Max(Vector256<ushort> left, Vector256<ushort> right) => Max(left, right);
          /// <summary>
          /// __m256i _mm256_max_epi32 (__m256i a, __m256i b)
-        ///   VPMAXSD ymm, ymm, ymm/m256
+        ///   VPMAXSD ymm1,         ymm2, ymm3/m256
+        ///   VPMAXSD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Max(Vector256<int> left, Vector256<int> right) => Max(left, right);
          /// <summary>
          /// __m256i _mm256_max_epu32 (__m256i a, __m256i b)
-        ///   VPMAXUD ymm, ymm, ymm/m256
+        ///   VPMAXUD ymm1,         ymm2, ymm3/m256
+        ///   VPMAXUD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Max(Vector256<uint> left, Vector256<uint> right) => Max(left, right);
  
          /// <summary>
          /// __m256i _mm256_min_epi8 (__m256i a, __m256i b)
-        ///   VPMINSB ymm, ymm, ymm/m256
+        ///   VPMINSB ymm1,         ymm2, ymm3/m256
+        ///   VPMINSB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Min(Vector256<sbyte> left, Vector256<sbyte> right) => Min(left, right);
          /// <summary>
          /// __m256i _mm256_min_epu8 (__m256i a, __m256i b)
-        ///   VPMINUB ymm, ymm, ymm/m256
+        ///   VPMINUB ymm1,         ymm2, ymm3/m256
+        ///   VPMINUB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Min(Vector256<byte> left, Vector256<byte> right) => Min(left, right);
          /// <summary>
          /// __m256i _mm256_min_epi16 (__m256i a, __m256i b)
-        ///   VPMINSW ymm, ymm, ymm/m256
+        ///   VPMINSW ymm1,         ymm2, ymm3/m256
+        ///   VPMINSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Min(Vector256<short> left, Vector256<short> right) => Min(left, right);
          /// <summary>
          /// __m256i _mm256_min_epu16 (__m256i a, __m256i b)
-        ///   VPMINUW ymm, ymm, ymm/m256
+        ///   VPMINUW ymm1,         ymm2, ymm3/m256
+        ///   VPMINUW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Min(Vector256<ushort> left, Vector256<ushort> right) => Min(left, right);
          /// <summary>
          /// __m256i _mm256_min_epi32 (__m256i a, __m256i b)
-        ///   VPMINSD ymm, ymm, ymm/m256
+        ///   VPMINSD ymm1,         ymm2, ymm3/m256
+        ///   VPMINSD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Min(Vector256<int> left, Vector256<int> right) => Min(left, right);
          /// <summary>
          /// __m256i _mm256_min_epu32 (__m256i a, __m256i b)
-        ///   VPMINUD ymm, ymm, ymm/m256
+        ///   VPMINUD ymm1,         ymm2, ymm3/m256
+        ///   VPMINUD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Min(Vector256<uint> left, Vector256<uint> right) => Min(left, right);
  
          /// <summary>
          /// int _mm256_movemask_epi8 (__m256i a)
-        ///   VPMOVMSKB reg, ymm
+        ///   VPMOVMSKB r32, ymm1
          /// </summary>
          public static int MoveMask(Vector256<sbyte> value) => MoveMask(value);
          /// <summary>
          /// int _mm256_movemask_epi8 (__m256i a)
-        ///   VPMOVMSKB reg, ymm
+        ///   VPMOVMSKB r32, ymm1
          /// </summary>
          public static int MoveMask(Vector256<byte> value) => MoveMask(value);
  
          /// <summary>
          /// __m256i _mm256_mpsadbw_epu8 (__m256i a, __m256i b, const int imm8)
-        ///   VMPSADBW ymm, ymm, ymm/m256, imm8
+        ///   VMPSADBW ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static Vector256<ushort> MultipleSumAbsoluteDifferences(Vector256<byte> left, Vector256<byte> right, [ConstantExpected] byte mask) => MultipleSumAbsoluteDifferences(left, right, mask);
  
          /// <summary>
          /// __m256i _mm256_mul_epi32 (__m256i a, __m256i b)
-        ///   VPMULDQ ymm, ymm, ymm/m256
+        ///   VPMULDQ ymm1,         ymm2, ymm3/m256
+        ///   VPMULDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> Multiply(Vector256<int> left, Vector256<int> right) => Multiply(left, right);
          /// <summary>
          /// __m256i _mm256_mul_epu32 (__m256i a, __m256i b)
-        ///   VPMULUDQ ymm, ymm, ymm/m256
+        ///   VPMULUDQ ymm1,         ymm2, ymm3/m256
+        ///   VPMULUDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> Multiply(Vector256<uint> left, Vector256<uint> right) => Multiply(left, right);
  
          /// <summary>
          /// __m256i _mm256_mulhi_epi16 (__m256i a, __m256i b)
-        ///   VPMULHW ymm, ymm, ymm/m256
+        ///   VPMULHW ymm1,         ymm2, ymm3/m256
+        ///   VPMULHW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> MultiplyHigh(Vector256<short> left, Vector256<short> right) => MultiplyHigh(left, right);
          /// <summary>
          /// __m256i _mm256_mulhi_epu16 (__m256i a, __m256i b)
-        ///   VPMULHUW ymm, ymm, ymm/m256
+        ///   VPMULHUW ymm1,         ymm2, ymm3/m256
+        ///   VPMULHUW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> MultiplyHigh(Vector256<ushort> left, Vector256<ushort> right) => MultiplyHigh(left, right);
  
          /// <summary>
          /// __m256i _mm256_mulhrs_epi16 (__m256i a, __m256i b)
-        ///   VPMULHRSW ymm, ymm, ymm/m256
+        ///   VPMULHRSW ymm1,         ymm2, ymm3/m256
+        ///   VPMULHRSW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> MultiplyHighRoundScale(Vector256<short> left, Vector256<short> right) => MultiplyHighRoundScale(left, right);
  
          /// <summary>
          /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b)
-        ///   VPMULLW ymm, ymm, ymm/m256
+        ///   VPMULLW ymm1,         ymm2, ymm3/m256
+        ///   VPMULLW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> MultiplyLow(Vector256<short> left, Vector256<short> right) => MultiplyLow(left, right);
          /// <summary>
          /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b)
-        ///   VPMULLW ymm, ymm, ymm/m256
+        ///   VPMULLW ymm1,         ymm2, ymm3/m256
+        ///   VPMULLW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> MultiplyLow(Vector256<ushort> left, Vector256<ushort> right) => MultiplyLow(left, right);
-
          /// <summary>
          /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b)
-        ///   VPMULLD ymm, ymm, ymm/m256
+        ///   VPMULLD ymm1,         ymm2, ymm3/m256
+        ///   VPMULLD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> MultiplyLow(Vector256<int> left, Vector256<int> right) => MultiplyLow(left, right);
          /// <summary>
          /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b)
-        ///   VPMULLD ymm, ymm, ymm/m256
+        ///   VPMULLD ymm1,         ymm2, ymm3/m256
+        ///   VPMULLD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> MultiplyLow(Vector256<uint> left, Vector256<uint> right) => MultiplyLow(left, right);
  
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Or(Vector256<sbyte> left, Vector256<sbyte> right) => Or(left, right);
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Or(Vector256<byte> left, Vector256<byte> right) => Or(left, right);
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Or(Vector256<short> left, Vector256<short> right) => Or(left, right);
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Or(Vector256<ushort> left, Vector256<ushort> right) => Or(left, right);
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR  ymm1,         ymm2, ymm3/m256
+        ///   VPORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Or(Vector256<int> left, Vector256<int> right) => Or(left, right);
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR  ymm1,         ymm2, ymm3/m256
+        ///   VPORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Or(Vector256<uint> left, Vector256<uint> right) => Or(left, right);
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR  ymm1,         ymm2, ymm3/m256
+        ///   VPORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> Or(Vector256<long> left, Vector256<long> right) => Or(left, right);
          /// <summary>
          /// __m256i _mm256_or_si256 (__m256i a, __m256i b)
-        ///   VPOR ymm, ymm, ymm/m256
+        ///   VPOR  ymm1,         ymm2, ymm3/m256
+        ///   VPORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> Or(Vector256<ulong> left, Vector256<ulong> right) => Or(left, right);
  
          /// <summary>
          /// __m256i _mm256_packs_epi16 (__m256i a, __m256i b)
-        ///   VPACKSSWB ymm, ymm, ymm/m256
+        ///   VPACKSSWB ymm1,         ymm2, ymm3/m256
+        ///   VPACKSSWB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> PackSignedSaturate(Vector256<short> left, Vector256<short> right) => PackSignedSaturate(left, right);
          /// <summary>
          /// __m256i _mm256_packs_epi32 (__m256i a, __m256i b)
-        ///   VPACKSSDW ymm, ymm, ymm/m256
+        ///   VPACKSSDW ymm1,         ymm2, ymm3/m256
+        ///   VPACKSSDW ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<short> PackSignedSaturate(Vector256<int> left, Vector256<int> right) => PackSignedSaturate(left, right);
+
          /// <summary>
          /// __m256i _mm256_packus_epi16 (__m256i a, __m256i b)
-        ///   VPACKUSWB ymm, ymm, ymm/m256
+        ///   VPACKUSWB ymm1,         ymm2, ymm3/m256
+        ///   VPACKUSWB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> PackUnsignedSaturate(Vector256<short> left, Vector256<short> right) => PackUnsignedSaturate(left, right);
          /// <summary>
          /// __m256i _mm256_packus_epi32 (__m256i a, __m256i b)
-        ///   VPACKUSDW ymm, ymm, ymm/m256
+        ///   VPACKUSDW ymm1,         ymm2, ymm3/m256
+        ///   VPACKUSDW ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<ushort> PackUnsignedSaturate(Vector256<int> left, Vector256<int> right) => PackUnsignedSaturate(left, right);
  
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<sbyte> Permute2x128(Vector256<sbyte> left, Vector256<sbyte> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<byte> Permute2x128(Vector256<byte> left, Vector256<byte> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<short> Permute2x128(Vector256<short> left, Vector256<short> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<ushort> Permute2x128(Vector256<ushort> left, Vector256<ushort> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<int> Permute2x128(Vector256<int> left, Vector256<int> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<uint> Permute2x128(Vector256<uint> left, Vector256<uint> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<long> Permute2x128(Vector256<long> left, Vector256<long> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
          /// <summary>
          /// __m256i _mm256_permute2x128_si256 (__m256i a, __m256i b, const int imm8)
-        ///   VPERM2I128 ymm, ymm, ymm/m256, imm8
+        ///   VPERM2I128 ymm1, ymm2, ymm3/m256, imm8
          /// </summary>
          public static new Vector256<ulong> Permute2x128(Vector256<ulong> left, Vector256<ulong> right, [ConstantExpected] byte control) => Permute2x128(left, right, control);
  
          /// <summary>
          /// __m256i _mm256_permute4x64_epi64 (__m256i a, const int imm8)
-        ///   VPERMQ ymm, ymm/m256, imm8
+        ///   VPERMQ ymm1,         ymm2/m256,         imm8
+        ///   VPERMQ ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<long> Permute4x64(Vector256<long> value, [ConstantExpected] byte control) => Permute4x64(value, control);
          /// <summary>
          /// __m256i _mm256_permute4x64_epi64 (__m256i a, const int imm8)
-        ///   VPERMQ ymm, ymm/m256, imm8
+        ///   VPERMQ ymm1,         ymm2/m256,         imm8
+        ///   VPERMQ ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<ulong> Permute4x64(Vector256<ulong> value, [ConstantExpected] byte control) => Permute4x64(value, control);
          /// <summary>
          /// __m256d _mm256_permute4x64_pd (__m256d a, const int imm8)
-        ///   VPERMPD ymm, ymm/m256, imm8
+        ///   VPERMPD ymm1,         ymm2/m256,         imm8
+        ///   VPERMPD ymm1 {k1}{z}, ymm2/m256/m64bcst, imm8
          /// </summary>
          public static Vector256<double> Permute4x64(Vector256<double> value, [ConstantExpected] byte control) => Permute4x64(value, control);
  
          /// <summary>
          /// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx)
-        ///   VPERMD ymm, ymm/m256, ymm
+        ///   VPERMD ymm1,         ymm2/m256,         imm8
+        ///   VPERMD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<int> PermuteVar8x32(Vector256<int> left, Vector256<int> control) => PermuteVar8x32(left, control);
          /// <summary>
          /// __m256i _mm256_permutevar8x32_epi32 (__m256i a, __m256i idx)
-        ///   VPERMD ymm, ymm/m256, ymm
+        ///   VPERMD ymm1,         ymm2/m256,         imm8
+        ///   VPERMD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<uint> PermuteVar8x32(Vector256<uint> left, Vector256<uint> control) => PermuteVar8x32(left, control);
          /// <summary>
          /// __m256 _mm256_permutevar8x32_ps (__m256 a, __m256i idx)
-        ///   VPERMPS ymm, ymm/m256, ymm
+        ///   VPERMPS ymm1,         ymm2/m256,         imm8
+        ///   VPERMPS ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<float> PermuteVar8x32(Vector256<float> left, Vector256<int> control) => PermuteVar8x32(left, control);
  
          /// <summary>
          /// __m256i _mm256_sll_epi16 (__m256i a, __m128i count)
-        ///   VPSLLW ymm, ymm, xmm/m128
+        ///   VPSLLW ymm1,         ymm2, xmm3/m128
+        ///   VPSLLW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<short> ShiftLeftLogical(Vector256<short> value, Vector128<short> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_sll_epi16 (__m256i a, __m128i count)
-        ///   VPSLLW ymm, ymm, xmm/m128
+        ///   VPSLLW ymm1,         ymm2, xmm3/m128
+        ///   VPSLLW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<ushort> ShiftLeftLogical(Vector256<ushort> value, Vector128<ushort> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_sll_epi32 (__m256i a, __m128i count)
-        ///   VPSLLD ymm, ymm, xmm/m128
+        ///   VPSLLD ymm1,         ymm2, xmm3/m128
+        ///   VPSLLD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<int> ShiftLeftLogical(Vector256<int> value, Vector128<int> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_sll_epi32 (__m256i a, __m128i count)
-        ///   VPSLLD ymm, ymm, xmm/m128
+        ///   VPSLLD ymm1,         ymm2, xmm3/m128
+        ///   VPSLLD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<uint> ShiftLeftLogical(Vector256<uint> value, Vector128<uint> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_sll_epi64 (__m256i a, __m128i count)
-        ///   VPSLLQ ymm, ymm, xmm/m128
+        ///   VPSLLQ ymm1,         ymm2, xmm3/m128
+        ///   VPSLLQ ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<long> ShiftLeftLogical(Vector256<long> value, Vector128<long> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_sll_epi64 (__m256i a, __m128i count)
-        ///   VPSLLQ ymm, ymm, xmm/m128
+        ///   VPSLLQ ymm1,         ymm2, xmm3/m128
+        ///   VPSLLQ ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<ulong> ShiftLeftLogical(Vector256<ulong> value, Vector128<ulong> count) => ShiftLeftLogical(value, count);
  
          /// <summary>
          /// __m256i _mm256_slli_epi16 (__m256i a, int imm8)
-        ///   VPSLLW ymm, ymm, imm8
+        ///   VPSLLW ymm1,         ymm2, imm8
+        ///   VPSLLW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<short> ShiftLeftLogical(Vector256<short> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_slli_epi16 (__m256i a, int imm8)
-        ///   VPSLLW ymm, ymm, imm8
+        ///   VPSLLW ymm1,         ymm2, imm8
+        ///   VPSLLW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<ushort> ShiftLeftLogical(Vector256<ushort> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_slli_epi32 (__m256i a, int imm8)
-        ///   VPSLLD ymm, ymm, imm8
+        ///   VPSLLD ymm1,         ymm2, imm8
+        ///   VPSLLD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<int> ShiftLeftLogical(Vector256<int> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_slli_epi32 (__m256i a, int imm8)
-        ///   VPSLLD ymm, ymm, imm8
+        ///   VPSLLD ymm1,         ymm2, imm8
+        ///   VPSLLD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<uint> ShiftLeftLogical(Vector256<uint> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_slli_epi64 (__m256i a, int imm8)
-        ///   VPSLLQ ymm, ymm, imm8
+        ///   VPSLLQ ymm1,         ymm2, imm8
+        ///   VPSLLQ ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<long> ShiftLeftLogical(Vector256<long> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m256i _mm256_slli_epi64 (__m256i a, int imm8)
-        ///   VPSLLQ ymm, ymm, imm8
+        ///   VPSLLQ ymm1,         ymm2, imm8
+        ///   VPSLLQ ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<ulong> ShiftLeftLogical(Vector256<ulong> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
  
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
          /// </summary>
          public static Vector256<sbyte> ShiftLeftLogical128BitLane(Vector256<sbyte> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
          /// </summary>
          public static Vector256<byte> ShiftLeftLogical128BitLane(Vector256<byte> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<short> ShiftLeftLogical128BitLane(Vector256<short> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ushort> ShiftLeftLogical128BitLane(Vector256<ushort> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<int> ShiftLeftLogical128BitLane(Vector256<int> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<uint> ShiftLeftLogical128BitLane(Vector256<uint> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<long> ShiftLeftLogical128BitLane(Vector256<long> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bslli_epi128 (__m256i a, const int imm8)
-        ///   VPSLLDQ ymm, ymm, imm8
+        ///   VPSLLDQ ymm1, ymm2/m256, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ulong> ShiftLeftLogical128BitLane(Vector256<ulong> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
  
-        /// <summary>
-        /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
-        ///   VPSLLVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<int> ShiftLeftLogicalVariable(Vector256<int> value, Vector256<uint> count) => ShiftLeftLogicalVariable(value, count);
-        /// <summary>
-        /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
-        ///   VPSLLVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<uint> ShiftLeftLogicalVariable(Vector256<uint> value, Vector256<uint> count) => ShiftLeftLogicalVariable(value, count);
-        /// <summary>
-        /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count)
-        ///   VPSLLVQ ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<long> ShiftLeftLogicalVariable(Vector256<long> value, Vector256<ulong> count) => ShiftLeftLogicalVariable(value, count);
-        /// <summary>
-        /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count)
-        ///   VPSLLVQ ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<ulong> ShiftLeftLogicalVariable(Vector256<ulong> value, Vector256<ulong> count) => ShiftLeftLogicalVariable(value, count);
-
          /// <summary>
          /// __m128i _mm_sllv_epi32 (__m128i a, __m128i count)
-        ///   VPSLLVD xmm, xmm, xmm/m128
+        ///   VPSLLVD xmm1,         xmm2, xmm3/m128
+        ///   VPSLLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> ShiftLeftLogicalVariable(Vector128<int> value, Vector128<uint> count) => ShiftLeftLogicalVariable(value, count);
          /// <summary>
          /// __m128i _mm_sllv_epi32 (__m128i a, __m128i count)
-        ///   VPSLLVD xmm, xmm, xmm/m128
+        ///   VPSLLVD xmm1,         xmm2, xmm3/m128
+        ///   VPSLLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> ShiftLeftLogicalVariable(Vector128<uint> value, Vector128<uint> count) => ShiftLeftLogicalVariable(value, count);
          /// <summary>
          /// __m128i _mm_sllv_epi64 (__m128i a, __m128i count)
-        ///   VPSLLVQ xmm, xmm, xmm/m128
+        ///   VPSLLVQ xmm1,         xmm2, xmm3/m128
+        ///   VPSLLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> ShiftLeftLogicalVariable(Vector128<long> value, Vector128<ulong> count) => ShiftLeftLogicalVariable(value, count);
          /// <summary>
          /// __m128i _mm_sllv_epi64 (__m128i a, __m128i count)
-        ///   VPSLLVQ xmm, xmm, xmm/m128
+        ///   VPSLLVQ xmm1,         xmm2, xmm3/m128
+        ///   VPSLLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> ShiftLeftLogicalVariable(Vector128<ulong> value, Vector128<ulong> count) => ShiftLeftLogicalVariable(value, count);
+        /// <summary>
+        /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
+        ///   VPSLLVD ymm1,         ymm2, ymm3/m256
+        ///   VPSLLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<int> ShiftLeftLogicalVariable(Vector256<int> value, Vector256<uint> count) => ShiftLeftLogicalVariable(value, count);
+        /// <summary>
+        /// __m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
+        ///   VPSLLVD ymm1,         ymm2, ymm3/m256
+        ///   VPSLLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<uint> ShiftLeftLogicalVariable(Vector256<uint> value, Vector256<uint> count) => ShiftLeftLogicalVariable(value, count);
+        /// <summary>
+        /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count)
+        ///   VPSLLVQ ymm1,         ymm2, ymm3/m256
+        ///   VPSLLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
+        /// </summary>
+        public static Vector256<long> ShiftLeftLogicalVariable(Vector256<long> value, Vector256<ulong> count) => ShiftLeftLogicalVariable(value, count);
+        /// <summary>
+        /// __m256i _mm256_sllv_epi64 (__m256i a, __m256i count)
+        ///   VPSLLVQ ymm1,         ymm2, ymm3/m256
+        ///   VPSLLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
+        /// </summary>
+        public static Vector256<ulong> ShiftLeftLogicalVariable(Vector256<ulong> value, Vector256<ulong> count) => ShiftLeftLogicalVariable(value, count);
  
          /// <summary>
          /// _mm256_sra_epi16 (__m256i a, __m128i count)
-        ///   VPSRAW ymm, ymm, xmm/m128
+        ///   VPSRAW ymm1,         ymm2, xmm3/m128
+        ///   VPSRAW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<short> ShiftRightArithmetic(Vector256<short> value, Vector128<short> count) => ShiftRightArithmetic(value, count);
          /// <summary>
          /// _mm256_sra_epi32 (__m256i a, __m128i count)
-        ///   VPSRAD ymm, ymm, xmm/m128
+        ///   VPSRAD ymm1,         ymm2, xmm3/m128
+        ///   VPSRAD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<int> ShiftRightArithmetic(Vector256<int> value, Vector128<int> count) => ShiftRightArithmetic(value, count);
  
          /// <summary>
          /// __m256i _mm256_srai_epi16 (__m256i a, int imm8)
-        ///   VPSRAW ymm, ymm, imm8
+        ///   VPSRAW ymm1,         ymm2, imm8
+        ///   VPSRAW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<short> ShiftRightArithmetic(Vector256<short> value, [ConstantExpected] byte count) => ShiftRightArithmetic(value, count);
          /// <summary>
          /// __m256i _mm256_srai_epi32 (__m256i a, int imm8)
-        ///   VPSRAD ymm, ymm, imm8
+        ///   VPSRAD ymm1,         ymm2, imm8
+        ///   VPSRAD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<int> ShiftRightArithmetic(Vector256<int> value, [ConstantExpected] byte count) => ShiftRightArithmetic(value, count);
  
-        /// <summary>
-        /// __m256i _mm256_srav_epi32 (__m256i a, __m256i count)
-        ///   VPSRAVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<int> ShiftRightArithmeticVariable(Vector256<int> value, Vector256<uint> count) => ShiftRightArithmeticVariable(value, count);
-
          /// <summary>
          /// __m128i _mm_srav_epi32 (__m128i a, __m128i count)
-        ///   VPSRAVD xmm, xmm, xmm/m128
+        ///   VPSRAVD xmm1,         xmm2, xmm3/m128
+        ///   VPSRAVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> ShiftRightArithmeticVariable(Vector128<int> value, Vector128<uint> count) => ShiftRightArithmeticVariable(value, count);
+        /// <summary>
+        /// __m256i _mm256_srav_epi32 (__m256i a, __m256i count)
+        ///   VPSRAVD ymm1,         ymm2, ymm3/m256
+        ///   VPSRAVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<int> ShiftRightArithmeticVariable(Vector256<int> value, Vector256<uint> count) => ShiftRightArithmeticVariable(value, count);
  
          /// <summary>
          /// __m256i _mm256_srl_epi16 (__m256i a, __m128i count)
-        ///   VPSRLW ymm, ymm, xmm/m128
+        ///   VPSRLW ymm1,         ymm2, xmm3/m128
+        ///   VPSRLW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<short> ShiftRightLogical(Vector256<short> value, Vector128<short> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srl_epi16 (__m256i a, __m128i count)
-        ///   VPSRLW ymm, ymm, xmm/m128
+        ///   VPSRLW ymm1,         ymm2, xmm3/m128
+        ///   VPSRLW ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<ushort> ShiftRightLogical(Vector256<ushort> value, Vector128<ushort> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srl_epi32 (__m256i a, __m128i count)
-        ///   VPSRLD ymm, ymm, xmm/m128
+        ///   VPSRLD ymm1,         ymm2, xmm3/m128
+        ///   VPSRLD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<int> ShiftRightLogical(Vector256<int> value, Vector128<int> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srl_epi32 (__m256i a, __m128i count)
-        ///   VPSRLD ymm, ymm, xmm/m128
+        ///   VPSRLD ymm1,         ymm2, xmm3/m128
+        ///   VPSRLD ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<uint> ShiftRightLogical(Vector256<uint> value, Vector128<uint> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srl_epi64 (__m256i a, __m128i count)
-        ///   VPSRLQ ymm, ymm, xmm/m128
+        ///   VPSRLQ ymm1,         ymm2, xmm3/m128
+        ///   VPSRLQ ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<long> ShiftRightLogical(Vector256<long> value, Vector128<long> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srl_epi64 (__m256i a, __m128i count)
-        ///   VPSRLQ ymm, ymm, xmm/m128
+        ///   VPSRLQ ymm1,         ymm2, xmm3/m128
+        ///   VPSRLQ ymm1 {k1}{z}, ymm2, xmm3/m128
          /// </summary>
          public static Vector256<ulong> ShiftRightLogical(Vector256<ulong> value, Vector128<ulong> count) => ShiftRightLogical(value, count);
  
          /// <summary>
          /// __m256i _mm256_srli_epi16 (__m256i a, int imm8)
-        ///   VPSRLW ymm, ymm, imm8
+        ///   VPSRLW ymm1,         ymm2, imm8
+        ///   VPSRLW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<short> ShiftRightLogical(Vector256<short> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srli_epi16 (__m256i a, int imm8)
-        ///   VPSRLW ymm, ymm, imm8
+        ///   VPSRLW ymm1,         ymm2, imm8
+        ///   VPSRLW ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<ushort> ShiftRightLogical(Vector256<ushort> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srli_epi32 (__m256i a, int imm8)
-        ///   VPSRLD ymm, ymm, imm8
+        ///   VPSRLD ymm1,         ymm2, imm8
+        ///   VPSRLD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<int> ShiftRightLogical(Vector256<int> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srli_epi32 (__m256i a, int imm8)
-        ///   VPSRLD ymm, ymm, imm8
+        ///   VPSRLD ymm1,         ymm2, imm8
+        ///   VPSRLD ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<uint> ShiftRightLogical(Vector256<uint> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srli_epi64 (__m256i a, int imm8)
-        ///   VPSRLQ ymm, ymm, imm8
+        ///   VPSRLQ ymm1,         ymm2, imm8
+        ///   VPSRLQ ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<long> ShiftRightLogical(Vector256<long> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m256i _mm256_srli_epi64 (__m256i a, int imm8)
-        ///   VPSRLQ ymm, ymm, imm8
+        ///   VPSRLQ ymm1,         ymm2, imm8
+        ///   VPSRLQ ymm1 {k1}{z}, ymm2, imm8
          /// </summary>
          public static Vector256<ulong> ShiftRightLogical(Vector256<ulong> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
  
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
          /// </summary>
          public static Vector256<sbyte> ShiftRightLogical128BitLane(Vector256<sbyte> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
          /// </summary>
          public static Vector256<byte> ShiftRightLogical128BitLane(Vector256<byte> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<short> ShiftRightLogical128BitLane(Vector256<short> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ushort> ShiftRightLogical128BitLane(Vector256<ushort> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<int> ShiftRightLogical128BitLane(Vector256<int> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<uint> ShiftRightLogical128BitLane(Vector256<uint> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<long> ShiftRightLogical128BitLane(Vector256<long> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m256i _mm256_bsrli_epi128 (__m256i a, const int imm8)
-        ///   VPSRLDQ ymm, ymm, imm8
+        ///   VPSRLDQ ymm1, ymm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector256<ulong> ShiftRightLogical128BitLane(Vector256<ulong> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
  
-        /// <summary>
-        /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
-        ///   VPSRLVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<int> ShiftRightLogicalVariable(Vector256<int> value, Vector256<uint> count) => ShiftRightLogicalVariable(value, count);
-        /// <summary>
-        /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
-        ///   VPSRLVD ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<uint> ShiftRightLogicalVariable(Vector256<uint> value, Vector256<uint> count) => ShiftRightLogicalVariable(value, count);
-        /// <summary>
-        /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count)
-        ///   VPSRLVQ ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<long> ShiftRightLogicalVariable(Vector256<long> value, Vector256<ulong> count) => ShiftRightLogicalVariable(value, count);
-        /// <summary>
-        /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count)
-        ///   VPSRLVQ ymm, ymm, ymm/m256
-        /// </summary>
-        public static Vector256<ulong> ShiftRightLogicalVariable(Vector256<ulong> value, Vector256<ulong> count) => ShiftRightLogicalVariable(value, count);
-
          /// <summary>
          /// __m128i _mm_srlv_epi32 (__m128i a, __m128i count)
-        ///   VPSRLVD xmm, xmm, xmm/m128
+        ///   VPSRLVD xmm1,         xmm2, xmm3/m128
+        ///   VPSRLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> ShiftRightLogicalVariable(Vector128<int> value, Vector128<uint> count) => ShiftRightLogicalVariable(value, count);
          /// <summary>
          /// __m128i _mm_srlv_epi32 (__m128i a, __m128i count)
-        ///   VPSRLVD xmm, xmm, xmm/m128
+        ///   VPSRLVD xmm1,         xmm2, xmm3/m128
+        ///   VPSRLVD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> ShiftRightLogicalVariable(Vector128<uint> value, Vector128<uint> count) => ShiftRightLogicalVariable(value, count);
          /// <summary>
          /// __m128i _mm_srlv_epi64 (__m128i a, __m128i count)
-        ///   VPSRLVQ xmm, xmm, xmm/m128
+        ///   VPSRLVQ xmm1,         xmm2, xmm3/m128
+        ///   VPSRLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> ShiftRightLogicalVariable(Vector128<long> value, Vector128<ulong> count) => ShiftRightLogicalVariable(value, count);
          /// <summary>
          /// __m128i _mm_srlv_epi64 (__m128i a, __m128i count)
-        ///   VPSRLVQ xmm, xmm, xmm/m128
+        ///   VPSRLVQ xmm1,         xmm2, xmm3/m128
+        ///   VPSRLVQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> ShiftRightLogicalVariable(Vector128<ulong> value, Vector128<ulong> count) => ShiftRightLogicalVariable(value, count);
+        /// <summary>
+        /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
+        ///   VPSRLVD ymm1,         ymm2, ymm3/m256
+        ///   VPSRLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<int> ShiftRightLogicalVariable(Vector256<int> value, Vector256<uint> count) => ShiftRightLogicalVariable(value, count);
+        /// <summary>
+        /// __m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
+        ///   VPSRLVD ymm1,         ymm2, ymm3/m256
+        ///   VPSRLVD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
+        /// </summary>
+        public static Vector256<uint> ShiftRightLogicalVariable(Vector256<uint> value, Vector256<uint> count) => ShiftRightLogicalVariable(value, count);
+        /// <summary>
+        /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count)
+        ///   VPSRLVQ ymm1,         ymm2, ymm3/m256
+        ///   VPSRLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
+        /// </summary>
+        public static Vector256<long> ShiftRightLogicalVariable(Vector256<long> value, Vector256<ulong> count) => ShiftRightLogicalVariable(value, count);
+        /// <summary>
+        /// __m256i _mm256_srlv_epi64 (__m256i a, __m256i count)
+        ///   VPSRLVQ ymm1,         ymm2, ymm3/m256
+        ///   VPSRLVQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
+        /// </summary>
+        public static Vector256<ulong> ShiftRightLogicalVariable(Vector256<ulong> value, Vector256<ulong> count) => ShiftRightLogicalVariable(value, count);
  
          /// <summary>
          /// __m256i _mm256_shuffle_epi8 (__m256i a, __m256i b)
-        ///   VPSHUFB ymm, ymm, ymm/m256
+        ///   VPSHUFB ymm1,         ymm2, ymm3/m256
+        ///   VPSHUFB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Shuffle(Vector256<sbyte> value, Vector256<sbyte> mask) => Shuffle(value, mask);
          /// <summary>
          /// __m256i _mm256_shuffle_epi8 (__m256i a, __m256i b)
-        ///   VPSHUFB ymm, ymm, ymm/m256
+        ///   VPSHUFB ymm1,         ymm2, ymm3/m256
+        ///   VPSHUFB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Shuffle(Vector256<byte> value, Vector256<byte> mask) => Shuffle(value, mask);
          /// <summary>
          /// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8)
-        ///   VPSHUFD ymm, ymm/m256, imm8
+        ///   VPSHUFD ymm1,         ymm2/m256,         imm8
+        ///   VPSHUFD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<int> Shuffle(Vector256<int> value, [ConstantExpected] byte control) => Shuffle(value, control);
          /// <summary>
          /// __m256i _mm256_shuffle_epi32 (__m256i a, const int imm8)
-        ///   VPSHUFD ymm, ymm/m256, imm8
+        ///   VPSHUFD ymm1,         ymm2/m256,         imm8
+        ///   VPSHUFD ymm1 {k1}{z}, ymm2/m256/m32bcst, imm8
          /// </summary>
          public static Vector256<uint> Shuffle(Vector256<uint> value, [ConstantExpected] byte control) => Shuffle(value, control);
  
          /// <summary>
          /// __m256i _mm256_shufflehi_epi16 (__m256i a, const int imm8)
-        ///   VPSHUFHW ymm, ymm/m256, imm8
+        ///   VPSHUFHW ymm1,         ymm2/m256, imm8
+        ///   VPSHUFHW ymm1 {k1}{z}, ymm2/m256, imm8
          /// </summary>
          public static Vector256<short> ShuffleHigh(Vector256<short> value, [ConstantExpected] byte control) => ShuffleHigh(value, control);
          /// <summary>
          /// __m256i _mm256_shufflehi_epi16 (__m256i a, const int imm8)
-        ///   VPSHUFHW ymm, ymm/m256, imm8
+        ///   VPSHUFHW ymm1,         ymm2/m256, imm8
+        ///   VPSHUFHW ymm1 {k1}{z}, ymm2/m256, imm8
          /// </summary>
          public static Vector256<ushort> ShuffleHigh(Vector256<ushort> value, [ConstantExpected] byte control) => ShuffleHigh(value, control);
  
          /// <summary>
          /// __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8)
-        ///   VPSHUFLW ymm, ymm/m256, imm8
+        ///   VPSHUFLW ymm1,         ymm2/m256, imm8
+        ///   VPSHUFLW ymm1 {k1}{z}, ymm2/m256, imm8
          /// </summary>
          public static Vector256<short> ShuffleLow(Vector256<short> value, [ConstantExpected] byte control) => ShuffleLow(value, control);
          /// <summary>
          /// __m256i _mm256_shufflelo_epi16 (__m256i a, const int imm8)
-        ///   VPSHUFLW ymm, ymm/m256, imm8
+        ///   VPSHUFLW ymm1,         ymm2/m256, imm8
+        ///   VPSHUFLW ymm1 {k1}{z}, ymm2/m256, imm8
          /// </summary>
          public static Vector256<ushort> ShuffleLow(Vector256<ushort> value, [ConstantExpected] byte control) => ShuffleLow(value, control);
  
          /// <summary>
          /// __m256i _mm256_sign_epi8 (__m256i a, __m256i b)
-        ///   VPSIGNB ymm, ymm, ymm/m256
+        ///   VPSIGNB ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Sign(Vector256<sbyte> left, Vector256<sbyte> right) => Sign(left, right);
          /// <summary>
          /// __m256i _mm256_sign_epi16 (__m256i a, __m256i b)
-        ///   VPSIGNW ymm, ymm, ymm/m256
+        ///   VPSIGNW ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Sign(Vector256<short> left, Vector256<short> right) => Sign(left, right);
          /// <summary>
          /// __m256i _mm256_sign_epi32 (__m256i a, __m256i b)
-        ///   VPSIGND ymm, ymm, ymm/m256
+        ///   VPSIGND ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> Sign(Vector256<int> left, Vector256<int> right) => Sign(left, right);
  
          /// <summary>
          /// __m256i _mm256_sub_epi8 (__m256i a, __m256i b)
-        ///   VPSUBB ymm, ymm, ymm/m256
+        ///   VPSUBB ymm1,         ymm2, ymm3/m256
+        ///   VPSUBB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Subtract(Vector256<sbyte> left, Vector256<sbyte> right) => Subtract(left, right);
          /// <summary>
          /// __m256i _mm256_sub_epi8 (__m256i a, __m256i b)
-        ///   VPSUBB ymm, ymm, ymm/m256
+        ///   VPSUBB ymm1,         ymm2, ymm3/m256
+        ///   VPSUBB ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Subtract(Vector256<byte> left, Vector256<byte> right) => Subtract(left, right);
          /// <summary>
          /// __m256i _mm256_sub_epi16 (__m256i a, __m256i b)
-        ///   VPSUBW ymm, ymm, ymm/m256
+        ///   VPSUBW ymm1,         ymm2, ymm3/m256
+        ///   VPSUBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Subtract(Vector256<short> left, Vector256<short> right) => Subtract(left, right);
          /// <summary>
          /// __m256i _mm256_sub_epi16 (__m256i a, __m256i b)
-        ///   VPSUBW ymm, ymm, ymm/m256
+        ///   VPSUBW ymm1,         ymm2, ymm3/m256
+        ///   VPSUBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Subtract(Vector256<ushort> left, Vector256<ushort> right) => Subtract(left, right);
          /// <summary>
          /// __m256i _mm256_sub_epi32 (__m256i a, __m256i b)
-        ///   VPSUBD ymm, ymm, ymm/m256
+        ///   VPSUBD ymm1,         ymm2, ymm3/m256
+        ///   VPSUBD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<int> Subtract(Vector256<int> left, Vector256<int> right) => Subtract(left, right);
          /// <summary>
          /// __m256i _mm256_sub_epi32 (__m256i a, __m256i b)
-        ///   VPSUBD ymm, ymm, ymm/m256
+        ///   VPSUBD ymm1,         ymm2, ymm3/m256
+        ///   VPSUBD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<uint> Subtract(Vector256<uint> left, Vector256<uint> right) => Subtract(left, right);
          /// <summary>
          /// __m256i _mm256_sub_epi64 (__m256i a, __m256i b)
-        ///   VPSUBQ ymm, ymm, ymm/m256
+        ///   VPSUBQ ymm1,         ymm2, ymm3/m256
+        ///   VPSUBQ ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<long> Subtract(Vector256<long> left, Vector256<long> right) => Subtract(left, right);
          /// <summary>
          /// __m256i _mm256_sub_epi64 (__m256i a, __m256i b)
-        ///   VPSUBQ ymm, ymm, ymm/m256
+        ///   VPSUBQ ymm1,         ymm2, ymm3/m256
+        ///   VPSUBQ ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ulong> Subtract(Vector256<ulong> left, Vector256<ulong> right) => Subtract(left, right);
  
          /// <summary>
          /// __m256i _mm256_subs_epi8 (__m256i a, __m256i b)
-        ///   VPSUBSB ymm, ymm, ymm/m256
+        ///   VPSUBSB ymm1,         ymm2, ymm3/m128
+        ///   VPSUBSB ymm1 {k1}{z}, ymm2, ymm3/m128
          /// </summary>
          public static Vector256<sbyte> SubtractSaturate(Vector256<sbyte> left, Vector256<sbyte> right) => SubtractSaturate(left, right);
          /// <summary>
          /// __m256i _mm256_subs_epi16 (__m256i a, __m256i b)
-        ///   VPSUBSW ymm, ymm, ymm/m256
+        ///   VPSUBSW ymm1,         ymm2, ymm3/m128
+        ///   VPSUBSW ymm1 {k1}{z}, ymm2, ymm3/m128
          /// </summary>
          public static Vector256<short> SubtractSaturate(Vector256<short> left, Vector256<short> right) => SubtractSaturate(left, right);
          /// <summary>
          /// __m256i _mm256_subs_epu8 (__m256i a, __m256i b)
-        ///   VPSUBUSB ymm, ymm, ymm/m256
+        ///   VPSUBUSB ymm1,         ymm2, ymm3/m128
+        ///   VPSUBUSB ymm1 {k1}{z}, ymm2, ymm3/m128
          /// </summary>
          public static Vector256<byte> SubtractSaturate(Vector256<byte> left, Vector256<byte> right) => SubtractSaturate(left, right);
          /// <summary>
          /// __m256i _mm256_subs_epu16 (__m256i a, __m256i b)
-        ///   VPSUBUSW ymm, ymm, ymm/m256
+        ///   VPSUBUSW ymm1,         ymm2, ymm3/m128
+        ///   VPSUBUSW ymm1 {k1}{z}, ymm2, ymm3/m128
          /// </summary>
          public static Vector256<ushort> SubtractSaturate(Vector256<ushort> left, Vector256<ushort> right) => SubtractSaturate(left, right);
  
          /// <summary>
          /// __m256i _mm256_sad_epu8 (__m256i a, __m256i b)
-        ///   VPSADBW ymm, ymm, ymm/m256
+        ///   VPSADBW ymm1,         ymm2, ymm3/m256
+        ///   VPSADBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> SumAbsoluteDifferences(Vector256<byte> left, Vector256<byte> right) => SumAbsoluteDifferences(left, right);
  
          /// <summary>
          /// __m256i _mm256_unpackhi_epi8 (__m256i a, __m256i b)
-        ///   VPUNPCKHBW ymm, ymm, ymm/m256
+        ///   VPUNPCKHBW ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> UnpackHigh(Vector256<sbyte> left, Vector256<sbyte> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m256i _mm256_unpackhi_epi8 (__m256i a, __m256i b)
-        ///   VPUNPCKHBW ymm, ymm, ymm/m256
+        ///   VPUNPCKHBW ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> UnpackHigh(Vector256<byte> left, Vector256<byte> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m256i _mm256_unpackhi_epi16 (__m256i a, __m256i b)
-        ///   VPUNPCKHWD ymm, ymm, ymm/m256
+        ///   VPUNPCKHWD ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> UnpackHigh(Vector256<short> left, Vector256<short> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m256i _mm256_unpackhi_epi16 (__m256i a, __m256i b)
-        ///   VPUNPCKHWD ymm, ymm, ymm/m256
+        ///   VPUNPCKHWD ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> UnpackHigh(Vector256<ushort> left, Vector256<ushort> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m256i _mm256_unpackhi_epi32 (__m256i a, __m256i b)
-        ///   VPUNPCKHDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKHDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> UnpackHigh(Vector256<int> left, Vector256<int> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m256i _mm256_unpackhi_epi32 (__m256i a, __m256i b)
-        ///   VPUNPCKHDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKHDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> UnpackHigh(Vector256<uint> left, Vector256<uint> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m256i _mm256_unpackhi_epi64 (__m256i a, __m256i b)
-        ///   VPUNPCKHQDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKHQDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> UnpackHigh(Vector256<long> left, Vector256<long> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m256i _mm256_unpackhi_epi64 (__m256i a, __m256i b)
-        ///   VPUNPCKHQDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKHQDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKHQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> UnpackHigh(Vector256<ulong> left, Vector256<ulong> right) => UnpackHigh(left, right);
  
          /// <summary>
          /// __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b)
-        ///   VPUNPCKLBW ymm, ymm, ymm/m256
+        ///   VPUNPCKLBW ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> UnpackLow(Vector256<sbyte> left, Vector256<sbyte> right) => UnpackLow(left, right);
          /// <summary>
          /// __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b)
-        ///   VPUNPCKLBW ymm, ymm, ymm/m256
+        ///   VPUNPCKLBW ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLBW ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> UnpackLow(Vector256<byte> left, Vector256<byte> right) => UnpackLow(left, right);
          /// <summary>
          /// __m256i _mm256_unpacklo_epi16 (__m256i a, __m256i b)
-        ///   VPUNPCKLWD ymm, ymm, ymm/m256
+        ///   VPUNPCKLWD ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> UnpackLow(Vector256<short> left, Vector256<short> right) => UnpackLow(left, right);
          /// <summary>
          /// __m256i _mm256_unpacklo_epi16 (__m256i a, __m256i b)
-        ///   VPUNPCKLWD ymm, ymm, ymm/m256
+        ///   VPUNPCKLWD ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLWD ymm1 {k1}{z}, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> UnpackLow(Vector256<ushort> left, Vector256<ushort> right) => UnpackLow(left, right);
          /// <summary>
          /// __m256i _mm256_unpacklo_epi32 (__m256i a, __m256i b)
-        ///   VPUNPCKLDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKLDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> UnpackLow(Vector256<int> left, Vector256<int> right) => UnpackLow(left, right);
          /// <summary>
          /// __m256i _mm256_unpacklo_epi32 (__m256i a, __m256i b)
-        ///   VPUNPCKLDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKLDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> UnpackLow(Vector256<uint> left, Vector256<uint> right) => UnpackLow(left, right);
          /// <summary>
          /// __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b)
-        ///   VPUNPCKLQDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKLQDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> UnpackLow(Vector256<long> left, Vector256<long> right) => UnpackLow(left, right);
          /// <summary>
          /// __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b)
-        ///   VPUNPCKLQDQ ymm, ymm, ymm/m256
+        ///   VPUNPCKLQDQ ymm1,         ymm2, ymm3/m256
+        ///   VPUNPCKLQDQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> UnpackLow(Vector256<ulong> left, Vector256<ulong> right) => UnpackLow(left, right);
  
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<sbyte> Xor(Vector256<sbyte> left, Vector256<sbyte> right) => Xor(left, right);
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<byte> Xor(Vector256<byte> left, Vector256<byte> right) => Xor(left, right);
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<short> Xor(Vector256<short> left, Vector256<short> right) => Xor(left, right);
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR ymm1, ymm2, ymm3/m256
          /// </summary>
          public static Vector256<ushort> Xor(Vector256<ushort> left, Vector256<ushort> right) => Xor(left, right);
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR  ymm1,         ymm2, ymm3/m256
+        ///   VPXORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<int> Xor(Vector256<int> left, Vector256<int> right) => Xor(left, right);
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR  ymm1,         ymm2, ymm3/m256
+        ///   VPXORD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<uint> Xor(Vector256<uint> left, Vector256<uint> right) => Xor(left, right);
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR  ymm1,         ymm2, ymm3/m256
+        ///   VPXORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<long> Xor(Vector256<long> left, Vector256<long> right) => Xor(left, right);
          /// <summary>
          /// __m256i _mm256_xor_si256 (__m256i a, __m256i b)
-        ///   VPXOR ymm, ymm, ymm/m256
+        ///   VPXOR  ymm1,         ymm2, ymm3/m256
+        ///   VPXORQ ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<ulong> Xor(Vector256<ulong> left, Vector256<ulong> right) => Xor(left, right);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.PlatformNotSupported.cs

index a9dc857b623d69f068f385016acee99ea3de85f0..46fb386a395bf5bd3984965596750b18f63bebaa 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.PlatformNotSupported.cs
@@ -25,49 +25,49 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _andn_u64 (unsigned __int64 a, unsigned __int64 b)
-            ///   ANDN r64a, r64b, reg/m64
+            ///   ANDN r64a, r64b, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong AndNot(ulong left, ulong right) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// unsigned __int64 _bextr_u64 (unsigned __int64 a, unsigned int start, unsigned int len)
-            ///   BEXTR r64a, reg/m64, r64b
+            ///   BEXTR r64a, r/m64, r64b
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong BitFieldExtract(ulong value, byte start, byte length) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// unsigned __int64 _bextr2_u64 (unsigned __int64 a, unsigned __int64 control)
-            ///   BEXTR r64a, reg/m64, r64b
+            ///   BEXTR r64a, r/m64, r64b
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong BitFieldExtract(ulong value, ushort control) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// unsigned __int64 _blsi_u64 (unsigned __int64 a)
-            ///   BLSI reg, reg/m64
+            ///   BLSI r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ExtractLowestSetBit(ulong value) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// unsigned __int64 _blsmsk_u64 (unsigned __int64 a)
-            ///   BLSMSK reg, reg/m64
+            ///   BLSMSK r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong GetMaskUpToLowestSetBit(ulong value) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// unsigned __int64 _blsr_u64 (unsigned __int64 a)
-            ///   BLSR reg, reg/m64
+            ///   BLSR r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ResetLowestSetBit(ulong value) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// __int64 _mm_tzcnt_64 (unsigned __int64 a)
-            ///   TZCNT reg, reg/m64
+            ///   TZCNT r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong TrailingZeroCount(ulong value) { throw new PlatformNotSupportedException(); }
@@ -75,43 +75,43 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// unsigned int _andn_u32 (unsigned int a, unsigned int b)
-        ///   ANDN r32a, r32b, reg/m32
+        ///   ANDN r32a, r32b, r/m32
          /// </summary>
          public static uint AndNot(uint left, uint right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _bextr_u32 (unsigned int a, unsigned int start, unsigned int len)
-        ///   BEXTR r32a, reg/m32, r32b
+        ///   BEXTR r32a, r/m32, r32b
          /// </summary>
          public static uint BitFieldExtract(uint value, byte start, byte length) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _bextr2_u32 (unsigned int a, unsigned int control)
-        ///   BEXTR r32a, reg/m32, r32b
+        ///   BEXTR r32a, r/m32, r32b
          /// </summary>
          public static uint BitFieldExtract(uint value, ushort control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _blsi_u32 (unsigned int a)
-        ///   BLSI reg, reg/m32
+        ///   BLSI r32, r/m32
          /// </summary>
          public static uint ExtractLowestSetBit(uint value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _blsmsk_u32 (unsigned int a)
-        ///   BLSMSK reg, reg/m32
+        ///   BLSMSK r32, r/m32
          /// </summary>
          public static uint GetMaskUpToLowestSetBit(uint value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _blsr_u32 (unsigned int a)
-        ///   BLSR reg, reg/m32
+        ///   BLSR r32, r/m32
          /// </summary>
          public static uint ResetLowestSetBit(uint value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm_tzcnt_32 (unsigned int a)
-        ///   TZCNT reg, reg/m32
+        ///   TZCNT r32, r/m32
          /// </summary>
          public static uint TrailingZeroCount(uint value) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.cs

index 044ec940e75e08c583e155b32d022b18803e3364..eec6534a14232de28b60b668e6d4a4a595777e40 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.cs
@@ -25,49 +25,49 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _andn_u64 (unsigned __int64 a, unsigned __int64 b)
-            ///   ANDN r64a, r64b, reg/m64
+            ///   ANDN r64a, r64b, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong AndNot(ulong left, ulong right) => AndNot(left, right);
  
              /// <summary>
              /// unsigned __int64 _bextr_u64 (unsigned __int64 a, unsigned int start, unsigned int len)
-            ///   BEXTR r64a, reg/m64, r64b
+            ///   BEXTR r64a, r/m64, r64b
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong BitFieldExtract(ulong value, byte start, byte length) => BitFieldExtract(value, (ushort)(start | (length << 8)));
  
              /// <summary>
              /// unsigned __int64 _bextr2_u64 (unsigned __int64 a, unsigned __int64 control)
-            ///   BEXTR r64a, reg/m64, r64b
+            ///   BEXTR r64a, r/m64, r64b
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong BitFieldExtract(ulong value, ushort control) => BitFieldExtract(value, control);
  
              /// <summary>
              /// unsigned __int64 _blsi_u64 (unsigned __int64 a)
-            ///   BLSI reg, reg/m64
+            ///   BLSI r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ExtractLowestSetBit(ulong value) => ExtractLowestSetBit(value);
  
              /// <summary>
              /// unsigned __int64 _blsmsk_u64 (unsigned __int64 a)
-            ///   BLSMSK reg, reg/m64
+            ///   BLSMSK r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong GetMaskUpToLowestSetBit(ulong value) => GetMaskUpToLowestSetBit(value);
  
              /// <summary>
              /// unsigned __int64 _blsr_u64 (unsigned __int64 a)
-            ///   BLSR reg, reg/m64
+            ///   BLSR r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ResetLowestSetBit(ulong value) => ResetLowestSetBit(value);
  
              /// <summary>
              /// __int64 _mm_tzcnt_64 (unsigned __int64 a)
-            ///   TZCNT reg, reg/m64
+            ///   TZCNT r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong TrailingZeroCount(ulong value) => TrailingZeroCount(value);
@@ -75,43 +75,43 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// unsigned int _andn_u32 (unsigned int a, unsigned int b)
-        ///   ANDN r32a, r32b, reg/m32
+        ///   ANDN r32a, r32b, r/m32
          /// </summary>
          public static uint AndNot(uint left, uint right) => AndNot(left, right);
  
          /// <summary>
          /// unsigned int _bextr_u32 (unsigned int a, unsigned int start, unsigned int len)
-        ///   BEXTR r32a, reg/m32, r32b
+        ///   BEXTR r32a, r/m32, r32b
          /// </summary>
          public static uint BitFieldExtract(uint value, byte start, byte length) => BitFieldExtract(value, (ushort)(start | (length << 8)));
  
          /// <summary>
          /// unsigned int _bextr2_u32 (unsigned int a, unsigned int control)
-        ///   BEXTR r32a, reg/m32, r32b
+        ///   BEXTR r32a, r/m32, r32b
          /// </summary>
          public static uint BitFieldExtract(uint value, ushort control) => BitFieldExtract(value, control);
  
          /// <summary>
          /// unsigned int _blsi_u32 (unsigned int a)
-        ///   BLSI reg, reg/m32
+        ///   BLSI r32, r/m32
          /// </summary>
          public static uint ExtractLowestSetBit(uint value) => ExtractLowestSetBit(value);
  
          /// <summary>
          /// unsigned int _blsmsk_u32 (unsigned int a)
-        ///   BLSMSK reg, reg/m32
+        ///   BLSMSK r32, r/m32
          /// </summary>
          public static uint GetMaskUpToLowestSetBit(uint value) => GetMaskUpToLowestSetBit(value);
  
          /// <summary>
          /// unsigned int _blsr_u32 (unsigned int a)
-        ///   BLSR reg, reg/m32
+        ///   BLSR r32, r/m32
          /// </summary>
          public static uint ResetLowestSetBit(uint value) => ResetLowestSetBit(value);
  
          /// <summary>
          /// int _mm_tzcnt_32 (unsigned int a)
-        ///   TZCNT reg, reg/m32
+        ///   TZCNT r32, r/m32
          /// </summary>
          public static uint TrailingZeroCount(uint value) => TrailingZeroCount(value);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.PlatformNotSupported.cs

index 8d8fdcf2df2ddbf4581f80d8a9be32852e124e67..a412768afe76a80b1939061f8aeb52171c7a57f4 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.PlatformNotSupported.cs
@@ -25,14 +25,14 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _bzhi_u64 (unsigned __int64 a, unsigned int index)
-            ///   BZHI r64a, reg/m32, r64b
+            ///   BZHI r64a, r/m64, r64b
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ZeroHighBits(ulong value, ulong index) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// unsigned __int64 _mulx_u64 (unsigned __int64 a, unsigned __int64 b, unsigned __int64* hi)
-            ///   MULX r64a, r64b, reg/m64
+            ///   MULX r64a, r64b, r/m64
              /// The above native signature does not directly correspond to the managed signature.
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
@@ -40,7 +40,7 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _mulx_u64 (unsigned __int64 a, unsigned __int64 b, unsigned __int64* hi)
-            ///   MULX r64a, r64b, reg/m64
+            ///   MULX r64a, r64b, r/m64
              /// The above native signature does not directly correspond to the managed signature.
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
@@ -48,14 +48,14 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _pdep_u64 (unsigned __int64 a, unsigned __int64 mask)
-            ///   PDEP r64a, r64b, reg/m64
+            ///   PDEP r64a, r64b, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ParallelBitDeposit(ulong value, ulong mask) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// unsigned __int64 _pext_u64 (unsigned __int64 a, unsigned __int64 mask)
-            ///   PEXT r64a, r64b, reg/m64
+            ///   PEXT r64a, r64b, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ParallelBitExtract(ulong value, ulong mask) { throw new PlatformNotSupportedException(); }
@@ -63,33 +63,33 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// unsigned int _bzhi_u32 (unsigned int a, unsigned int index)
-        ///   BZHI r32a, reg/m32, r32b
+        ///   BZHI r32a, r/m32, r32b
          /// </summary>
          public static uint ZeroHighBits(uint value, uint index) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _mulx_u32 (unsigned int a, unsigned int b, unsigned int* hi)
-        ///   MULX r32a, r32b, reg/m32
+        ///   MULX r32a, r32b, r/m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static uint MultiplyNoFlags(uint left, uint right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _mulx_u32 (unsigned int a, unsigned int b, unsigned int* hi)
-        ///   MULX r32a, r32b, reg/m32
+        ///   MULX r32a, r32b, r/m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe uint MultiplyNoFlags(uint left, uint right, uint* low) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _pdep_u32 (unsigned int a, unsigned int mask)
-        ///   PDEP r32a, r32b, reg/m32
+        ///   PDEP r32a, r32b, r/m32
          /// </summary>
          public static uint ParallelBitDeposit(uint value, uint mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _pext_u32 (unsigned int a, unsigned int mask)
-        ///   PEXT r32a, r32b, reg/m32
+        ///   PEXT r32a, r32b, r/m32
          /// </summary>
          public static uint ParallelBitExtract(uint value, uint mask) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.cs

index 2ed73efad2d861044b56939004c011893d937265..31ac3531e3bded385c394f0380635abfd8f5a094 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.cs
@@ -25,14 +25,14 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _bzhi_u64 (unsigned __int64 a, unsigned int index)
-            ///   BZHI r64a, reg/m32, r64b
+            ///   BZHI r64a, r/m64, r64b
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ZeroHighBits(ulong value, ulong index) => ZeroHighBits(value, index);
  
              /// <summary>
              /// unsigned __int64 _mulx_u64 (unsigned __int64 a, unsigned __int64 b, unsigned __int64* hi)
-            ///   MULX r64a, r64b, reg/m64
+            ///   MULX r64a, r64b, r/m64
              /// The above native signature does not directly correspond to the managed signature.
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
@@ -40,7 +40,7 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _mulx_u64 (unsigned __int64 a, unsigned __int64 b, unsigned __int64* hi)
-            ///   MULX r64a, r64b, reg/m64
+            ///   MULX r64a, r64b, r/m64
              /// The above native signature does not directly correspond to the managed signature.
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
@@ -48,14 +48,14 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _pdep_u64 (unsigned __int64 a, unsigned __int64 mask)
-            ///   PDEP r64a, r64b, reg/m64
+            ///   PDEP r64a, r64b, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ParallelBitDeposit(ulong value, ulong mask) => ParallelBitDeposit(value, mask);
  
              /// <summary>
              /// unsigned __int64 _pext_u64 (unsigned __int64 a, unsigned __int64 mask)
-            ///   PEXT r64a, r64b, reg/m64
+            ///   PEXT r64a, r64b, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong ParallelBitExtract(ulong value, ulong mask) => ParallelBitExtract(value, mask);
@@ -63,33 +63,33 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// unsigned int _bzhi_u32 (unsigned int a, unsigned int index)
-        ///   BZHI r32a, reg/m32, r32b
+        ///   BZHI r32a, r/m32, r32b
          /// </summary>
          public static uint ZeroHighBits(uint value, uint index) => ZeroHighBits(value, index);
  
          /// <summary>
          /// unsigned int _mulx_u32 (unsigned int a, unsigned int b, unsigned int* hi)
-        ///   MULX r32a, r32b, reg/m32
+        ///   MULX r32a, r32b, r/m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static uint MultiplyNoFlags(uint left, uint right) => MultiplyNoFlags(left, right);
  
          /// <summary>
          /// unsigned int _mulx_u32 (unsigned int a, unsigned int b, unsigned int* hi)
-        ///   MULX r32a, r32b, reg/m32
+        ///   MULX r32a, r32b, r/m32
          /// The above native signature does not directly correspond to the managed signature.
          /// </summary>
          public static unsafe uint MultiplyNoFlags(uint left, uint right, uint* low) => MultiplyNoFlags(left, right, low);
  
          /// <summary>
          /// unsigned int _pdep_u32 (unsigned int a, unsigned int mask)
-        ///   PDEP r32a, r32b, reg/m32
+        ///   PDEP r32a, r32b, r/m32
          /// </summary>
          public static uint ParallelBitDeposit(uint value, uint mask) => ParallelBitDeposit(value, mask);
  
          /// <summary>
          /// unsigned int _pext_u32 (unsigned int a, unsigned int mask)
-        ///   PEXT r32a, r32b, reg/m32
+        ///   PEXT r32a, r32b, r/m32
          /// </summary>
          public static uint ParallelBitExtract(uint value, uint mask) => ParallelBitExtract(value, mask);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.PlatformNotSupported.cs

index 64b3024a56f8b021825be37ce6b9139cc6f5804e..398f55f3c6b068e7175c53f716b3673bce6b978c 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.PlatformNotSupported.cs
@@ -26,171 +26,203 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128 _mm_fmadd_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFMADDPS xmm, xmm, xmm/m128
+        ///   VFMADDPS xmm1,         xmm2, xmm3/m128
+        ///   VFMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplyAdd(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fmadd_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFMADDPD xmm, xmm, xmm/m128
+        ///   VFMADDPD xmm1,         xmm2, xmm3/m128
+        ///   VFMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplyAdd(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_fmadd_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFMADDPS ymm, ymm, ymm/m256
+        ///   VFMADDPS ymm1,         ymm2, ymm3/m256
+        ///   VFMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplyAdd(Vector256<float> a, Vector256<float> b, Vector256<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_fmadd_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFMADDPS ymm, ymm, ymm/m256
+        ///   VFMADDPD ymm1,         ymm2, ymm3/m256
+        ///   VFMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplyAdd(Vector256<double> a, Vector256<double> b, Vector256<double> c) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_fmadd_ss (__m128 a, __m128 b, __m128 c)
-        ///   VFMADDSS xmm, xmm, xmm/m32
+        ///   VFMADDSS xmm1,         xmm2, xmm3/m32
+        ///   VFMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> MultiplyAddScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fmadd_sd (__m128d a, __m128d b, __m128d c)
-        ///   VFMADDSS xmm, xmm, xmm/m64
+        ///   VFMADDSD xmm1,         xmm2, xmm3/m64
+        ///   VFMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> MultiplyAddScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFMADDSUBPS xmm, xmm, xmm/m128
+        ///   VFMADDSUBPS xmm1,         xmm2, xmm3/m128
+        ///   VFMADDSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplyAddSubtract(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFMADDSUBPD xmm, xmm, xmm/m128
+        ///   VFMADDSUBPD xmm1,         xmm2, xmm3/m128
+        ///   VFMADDSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplyAddSubtract(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFMADDSUBPS ymm, ymm, ymm/m256
+        ///   VFMADDSUBPS ymm1,         ymm2, ymm3/m256
+        ///   VFMADDSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplyAddSubtract(Vector256<float> a, Vector256<float> b, Vector256<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFMADDSUBPD ymm, ymm, ymm/m256
+        ///   VFMADDSUBPD ymm1,         ymm2, ymm3/m256
+        ///   VFMADDSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplyAddSubtract(Vector256<double> a, Vector256<double> b, Vector256<double> c) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_fmsub_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFMSUBPS xmm, xmm, xmm/m128
+        ///   VFMSUBPS xmm1,         xmm2, xmm3/m128
+        ///   VFMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplySubtract(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fmsub_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFMSUBPS xmm, xmm, xmm/m128
+        ///   VFMSUBPD xmm1,         xmm2, xmm3/m128
+        ///   VFMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplySubtract(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_fmsub_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFMSUBPS ymm, ymm, ymm/m256
+        ///   VFMSUBPS ymm1,         ymm2, ymm3/m256
+        ///   VFMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplySubtract(Vector256<float> a, Vector256<float> b, Vector256<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_fmsub_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFMSUBPD ymm, ymm, ymm/m256
+        ///   VFMSUBPD ymm1,         ymm2, ymm3/m256
+        ///   VFMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplySubtract(Vector256<double> a, Vector256<double> b, Vector256<double> c) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_fmsub_ss (__m128 a, __m128 b, __m128 c)
-        ///   VFMSUBSS xmm, xmm, xmm/m32
+        ///   VFMSUBSS xmm1,         xmm2, xmm3/m32
+        ///   VFMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> MultiplySubtractScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fmsub_sd (__m128d a, __m128d b, __m128d c)
-        ///   VFMSUBSD xmm, xmm, xmm/m64
+        ///   VFMSUBSD xmm1,         xmm2, xmm3/m64
+        ///   VFMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> MultiplySubtractScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFMSUBADDPS xmm, xmm, xmm/m128
+        ///   VFMSUBADDPS xmm1,         xmm2, xmm3/m128
+        ///   VFMSUBADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplySubtractAdd(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFMSUBADDPD xmm, xmm, xmm/m128
+        ///   VFMSUBADDPD xmm1,         xmm2, xmm3/m128
+        ///   VFMSUBADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplySubtractAdd(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFMSUBADDPS ymm, ymm, ymm/m256
+        ///   VFMSUBADDPS ymm1,         ymm2, ymm3/m256
+        ///   VFMSUBADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplySubtractAdd(Vector256<float> a, Vector256<float> b, Vector256<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFMSUBADDPD ymm, ymm, ymm/m256
+        ///   VFMSUBADDPD ymm1,         ymm2, ymm3/m256
+        ///   VFMSUBADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplySubtractAdd(Vector256<double> a, Vector256<double> b, Vector256<double> c) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_fnmadd_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFNMADDPS xmm, xmm, xmm/m128
+        ///   VFNMADDPS xmm1,         xmm2, xmm3/m128
+        ///   VFNMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplyAddNegated(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fnmadd_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFNMADDPD xmm, xmm, xmm/m128
+        ///   VFNMADDPD xmm1,         xmm2, xmm3/m128
+        ///   VFNMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplyAddNegated(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFNMADDPS ymm, ymm, ymm/m256
+        ///   VFNMADDPS ymm1,         ymm2, ymm3/m256
+        ///   VFNMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplyAddNegated(Vector256<float> a, Vector256<float> b, Vector256<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFNMADDPD ymm, ymm, ymm/m256
+        ///   VFNMADDPD ymm1,         ymm2, ymm3/m256
+        ///   VFNMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplyAddNegated(Vector256<double> a, Vector256<double> b, Vector256<double> c) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_fnmadd_ss (__m128 a, __m128 b, __m128 c)
-        ///   VFNMADDSS xmm, xmm, xmm/m32
+        ///   VFNMADDSS xmm1,         xmm2, xmm3/m32
+        ///   VFNMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> MultiplyAddNegatedScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fnmadd_sd (__m128d a, __m128d b, __m128d c)
-        ///   VFNMADDSD xmm, xmm, xmm/m64
+        ///   VFNMADDSD xmm1,         xmm2, xmm3/m64
+        ///   VFNMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> MultiplyAddNegatedScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_fnmsub_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFNMSUBPS xmm, xmm, xmm/m128
+        ///   VFNMSUBPS xmm1,         xmm2, xmm3/m128
+        ///   VFNMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplySubtractNegated(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fnmsub_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFNMSUBPD xmm, xmm, xmm/m128
+        ///   VFNMSUBPD xmm1,         xmm2, xmm3/m128
+        ///   VFNMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplySubtractNegated(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256 _mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFNMSUBPS ymm, ymm, ymm/m256
+        ///   VFNMSUBPS ymm1,         ymm2, ymm3/m256
+        ///   VFNMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplySubtractNegated(Vector256<float> a, Vector256<float> b, Vector256<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m256d _mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFNMSUBPD ymm, ymm, ymm/m256
+        ///   VFNMSUBPD ymm1,         ymm2, ymm3/m256
+        ///   VFNMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplySubtractNegated(Vector256<double> a, Vector256<double> b, Vector256<double> c) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_fnmsub_ss (__m128 a, __m128 b, __m128 c)
-        ///   VFNMSUBSS xmm, xmm, xmm/m32
+        ///   VFNMSUBSS xmm1,         xmm2, xmm3/m32
+        ///   VFNMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> MultiplySubtractNegatedScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_fnmsub_sd (__m128d a, __m128d b, __m128d c)
-        ///   VFNMSUBSD xmm, xmm, xmm/m64
+        ///   VFNMSUBSD xmm1,         xmm2, xmm3/m64
+        ///   VFNMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> MultiplySubtractNegatedScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs

index d147e8b4a712b63ad3c2f5fadc48e36cd75b84a7..f1549932e8a36a8b520a9789cc53e86b37e071f6 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs
@@ -26,171 +26,203 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128 _mm_fmadd_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFMADDPS xmm, xmm, xmm/m128
+        ///   VFMADDPS xmm1,         xmm2, xmm3/m128
+        ///   VFMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplyAdd(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplyAdd(a, b, c);
          /// <summary>
          /// __m128d _mm_fmadd_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFMADDPD xmm, xmm, xmm/m128
+        ///   VFMADDPD xmm1,         xmm2, xmm3/m128
+        ///   VFMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplyAdd(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplyAdd(a, b, c);
          /// <summary>
          /// __m256 _mm256_fmadd_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFMADDPS ymm, ymm, ymm/m256
+        ///   VFMADDPS ymm1,         ymm2, ymm3/m256
+        ///   VFMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplyAdd(Vector256<float> a, Vector256<float> b, Vector256<float> c) => MultiplyAdd(a, b, c);
          /// <summary>
          /// __m256d _mm256_fmadd_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFMADDPS ymm, ymm, ymm/m256
+        ///   VFMADDPD ymm1,         ymm2, ymm3/m256
+        ///   VFMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplyAdd(Vector256<double> a, Vector256<double> b, Vector256<double> c) => MultiplyAdd(a, b, c);
  
          /// <summary>
          /// __m128 _mm_fmadd_ss (__m128 a, __m128 b, __m128 c)
-        ///   VFMADDSS xmm, xmm, xmm/m32
+        ///   VFMADDSS xmm1,         xmm2, xmm3/m32
+        ///   VFMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> MultiplyAddScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplyAddScalar(a, b, c);
          /// <summary>
          /// __m128d _mm_fmadd_sd (__m128d a, __m128d b, __m128d c)
-        ///   VFMADDSS xmm, xmm, xmm/m64
+        ///   VFMADDSD xmm1,         xmm2, xmm3/m64
+        ///   VFMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> MultiplyAddScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplyAddScalar(a, b, c);
  
          /// <summary>
          /// __m128 _mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFMADDSUBPS xmm, xmm, xmm/m128
+        ///   VFMADDSUBPS xmm1,         xmm2, xmm3/m128
+        ///   VFMADDSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplyAddSubtract(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplyAddSubtract(a, b, c);
          /// <summary>
          /// __m128d _mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFMADDSUBPD xmm, xmm, xmm/m128
+        ///   VFMADDSUBPD xmm1,         xmm2, xmm3/m128
+        ///   VFMADDSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplyAddSubtract(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplyAddSubtract(a, b, c);
          /// <summary>
          /// __m256 _mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFMADDSUBPS ymm, ymm, ymm/m256
+        ///   VFMADDSUBPS ymm1,         ymm2, ymm3/m256
+        ///   VFMADDSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplyAddSubtract(Vector256<float> a, Vector256<float> b, Vector256<float> c) => MultiplyAddSubtract(a, b, c);
          /// <summary>
          /// __m256d _mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFMADDSUBPD ymm, ymm, ymm/m256
+        ///   VFMADDSUBPD ymm1,         ymm2, ymm3/m256
+        ///   VFMADDSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<double> MultiplyAddSubtract(Vector256<double> a, Vector256<double> b, Vector256<double> c) => MultiplyAddSubtract(a, b, c);
  
          /// <summary>
          /// __m128 _mm_fmsub_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFMSUBPS xmm, xmm, xmm/m128
+        ///   VFMSUBPS xmm1,         xmm2, xmm3/m128
+        ///   VFMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplySubtract(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplySubtract(a, b, c);
          /// <summary>
          /// __m128d _mm_fmsub_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFMSUBPS xmm, xmm, xmm/m128
+        ///   VFMSUBPD xmm1,         xmm2, xmm3/m128
+        ///   VFMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplySubtract(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplySubtract(a, b, c);
          /// <summary>
          /// __m256 _mm256_fmsub_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFMSUBPS ymm, ymm, ymm/m256
+        ///   VFMSUBPS ymm1,         ymm2, ymm3/m256
+        ///   VFMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplySubtract(Vector256<float> a, Vector256<float> b, Vector256<float> c) => MultiplySubtract(a, b, c);
          /// <summary>
          /// __m256d _mm256_fmsub_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFMSUBPD ymm, ymm, ymm/m256
+        ///   VFMSUBPD ymm1,         ymm2, ymm3/m256
+        ///   VFMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplySubtract(Vector256<double> a, Vector256<double> b, Vector256<double> c) => MultiplySubtract(a, b, c);
  
          /// <summary>
          /// __m128 _mm_fmsub_ss (__m128 a, __m128 b, __m128 c)
-        ///   VFMSUBSS xmm, xmm, xmm/m32
+        ///   VFMSUBSS xmm1,         xmm2, xmm3/m32
+        ///   VFMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> MultiplySubtractScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplySubtractScalar(a, b, c);
          /// <summary>
          /// __m128d _mm_fmsub_sd (__m128d a, __m128d b, __m128d c)
-        ///   VFMSUBSD xmm, xmm, xmm/m64
+        ///   VFMSUBSD xmm1,         xmm2, xmm3/m64
+        ///   VFMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> MultiplySubtractScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplySubtractScalar(a, b, c);
  
          /// <summary>
          /// __m128 _mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFMSUBADDPS xmm, xmm, xmm/m128
+        ///   VFMSUBADDPS xmm1,         xmm2, xmm3/m128
+        ///   VFMSUBADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplySubtractAdd(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplySubtractAdd(a, b, c);
          /// <summary>
          /// __m128d _mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFMSUBADDPD xmm, xmm, xmm/m128
+        ///   VFMSUBADDPD xmm1,         xmm2, xmm3/m128
+        ///   VFMSUBADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplySubtractAdd(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplySubtractAdd(a, b, c);
          /// <summary>
          /// __m256 _mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFMSUBADDPS ymm, ymm, ymm/m256
+        ///   VFMSUBADDPS ymm1,         ymm2, ymm3/m256
+        ///   VFMSUBADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplySubtractAdd(Vector256<float> a, Vector256<float> b, Vector256<float> c) => MultiplySubtractAdd(a, b, c);
          /// <summary>
          /// __m256d _mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFMSUBADDPD ymm, ymm, ymm/m256
+        ///   VFMSUBADDPD ymm1,         ymm2, ymm3/m256
+        ///   VFMSUBADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplySubtractAdd(Vector256<double> a, Vector256<double> b, Vector256<double> c) => MultiplySubtractAdd(a, b, c);
  
          /// <summary>
          /// __m128 _mm_fnmadd_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFNMADDPS xmm, xmm, xmm/m128
+        ///   VFNMADDPS xmm1,         xmm2, xmm3/m128
+        ///   VFNMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplyAddNegated(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplyAddNegated(a, b, c);
          /// <summary>
          /// __m128d _mm_fnmadd_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFNMADDPD xmm, xmm, xmm/m128
+        ///   VFNMADDPD xmm1,         xmm2, xmm3/m128
+        ///   VFNMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplyAddNegated(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplyAddNegated(a, b, c);
          /// <summary>
          /// __m256 _mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFNMADDPS ymm, ymm, ymm/m256
+        ///   VFNMADDPS ymm1,         ymm2, ymm3/m256
+        ///   VFNMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplyAddNegated(Vector256<float> a, Vector256<float> b, Vector256<float> c) => MultiplyAddNegated(a, b, c);
          /// <summary>
          /// __m256d _mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFNMADDPD ymm, ymm, ymm/m256
+        ///   VFNMADDPD ymm1,         ymm2, ymm3/m256
+        ///   VFNMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplyAddNegated(Vector256<double> a, Vector256<double> b, Vector256<double> c) => MultiplyAddNegated(a, b, c);
  
          /// <summary>
          /// __m128 _mm_fnmadd_ss (__m128 a, __m128 b, __m128 c)
-        ///   VFNMADDSS xmm, xmm, xmm/m32
+        ///   VFNMADDSS xmm1,         xmm2, xmm3/m32
+        ///   VFNMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> MultiplyAddNegatedScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplyAddNegatedScalar(a, b, c);
          /// <summary>
          /// __m128d _mm_fnmadd_sd (__m128d a, __m128d b, __m128d c)
-        ///   VFNMADDSD xmm, xmm, xmm/m64
+        ///   VFNMADDSD xmm1,         xmm2, xmm3/m64
+        ///   VFNMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> MultiplyAddNegatedScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplyAddNegatedScalar(a, b, c);
  
          /// <summary>
          /// __m128 _mm_fnmsub_ps (__m128 a, __m128 b, __m128 c)
-        ///   VFNMSUBPS xmm, xmm, xmm/m128
+        ///   VFNMSUBPS xmm1,         xmm2, xmm3/m128
+        ///   VFNMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> MultiplySubtractNegated(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplySubtractNegated(a, b, c);
          /// <summary>
          /// __m128d _mm_fnmsub_pd (__m128d a, __m128d b, __m128d c)
-        ///   VFNMSUBPD xmm, xmm, xmm/m128
+        ///   VFNMSUBPD xmm1,         xmm2, xmm3/m128
+        ///   VFNMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> MultiplySubtractNegated(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplySubtractNegated(a, b, c);
          /// <summary>
          /// __m256 _mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c)
-        ///   VFNMSUBPS ymm, ymm, ymm/m256
+        ///   VFNMSUBPS ymm1,         ymm2, ymm3/m256
+        ///   VFNMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst
          /// </summary>
          public static Vector256<float> MultiplySubtractNegated(Vector256<float> a, Vector256<float> b, Vector256<float> c) => MultiplySubtractNegated(a, b, c);
          /// <summary>
          /// __m256d _mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c)
-        ///   VFNMSUBPD ymm, ymm, ymm/m256
+        ///   VFNMSUBPD ymm1,         ymm2, ymm3/m256
+        ///   VFNMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst
          /// </summary>
          public static Vector256<double> MultiplySubtractNegated(Vector256<double> a, Vector256<double> b, Vector256<double> c) => MultiplySubtractNegated(a, b, c);
  
          /// <summary>
          /// __m128 _mm_fnmsub_ss (__m128 a, __m128 b, __m128 c)
-        ///   VFNMSUBSS xmm, xmm, xmm/m32
+        ///   VFNMSUBSS xmm1,         xmm2, xmm3/m32
+        ///   VFNMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> MultiplySubtractNegatedScalar(Vector128<float> a, Vector128<float> b, Vector128<float> c) => MultiplySubtractNegatedScalar(a, b, c);
          /// <summary>
          /// __m128d _mm_fnmsub_sd (__m128d a, __m128d b, __m128d c)
-        ///   VFNMSUBSD xmm, xmm, xmm/m64
+        ///   VFNMSUBSD xmm1,         xmm2, xmm3/m64
+        ///   VFNMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> MultiplySubtractNegatedScalar(Vector128<double> a, Vector128<double> b, Vector128<double> c) => MultiplySubtractNegatedScalar(a, b, c);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.PlatformNotSupported.cs

index 849a68476610e1faab1b314d4c3a20ecd8185b90..f15deb98b688f4acdb12e4f5c1f92ed29af6da46 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.PlatformNotSupported.cs
@@ -24,7 +24,7 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _lzcnt_u64 (unsigned __int64 a)
-            ///   LZCNT reg, reg/m64
+            ///   LZCNT r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong LeadingZeroCount(ulong value) { throw new PlatformNotSupportedException(); }
@@ -32,7 +32,7 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// unsigned int _lzcnt_u32 (unsigned int a)
-        ///   LZCNT reg, reg/m32
+        ///   LZCNT r32, r/m32
          /// </summary>
          public static uint LeadingZeroCount(uint value) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.cs

index 2493ed2aba1b1eea59ee24ca3fd41587f6ffa547..4fdfabd818966f02405573b9a42a0166180b8d9a 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.cs
@@ -25,7 +25,7 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _lzcnt_u64 (unsigned __int64 a)
-            ///   LZCNT reg, reg/m64
+            ///   LZCNT r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong LeadingZeroCount(ulong value) => LeadingZeroCount(value);
@@ -33,7 +33,7 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// unsigned int _lzcnt_u32 (unsigned int a)
-        ///   LZCNT reg, reg/m32
+        ///   LZCNT r32, r/m32
          /// </summary>
          public static uint LeadingZeroCount(uint value) => LeadingZeroCount(value);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs

index 6cb481911325cc289e8a11b2b521a177ed97e568..a815701bc83015d4bb9276fe24e3f658a3d4ffc4 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs
@@ -27,12 +27,14 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8)
-        ///   PCLMULQDQ xmm, xmm/m128, imm8
+        ///    PCLMULQDQ xmm1,       xmm2/m128, imm8
+        ///   VPCLMULQDQ xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<long> CarrylessMultiply(Vector128<long> left, Vector128<long> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8)
-        ///   PCLMULQDQ xmm, xmm/m128, imm8
+        ///    PCLMULQDQ xmm1,       xmm2/m128, imm8
+        ///   VPCLMULQDQ xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<ulong> CarrylessMultiply(Vector128<ulong> left, Vector128<ulong> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs

index 0423cd49d726f92019a9e52294024ad723cfb96d..aa1c3d21b8c0b554bc933bd294049374c01fcf5b 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs
@@ -27,12 +27,14 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8)
-        ///   PCLMULQDQ xmm, xmm/m128, imm8
+        ///    PCLMULQDQ xmm1,       xmm2/m128, imm8
+        ///   VPCLMULQDQ xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<long> CarrylessMultiply(Vector128<long> left, Vector128<long> right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control);
          /// <summary>
          /// __m128i _mm_clmulepi64_si128 (__m128i a, __m128i b, const int imm8)
-        ///   PCLMULQDQ xmm, xmm/m128, imm8
+        ///    PCLMULQDQ xmm1,       xmm2/m128, imm8
+        ///   VPCLMULQDQ xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<ulong> CarrylessMultiply(Vector128<ulong> left, Vector128<ulong> right, [ConstantExpected] byte control) => CarrylessMultiply(left, right, control);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.PlatformNotSupported.cs

index 1913605685c337cc969b6d8c6260ee64181ea1f9..bd979943225397e023282ce8633d7c1c532148c8 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.PlatformNotSupported.cs
@@ -24,7 +24,7 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// __int64 _mm_popcnt_u64 (unsigned __int64 a)
-            ///   POPCNT reg64, reg/m64
+            ///   POPCNT r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong PopCount(ulong value) { throw new PlatformNotSupportedException(); }
@@ -32,7 +32,7 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// int _mm_popcnt_u32 (unsigned int a)
-        ///   POPCNT reg, reg/m32
+        ///   POPCNT r32, r/m32
          /// </summary>
          public static uint PopCount(uint value) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.cs

index 185f7fc00e619255497d5e00a6695d446340a629..c04de74cefc46b7f5292d73d78317315e643c0f5 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.cs
@@ -25,7 +25,7 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// __int64 _mm_popcnt_u64 (unsigned __int64 a)
-            ///   POPCNT reg64, reg/m64
+            ///   POPCNT r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong PopCount(ulong value) => PopCount(value);
@@ -33,7 +33,7 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// int _mm_popcnt_u32 (unsigned int a)
-        ///   POPCNT reg, reg/m32
+        ///   POPCNT r32, r/m32
          /// </summary>
          public static uint PopCount(uint value) => PopCount(value);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs

index 5f7655fdb367c41b9a4eb8980c4e8897c3a0fd72..c512bc4246f1a0c7ed15f7c30bb99db3cf91c1a7 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs
@@ -24,550 +24,620 @@ namespace System.Runtime.Intrinsics.X86
  
              public static new bool IsSupported { [Intrinsic] get { return false; } }
  
-            /// <summary>
-            /// __int64 _mm_cvtss_si64 (__m128 a)
-            ///   CVTSS2SI r64, xmm/m32
-            /// This intrinsic is only available on 64-bit processes
-            /// </summary>
-            public static long ConvertToInt64(Vector128<float> value) { throw new PlatformNotSupportedException(); }
              /// <summary>
              /// __m128 _mm_cvtsi64_ss (__m128 a, __int64 b)
-            ///   CVTSI2SS xmm, reg/m64
+            ///    CVTSI2SS xmm1,       r/m64
+            ///   VCVTSI2SS xmm1, xmm2, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, long value) { throw new PlatformNotSupportedException(); }
  
+            /// <summary>
+            /// __int64 _mm_cvtss_si64 (__m128 a)
+            ///    CVTSS2SI r64, xmm1/m32
+            ///   VCVTSS2SI r64, xmm1/m32
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static long ConvertToInt64(Vector128<float> value) { throw new PlatformNotSupportedException(); }
              /// <summary>
              /// __int64 _mm_cvttss_si64 (__m128 a)
-            ///   CVTTSS2SI r64, xmm/m32
+            ///    CVTTSS2SI r64, xmm1/m32
+            ///   VCVTTSS2SI r64, xmm1/m32
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static long ConvertToInt64WithTruncation(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
          }
  
          /// <summary>
          /// __m128 _mm_add_ps (__m128 a,  __m128 b)
-        ///   ADDPS xmm, xmm/m128
+        ///    ADDPS xmm1,               xmm2/m128
+        ///   VADDPS xmm1,         xmm2, xmm3/m128
+        ///   VADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Add(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_add_ss (__m128 a,  __m128 b)
-        ///   ADDSS xmm, xmm/m32
+        ///    ADDSS xmm1,               xmm2/m32
+        ///   VADDSS xmm1,         xmm2, xmm3/m32
+        ///   VADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> AddScalar(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_and_ps (__m128 a, __m128 b)
-        ///   ANDPS xmm, xmm/m128
+        ///    ANDPS xmm1,               xmm2/m128
+        ///   VANDPS xmm1,         xmm2, xmm3/m128
+        ///   VANDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> And(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_andnot_ps (__m128 a, __m128 b)
-        ///   ANDNPS xmm, xmm/m128
+        ///    ANDNPS xmm1,               xmm2/m128
+        ///   VANDNPS xmm1,         xmm2, xmm3/m128
+        ///   VANDNPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> AndNot(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_cmpeq_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(0)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(0)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(0)
          /// </summary>
          public static Vector128<float> CompareEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// int _mm_comieq_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
-        /// </summary>
-        public static bool CompareScalarOrderedEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
-        /// int _mm_ucomieq_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpgt_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(1)   ; with swapped operands
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(1)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarUnorderedEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpeq_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(0)
+        /// __m128 _mm_cmpge_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(2)   ; with swapped operands
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(2)   ; with swapped operands
          /// </summary>
-        public static Vector128<float> CompareScalarEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpgt_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(1) with swapped operands
+        /// __m128 _mm_cmplt_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(1)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(1)
          /// </summary>
-        public static Vector128<float> CompareGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_comigt_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmple_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(2)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(2)
          /// </summary>
-        public static bool CompareScalarOrderedGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_ucomigt_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpneq_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(4)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(4)
          /// </summary>
-        public static bool CompareScalarUnorderedGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareNotEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpgt_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(1) with swapped operands
+        /// __m128 _mm_cmpngt_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(5)   ; with swapped operands
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(5)   ; with swapped operands
          /// </summary>
-        public static Vector128<float> CompareScalarGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareNotGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpge_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(2) with swapped operands
+        /// __m128 _mm_cmpnge_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(6)   ; with swapped operands
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(6)   ; with swapped operands
          /// </summary>
-        public static Vector128<float> CompareGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareNotGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_comige_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmpnlt_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(5)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(5)
          /// </summary>
-        public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareNotLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_ucomige_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpnle_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(6)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(6)
          /// </summary>
-        public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareNotLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpge_ss (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m32, imm8(2) with swapped operands
+        /// __m128 _mm_cmpord_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(7)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(7)
          /// </summary>
-        public static Vector128<float> CompareScalarGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> CompareOrdered(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128 _mm_cmplt_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(1)
+        /// __m128 _mm_cmpeq_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(0)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(0)
          /// </summary>
-        public static Vector128<float> CompareLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_comilt_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmpgt_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(1)   ; with swapped operands
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(1)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarOrderedLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_ucomilt_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpge_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(2)   ; with swapped operands
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(2)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarUnorderedLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm_cmplt_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(1)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(1)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(1)
          /// </summary>
          public static Vector128<float> CompareScalarLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
-        /// __m128 _mm_cmple_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(2)
+        /// __m128 _mm_cmple_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(2)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(2)
          /// </summary>
-        public static Vector128<float> CompareLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_comile_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmpneq_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(4)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(4)
          /// </summary>
-        public static bool CompareScalarOrderedLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarNotEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_ucomile_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpngt_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(5)   ; with swapped operands
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(5)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarNotGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmple_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(2)
+        /// __m128 _mm_cmpnge_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(6)   ; with swapped operands
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(6)   ; with swapped operands
          /// </summary>
-        public static Vector128<float> CompareScalarLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarNotGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpneq_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_cmpnlt_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(5)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(5)
          /// </summary>
-        public static Vector128<float> CompareNotEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarNotLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_comineq_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmpnle_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(6)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(6)
          /// </summary>
-        public static bool CompareScalarOrderedNotEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> CompareScalarNotLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// int _mm_ucomineq_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpord_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(7)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(7)
          /// </summary>
-        public static bool CompareScalarUnorderedNotEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarOrdered(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpneq_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(4)
+        /// int _mm_comieq_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; ZF=1 &amp;&amp; PF=0
+        ///   VCOMISS xmm1, xmm2/m32        ; ZF=1 &amp;&amp; PF=0
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; ZF=1 &amp;&amp; PF=0
          /// </summary>
-        public static Vector128<float> CompareScalarNotEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarOrderedEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpngt_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(5) with swapped operands
+        /// int _mm_comigt_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; ZF=0 &amp;&amp; CF=0
+        ///   VCOMISS xmm1, xmm2/m32        ; ZF=0 &amp;&amp; CF=0
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; ZF=0 &amp;&amp; CF=0
          /// </summary>
-        public static Vector128<float> CompareNotGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarOrderedGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpngt_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(5) with swapped operands
+        /// int _mm_comige_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; CF=0
+        ///   VCOMISS xmm1, xmm2/m32        ; CF=0
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; CF=0
          /// </summary>
-        public static Vector128<float> CompareScalarNotGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpnge_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(6) with swapped operands
+        /// int _mm_comilt_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; PF=0 &amp;&amp; CF=1
+        ///   VCOMISS xmm1, xmm2/m32        ; PF=0 &amp;&amp; CF=1
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; PF=0 &amp;&amp; CF=1
          /// </summary>
-        public static Vector128<float> CompareNotGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarOrderedLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpnge_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(6) with swapped operands
+        /// int _mm_comile_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VCOMISS xmm1, xmm2/m32        ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; PF=0 &amp;&amp; (ZF=1 || CF=1)
          /// </summary>
-        public static Vector128<float> CompareScalarNotGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarOrderedLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpnlt_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(5)
+        /// int _mm_comineq_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; ZF=0 || PF=1
+        ///   VCOMISS xmm1, xmm2/m32        ; ZF=0 || PF=1
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; ZF=0 || PF=1
          /// </summary>
-        public static Vector128<float> CompareNotLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
+        public static bool CompareScalarOrderedNotEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128 _mm_cmpnlt_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(5)
+        /// __m128 _mm_cmpunord_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(3)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(3)
          /// </summary>
-        public static Vector128<float> CompareScalarNotLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> CompareScalarUnordered(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpnle_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(6)
+        /// int _mm_ucomieq_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; ZF=1 &amp;&amp; PF=0
+        ///   VUCOMISS xmm1, xmm2/m32       ; ZF=1 &amp;&amp; PF=0
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; ZF=1 &amp;&amp; PF=0
          /// </summary>
-        public static Vector128<float> CompareNotLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarUnorderedEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpnle_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(6)
+        /// int _mm_ucomigt_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; ZF=0 &amp;&amp; CF=0
+        ///   VUCOMISS xmm1, xmm2/m32       ; ZF=0 &amp;&amp; CF=0
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; ZF=0 &amp;&amp; CF=0
          /// </summary>
-        public static Vector128<float> CompareScalarNotLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarUnorderedGreaterThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpord_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(7)
+        /// int _mm_ucomige_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; CF=0
+        ///   VUCOMISS xmm1, xmm2/m32       ; CF=0
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; CF=0
          /// </summary>
-        public static Vector128<float> CompareOrdered(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpord_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(7)
+        /// int _mm_ucomilt_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; PF=0 &amp;&amp; CF=1
+        ///   VUCOMISS xmm1, xmm2/m32       ; PF=0 &amp;&amp; CF=1
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; PF=0 &amp;&amp; CF=1
          /// </summary>
-        public static Vector128<float> CompareScalarOrdered(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarUnorderedLessThan(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpunord_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(3)
+        /// int _mm_ucomile_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VUCOMISS xmm1, xmm2/m32       ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; PF=0 &amp;&amp; (ZF=1 || CF=1)
          /// </summary>
-        public static Vector128<float> CompareUnordered(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cmpunord_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(3)
+        /// int _mm_ucomineq_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; ZF=0 || PF=1
+        ///   VUCOMISS xmm1, xmm2/m32       ; ZF=0 || PF=1
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; ZF=0 || PF=1
          /// </summary>
-        public static Vector128<float> CompareScalarUnordered(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
+        public static bool CompareScalarUnorderedNotEqual(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// int _mm_cvtss_si32 (__m128 a)
-        ///   CVTSS2SI r32, xmm/m32
+        /// __m128 _mm_cmpunord_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(3)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(3)
          /// </summary>
-        public static int ConvertToInt32(Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> CompareUnordered(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_cvtsi32_ss (__m128 a, int b)
-        ///   CVTSI2SS xmm, reg/m32
+        ///    CVTSI2SS xmm1,       r/m32
+        ///   VCVTSI2SS xmm1, xmm2, r/m32
          /// </summary>
          public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, int value) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// int _mm_cvtss_si32 (__m128 a)
+        ///    CVTSS2SI r32, xmm1/m32
+        ///   VCVTSS2SI r32, xmm1/m32
+        /// </summary>
+        public static int ConvertToInt32(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_cvttss_si32 (__m128 a)
-        ///   CVTTSS2SI r32, xmm/m32
+        ///    CVTTSS2SI r32, xmm1/m32
+        ///   VCVTTSS2SI r32, xmm1/m32
          /// </summary>
          public static int ConvertToInt32WithTruncation(Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_div_ps (__m128 a,  __m128 b)
-        ///   DIVPS xmm, xmm/m128
+        ///    DIVPS xmm,                xmm2/m128
+        ///   VDIVPS xmm1,         xmm2, xmm3/m128
+        ///   VDIVPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Divide(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_div_ss (__m128 a,  __m128 b)
-        ///   DIVSS xmm, xmm/m32
+        ///    DIVSS xmm1,       xmm2/m32
+        ///   VDIVSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> DivideScalar(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m128 _mm_loadu_ps (float const* mem_address)
-        ///   MOVUPS xmm, m128
-        /// </summary>
-        public static unsafe Vector128<float> LoadVector128(float* address) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128 _mm_load_ss (float const* mem_address)
-        ///   MOVSS xmm, m32
-        /// </summary>
-        public static unsafe Vector128<float> LoadScalarVector128(float* address) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_load_ps (float const* mem_address)
-        ///   MOVAPS xmm, m128
+        ///    MOVAPS xmm1,         m128
+        ///   VMOVAPS xmm1,         m128
+        ///   VMOVAPS xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<float> LoadAlignedVector128(float* address) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_loadh_pi (__m128 a, __m64 const* mem_addr)
-        ///   MOVHPS xmm, m64
+        ///    MOVHPS xmm1,       m64
+        ///   VMOVHPS xmm1, xmm2, m64
          /// </summary>
          public static unsafe Vector128<float> LoadHigh(Vector128<float> lower, float* address) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_loadl_pi (__m128 a, __m64 const* mem_addr)
-        ///   MOVLPS xmm, m64
+        ///    MOVLPS xmm1,       m64
+        ///   VMOVLPS xmm1, xmm2, m64
          /// </summary>
          public static unsafe Vector128<float> LoadLow(Vector128<float> upper, float* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_load_ss (float const* mem_address)
+        ///    MOVSS xmm1,      m32
+        ///   VMOVSS xmm1,      m32
+        ///   VMOVSS xmm1 {k1}, m32
+        /// </summary>
+        public static unsafe Vector128<float> LoadScalarVector128(float* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_loadu_ps (float const* mem_address)
+        ///    MOVUPS xmm1,         m128
+        ///   VMOVUPS xmm1,         m128
+        ///   VMOVUPS xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<float> LoadVector128(float* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_max_ps (__m128 a,  __m128 b)
-        ///   MAXPS xmm, xmm/m128
+        ///    MAXPS xmm1,               xmm2/m128
+        ///   VMAXPS xmm1,         xmm2, xmm3/m128
+        ///   VMAXPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Max(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_max_ss (__m128 a,  __m128 b)
-        ///   MAXSS xmm, xmm/m32
+        ///    MAXSS xmm1,       xmm2/m32
+        ///   VMAXSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> MaxScalar(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_min_ps (__m128 a,  __m128 b)
-        ///   MINPS xmm, xmm/m128
+        ///    MINPS xmm1,               xmm2/m128
+        ///   VMINPS xmm1,         xmm2, xmm3/m128
+        ///   VMINPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Min(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_min_ss (__m128 a,  __m128 b)
-        ///   MINSS xmm, xmm/m32
+        ///    MINSS xmm1,       xmm2/m32
+        ///   VMINSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> MinScalar(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m128 _mm_move_ss (__m128 a, __m128 b)
-        ///   MOVSS xmm, xmm
-        /// </summary>
-        public static Vector128<float> MoveScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_movehl_ps (__m128 a,  __m128 b)
-        ///   MOVHLPS xmm, xmm
+        ///    MOVHLPS xmm1,       xmm2
+        ///   VMOVHLPS xmm1, xmm2, xmm3
          /// </summary>
          public static Vector128<float> MoveHighToLow(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_movelh_ps (__m128 a,  __m128 b)
-        ///   MOVLHPS xmm, xmm
+        ///    MOVLHPS xmm1,       xmm2
+        ///   VMOVLHPS xmm1, xmm2, xmm3
          /// </summary>
          public static Vector128<float> MoveLowToHigh(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm_movemask_ps (__m128 a)
-        ///   MOVMSKPS reg, xmm
+        ///    MOVMSKPS r32, xmm1
+        ///   VMOVMSKPS r32, xmm1
          /// </summary>
          public static int MoveMask(Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128 _mm_move_ss (__m128 a, __m128 b)
+        ///    MOVSS xmm1,         xmm2
+        ///   VMOVSS xmm1,         xmm2, xmm3
+        ///   VMOVSS xmm1 {k1}{z}, xmm2, xmm3
+        /// </summary>
+        public static Vector128<float> MoveScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_mul_ps (__m128 a, __m128 b)
-        ///   MULPS xmm, xmm/m128
+        ///    MULPS xmm1,               xmm2/m128
+        ///   VMULPS xmm1,         xmm2, xmm3/m128
+        ///   VMULPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Multiply(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_mul_ss (__m128 a, __m128 b)
-        ///   MULPS xmm, xmm/m32
+        ///    MULSS xmm1,       xmm2/m32
+        ///   VMULSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> MultiplyScalar(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// __m128 _mm_or_ps (__m128 a,  __m128 b)
+        ///    ORPS xmm1,               xmm2/m128
+        ///   VORPS xmm1,         xmm2, xmm3/m128
+        ///   VORPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
+        /// </summary>
+        public static Vector128<float> Or(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// void _mm_prefetch(char* p, int i)
          ///   PREFETCHT0 m8
          /// </summary>
          public static unsafe void Prefetch0(void* address) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm_prefetch(char* p, int i)
          ///   PREFETCHT1 m8
          /// </summary>
          public static unsafe void Prefetch1(void* address) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm_prefetch(char* p, int i)
          ///   PREFETCHT2 m8
          /// </summary>
          public static unsafe void Prefetch2(void* address) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm_prefetch(char* p, int i)
          ///   PREFETCHNTA m8
          /// </summary>
          public static unsafe void PrefetchNonTemporal(void* address) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m128 _mm_or_ps (__m128 a,  __m128 b)
-        ///   ORPS xmm, xmm/m128
-        /// </summary>
-        public static Vector128<float> Or(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_rcp_ps (__m128 a)
-        ///   RCPPS xmm, xmm/m128
+        ///    RCPPS xmm1, xmm2/m128
+        ///   VRCPPS xmm1, xmm2/m128
          /// </summary>
          public static Vector128<float> Reciprocal(Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_rcp_ss (__m128 a)
-        ///   RCPSS xmm, xmm/m32
+        ///    RCPSS xmm1,       xmm2/m32
+        ///   VRCPSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> ReciprocalScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_rcp_ss (__m128 a, __m128 b)
-        ///   RCPSS xmm, xmm/m32
+        ///    RCPSS xmm1,       xmm2/m32
+        ///   VRCPSS xmm1, xmm2, xmm3/m32
          /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs.
          /// </summary>
          public static Vector128<float> ReciprocalScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_rsqrt_ps (__m128 a)
-        ///   RSQRTPS xmm, xmm/m128
+        ///    RSQRTPS xmm1, xmm2/m128
+        ///   VRSQRTPS xmm1, xmm2/m128
          /// </summary>
          public static Vector128<float> ReciprocalSqrt(Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_rsqrt_ss (__m128 a)
-        ///   RSQRTSS xmm, xmm/m32
+        ///    RSQRTSS xmm1,       xmm2/m32
+        ///   VRSQRTSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> ReciprocalSqrtScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_rsqrt_ss (__m128 a, __m128 b)
-        ///   RSQRTSS xmm, xmm/m32
+        ///    RSQRTSS xmm1,       xmm2/m32
+        ///   VRSQRTSS xmm1, xmm2, xmm3/m32
          /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs.
          /// </summary>
          public static Vector128<float> ReciprocalSqrtScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_shuffle_ps (__m128 a,  __m128 b, unsigned int control)
-        ///   SHUFPS xmm, xmm/m128, imm8
+        ///    SHUFPS xmm1,               xmm2/m128,         imm8
+        ///   VSHUFPS xmm1,         xmm2, xmm3/m128,         imm8
+        ///   VSHUFPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst, imm8
          /// </summary>
          public static Vector128<float> Shuffle(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_sqrt_ps (__m128 a)
-        ///   SQRTPS xmm, xmm/m128
+        ///    SQRTPS xmm1,         xmm2/m128
+        ///   VSQRTPS xmm1,         xmm2/m128
+        ///   VSQRTPS xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
          public static Vector128<float> Sqrt(Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_sqrt_ss (__m128 a)
-        ///   SQRTSS xmm, xmm/m32
+        ///    SQRTSS xmm1,               xmm2/m32
+        ///   VSQRTSS xmm1,         xmm2, xmm3/m32
+        ///   VSQRTSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> SqrtScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_sqrt_ss (__m128 a, __m128 b)
-        ///   SQRTSS xmm, xmm/m32
+        ///    SQRTSS xmm1,               xmm2/m32
+        ///   VSQRTSS xmm1,         xmm2, xmm3/m32
+        ///   VSQRTSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs.
          /// </summary>
          public static Vector128<float> SqrtScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// void _mm_storeu_ps (float* mem_addr, __m128 a)
+        ///    MOVUPS m128,         xmm1
+        ///   VMOVUPS m128,         xmm1
+        ///   VMOVUPS m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(float* address, Vector128<float> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_store_ps (float* mem_addr, __m128 a)
-        ///   MOVAPS m128, xmm
+        ///    MOVAPS m128,         xmm1
+        ///   VMOVAPS m128,         xmm1
+        ///   VMOVAPS m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(float* address, Vector128<float> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm_stream_ps (float* mem_addr, __m128 a)
-        ///   MOVNTPS m128, xmm
+        ///    MOVNTPS m128, xmm1
+        ///   VMOVNTPS m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(float* address, Vector128<float> source) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// void _mm_storeu_ps (float* mem_addr, __m128 a)
-        ///   MOVUPS m128, xmm
-        /// </summary>
-        public static unsafe void Store(float* address, Vector128<float> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm_sfence(void)
          ///   SFENCE
          /// </summary>
          public static void StoreFence() { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// void _mm_store_ss (float* mem_addr, __m128 a)
-        ///   MOVSS m32, xmm
-        /// </summary>
-        public static unsafe void StoreScalar(float* address, Vector128<float> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm_storeh_pi (__m64* mem_addr, __m128 a)
-        ///   MOVHPS m64, xmm
+        ///    MOVHPS m64, xmm1
+        ///   VMOVHPS m64, xmm1
          /// </summary>
          public static unsafe void StoreHigh(float* address, Vector128<float> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm_storel_pi (__m64* mem_addr, __m128 a)
-        ///   MOVLPS m64, xmm
+        ///    MOVLPS m64, xmm1
+        ///   VMOVLPS m64, xmm1
          /// </summary>
          public static unsafe void StoreLow(float* address, Vector128<float> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm_store_ss (float* mem_addr, __m128 a)
+        ///    MOVSS m32,      xmm1
+        ///   VMOVSS m32,      xmm1
+        ///   VMOVSS m32 {k1}, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(float* address, Vector128<float> source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128d _mm_sub_ps (__m128d a, __m128d b)
-        ///   SUBPS xmm, xmm/m128
+        ///    SUBPS xmm1,               xmm2/m128
+        ///   VSUBPS xmm1,         xmm2, xmm3/m128
+        ///   VSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Subtract(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_sub_ss (__m128 a, __m128 b)
-        ///   SUBSS xmm, xmm/m32
+        ///    SUBSS xmm1,               xmm2/m32
+        ///   VSUBSS xmm1,         xmm2, xmm3/m32
+        ///   VSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> SubtractScalar(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_unpackhi_ps (__m128 a,  __m128 b)
-        ///   UNPCKHPS xmm, xmm/m128
+        ///    UNPCKHPS xmm1,               xmm2/m128
+        ///   VUNPCKHPS xmm1,         xmm2, xmm3/m128
+        ///   VUNPCKHPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> UnpackHigh(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_unpacklo_ps (__m128 a,  __m128 b)
-        ///   UNPCKLPS xmm, xmm/m128
+        ///    UNPCKLPS xmm1,               xmm2/m128
+        ///   VUNPCKLPS xmm1,         xmm2, xmm3/m128
+        ///   VUNPCKLPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> UnpackLow(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_xor_ps (__m128 a,  __m128 b)
-        ///   XORPS xmm, xmm/m128
+        ///    XORPS xmm1,               xmm2/m128
+        ///   VXORPS xmm1,         xmm2, xmm3/m128
+        ///   VXORPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Xor(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.cs

index 3ca759b14725511e65afeb01a98b1c7e4e66f739..67bd57161ab2f9478aa36264959388a45b73fd29 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.cs
@@ -24,22 +24,25 @@ namespace System.Runtime.Intrinsics.X86
  
              public static new bool IsSupported { get => IsSupported; }
  
-            /// <summary>
-            /// __int64 _mm_cvtss_si64 (__m128 a)
-            ///   CVTSS2SI r64, xmm/m32
-            /// This intrinsic is only available on 64-bit processes
-            /// </summary>
-            public static long ConvertToInt64(Vector128<float> value) => ConvertToInt64(value);
              /// <summary>
              /// __m128 _mm_cvtsi64_ss (__m128 a, __int64 b)
-            ///   CVTSI2SS xmm, reg/m64
+            ///    CVTSI2SS xmm1,       r/m64
+            ///   VCVTSI2SS xmm1, xmm2, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, long value) => ConvertScalarToVector128Single(upper, value);
  
+            /// <summary>
+            /// __int64 _mm_cvtss_si64 (__m128 a)
+            ///    CVTSS2SI r64, xmm1/m32
+            ///   VCVTSS2SI r64, xmm1/m32
+            /// This intrinsic is only available on 64-bit processes
+            /// </summary>
+            public static long ConvertToInt64(Vector128<float> value) => ConvertToInt64(value);
              /// <summary>
              /// __int64 _mm_cvttss_si64 (__m128 a)
-            ///   CVTTSS2SI r64, xmm/m32
+            ///    CVTTSS2SI r64, xmm1/m32
+            ///   VCVTTSS2SI r64, xmm1/m32
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static long ConvertToInt64WithTruncation(Vector128<float> value) => ConvertToInt64WithTruncation(value);
@@ -47,367 +50,414 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128 _mm_add_ps (__m128 a,  __m128 b)
-        ///   ADDPS xmm, xmm/m128
+        ///    ADDPS xmm1,               xmm2/m128
+        ///   VADDPS xmm1,         xmm2, xmm3/m128
+        ///   VADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Add(Vector128<float> left, Vector128<float> right) => Add(left, right);
  
          /// <summary>
          /// __m128 _mm_add_ss (__m128 a,  __m128 b)
-        ///   ADDSS xmm, xmm/m32
+        ///    ADDSS xmm1,               xmm2/m32
+        ///   VADDSS xmm1,         xmm2, xmm3/m32
+        ///   VADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> AddScalar(Vector128<float> left, Vector128<float> right) => AddScalar(left, right);
  
          /// <summary>
          /// __m128 _mm_and_ps (__m128 a, __m128 b)
-        ///   ANDPS xmm, xmm/m128
+        ///    ANDPS xmm1,               xmm2/m128
+        ///   VANDPS xmm1,         xmm2, xmm3/m128
+        ///   VANDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> And(Vector128<float> left, Vector128<float> right) => And(left, right);
  
          /// <summary>
          /// __m128 _mm_andnot_ps (__m128 a, __m128 b)
-        ///   ANDNPS xmm, xmm/m128
+        ///    ANDNPS xmm1,               xmm2/m128
+        ///   VANDNPS xmm1,         xmm2, xmm3/m128
+        ///   VANDNPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> AndNot(Vector128<float> left, Vector128<float> right) => AndNot(left, right);
  
          /// <summary>
          /// __m128 _mm_cmpeq_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(0)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(0)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(0)
          /// </summary>
          public static Vector128<float> CompareEqual(Vector128<float> left, Vector128<float> right) => CompareEqual(left, right);
-
-        /// <summary>
-        /// int _mm_comieq_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
-        /// </summary>
-        public static bool CompareScalarOrderedEqual(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedEqual(left, right);
-
          /// <summary>
-        /// int _mm_ucomieq_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpgt_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(1)   ; with swapped operands
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(1)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarUnorderedEqual(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedEqual(left, right);
-
+        public static Vector128<float> CompareGreaterThan(Vector128<float> left, Vector128<float> right) => CompareGreaterThan(left, right);
          /// <summary>
-        /// __m128 _mm_cmpeq_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(0)
+        /// __m128 _mm_cmpge_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(2)   ; with swapped operands
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(2)   ; with swapped operands
          /// </summary>
-        public static Vector128<float> CompareScalarEqual(Vector128<float> left, Vector128<float> right) => CompareScalarEqual(left, right);
-
+        public static Vector128<float> CompareGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareGreaterThanOrEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpgt_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(1) with swapped operands
+        /// __m128 _mm_cmplt_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(1)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(1)
          /// </summary>
-        public static Vector128<float> CompareGreaterThan(Vector128<float> left, Vector128<float> right) => CompareGreaterThan(left, right);
-
+        public static Vector128<float> CompareLessThan(Vector128<float> left, Vector128<float> right) => CompareLessThan(left, right);
          /// <summary>
-        /// int _mm_comigt_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmple_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(2)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(2)
          /// </summary>
-        public static bool CompareScalarOrderedGreaterThan(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedGreaterThan(left, right);
-
+        public static Vector128<float> CompareLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareLessThanOrEqual(left, right);
          /// <summary>
-        /// int _mm_ucomigt_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpneq_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(4)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(4)
          /// </summary>
-        public static bool CompareScalarUnorderedGreaterThan(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedGreaterThan(left, right);
-
+        public static Vector128<float> CompareNotEqual(Vector128<float> left, Vector128<float> right) => CompareNotEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpgt_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(1) with swapped operands
+        /// __m128 _mm_cmpngt_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(5)   ; with swapped operands
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(5)   ; with swapped operands
          /// </summary>
-        public static Vector128<float> CompareScalarGreaterThan(Vector128<float> left, Vector128<float> right) => CompareScalarGreaterThan(left, right);
-
+        public static Vector128<float> CompareNotGreaterThan(Vector128<float> left, Vector128<float> right) => CompareNotGreaterThan(left, right);
          /// <summary>
-        /// __m128 _mm_cmpge_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(2) with swapped operands
+        /// __m128 _mm_cmpnge_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(6)   ; with swapped operands
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(6)   ; with swapped operands
          /// </summary>
-        public static Vector128<float> CompareGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareGreaterThanOrEqual(left, right);
-
+        public static Vector128<float> CompareNotGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareNotGreaterThanOrEqual(left, right);
          /// <summary>
-        /// int _mm_comige_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmpnlt_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(5)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(5)
          /// </summary>
-        public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedGreaterThanOrEqual(left, right);
-
+        public static Vector128<float> CompareNotLessThan(Vector128<float> left, Vector128<float> right) => CompareNotLessThan(left, right);
          /// <summary>
-        /// int _mm_ucomige_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpnle_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(6)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(6)
          /// </summary>
-        public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedGreaterThanOrEqual(left, right);
-
+        public static Vector128<float> CompareNotLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareNotLessThanOrEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpge_ss (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m32, imm8(2) with swapped operands
+        /// __m128 _mm_cmpord_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(7)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(7)
          /// </summary>
-        public static Vector128<float> CompareScalarGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarGreaterThanOrEqual(left, right);
+        public static Vector128<float> CompareOrdered(Vector128<float> left, Vector128<float> right) => CompareOrdered(left, right);
  
          /// <summary>
-        /// __m128 _mm_cmplt_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(1)
+        /// __m128 _mm_cmpeq_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(0)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(0)
          /// </summary>
-        public static Vector128<float> CompareLessThan(Vector128<float> left, Vector128<float> right) => CompareLessThan(left, right);
-
+        public static Vector128<float> CompareScalarEqual(Vector128<float> left, Vector128<float> right) => CompareScalarEqual(left, right);
          /// <summary>
-        /// int _mm_comilt_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmpgt_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(1)   ; with swapped operands
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(1)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarOrderedLessThan(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedLessThan(left, right);
-
+        public static Vector128<float> CompareScalarGreaterThan(Vector128<float> left, Vector128<float> right) => CompareScalarGreaterThan(left, right);
          /// <summary>
-        /// int _mm_ucomilt_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpge_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(2)   ; with swapped operands
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(2)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarUnorderedLessThan(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedLessThan(left, right);
-
+        public static Vector128<float> CompareScalarGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarGreaterThanOrEqual(left, right);
          /// <summary>
          /// __m128 _mm_cmplt_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(1)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(1)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(1)
          /// </summary>
          public static Vector128<float> CompareScalarLessThan(Vector128<float> left, Vector128<float> right) => CompareScalarLessThan(left, right);
-
          /// <summary>
-        /// __m128 _mm_cmple_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(2)
+        /// __m128 _mm_cmple_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(2)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(2)
          /// </summary>
-        public static Vector128<float> CompareLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareLessThanOrEqual(left, right);
-
+        public static Vector128<float> CompareScalarLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarLessThanOrEqual(left, right);
          /// <summary>
-        /// int _mm_comile_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmpneq_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(4)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(4)
          /// </summary>
-        public static bool CompareScalarOrderedLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedLessThanOrEqual(left, right);
-
+        public static Vector128<float> CompareScalarNotEqual(Vector128<float> left, Vector128<float> right) => CompareScalarNotEqual(left, right);
          /// <summary>
-        /// int _mm_ucomile_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpngt_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(5)   ; with swapped operands
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(5)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedLessThanOrEqual(left, right);
-
+        public static Vector128<float> CompareScalarNotGreaterThan(Vector128<float> left, Vector128<float> right) => CompareScalarNotGreaterThan(left, right);
          /// <summary>
-        /// __m128 _mm_cmple_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(2)
+        /// __m128 _mm_cmpnge_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(6)   ; with swapped operands
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(6)   ; with swapped operands
          /// </summary>
-        public static Vector128<float> CompareScalarLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarLessThanOrEqual(left, right);
-
+        public static Vector128<float> CompareScalarNotGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarNotGreaterThanOrEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpneq_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_cmpnlt_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(5)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(5)
          /// </summary>
-        public static Vector128<float> CompareNotEqual(Vector128<float> left, Vector128<float> right) => CompareNotEqual(left, right);
-
+        public static Vector128<float> CompareScalarNotLessThan(Vector128<float> left, Vector128<float> right) => CompareScalarNotLessThan(left, right);
          /// <summary>
-        /// int _mm_comineq_ss (__m128 a, __m128 b)
-        ///   COMISS xmm, xmm/m32
+        /// __m128 _mm_cmpnle_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(6)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(6)
          /// </summary>
-        public static bool CompareScalarOrderedNotEqual(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedNotEqual(left, right);
+        public static Vector128<float> CompareScalarNotLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarNotLessThanOrEqual(left, right);
  
          /// <summary>
-        /// int _mm_ucomineq_ss (__m128 a, __m128 b)
-        ///   UCOMISS xmm, xmm/m32
+        /// __m128 _mm_cmpord_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(7)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(7)
          /// </summary>
-        public static bool CompareScalarUnorderedNotEqual(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedNotEqual(left, right);
-
+        public static Vector128<float> CompareScalarOrdered(Vector128<float> left, Vector128<float> right) => CompareScalarOrdered(left, right);
          /// <summary>
-        /// __m128 _mm_cmpneq_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(4)
+        /// int _mm_comieq_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; ZF=1 &amp;&amp; PF=0
+        ///   VCOMISS xmm1, xmm2/m32        ; ZF=1 &amp;&amp; PF=0
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; ZF=1 &amp;&amp; PF=0
          /// </summary>
-        public static Vector128<float> CompareScalarNotEqual(Vector128<float> left, Vector128<float> right) => CompareScalarNotEqual(left, right);
-
+        public static bool CompareScalarOrderedEqual(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpngt_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(5) with swapped operands
+        /// int _mm_comigt_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; ZF=0 &amp;&amp; CF=0
+        ///   VCOMISS xmm1, xmm2/m32        ; ZF=0 &amp;&amp; CF=0
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; ZF=0 &amp;&amp; CF=0
          /// </summary>
-        public static Vector128<float> CompareNotGreaterThan(Vector128<float> left, Vector128<float> right) => CompareNotGreaterThan(left, right);
-
+        public static bool CompareScalarOrderedGreaterThan(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedGreaterThan(left, right);
          /// <summary>
-        /// __m128 _mm_cmpngt_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(5) with swapped operands
+        /// int _mm_comige_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; CF=0
+        ///   VCOMISS xmm1, xmm2/m32        ; CF=0
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; CF=0
          /// </summary>
-        public static Vector128<float> CompareScalarNotGreaterThan(Vector128<float> left, Vector128<float> right) => CompareScalarNotGreaterThan(left, right);
-
+        public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedGreaterThanOrEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpnge_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(6) with swapped operands
+        /// int _mm_comilt_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; PF=0 &amp;&amp; CF=1
+        ///   VCOMISS xmm1, xmm2/m32        ; PF=0 &amp;&amp; CF=1
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; PF=0 &amp;&amp; CF=1
          /// </summary>
-        public static Vector128<float> CompareNotGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareNotGreaterThanOrEqual(left, right);
-
+        public static bool CompareScalarOrderedLessThan(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedLessThan(left, right);
          /// <summary>
-        /// __m128 _mm_cmpnge_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(6) with swapped operands
+        /// int _mm_comile_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VCOMISS xmm1, xmm2/m32        ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; PF=0 &amp;&amp; (ZF=1 || CF=1)
          /// </summary>
-        public static Vector128<float> CompareScalarNotGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarNotGreaterThanOrEqual(left, right);
-
+        public static bool CompareScalarOrderedLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedLessThanOrEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpnlt_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(5)
+        /// int _mm_comineq_ss (__m128 a, __m128 b)
+        ///    COMISS xmm1, xmm2/m32        ; ZF=0 || PF=1
+        ///   VCOMISS xmm1, xmm2/m32        ; ZF=0 || PF=1
+        ///   VCOMISS xmm1, xmm2/m32{sae}   ; ZF=0 || PF=1
          /// </summary>
-        public static Vector128<float> CompareNotLessThan(Vector128<float> left, Vector128<float> right) => CompareNotLessThan(left, right);
+        public static bool CompareScalarOrderedNotEqual(Vector128<float> left, Vector128<float> right) => CompareScalarOrderedNotEqual(left, right);
  
          /// <summary>
-        /// __m128 _mm_cmpnlt_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(5)
+        /// __m128 _mm_cmpunord_ss (__m128 a,  __m128 b)
+        ///    CMPSS xmm1,       xmm2/m32, imm8(3)
+        ///   VCMPSS xmm1, xmm2, xmm3/m32, imm8(3)
          /// </summary>
-        public static Vector128<float> CompareScalarNotLessThan(Vector128<float> left, Vector128<float> right) => CompareScalarNotLessThan(left, right);
-
+        public static Vector128<float> CompareScalarUnordered(Vector128<float> left, Vector128<float> right) => CompareScalarUnordered(left, right);
          /// <summary>
-        /// __m128 _mm_cmpnle_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(6)
+        /// int _mm_ucomieq_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; ZF=1 &amp;&amp; PF=0
+        ///   VUCOMISS xmm1, xmm2/m32       ; ZF=1 &amp;&amp; PF=0
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; ZF=1 &amp;&amp; PF=0
          /// </summary>
-        public static Vector128<float> CompareNotLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareNotLessThanOrEqual(left, right);
-
+        public static bool CompareScalarUnorderedEqual(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpnle_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(6)
+        /// int _mm_ucomigt_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; ZF=0 &amp;&amp; CF=0
+        ///   VUCOMISS xmm1, xmm2/m32       ; ZF=0 &amp;&amp; CF=0
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; ZF=0 &amp;&amp; CF=0
          /// </summary>
-        public static Vector128<float> CompareScalarNotLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarNotLessThanOrEqual(left, right);
-
+        public static bool CompareScalarUnorderedGreaterThan(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedGreaterThan(left, right);
          /// <summary>
-        /// __m128 _mm_cmpord_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(7)
+        /// int _mm_ucomige_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; CF=0
+        ///   VUCOMISS xmm1, xmm2/m32       ; CF=0
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; CF=0
          /// </summary>
-        public static Vector128<float> CompareOrdered(Vector128<float> left, Vector128<float> right) => CompareOrdered(left, right);
-
+        public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedGreaterThanOrEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpord_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(7)
+        /// int _mm_ucomilt_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; PF=0 &amp;&amp; CF=1
+        ///   VUCOMISS xmm1, xmm2/m32       ; PF=0 &amp;&amp; CF=1
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; PF=0 &amp;&amp; CF=1
          /// </summary>
-        public static Vector128<float> CompareScalarOrdered(Vector128<float> left, Vector128<float> right) => CompareScalarOrdered(left, right);
-
+        public static bool CompareScalarUnorderedLessThan(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedLessThan(left, right);
          /// <summary>
-        /// __m128 _mm_cmpunord_ps (__m128 a,  __m128 b)
-        ///   CMPPS xmm, xmm/m128, imm8(3)
+        /// int _mm_ucomile_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VUCOMISS xmm1, xmm2/m32       ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; PF=0 &amp;&amp; (ZF=1 || CF=1)
          /// </summary>
-        public static Vector128<float> CompareUnordered(Vector128<float> left, Vector128<float> right) => CompareUnordered(left, right);
-
+        public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedLessThanOrEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cmpunord_ss (__m128 a,  __m128 b)
-        ///   CMPSS xmm, xmm/m32, imm8(3)
+        /// int _mm_ucomineq_ss (__m128 a, __m128 b)
+        ///    UCOMISS xmm1, xmm2/m32       ; ZF=0 || PF=1
+        ///   VUCOMISS xmm1, xmm2/m32       ; ZF=0 || PF=1
+        ///   VUCOMISS xmm1, xmm2/m32{sae}  ; ZF=0 || PF=1
          /// </summary>
-        public static Vector128<float> CompareScalarUnordered(Vector128<float> left, Vector128<float> right) => CompareScalarUnordered(left, right);
+        public static bool CompareScalarUnorderedNotEqual(Vector128<float> left, Vector128<float> right) => CompareScalarUnorderedNotEqual(left, right);
  
          /// <summary>
-        /// int _mm_cvtss_si32 (__m128 a)
-        ///   CVTSS2SI r32, xmm/m32
+        /// __m128 _mm_cmpunord_ps (__m128 a,  __m128 b)
+        ///    CMPPS xmm1,       xmm2/m128, imm8(3)
+        ///   VCMPPS xmm1, xmm2, xmm3/m128, imm8(3)
          /// </summary>
-        public static int ConvertToInt32(Vector128<float> value) => ConvertToInt32(value);
+        public static Vector128<float> CompareUnordered(Vector128<float> left, Vector128<float> right) => CompareUnordered(left, right);
  
          /// <summary>
          /// __m128 _mm_cvtsi32_ss (__m128 a, int b)
-        ///   CVTSI2SS xmm, reg/m32
+        ///    CVTSI2SS xmm1,       r/m32
+        ///   VCVTSI2SS xmm1, xmm2, r/m32
          /// </summary>
          public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, int value) => ConvertScalarToVector128Single(upper, value);
  
+        /// <summary>
+        /// int _mm_cvtss_si32 (__m128 a)
+        ///    CVTSS2SI r32, xmm1/m32
+        ///   VCVTSS2SI r32, xmm1/m32
+        /// </summary>
+        public static int ConvertToInt32(Vector128<float> value) => ConvertToInt32(value);
          /// <summary>
          /// int _mm_cvttss_si32 (__m128 a)
-        ///   CVTTSS2SI r32, xmm/m32
+        ///    CVTTSS2SI r32, xmm1/m32
+        ///   VCVTTSS2SI r32, xmm1/m32
          /// </summary>
          public static int ConvertToInt32WithTruncation(Vector128<float> value) => ConvertToInt32WithTruncation(value);
  
          /// <summary>
          /// __m128 _mm_div_ps (__m128 a,  __m128 b)
-        ///   DIVPS xmm, xmm/m128
+        ///    DIVPS xmm,                xmm2/m128
+        ///   VDIVPS xmm1,         xmm2, xmm3/m128
+        ///   VDIVPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Divide(Vector128<float> left, Vector128<float> right) => Divide(left, right);
  
          /// <summary>
          /// __m128 _mm_div_ss (__m128 a,  __m128 b)
-        ///   DIVSS xmm, xmm/m32
+        ///    DIVSs xmm1,       xmm2/m32
+        ///   VDIVSs xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> DivideScalar(Vector128<float> left, Vector128<float> right) => DivideScalar(left, right);
  
-        /// <summary>
-        /// __m128 _mm_loadu_ps (float const* mem_address)
-        ///   MOVUPS xmm, m128
-        /// </summary>
-        public static unsafe Vector128<float> LoadVector128(float* address) => LoadVector128(address);
-
-        /// <summary>
-        /// __m128 _mm_load_ss (float const* mem_address)
-        ///   MOVSS xmm, m32
-        /// </summary>
-        public static unsafe Vector128<float> LoadScalarVector128(float* address) => LoadScalarVector128(address);
-
          /// <summary>
          /// __m128 _mm_load_ps (float const* mem_address)
-        ///   MOVAPS xmm, m128
+        ///    MOVAPS xmm1,         m128
+        ///   VMOVAPS xmm1,         m128
+        ///   VMOVAPS xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<float> LoadAlignedVector128(float* address) => LoadAlignedVector128(address);
-
          /// <summary>
          /// __m128 _mm_loadh_pi (__m128 a, __m64 const* mem_addr)
-        ///   MOVHPS xmm, m64
+        ///    MOVHPS xmm1,       m64
+        ///   VMOVHPS xmm1, xmm2, m64
          /// </summary>
          public static unsafe Vector128<float> LoadHigh(Vector128<float> lower, float* address) => LoadHigh(lower, address);
-
          /// <summary>
          /// __m128 _mm_loadl_pi (__m128 a, __m64 const* mem_addr)
-        ///   MOVLPS xmm, m64
+        ///    MOVLPS xmm1,       m64
+        ///   VMOVLPS xmm1, xmm2, m64
          /// </summary>
          public static unsafe Vector128<float> LoadLow(Vector128<float> upper, float* address) => LoadLow(upper, address);
+        /// <summary>
+        /// __m128 _mm_load_ss (float const* mem_address)
+        ///    MOVSS xmm1,      m32
+        ///   VMOVSS xmm1,      m32
+        ///   VMOVSS xmm1 {k1}, m32
+        /// </summary>
+        public static unsafe Vector128<float> LoadScalarVector128(float* address) => LoadScalarVector128(address);
+        /// <summary>
+        /// __m128 _mm_loadu_ps (float const* mem_address)
+        ///    MOVUPS xmm1,         m128
+        ///   VMOVUPS xmm1,         m128
+        ///   VMOVUPS xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<float> LoadVector128(float* address) => LoadVector128(address);
  
          /// <summary>
          /// __m128 _mm_max_ps (__m128 a,  __m128 b)
-        ///   MAXPS xmm, xmm/m128
+        ///    MAXPS xmm1,               xmm2/m128
+        ///   VMAXPS xmm1,         xmm2, xmm3/m128
+        ///   VMAXPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Max(Vector128<float> left, Vector128<float> right) => Max(left, right);
  
          /// <summary>
          /// __m128 _mm_max_ss (__m128 a,  __m128 b)
-        ///   MAXSS xmm, xmm/m32
+        ///    MAXSS xmm1,       xmm2/m32
+        ///   VMAXSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> MaxScalar(Vector128<float> left, Vector128<float> right) => MaxScalar(left, right);
  
          /// <summary>
          /// __m128 _mm_min_ps (__m128 a,  __m128 b)
-        ///   MINPS xmm, xmm/m128
+        ///    MINPS xmm1,               xmm2/m128
+        ///   VMINPS xmm1,         xmm2, xmm3/m128
+        ///   VMINPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Min(Vector128<float> left, Vector128<float> right) => Min(left, right);
  
          /// <summary>
          /// __m128 _mm_min_ss (__m128 a,  __m128 b)
-        ///   MINSS xmm, xmm/m32
+        ///    MINSS xmm1,       xmm2/m32
+        ///   VMINSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> MinScalar(Vector128<float> left, Vector128<float> right) => MinScalar(left, right);
  
-        /// <summary>
-        /// __m128 _mm_move_ss (__m128 a, __m128 b)
-        ///   MOVSS xmm, xmm
-        /// </summary>
-        public static Vector128<float> MoveScalar(Vector128<float> upper, Vector128<float> value) => MoveScalar(upper, value);
-
          /// <summary>
          /// __m128 _mm_movehl_ps (__m128 a,  __m128 b)
-        ///   MOVHLPS xmm, xmm
+        ///    MOVHLPS xmm1,       xmm2
+        ///   VMOVHLPS xmm1, xmm2, xmm3
          /// </summary>
          public static Vector128<float> MoveHighToLow(Vector128<float> left, Vector128<float> right) => MoveHighToLow(left, right);
-
          /// <summary>
          /// __m128 _mm_movelh_ps (__m128 a,  __m128 b)
-        ///   MOVLHPS xmm, xmm
+        ///    MOVLHPS xmm1,       xmm2
+        ///   VMOVLHPS xmm1, xmm2, xmm3
          /// </summary>
          public static Vector128<float> MoveLowToHigh(Vector128<float> left, Vector128<float> right) => MoveLowToHigh(left, right);
-
          /// <summary>
          /// int _mm_movemask_ps (__m128 a)
-        ///   MOVMSKPS reg, xmm
+        ///    MOVMSKPS r32, xmm1
+        ///   VMOVMSKPS r32, xmm1
          /// </summary>
          public static int MoveMask(Vector128<float> value) => MoveMask(value);
+        /// <summary>
+        /// __m128 _mm_move_ss (__m128 a, __m128 b)
+        ///    MOVSS xmm1,         xmm2
+        ///   VMOVSS xmm1,         xmm2, xmm3
+        ///   VMOVSS xmm1 {k1}{z}, xmm2, xmm3
+        /// </summary>
+        public static Vector128<float> MoveScalar(Vector128<float> upper, Vector128<float> value) => MoveScalar(upper, value);
  
          /// <summary>
          /// __m128 _mm_mul_ps (__m128 a, __m128 b)
-        ///   MULPS xmm, xmm/m128
+        ///    MULPS xmm1,               xmm2/m128
+        ///   VMULPS xmm1,         xmm2, xmm3/m128
+        ///   VMULPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Multiply(Vector128<float> left, Vector128<float> right) => Multiply(left, right);
  
          /// <summary>
          /// __m128 _mm_mul_ss (__m128 a, __m128 b)
-        ///   MULPS xmm, xmm/m32
+        ///    MULSS xmm1,       xmm2/m32
+        ///   VMULSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> MultiplyScalar(Vector128<float> left, Vector128<float> right) => MultiplyScalar(left, right);
  
          /// <summary>
          /// __m128 _mm_or_ps (__m128 a,  __m128 b)
-        ///   ORPS xmm, xmm/m128
+        ///    ORPS xmm1,               xmm2/m128
+        ///   VORPS xmm1,         xmm2, xmm3/m128
+        ///   VORPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Or(Vector128<float> left, Vector128<float> right) => Or(left, right);
  
@@ -416,19 +466,16 @@ namespace System.Runtime.Intrinsics.X86
          ///   PREFETCHT0 m8
          /// </summary>
          public static unsafe void Prefetch0(void* address) => Prefetch0(address);
-
          /// <summary>
          /// void _mm_prefetch(char* p, int i)
          ///   PREFETCHT1 m8
          /// </summary>
          public static unsafe void Prefetch1(void* address) => Prefetch1(address);
-
          /// <summary>
          /// void _mm_prefetch(char* p, int i)
          ///   PREFETCHT2 m8
          /// </summary>
          public static unsafe void Prefetch2(void* address) => Prefetch2(address);
-
          /// <summary>
          /// void _mm_prefetch(char* p, int i)
          ///   PREFETCHNTA m8
@@ -437,136 +484,160 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128 _mm_rcp_ps (__m128 a)
-        ///   RCPPS xmm, xmm/m128
+        ///    RCPPS xmm1, xmm2/m128
+        ///   VRCPPS xmm1, xmm2/m128
          /// </summary>
          public static Vector128<float> Reciprocal(Vector128<float> value) => Reciprocal(value);
  
          /// <summary>
          /// __m128 _mm_rcp_ss (__m128 a)
-        ///   RCPSS xmm, xmm/m32
+        ///    RCPSS xmm1,       xmm2/m32
+        ///   VRCPSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> ReciprocalScalar(Vector128<float> value) => ReciprocalScalar(value);
-
          /// <summary>
          /// __m128 _mm_rcp_ss (__m128 a, __m128 b)
-        ///   RCPSS xmm, xmm/m32
+        ///    RCPSS xmm1,       xmm2/m32
+        ///   VRCPSS xmm1, xmm2, xmm3/m32
          /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs.
          /// </summary>
          public static Vector128<float> ReciprocalScalar(Vector128<float> upper, Vector128<float> value) => ReciprocalScalar(upper, value);
  
          /// <summary>
          /// __m128 _mm_rsqrt_ps (__m128 a)
-        ///   RSQRTPS xmm, xmm/m128
+        ///    RSQRTPS xmm1, xmm2/m128
+        ///   VRSQRTPS xmm1, xmm2/m128
          /// </summary>
          public static Vector128<float> ReciprocalSqrt(Vector128<float> value) => ReciprocalSqrt(value);
  
          /// <summary>
          /// __m128 _mm_rsqrt_ss (__m128 a)
-        ///   RSQRTSS xmm, xmm/m32
+        ///    RSQRTSS xmm1,       xmm2/m32
+        ///   VRSQRTSS xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<float> ReciprocalSqrtScalar(Vector128<float> value) => ReciprocalSqrtScalar(value);
-
          /// <summary>
          /// __m128 _mm_rsqrt_ss (__m128 a, __m128 b)
-        ///   RSQRTSS xmm, xmm/m32
+        ///    RSQRTSS xmm1,       xmm2/m32
+        ///   VRSQRTSS xmm1, xmm2, xmm3/m32
          /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs.
          /// </summary>
          public static Vector128<float> ReciprocalSqrtScalar(Vector128<float> upper, Vector128<float> value) => ReciprocalSqrtScalar(upper, value);
  
          /// <summary>
          /// __m128 _mm_shuffle_ps (__m128 a,  __m128 b, unsigned int control)
-        ///   SHUFPS xmm, xmm/m128, imm8
+        ///    SHUFPS xmm1,               xmm2/m128,         imm8
+        ///   VSHUFPS xmm1,         xmm2, xmm3/m128,         imm8
+        ///   VSHUFPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst, imm8
          /// </summary>
          public static Vector128<float> Shuffle(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) => Shuffle(left, right, control);
  
          /// <summary>
          /// __m128 _mm_sqrt_ps (__m128 a)
-        ///   SQRTPS xmm, xmm/m128
+        ///    SQRTPS xmm1,         xmm2/m128
+        ///   VSQRTPS xmm1,         xmm2/m128
+        ///   VSQRTPS xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
          public static Vector128<float> Sqrt(Vector128<float> value) => Sqrt(value);
  
          /// <summary>
          /// __m128 _mm_sqrt_ss (__m128 a)
-        ///   SQRTSS xmm, xmm/m32
+        ///    SQRTSS xmm1,               xmm2/m32
+        ///   VSQRTSS xmm1,         xmm2, xmm3/m32
+        ///   VSQRTSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> SqrtScalar(Vector128<float> value) => SqrtScalar(value);
-
          /// <summary>
          /// __m128 _mm_sqrt_ss (__m128 a, __m128 b)
-        ///   SQRTSS xmm, xmm/m32
+        ///    SQRTSS xmm1,               xmm2/m32
+        ///   VSQRTSS xmm1,         xmm2, xmm3/m32
+        ///   VSQRTSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs.
          /// </summary>
          public static Vector128<float> SqrtScalar(Vector128<float> upper, Vector128<float> value) => SqrtScalar(upper, value);
  
+        /// <summary>
+        /// void _mm_storeu_ps (float* mem_addr, __m128 a)
+        ///    MOVAPS m128,         xmm1
+        ///   VMOVAPS m128,         xmm1
+        ///   VMOVAPS m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(float* address, Vector128<float> source) => Store(address, source);
          /// <summary>
          /// void _mm_store_ps (float* mem_addr, __m128 a)
-        ///   MOVAPS m128, xmm
+        ///    MOVAPS m128,         xmm1
+        ///   VMOVAPS m128,         xmm1
+        ///   VMOVAPS m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(float* address, Vector128<float> source) => StoreAligned(address, source);
-
          /// <summary>
          /// void _mm_stream_ps (float* mem_addr, __m128 a)
-        ///   MOVNTPS m128, xmm
+        ///    MOVNTPS m128, xmm1
+        ///   VMOVNTPS m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(float* address, Vector128<float> source) => StoreAlignedNonTemporal(address, source);
-
-        /// <summary>
-        /// void _mm_storeu_ps (float* mem_addr, __m128 a)
-        ///   MOVUPS m128, xmm
-        /// </summary>
-        public static unsafe void Store(float* address, Vector128<float> source) => Store(address, source);
-
          /// <summary>
          /// void _mm_sfence(void)
          ///   SFENCE
          /// </summary>
          public static void StoreFence() => StoreFence();
-
-        /// <summary>
-        /// void _mm_store_ss (float* mem_addr, __m128 a)
-        ///   MOVSS m32, xmm
-        /// </summary>
-        public static unsafe void StoreScalar(float* address, Vector128<float> source) => StoreScalar(address, source);
-
          /// <summary>
          /// void _mm_storeh_pi (__m64* mem_addr, __m128 a)
-        ///   MOVHPS m64, xmm
+        ///    MOVHPS m64, xmm1
+        ///   VMOVHPS m64, xmm1
          /// </summary>
          public static unsafe void StoreHigh(float* address, Vector128<float> source) => StoreHigh(address, source);
-
          /// <summary>
          /// void _mm_storel_pi (__m64* mem_addr, __m128 a)
-        ///   MOVLPS m64, xmm
+        ///    MOVLPS m64, xmm1
+        ///   VMOVLPS m64, xmm1
          /// </summary>
          public static unsafe void StoreLow(float* address, Vector128<float> source) => StoreLow(address, source);
+        /// <summary>
+        /// void _mm_store_ss (float* mem_addr, __m128 a)
+        ///    MOVSS m32,      xmm1
+        ///   VMOVSS m32,      xmm1
+        ///   VMOVSS m32 {k1}, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(float* address, Vector128<float> source) => StoreScalar(address, source);
  
          /// <summary>
          /// __m128d _mm_sub_ps (__m128d a, __m128d b)
-        ///   SUBPS xmm, xmm/m128
+        ///    SUBPS xmm1,               xmm2/m128
+        ///   VSUBPS xmm1,         xmm2, xmm3/m128
+        ///   VSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Subtract(Vector128<float> left, Vector128<float> right) => Subtract(left, right);
  
          /// <summary>
          /// __m128 _mm_sub_ss (__m128 a, __m128 b)
-        ///   SUBSS xmm, xmm/m32
+        ///    SUBSS xmm1,               xmm2/m32
+        ///   VSUBSS xmm1,         xmm2, xmm3/m32
+        ///   VSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er}
          /// </summary>
          public static Vector128<float> SubtractScalar(Vector128<float> left, Vector128<float> right) => SubtractScalar(left, right);
  
          /// <summary>
          /// __m128 _mm_unpackhi_ps (__m128 a,  __m128 b)
-        ///   UNPCKHPS xmm, xmm/m128
+        ///    UNPCKHPS xmm1,               xmm2/m128
+        ///   VUNPCKHPS xmm1,         xmm2, xmm3/m128
+        ///   VUNPCKHPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> UnpackHigh(Vector128<float> left, Vector128<float> right) => UnpackHigh(left, right);
  
          /// <summary>
          /// __m128 _mm_unpacklo_ps (__m128 a,  __m128 b)
-        ///   UNPCKLPS xmm, xmm/m128
+        ///    UNPCKLPS xmm1,               xmm2/m128
+        ///   VUNPCKLPS xmm1,         xmm2, xmm3/m128
+        ///   VUNPCKLPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> UnpackLow(Vector128<float> left, Vector128<float> right) => UnpackLow(left, right);
  
          /// <summary>
          /// __m128 _mm_xor_ps (__m128 a,  __m128 b)
-        ///   XORPS xmm, xmm/m128
+        ///    XORPS xmm1,               xmm2/m128
+        ///   VXORPS xmm1,         xmm2, xmm3/m128
+        ///   VXORPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<float> Xor(Vector128<float> left, Vector128<float> right) => Xor(left, right);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs

index 3b6ae16a2d1af724ca0d6c47898181168134ebb9..a52b19af01dc96b6f3154fe40e4ded098398b774 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs
@@ -25,52 +25,55 @@ namespace System.Runtime.Intrinsics.X86
              public static new bool IsSupported { [Intrinsic] get { return false; } }
  
              /// <summary>
-            /// __int64 _mm_cvtsd_si64 (__m128d a)
-            ///   CVTSD2SI r64, xmm/m64
+            /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b)
+            ///    CVTSI2SD xmm1,       r/m64
+            ///   VCVTSI2SD xmm1, xmm2, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static long ConvertToInt64(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+            public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, long value) { throw new PlatformNotSupportedException(); }
              /// <summary>
-            /// __int64 _mm_cvtsi128_si64 (__m128i a)
-            ///   MOVQ reg/m64, xmm
+            /// __m128i _mm_cvtsi64_si128 (__int64 a)
+            ///    MOVQ xmm1, r/m64
+            ///   VMOVQ xmm1, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static long ConvertToInt64(Vector128<long> value) { throw new PlatformNotSupportedException(); }
-
+            public static Vector128<long> ConvertScalarToVector128Int64(long value) { throw new PlatformNotSupportedException(); }
              /// <summary>
-            /// __int64 _mm_cvtsi128_si64 (__m128i a)
-            ///   MOVQ reg/m64, xmm
+            /// __m128i _mm_cvtsi64_si128 (__int64 a)
+            ///    MOVQ xmm1, r/m64
+            ///   VMOVQ xmm1, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static ulong ConvertToUInt64(Vector128<ulong> value) { throw new PlatformNotSupportedException(); }
+            public static Vector128<ulong> ConvertScalarToVector128UInt64(ulong value) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
-            /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b)
-            ///   CVTSI2SD xmm, reg/m64
+            /// __int64 _mm_cvtsi128_si64 (__m128i a)
+            ///    MOVQ r/m64, xmm1
+            ///   VMOVQ r/m64, xmm1
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, long value) { throw new PlatformNotSupportedException(); }
-
+            public static long ConvertToInt64(Vector128<long> value) { throw new PlatformNotSupportedException(); }
              /// <summary>
-            /// __m128i _mm_cvtsi64_si128 (__int64 a)
-            ///   MOVQ xmm, reg/m64
+            /// __int64 _mm_cvtsd_si64 (__m128d a)
+            ///    CVTSD2SI r64, xmm1/m64
+            ///   VCVTSD2SI r64, xmm1/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static Vector128<long> ConvertScalarToVector128Int64(long value) { throw new PlatformNotSupportedException(); }
-
+            public static long ConvertToInt64(Vector128<double> value) { throw new PlatformNotSupportedException(); }
              /// <summary>
-            /// __m128i _mm_cvtsi64_si128 (__int64 a)
-            ///   MOVQ xmm, reg/m64
+            /// __int64 _mm_cvttsd_si64 (__m128d a)
+            ///    CVTTSD2SI r64, xmm1/m64
+            ///   VCVTTSD2SI r64, xmm1/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static Vector128<ulong> ConvertScalarToVector128UInt64(ulong value) { throw new PlatformNotSupportedException(); }
-
+            public static long ConvertToInt64WithTruncation(Vector128<double> value) { throw new PlatformNotSupportedException(); }
              /// <summary>
-            /// __int64 _mm_cvttsd_si64 (__m128d a)
-            ///   CVTTSD2SI reg, xmm/m64
+            /// __int64 _mm_cvtsi128_si64 (__m128i a)
+            ///    MOVQ r/m64, xmm1
+            ///   VMOVQ r/m64, xmm1
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static long ConvertToInt64WithTruncation(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+            public static ulong ConvertToUInt64(Vector128<ulong> value) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// void _mm_stream_si64(__int64 *p, __int64 a)
@@ -88,635 +91,709 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_add_epi8 (__m128i a,  __m128i b)
-        ///   PADDB xmm, xmm/m128
+        ///    PADDB xmm1,               xmm2/m128
+        ///   VPADDB xmm1,         xmm2, xmm3/m128
+        ///   VPADDB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Add(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_add_epi8 (__m128i a,  __m128i b)
-        ///   PADDB xmm, xmm/m128
+        ///    PADDB xmm1,               xmm2/m128
+        ///   VPADDB xmm1,         xmm2, xmm3/m128
+        ///   VPADDB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Add(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_add_epi16 (__m128i a,  __m128i b)
-        ///   PADDW xmm, xmm/m128
+        ///    PADDW xmm1,               xmm2/m128
+        ///   VPADDW xmm1,         xmm2, xmm3/m128
+        ///   VPADDW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Add(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_add_epi16 (__m128i a,  __m128i b)
-        ///   PADDW xmm, xmm/m128
+        ///    PADDW xmm1,               xmm2/m128
+        ///   VPADDW xmm1,         xmm2, xmm3/m128
+        ///   VPADDW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Add(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_add_epi32 (__m128i a,  __m128i b)
-        ///   PADDD xmm, xmm/m128
+        ///    PADDD xmm1,               xmm2/m128
+        ///   VPADDD xmm1,         xmm2, xmm3/m128
+        ///   VPADDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Add(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_add_epi32 (__m128i a,  __m128i b)
-        ///   PADDD xmm, xmm/m128
+        ///    PADDD xmm1,               xmm2/m128
+        ///   VPADDD xmm1,         xmm2, xmm3/m128
+        ///   VPADDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Add(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_add_epi64 (__m128i a,  __m128i b)
-        ///   PADDQ xmm, xmm/m128
+        ///    PADDQ xmm1,               xmm2/m128
+        ///   VPADDQ xmm1,         xmm2, xmm3/m128
+        ///   VPADDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> Add(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_add_epi64 (__m128i a,  __m128i b)
-        ///   PADDQ xmm, xmm/m128
+        ///    PADDQ xmm1,               xmm2/m128
+        ///   VPADDQ xmm1,         xmm2, xmm3/m128
+        ///   VPADDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> Add(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_add_pd (__m128d a,  __m128d b)
-        ///   ADDPD xmm, xmm/m128
+        ///    ADDPD xmm1,               xmm2/m128
+        ///   VADDPD xmm1,         xmm2, xmm3/m128
+        ///   VADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Add(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128d _mm_add_sd (__m128d a,  __m128d b)
-        ///   ADDSD xmm, xmm/m64
+        ///    ADDSD xmm1,               xmm2/m64
+        ///   VADDSD xmm1,         xmm2, xmm3/m64
+        ///   VADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> AddScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_adds_epi8 (__m128i a,  __m128i b)
-        ///   PADDSB xmm, xmm/m128
+        ///    PADDSB xmm1,               xmm2/m128
+        ///   VPADDSB xmm1,         xmm2, xmm3/m128
+        ///   VPADDSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> AddSaturate(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_adds_epu8 (__m128i a,  __m128i b)
-        ///   PADDUSB xmm, xmm/m128
+        ///    PADDUSB xmm1,               xmm2/m128
+        ///   VPADDUSB xmm1,         xmm2, xmm3/m128
+        ///   VPADDUSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> AddSaturate(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_adds_epi16 (__m128i a,  __m128i b)
-        ///   PADDSW xmm, xmm/m128
+        ///    PADDSW xmm1,               xmm2/m128
+        ///   VPADDSW xmm1,         xmm2, xmm3/m128
+        ///   VPADDSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> AddSaturate(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_adds_epu16 (__m128i a,  __m128i b)
-        ///   PADDUSW xmm, xmm/m128
+        ///    PADDUSW xmm1,               xmm2/m128
+        ///   VPADDUSW xmm1,         xmm2, xmm3/m128
+        ///   VPADDUSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> AddSaturate(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND xmm1,       xmm2/m128
+        ///   VPAND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> And(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND xmm1,       xmm2/m128
+        ///   VPAND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> And(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND xmm1,       xmm2/m128
+        ///   VPAND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> And(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND xmm1,       xmm2/m128
+        ///   VPAND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> And(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND  xmm1,               xmm2/m128
+        ///   VPAND  xmm1,         xmm2, xmm3/m128
+        ///   VPANDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> And(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND  xmm1,               xmm2/m128
+        ///   VPAND  xmm1,         xmm2, xmm3/m128
+        ///   VPANDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> And(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND  xmm1,               xmm2/m128
+        ///   VPAND  xmm1,         xmm2, xmm3/m128
+        ///   VPANDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> And(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND  xmm1,               xmm2/m128
+        ///   VPAND  xmm1,         xmm2, xmm3/m128
+        ///   VPANDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> And(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_and_pd (__m128d a, __m128d b)
-        ///   ANDPD xmm, xmm/m128
+        ///    ANDPD xmm1,               xmm2/m128
+        ///   VANDPD xmm1,         xmm2, xmm3/m128
+        ///   VANDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> And(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN xmm1,       xmm2/m128
+        ///   VPANDN xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> AndNot(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN xmm1,       xmm2/m128
+        ///   VPANDN xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> AndNot(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN xmm1,       xmm2/m128
+        ///   VPANDN xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> AndNot(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN xmm1,       xmm2/m128
+        ///   VPANDN xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> AndNot(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN  xmm1,               xmm2/m128
+        ///   VPANDN  xmm1,         xmm2, xmm3/m128
+        ///   VPANDND xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> AndNot(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN  xmm1,               xmm2/m128
+        ///   VPANDN  xmm1,         xmm2, xmm3/m128
+        ///   VPANDND xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> AndNot(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN  xmm1,               xmm2/m128
+        ///   VPANDN  xmm1,         xmm2, xmm3/m128
+        ///   VPANDNQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> AndNot(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN  xmm1,               xmm2/m128
+        ///   VPANDN  xmm1,         xmm2, xmm3/m128
+        ///   VPANDNQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> AndNot(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_andnot_pd (__m128d a, __m128d b)
-        ///   ADDNPD xmm, xmm/m128
+        ///    ANDNPD xmm1,               xmm2/m128
+        ///   VANDNPD xmm1,         xmm2, xmm3/m128
+        ///   VANDNPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> AndNot(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_avg_epu8 (__m128i a,  __m128i b)
-        ///   PAVGB xmm, xmm/m128
+        ///    PAVGB xmm1,               xmm2/m128
+        ///   VPAVGB xmm1,         xmm2, xmm3/m128
+        ///   VPAVGB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Average(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_avg_epu16 (__m128i a,  __m128i b)
-        ///   PAVGW xmm, xmm/m128
+        ///    PAVGW xmm1,               xmm2/m128
+        ///   VPAVGW xmm1,         xmm2, xmm3/m128
+        ///   VPAVGW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Average(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_cmpeq_epi8 (__m128i a,  __m128i b)
-        ///   PCMPEQB xmm, xmm/m128
+        ///    PCMPEQB xmm1,       xmm2/m128
+        ///   VPCMPEQB xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> CompareEqual(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmpeq_epi8 (__m128i a,  __m128i b)
-        ///   PCMPEQB xmm, xmm/m128
+        ///    PCMPEQB xmm1,       xmm2/m128
+        ///   VPCMPEQB xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> CompareEqual(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmpeq_epi16 (__m128i a,  __m128i b)
-        ///   PCMPEQW xmm, xmm/m128
+        ///    PCMPEQW xmm1,       xmm2/m128
+        ///   VPCMPEQW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> CompareEqual(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmpeq_epi16 (__m128i a,  __m128i b)
-        ///   PCMPEQW xmm, xmm/m128
+        ///    PCMPEQW xmm1,       xmm2/m128
+        ///   VPCMPEQW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> CompareEqual(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmpeq_epi32 (__m128i a,  __m128i b)
-        ///   PCMPEQD xmm, xmm/m128
+        ///    PCMPEQD xmm1,       xmm2/m128
+        ///   VPCMPEQD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> CompareEqual(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmpeq_epi32 (__m128i a,  __m128i b)
-        ///   PCMPEQD xmm, xmm/m128
+        ///    PCMPEQD xmm1,       xmm2/m128
+        ///   VPCMPEQD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<uint> CompareEqual(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_cmpeq_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(0)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(0)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(0)
          /// </summary>
          public static Vector128<double> CompareEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// int _mm_comieq_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarOrderedEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// int _mm_ucomieq_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarUnorderedEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128d _mm_cmpeq_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(0)
-        /// </summary>
-        public static Vector128<double> CompareScalarEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_cmpgt_epi8 (__m128i a,  __m128i b)
-        ///   PCMPGTB xmm, xmm/m128
+        ///    PCMPGTB xmm1,       xmm2/m128
+        ///   VPCMPGTB xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> CompareGreaterThan(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmpgt_epi16 (__m128i a,  __m128i b)
-        ///   PCMPGTW xmm, xmm/m128
+        ///    PCMPGTW xmm1,       xmm2/m128
+        ///   VPCMPGTW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> CompareGreaterThan(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmpgt_epi32 (__m128i a,  __m128i b)
-        ///   PCMPGTD xmm, xmm/m128
+        ///    PCMPGTD xmm1,       xmm2/m128
+        ///   VPCMPGTD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> CompareGreaterThan(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_cmpgt_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(1) with swapped operands
+        ///    CMPPD xmm1,       xmm2/m128, imm8(1)   ; with swapped operands
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(1)   ; with swapped operands
          /// </summary>
          public static Vector128<double> CompareGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// int _mm_comigt_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarOrderedGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// int _mm_ucomigt_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarUnorderedGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128d _mm_cmpgt_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(1) with swapped operands
-        /// </summary>
-        public static Vector128<double> CompareScalarGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm_cmpge_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(2) with swapped operands
+        ///    CMPPD xmm1,       xmm2/m128, imm8(2)   ; with swapped operands
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(2)   ; with swapped operands
          /// </summary>
          public static Vector128<double> CompareGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// int _mm_comige_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// int _mm_ucomige_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128d _mm_cmpge_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(2) with swapped operands
-        /// </summary>
-        public static Vector128<double> CompareScalarGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_cmplt_epi8 (__m128i a,  __m128i b)
-        ///   PCMPGTB xmm, xmm/m128
+        ///    PCMPGTB xmm1,       xmm2/m128    ; with swapped operands
+        ///   VPCMPGTB xmm1, xmm2, xmm3/m128    ; with swapped operands
          /// </summary>
          public static Vector128<sbyte> CompareLessThan(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmplt_epi16 (__m128i a,  __m128i b)
-        ///   PCMPGTW xmm, xmm/m128
+        ///    PCMPGTW xmm1,       xmm2/m128    ; with swapped operands
+        ///   VPCMPGTW xmm1, xmm2, xmm3/m128    ; with swapped operands
          /// </summary>
          public static Vector128<short> CompareLessThan(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmplt_epi32 (__m128i a,  __m128i b)
-        ///   PCMPGTD xmm, xmm/m128
+        ///    PCMPGTD xmm1,       xmm2/m128    ; with swapped operands
+        ///   VPCMPGTD xmm1, xmm2, xmm3/m128    ; with swapped operands
          /// </summary>
          public static Vector128<int> CompareLessThan(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_cmplt_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(1)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(1)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(1)
          /// </summary>
          public static Vector128<double> CompareLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// int _mm_comilt_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
+        /// __m128d _mm_cmple_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(2)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(2)
          /// </summary>
-        public static bool CompareScalarOrderedLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_ucomilt_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
+        /// __m128d _mm_cmpneq_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(4)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(4)
          /// </summary>
-        public static bool CompareScalarUnorderedLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareNotEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmplt_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(1)
+        /// __m128d _mm_cmpngt_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(5)   ; with swapped operands
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(5)   ; with swapped operands
          /// </summary>
-        public static Vector128<double> CompareScalarLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareNotGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmple_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(2)
+        /// __m128d _mm_cmpnge_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(6)   ; with swapped operands
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(6)   ; with swapped operands
          /// </summary>
-        public static Vector128<double> CompareLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareNotGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_comile_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
+        /// __m128d _mm_cmpnlt_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(5)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(5)
          /// </summary>
-        public static bool CompareScalarOrderedLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareNotLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_ucomile_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
+        /// __m128d _mm_cmpnle_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(6)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(6)
          /// </summary>
-        public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareNotLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmple_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(2)
+        /// __m128d _mm_cmpord_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(7)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(7)
          /// </summary>
-        public static Vector128<double> CompareScalarLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> CompareOrdered(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128d _mm_cmpneq_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(4)
+        /// __m128d _mm_cmpeq_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(0)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(0)
          /// </summary>
-        public static Vector128<double> CompareNotEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareScalarEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_comineq_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
+        /// __m128d _mm_cmpgt_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(1)   ; with swapped operands
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(1)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarOrderedNotEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareScalarGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_ucomineq_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
+        /// __m128d _mm_cmpge_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(2)   ; with swapped operands
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(2)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarUnorderedNotEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareScalarGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmpneq_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(4)
+        /// __m128d _mm_cmplt_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(1)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(1)
          /// </summary>
-        public static Vector128<double> CompareScalarNotEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareScalarLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmpngt_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(5) with swapped operands
+        /// __m128d _mm_cmple_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(2)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(2)
          /// </summary>
-        public static Vector128<double> CompareNotGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareScalarLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmpngt_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(5) with swapped operands
+        /// __m128d _mm_cmpneq_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(4)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(4)
          /// </summary>
-        public static Vector128<double> CompareScalarNotGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareScalarNotEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmpnge_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(6) with swapped operands
+        /// __m128d _mm_cmpngt_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(5)   ; with swapped operands
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(5)   ; with swapped operands
          /// </summary>
-        public static Vector128<double> CompareNotGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CompareScalarNotGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_cmpnge_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(6) with swapped operands
+        ///    CMPDS xmm1,       xmm2/m64, imm8(6)   ; with swapped operands
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(6)   ; with swapped operands
          /// </summary>
          public static Vector128<double> CompareScalarNotGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128d _mm_cmpnlt_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(5)
-        /// </summary>
-        public static Vector128<double> CompareNotLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm_cmpnlt_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(5)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(5)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(5)
          /// </summary>
          public static Vector128<double> CompareScalarNotLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128d _mm_cmpnle_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(6)
-        /// </summary>
-        public static Vector128<double> CompareNotLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm_cmpnle_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(6)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(6)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(6)
          /// </summary>
          public static Vector128<double> CompareScalarNotLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m128d _mm_cmpord_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(7)
-        /// </summary>
-        public static Vector128<double> CompareOrdered(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm_cmpord_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(7)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(7)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(7)
          /// </summary>
          public static Vector128<double> CompareScalarOrdered(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
-        /// __m128d _mm_cmpunord_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(3)
+        /// int _mm_comieq_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; ZF=1 &amp;&amp; PF=0
+        ///   VCOMISD xmm1, xmm2/m64        ; ZF=1 &amp;&amp; PF=0
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; ZF=1 &amp;&amp; PF=0
          /// </summary>
-        public static Vector128<double> CompareUnordered(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarOrderedEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cmpunord_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(3)
+        /// int _mm_comigt_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; ZF=0 &amp;&amp; CF=0
+        ///   VCOMISD xmm1, xmm2/m64        ; ZF=0 &amp;&amp; CF=0
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; ZF=0 &amp;&amp; CF=0
          /// </summary>
-        public static Vector128<double> CompareScalarUnordered(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarOrderedGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_cvtps_epi32 (__m128 a)
-        ///   CVTPS2DQ xmm, xmm/m128
+        /// int _mm_comige_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; CF=0
+        ///   VCOMISD xmm1, xmm2/m64        ; CF=0
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; CF=0
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32(Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_cvtpd_epi32 (__m128d a)
-        ///   CVTPD2DQ xmm, xmm/m128
+        /// int _mm_comilt_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; PF=0 &amp;&amp; CF=1
+        ///   VCOMISD xmm1, xmm2/m64        ; PF=0 &amp;&amp; CF=1
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; PF=0 &amp;&amp; CF=1
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static bool CompareScalarOrderedLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cvtepi32_ps (__m128i a)
-        ///   CVTDQ2PS xmm, xmm/m128
+        /// int _mm_comile_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VCOMISD xmm1, xmm2/m64        ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; PF=0 &amp;&amp; (ZF=1 || CF=1)
          /// </summary>
-        public static Vector128<float> ConvertToVector128Single(Vector128<int> value) { throw new PlatformNotSupportedException(); }
+        public static bool CompareScalarOrderedLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_cvtpd_ps (__m128d a)
-        ///   CVTPD2PS xmm, xmm/m128
+        /// int _mm_comineq_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; ZF=0 || PF=1
+        ///   VCOMISD xmm1, xmm2/m64        ; ZF=0 || PF=1
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; ZF=0 || PF=1
          /// </summary>
-        public static Vector128<float> ConvertToVector128Single(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static bool CompareScalarOrderedNotEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m128d _mm_cvtepi32_pd (__m128i a)
-        ///   CVTDQ2PD xmm, xmm/m128
+        /// __m128d _mm_cmpunord_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(3)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(3)
          /// </summary>
-        public static Vector128<double> ConvertToVector128Double(Vector128<int> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> CompareScalarUnordered(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_cvtps_pd (__m128 a)
-        ///   CVTPS2PD xmm, xmm/m128
+        /// int _mm_ucomieq_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; ZF=1 &amp;&amp; PF=0
+        ///   VUCOMISD xmm1, xmm2/m64       ; ZF=1 &amp;&amp; PF=0
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; ZF=1 &amp;&amp; PF=0
          /// </summary>
-        public static Vector128<double> ConvertToVector128Double(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
+        public static bool CompareScalarUnorderedEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_cvtsd_si32 (__m128d a)
-        ///   CVTSD2SI r32, xmm/m64
+        /// int _mm_ucomigt_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; ZF=0 &amp;&amp; CF=0
+        ///   VUCOMISD xmm1, xmm2/m64       ; ZF=0 &amp;&amp; CF=0
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; ZF=0 &amp;&amp; CF=0
          /// </summary>
-        public static int ConvertToInt32(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static bool CompareScalarUnorderedGreaterThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_cvtsi128_si32 (__m128i a)
-        ///   MOVD reg/m32, xmm
+        /// int _mm_ucomige_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; CF=0
+        ///   VUCOMISD xmm1, xmm2/m64       ; CF=0
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; CF=0
          /// </summary>
-        public static int ConvertToInt32(Vector128<int> value) { throw new PlatformNotSupportedException(); }
+        public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_cvtsi128_si32 (__m128i a)
-        ///   MOVD reg/m32, xmm
+        /// int _mm_ucomilt_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; PF=0 &amp;&amp; CF=1
+        ///   VUCOMISD xmm1, xmm2/m64       ; PF=0 &amp;&amp; CF=1
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; PF=0 &amp;&amp; CF=1
          /// </summary>
-        public static uint ConvertToUInt32(Vector128<uint> value) { throw new PlatformNotSupportedException(); }
+        public static bool CompareScalarUnorderedLessThan(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// int _mm_ucomile_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VUCOMISD xmm1, xmm2/m64       ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        /// </summary>
+        public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// int _mm_ucomineq_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; ZF=0 || PF=1
+        ///   VUCOMISD xmm1, xmm2/m64       ; ZF=0 || PF=1
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; ZF=0 || PF=1
+        /// </summary>
+        public static bool CompareScalarUnorderedNotEqual(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m128d _mm_cmpunord_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(3)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(3)
+        /// </summary>
+        public static Vector128<double> CompareUnordered(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128d _mm_cvtsi32_sd (__m128d a, int b)
-        ///   CVTSI2SD xmm, reg/m32
+        ///    CVTSI2SD xmm1,       r/m32
+        ///   VCVTSI2SD xmm1, xmm2, r/m32
          /// </summary>
          public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, int value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_cvtss_sd (__m128d a, __m128 b)
-        ///   CVTSS2SD xmm, xmm/m32
+        ///    CVTSS2SD xmm1,       xmm2/m32
+        ///   VCVTSS2SD xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtsi32_si128 (int a)
-        ///   MOVD xmm, reg/m32
+        ///    MOVD xmm1, r/m32
+        ///   VMOVD xmm1, r/m32
          /// </summary>
          public static Vector128<int> ConvertScalarToVector128Int32(int value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm_cvtsd_ss (__m128 a, __m128d b)
-        ///   CVTSD2SS xmm, xmm/m64
+        ///    CVTSD2SS xmm1,       xmm2/m64
+        ///   VCVTSD2SS xmm1, xmm2, xmm3/m64
          /// </summary>
          public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtsi32_si128 (int a)
-        ///   MOVD xmm, reg/m32
+        ///    MOVD xmm1, r/m32
+        ///   VMOVD xmm1, r/m32
          /// </summary>
          public static Vector128<uint> ConvertScalarToVector128UInt32(uint value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128i _mm_cvttps_epi32 (__m128 a)
-        ///   CVTTPS2DQ xmm, xmm/m128
+        /// __m128d _mm_cvtepi32_pd (__m128i a)
+        ///    CVTDQ2PD xmm1,         xmm2/m64
+        ///   VCVTDQ2PD xmm1,         xmm2/m64
+        ///   VCVTDQ2PD xmm1 {k1}{z}, xmm2/m64/m32bcst
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> ConvertToVector128Double(Vector128<int> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_cvttpd_epi32 (__m128d a)
-        ///   CVTTPD2DQ xmm, xmm/m128
+        /// __m128d _mm_cvtps_pd (__m128 a)
+        ///    CVTPS2PD xmm1,         xmm2/m64
+        ///   VCVTPS2PD xmm1,         xmm2/m64
+        ///   VCVTPS2PD xmm1 {k1}{z}, xmm2/m64/m32bcst
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<double> value) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> ConvertToVector128Double(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_cvttsd_si32 (__m128d a)
-        ///   CVTTSD2SI reg, xmm/m64
+        /// __m128i _mm_cvtps_epi32 (__m128 a)
+        ///    CVTPS2DQ xmm1,         xmm2/m128
+        ///   VCVTPS2DQ xmm1,         xmm2/m128
+        ///   VCVTPS2DQ xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
-        public static int ConvertToInt32WithTruncation(Vector128<double> value) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<int> ConvertToVector128Int32(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_div_pd (__m128d a,  __m128d b)
-        ///   DIVPD xmm, xmm/m128
+        /// __m128i _mm_cvtpd_epi32 (__m128d a)
+        ///    CVTPD2DQ xmm1,         xmm2/m128
+        ///   VCVTPD2DQ xmm1,         xmm2/m128
+        ///   VCVTPD2DQ xmm1 {k1}{z}, xmm2/m128/m64bcst
          /// </summary>
-        public static Vector128<double> Divide(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<int> ConvertToVector128Int32(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_div_sd (__m128d a,  __m128d b)
-        ///   DIVSD xmm, xmm/m64
+        /// __m128i _mm_cvttps_epi32 (__m128 a)
+        ///    CVTTPS2DQ xmm1,         xmm2/m128
+        ///   VCVTTPS2DQ xmm1,         xmm2/m128
+        ///   VCVTTPS2DQ xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
-        public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// int _mm_extract_epi16 (__m128i a,  int immediate)
-        ///   PEXTRW reg, xmm, imm8
+        /// __m128i _mm_cvttpd_epi32 (__m128d a)
+        ///    CVTTPD2DQ xmm1,         xmm2/m128
+        ///   VCVTTPD2DQ xmm1,         xmm2/m128
+        ///   VCVTTPD2DQ xmm1 {k1}{z}, xmm2/m128/m64bcst
          /// </summary>
-        public static ushort Extract(Vector128<ushort> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_insert_epi16 (__m128i a,  int i, int immediate)
-        ///   PINSRW xmm, reg/m16, imm8
+        /// __m128 _mm_cvtepi32_ps (__m128i a)
+        ///    CVTDQ2PS xmm1,         xmm2/m128
+        ///   VCVTDQ2PS xmm1,         xmm2/m128
+        ///   VCVTDQ2PS xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
-        public static Vector128<short> Insert(Vector128<short> value, short data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> ConvertToVector128Single(Vector128<int> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_insert_epi16 (__m128i a,  int i, int immediate)
-        ///   PINSRW xmm, reg/m16, imm8
+        /// __m128 _mm_cvtpd_ps (__m128d a)
+        ///    CVTPD2PS xmm1,         xmm2/m128
+        ///   VCVTPD2PS xmm1,         xmm2/m128
+        ///   VCVTPD2PS xmm1 {k1}{z}, xmm2/m128/m64bcst
          /// </summary>
-        public static Vector128<ushort> Insert(Vector128<ushort> value, ushort data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> ConvertToVector128Single(Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
-        /// </summary>
-        public static unsafe Vector128<sbyte> LoadVector128(sbyte* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
-        /// </summary>
-        public static unsafe Vector128<byte> LoadVector128(byte* address) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// int _mm_cvtsi128_si32 (__m128i a)
+        ///    MOVD r/m32, xmm1
+        ///   VMOVD r/m32, xmm1
          /// </summary>
-        public static unsafe Vector128<short> LoadVector128(short* address) { throw new PlatformNotSupportedException(); }
+        public static int ConvertToInt32(Vector128<int> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// int _mm_cvtsd_si32 (__m128d a)
+        ///    CVTSD2SI r32, xmm1/m64
+        ///   VCVTSD2SI r32, xmm1/m64
          /// </summary>
-        public static unsafe Vector128<ushort> LoadVector128(ushort* address) { throw new PlatformNotSupportedException(); }
+        public static int ConvertToInt32(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// int _mm_cvttsd_si32 (__m128d a)
+        ///    CVTTSD2SI r32, xmm1/m64
+        ///   VCVTTSD2SI r32, xmm1/m64
          /// </summary>
-        public static unsafe Vector128<int> LoadVector128(int* address) { throw new PlatformNotSupportedException(); }
+        public static int ConvertToInt32WithTruncation(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// int _mm_cvtsi128_si32 (__m128i a)
+        ///    MOVD r/m32, xmm1
+        ///   VMOVD r/m32, xmm1
          /// </summary>
-        public static unsafe Vector128<uint> LoadVector128(uint* address) { throw new PlatformNotSupportedException(); }
+        public static uint ConvertToUInt32(Vector128<uint> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128d _mm_div_pd (__m128d a,  __m128d b)
+        ///    DIVPD xmm1,               xmm2/m128
+        ///   VDIVPD xmm1,         xmm2, xmm3/m128
+        ///   VDIVPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
-        public static unsafe Vector128<long> LoadVector128(long* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> Divide(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128d _mm_div_sd (__m128d a,  __m128d b)
+        ///    DIVSD xmm1,       xmm2/m64
+        ///   VDIVSD xmm1, xmm2, xmm3/m64
          /// </summary>
-        public static unsafe Vector128<ulong> LoadVector128(ulong* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m128d _mm_loadu_pd (double const* mem_address)
-        ///   MOVUPD xmm, m128
+        /// int _mm_extract_epi16 (__m128i a,  int immediate)
+        ///    PEXTRW r/m16, xmm1, imm8
+        ///   VPEXTRW r/m16, xmm1, imm8
          /// </summary>
-        public static unsafe Vector128<double> LoadVector128(double* address) { throw new PlatformNotSupportedException(); }
+        public static ushort Extract(Vector128<ushort> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128d _mm_load_sd (double const* mem_address)
-        ///   MOVSD xmm, m64
+        /// __m128i _mm_insert_epi16 (__m128i a,  int i, int immediate)
+        ///    PINSRW xmm1,       r/m16, imm8
+        ///   VPINSRW xmm1, xmm2, r/m16, imm8
          /// </summary>
-        public static unsafe Vector128<double> LoadScalarVector128(double* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<short> Insert(Vector128<short> value, short data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_insert_epi16 (__m128i a,  int i, int immediate)
+        ///    PINSRW xmm1,       r/m16, imm8
+        ///   VPINSRW xmm1, xmm2, r/m16, imm8
+        /// </summary>
+        public static Vector128<ushort> Insert(Vector128<ushort> value, ushort data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<sbyte> LoadAlignedVector128(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<byte> LoadAlignedVector128(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
@@ -726,32 +803,44 @@ namespace System.Runtime.Intrinsics.X86
          public static unsafe Vector128<short> LoadAlignedVector128(short* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<ushort> LoadAlignedVector128(ushort* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<int> LoadAlignedVector128(int* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<uint> LoadAlignedVector128(uint* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA64 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<long> LoadAlignedVector128(long* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA64 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<ulong> LoadAlignedVector128(ulong* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_load_pd (double const* mem_address)
-        ///   MOVAPD xmm, m128
+        ///    MOVAPD xmm1,         m128
+        ///   VMOVAPD xmm1,         m128
+        ///   VMOVAPD xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<double> LoadAlignedVector128(double* address) { throw new PlatformNotSupportedException(); }
  
@@ -760,70 +849,154 @@ namespace System.Runtime.Intrinsics.X86
          ///   LFENCE
          /// </summary>
          public static void LoadFence() { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm_loadh_pd (__m128d a, double const* mem_addr)
-        ///   MOVHPD xmm, m64
+        ///    MOVHPD xmm1,       m64
+        ///   VMOVHPD xmm1, xmm2, m64
          /// </summary>
          public static unsafe Vector128<double> LoadHigh(Vector128<double> lower, double* address) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm_loadl_pd (__m128d a, double const* mem_addr)
-        ///   MOVLPD xmm, m64
+        ///    MOVLPD xmm1,       m64
+        ///   VMOVLPD xmm1, xmm2, m64
          /// </summary>
          public static unsafe Vector128<double> LoadLow(Vector128<double> upper, double* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_loadu_si32 (void const* mem_addr)
-        ///   MOVD xmm, reg/m32
+        ///    MOVD xmm1, m32
+        ///   VMOVD xmm1, m32
          /// </summary>
          public static unsafe Vector128<int> LoadScalarVector128(int* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_loadu_si32 (void const* mem_addr)
-        ///   MOVD xmm, reg/m32
+        ///    MOVD xmm1, m32
+        ///   VMOVD xmm1, m32
          /// </summary>
          public static unsafe Vector128<uint> LoadScalarVector128(uint* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr)
-        ///   MOVQ xmm, reg/m64
+        ///    MOVQ xmm1, m64
+        ///   VMOVQ xmm1, m64
          /// </summary>
          public static unsafe Vector128<long> LoadScalarVector128(long* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr)
-        ///   MOVQ xmm, reg/m64
+        ///    MOVQ xmm1, m64
+        ///   VMOVQ xmm1, m64
          /// </summary>
          public static unsafe Vector128<ulong> LoadScalarVector128(ulong* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_load_sd (double const* mem_address)
+        ///    MOVSD xmm1,      m64
+        ///   VMOVSD xmm1,      m64
+        ///   VMOVSD xmm1 {k1}, m64
+        /// </summary>
+        public static unsafe Vector128<double> LoadScalarVector128(double* address) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU  xmm1,         m128
+        ///   VMOVDQU  xmm1,         m128
+        ///   VMOVDQU8 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<sbyte> LoadVector128(sbyte* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU  xmm1,         m128
+        ///   VMOVDQU  xmm1,         m128
+        ///   VMOVDQU8 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<byte> LoadVector128(byte* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU16 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<short> LoadVector128(short* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU16 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<ushort> LoadVector128(ushort* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU32 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<int> LoadVector128(int* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU32 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<uint> LoadVector128(uint* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU64 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<long> LoadVector128(long* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU64 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<ulong> LoadVector128(ulong* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_loadu_pd (double const* mem_address)
+        ///    MOVUPD xmm1,         m128
+        ///   VMOVUPD xmm1,         m128
+        ///   VMOVUPD xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<double> LoadVector128(double* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// void _mm_maskmoveu_si128 (__m128i a,  __m128i mask, char* mem_address)
-        ///   MASKMOVDQU xmm, xmm
+        ///    MASKMOVDQU xmm1, xmm2    ; Address: EDI/RDI
+        ///   VMASKMOVDQU xmm1, xmm2    ; Address: EDI/RDI
          /// </summary>
          public static unsafe void MaskMove(Vector128<sbyte> source, Vector128<sbyte> mask, sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_maskmoveu_si128 (__m128i a,  __m128i mask, char* mem_address)
-        ///   MASKMOVDQU xmm, xmm
+        ///    MASKMOVDQU xmm1, xmm2    ; Address: EDI/RDI
+        ///   VMASKMOVDQU xmm1, xmm2    ; Address: EDI/RDI
          /// </summary>
          public static unsafe void MaskMove(Vector128<byte> source, Vector128<byte> mask, byte* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_max_epu8 (__m128i a,  __m128i b)
-        ///   PMAXUB xmm, xmm/m128
+        ///    PMAXUB xmm1,               xmm2/m128
+        ///   VPMAXUB xmm1,         xmm2, xmm3/m128
+        ///   VPMAXUB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Max(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_max_epi16 (__m128i a,  __m128i b)
-        ///   PMAXSW xmm, xmm/m128
+        ///    PMAXSW xmm1,               xmm2/m128
+        ///   VPMAXSW xmm1,         xmm2, xmm3/m128
+        ///   VPMAXSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Max(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_max_pd (__m128d a,  __m128d b)
-        ///   MAXPD xmm, xmm/m128
+        ///    MAXPD xmm1,               xmm2/m128
+        ///   VMAXPD xmm1,         xmm2, xmm3/m128
+        ///   VMAXPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Max(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128d _mm_max_sd (__m128d a,  __m128d b)
-        ///   MAXSD xmm, xmm/m64
+        ///    MAXSD xmm1,       xmm2/m64
+        ///   VMAXSD xmm1, xmm2, xmm3/m64
          /// </summary>
          public static Vector128<double> MaxScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
@@ -835,631 +1008,796 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_min_epu8 (__m128i a,  __m128i b)
-        ///   PMINUB xmm, xmm/m128
+        ///    PMINUB xmm1,               xmm2/m128
+        ///   VPMINUB xmm1,         xmm2, xmm3/m128
+        ///   VPMINUB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Min(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_min_epi16 (__m128i a,  __m128i b)
-        ///   PMINSW xmm, xmm/m128
+        ///    PMINSW xmm1,               xmm2/m128
+        ///   VPMINSW xmm1,         xmm2, xmm3/m128
+        ///   VPMINSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Min(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_min_pd (__m128d a,  __m128d b)
-        ///   MINPD xmm, xmm/m128
+        ///    MINPD xmm1,               xmm2/m128
+        ///   VMINPD xmm1,         xmm2, xmm3/m128
+        ///   VMINPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Min(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128d _mm_min_sd (__m128d a,  __m128d b)
-        ///   MINSD xmm, xmm/m64
+        ///    MINSD xmm1,       xmm2/m64
+        ///   VMINSD xmm1, xmm2, xmm3/m64
          /// </summary>
          public static Vector128<double> MinScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m128d _mm_move_sd (__m128d a, __m128d b)
-        ///   MOVSD xmm, xmm
-        /// </summary>
-        public static Vector128<double> MoveScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// int _mm_movemask_epi8 (__m128i a)
-        ///   PMOVMSKB reg, xmm
+        ///    PMOVMSKB r32, xmm1
+        ///   VPMOVMSKB r32, xmm1
          /// </summary>
          public static int MoveMask(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_movemask_epi8 (__m128i a)
-        ///   PMOVMSKB reg, xmm
+        ///    PMOVMSKB r32, xmm1
+        ///   VPMOVMSKB r32, xmm1
          /// </summary>
          public static int MoveMask(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_movemask_pd (__m128d a)
-        ///   MOVMSKPD reg, xmm
+        ///    MOVMSKPD r32, xmm1
+        ///   VMOVMSKPD r32, xmm1
          /// </summary>
          public static int MoveMask(Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_move_epi64 (__m128i a)
-        ///   MOVQ xmm, xmm
+        ///    MOVQ xmm1, xmm2
+        ///   VMOVQ xmm1, xmm2
          /// </summary>
          public static Vector128<long> MoveScalar(Vector128<long> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_move_epi64 (__m128i a)
-        ///   MOVQ xmm, xmm
+        ///    MOVQ xmm1, xmm2
+        ///   VMOVQ xmm1, xmm2
          /// </summary>
          public static Vector128<ulong> MoveScalar(Vector128<ulong> value) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_move_sd (__m128d a, __m128d b)
+        ///    MOVSD xmm1,               xmm2
+        ///   VMOVSD xmm1,         xmm2, xmm3
+        ///   VMOVSD xmm1 {k1}{z}, xmm2, xmm3
+        /// </summary>
+        public static Vector128<double> MoveScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_mul_epu32 (__m128i a,  __m128i b)
-        ///   PMULUDQ xmm, xmm/m128
+        ///    PMULUDQ xmm1,               xmm2/m128
+        ///   VPMULUDQ xmm1,         xmm2, xmm3/m128
+        ///   VPMULUDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> Multiply(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_mul_pd (__m128d a,  __m128d b)
-        ///   MULPD xmm, xmm/m128
+        ///    MULPD xmm1,               xmm2/m128
+        ///   VMULPD xmm1,         xmm2, xmm3/m128
+        ///   VMULPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Multiply(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128d _mm_mul_sd (__m128d a,  __m128d b)
-        ///   MULSD xmm, xmm/m64
+        /// __m128i _mm_madd_epi16 (__m128i a,  __m128i b)
+        ///    PMADDWD xmm1,               xmm2/m128
+        ///   VPMADDWD xmm1,         xmm2, xmm3/m128
+        ///   VPMADDWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
-        public static Vector128<double> MultiplyScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<int> MultiplyAddAdjacent(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_mulhi_epi16 (__m128i a,  __m128i b)
-        ///   PMULHW xmm, xmm/m128
+        ///    PMULHW xmm1,               xmm2/m128
+        ///   VPMULHW xmm1,         xmm2, xmm3/m128
+        ///   VPMULHW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> MultiplyHigh(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mulhi_epu16 (__m128i a,  __m128i b)
-        ///   PMULHUW xmm, xmm/m128
+        ///    PMULHUW xmm1,               xmm2/m128
+        ///   VPMULHUW xmm1,         xmm2, xmm3/m128
+        ///   VPMULHUW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> MultiplyHigh(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m128i _mm_madd_epi16 (__m128i a,  __m128i b)
-        ///   PMADDWD xmm, xmm/m128
-        /// </summary>
-        public static Vector128<int> MultiplyAddAdjacent(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_mullo_epi16 (__m128i a,  __m128i b)
-        ///   PMULLW xmm, xmm/m128
+        ///    PMULLW xmm1,               xmm2/m128
+        ///   VPMULLW xmm1,         xmm2, xmm3/m128
+        ///   VPMULLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> MultiplyLow(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mullo_epi16 (__m128i a,  __m128i b)
-        ///   PMULLW xmm, xmm/m128
+        ///    PMULLW xmm1,               xmm2/m128
+        ///   VPMULLW xmm1,         xmm2, xmm3/m128
+        ///   VPMULLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> MultiplyLow(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// __m128d _mm_mul_sd (__m128d a,  __m128d b)
+        ///    MULSD xmm1,       xmm2/m64
+        ///   VMULSD xmm1, xmm2, xmm3/m64
+        /// </summary>
+        public static Vector128<double> MultiplyScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR xmm1,       xmm2/m128
+        ///   VPOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Or(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR xmm1,       xmm2/m128
+        ///   VPOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Or(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR xmm1,       xmm2/m128
+        ///   VPOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Or(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR  xmm1,       xmm2/m128
+        ///   VPOR  xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Or(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR  xmm1,               xmm2/m128
+        ///   VPOR  xmm1,         xmm2, xmm3/m128
+        ///   VPORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Or(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR  xmm1,               xmm2/m128
+        ///   VPOR  xmm1,         xmm2, xmm3/m128
+        ///   VPORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Or(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR  xmm1,               xmm2/m128
+        ///   VPOR  xmm1,         xmm2, xmm3/m128
+        ///   VPORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> Or(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR  xmm1,               xmm2/m128
+        ///   VPOR  xmm1,         xmm2, xmm3/m128
+        ///   VPORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> Or(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_or_pd (__m128d a,  __m128d b)
-        ///   ORPD xmm, xmm/m128
+        ///    ORPD xmm1,               xmm2/m128
+        ///   VORPD xmm1,         xmm2, xmm3/m128
+        ///   VORPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Or(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_packs_epi16 (__m128i a,  __m128i b)
-        ///   PACKSSWB xmm, xmm/m128
+        ///    PACKSSWB xmm1,               xmm2/m128
+        ///   VPACKSSWB xmm1,         xmm2, xmm3/m128
+        ///   VPACKSSWB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> PackSignedSaturate(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_packs_epi32 (__m128i a,  __m128i b)
-        ///   PACKSSDW xmm, xmm/m128
+        ///    PACKSSDW xmm1,               xmm2/m128
+        ///   VPACKSSDW xmm1,         xmm2, xmm3/m128
+        ///   VPACKSSDW xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_packus_epi16 (__m128i a,  __m128i b)
-        ///   PACKUSWB xmm, xmm/m128
+        ///    PACKUSWB xmm1,               xmm2/m128
+        ///   VPACKUSWB xmm1,         xmm2, xmm3/m128
+        ///   VPACKUSWB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// __m128i _mm_sad_epu8 (__m128i a,  __m128i b)
-        ///   PSADBW xmm, xmm/m128
-        /// </summary>
-        public static Vector128<ushort> SumAbsoluteDifferences(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128i _mm_shuffle_epi32 (__m128i a,  int immediate)
-        ///   PSHUFD xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<int> Shuffle(Vector128<int> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m128i _mm_shuffle_epi32 (__m128i a,  int immediate)
-        ///   PSHUFD xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<uint> Shuffle(Vector128<uint> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m128d _mm_shuffle_pd (__m128d a,  __m128d b, int immediate)
-        ///   SHUFPD xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<double> Shuffle(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128i _mm_shufflehi_epi16 (__m128i a,  int immediate)
-        ///   PSHUFHW xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<short> ShuffleHigh(Vector128<short> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m128i _mm_shufflehi_epi16 (__m128i a,  int control)
-        ///   PSHUFHW xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<ushort> ShuffleHigh(Vector128<ushort> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128i _mm_shufflelo_epi16 (__m128i a,  int control)
-        ///   PSHUFLW xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<short> ShuffleLow(Vector128<short> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m128i _mm_shufflelo_epi16 (__m128i a,  int control)
-        ///   PSHUFLW xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<ushort> ShuffleLow(Vector128<ushort> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_sll_epi16 (__m128i a, __m128i count)
-        ///   PSLLW xmm, xmm/m128
+        ///    PSLLW xmm1,               xmm2/m128
+        ///   VPSLLW xmm1,         xmm2, xmm3/m128
+        ///   VPSLLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> ShiftLeftLogical(Vector128<short> value, Vector128<short> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sll_epi16 (__m128i a,  __m128i count)
-        ///   PSLLW xmm, xmm/m128
+        ///    PSLLW xmm1,               xmm2/m128
+        ///   VPSLLW xmm1,         xmm2, xmm3/m128
+        ///   VPSLLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> ShiftLeftLogical(Vector128<ushort> value, Vector128<ushort> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sll_epi32 (__m128i a, __m128i count)
-        ///   PSLLD xmm, xmm/m128
+        ///    PSLLD xmm1,               xmm2/m128
+        ///   VPSLLD xmm1,         xmm2, xmm3/m128
+        ///   VPSLLD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> ShiftLeftLogical(Vector128<int> value, Vector128<int> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sll_epi32 (__m128i a, __m128i count)
-        ///   PSLLD xmm, xmm/m128
+        ///    PSLLD xmm1,               xmm2/m128
+        ///   VPSLLD xmm1,         xmm2, xmm3/m128
+        ///   VPSLLD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<uint> ShiftLeftLogical(Vector128<uint> value, Vector128<uint> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sll_epi64 (__m128i a, __m128i count)
-        ///   PSLLQ xmm, xmm/m128
+        ///    PSLLQ xmm1,               xmm2/m128
+        ///   VPSLLQ xmm1,         xmm2, xmm3/m128
+        ///   VPSLLQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> ShiftLeftLogical(Vector128<long> value, Vector128<long> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sll_epi64 (__m128i a, __m128i count)
-        ///   PSLLQ xmm, xmm/m128
+        ///    PSLLQ xmm1,               xmm2/m128
+        ///   VPSLLQ xmm1,         xmm2, xmm3/m128
+        ///   VPSLLQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ulong> ShiftLeftLogical(Vector128<ulong> value, Vector128<ulong> count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_slli_epi16 (__m128i a,  int immediate)
-        ///   PSLLW xmm, imm8
+        ///    PSLLW xmm1,               imm8
+        ///   VPSLLW xmm1,         xmm2, imm8
+        ///   VPSLLW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<short> ShiftLeftLogical(Vector128<short> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_slli_epi16 (__m128i a,  int immediate)
-        ///   PSLLW xmm, imm8
+        ///    PSLLW xmm1,               imm8
+        ///   VPSLLW xmm1,         xmm2, imm8
+        ///   VPSLLW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<ushort> ShiftLeftLogical(Vector128<ushort> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_slli_epi32 (__m128i a,  int immediate)
-        ///   PSLLD xmm, imm8
+        ///    PSLLD xmm1,               imm8
+        ///   VPSLLD xmm1,         xmm2, imm8
+        ///   VPSLLD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<int> ShiftLeftLogical(Vector128<int> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_slli_epi32 (__m128i a,  int immediate)
-        ///   PSLLD xmm, imm8
+        ///    PSLLD xmm1,               imm8
+        ///   VPSLLD xmm1,         xmm2, imm8
+        ///   VPSLLD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<uint> ShiftLeftLogical(Vector128<uint> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_slli_epi64 (__m128i a,  int immediate)
-        ///   PSLLQ xmm, imm8
+        ///    PSLLQ xmm1,               imm8
+        ///   VPSLLQ xmm1,         xmm2, imm8
+        ///   VPSLLQ xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<long> ShiftLeftLogical(Vector128<long> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_slli_epi64 (__m128i a,  int immediate)
-        ///   PSLLQ xmm, imm8
+        ///    PSLLQ xmm1,               imm8
+        ///   VPSLLQ xmm1,         xmm2, imm8
+        ///   VPSLLQ xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<ulong> ShiftLeftLogical(Vector128<ulong> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<sbyte> ShiftLeftLogical128BitLane(Vector128<sbyte> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<byte> ShiftLeftLogical128BitLane(Vector128<byte> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<short> ShiftLeftLogical128BitLane(Vector128<short> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ushort> ShiftLeftLogical128BitLane(Vector128<ushort> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<int> ShiftLeftLogical128BitLane(Vector128<int> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<uint> ShiftLeftLogical128BitLane(Vector128<uint> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<long> ShiftLeftLogical128BitLane(Vector128<long> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ulong> ShiftLeftLogical128BitLane(Vector128<ulong> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_sra_epi16 (__m128i a, __m128i count)
-        ///   PSRAW xmm, xmm/m128
+        ///    PSRAW xmm1,               xmm2/m128
+        ///   VPSRAW xmm1,         xmm2, xmm3/m128
+        ///   VPSRAW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> ShiftRightArithmetic(Vector128<short> value, Vector128<short> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sra_epi32 (__m128i a, __m128i count)
-        ///   PSRAD xmm, xmm/m128
+        ///    PSRAD xmm1,               xmm2/m128
+        ///   VPSRAD xmm1,         xmm2, xmm3/m128
+        ///   VPSRAD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> ShiftRightArithmetic(Vector128<int> value, Vector128<int> count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_srai_epi16 (__m128i a,  int immediate)
-        ///   PSRAW xmm, imm8
+        ///    PSRAW xmm1,               imm8
+        ///   VPSRAW xmm1,         xmm2, imm8
+        ///   VPSRAW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<short> ShiftRightArithmetic(Vector128<short> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srai_epi32 (__m128i a,  int immediate)
-        ///   PSRAD xmm, imm8
+        ///    PSRAD xmm1,               imm8
+        ///   VPSRAD xmm1,         xmm2, imm8
+        ///   VPSRAD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<int> ShiftRightArithmetic(Vector128<int> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_srl_epi16 (__m128i a, __m128i count)
-        ///   PSRLW xmm, xmm/m128
+        ///    PSRLW xmm1,               xmm2/m128
+        ///   VPSRLW xmm1,         xmm2, xmm3/m128
+        ///   VPSRLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> ShiftRightLogical(Vector128<short> value, Vector128<short> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srl_epi16 (__m128i a, __m128i count)
-        ///   PSRLW xmm, xmm/m128
+        ///    PSRLW xmm1,               xmm2/m128
+        ///   VPSRLW xmm1,         xmm2, xmm3/m128
+        ///   VPSRLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> ShiftRightLogical(Vector128<ushort> value, Vector128<ushort> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srl_epi32 (__m128i a, __m128i count)
-        ///   PSRLD xmm, xmm/m128
+        ///    PSRLD xmm1,               xmm2/m128
+        ///   VPSRLD xmm1,         xmm2, xmm3/m128
+        ///   VPSRLD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> ShiftRightLogical(Vector128<int> value, Vector128<int> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srl_epi32 (__m128i a, __m128i count)
-        ///   PSRLD xmm, xmm/m128
+        ///    PSRLD xmm1,               xmm2/m128
+        ///   VPSRLD xmm1,         xmm2, xmm3/m128
+        ///   VPSRLD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<uint> ShiftRightLogical(Vector128<uint> value, Vector128<uint> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srl_epi64 (__m128i a, __m128i count)
-        ///   PSRLQ xmm, xmm/m128
+        ///    PSRLQ xmm1,               xmm2/m128
+        ///   VPSRLQ xmm1,         xmm2, xmm3/m128
+        ///   VPSRLQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> ShiftRightLogical(Vector128<long> value, Vector128<long> count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srl_epi64 (__m128i a, __m128i count)
-        ///   PSRLQ xmm, xmm/m128
+        ///    PSRLQ xmm1,               xmm2/m128
+        ///   VPSRLQ xmm1,         xmm2, xmm3/m128
+        ///   VPSRLQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ulong> ShiftRightLogical(Vector128<ulong> value, Vector128<ulong> count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_srli_epi16 (__m128i a,  int immediate)
-        ///   PSRLW xmm, imm8
+        ///    PSRLW xmm1,               imm8
+        ///   VPSRLW xmm1,         xmm2, imm8
+        ///   VPSRLW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<short> ShiftRightLogical(Vector128<short> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srli_epi16 (__m128i a,  int immediate)
-        ///   PSRLW xmm, imm8
+        ///    PSRLW xmm1,               imm8
+        ///   VPSRLW xmm1,         xmm2, imm8
+        ///   VPSRLW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<ushort> ShiftRightLogical(Vector128<ushort> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srli_epi32 (__m128i a,  int immediate)
-        ///   PSRLD xmm, imm8
+        ///    PSRLD xmm1,               imm8
+        ///   VPSRLD xmm1,         xmm2, imm8
+        ///   VPSRLD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<int> ShiftRightLogical(Vector128<int> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srli_epi32 (__m128i a,  int immediate)
-        ///   PSRLD xmm, imm8
+        ///    PSRLD xmm1,               imm8
+        ///   VPSRLD xmm1,         xmm2, imm8
+        ///   VPSRLD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<uint> ShiftRightLogical(Vector128<uint> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srli_epi64 (__m128i a,  int immediate)
-        ///   PSRLQ xmm, imm8
+        ///    PSRLQ xmm1,               imm8
+        ///   VPSRLQ xmm1,         xmm2, imm8
+        ///   VPSRLQ xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<long> ShiftRightLogical(Vector128<long> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_srli_epi64 (__m128i a,  int immediate)
-        ///   PSRLQ xmm, imm8
+        ///    PSRLQ xmm1,               imm8
+        ///   VPSRLQ xmm1,         xmm2, imm8
+        ///   VPSRLQ xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<ulong> ShiftRightLogical(Vector128<ulong> value, [ConstantExpected] byte count) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<sbyte> ShiftRightLogical128BitLane(Vector128<sbyte> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<byte> ShiftRightLogical128BitLane(Vector128<byte> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<short> ShiftRightLogical128BitLane(Vector128<short> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ushort> ShiftRightLogical128BitLane(Vector128<ushort> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<int> ShiftRightLogical128BitLane(Vector128<int> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<uint> ShiftRightLogical128BitLane(Vector128<uint> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<long> ShiftRightLogical128BitLane(Vector128<long> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ulong> ShiftRightLogical128BitLane(Vector128<ulong> value, [ConstantExpected] byte numBytes) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// __m128i _mm_shuffle_epi32 (__m128i a,  int immediate)
+        ///    PSHUFD xmm1,         xmm2/m128,         imm8
+        ///   VPSHUFD xmm1,         xmm2/m128,         imm8
+        ///   VPSHUFD xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8
+        /// </summary>
+        public static Vector128<int> Shuffle(Vector128<int> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_shuffle_epi32 (__m128i a,  int immediate)
+        ///    PSHUFD xmm1,         xmm2/m128,         imm8
+        ///   VPSHUFD xmm1,         xmm2/m128,         imm8
+        ///   VPSHUFD xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8
+        /// </summary>
+        public static Vector128<uint> Shuffle(Vector128<uint> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_shuffle_pd (__m128d a,  __m128d b, int immediate)
+        ///    SHUFPD xmm1,               xmm2/m128,         imm8
+        ///   VSHUFPD xmm1,         xmm2, xmm3/m128,         imm8
+        ///   VSHUFPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst, imm8
+        /// </summary>
+        public static Vector128<double> Shuffle(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m128i _mm_shufflehi_epi16 (__m128i a,  int immediate)
+        ///    PSHUFHW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFHW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFHW xmm1 {k1}{z}, xmm2/m128, imm8
+        /// </summary>
+        public static Vector128<short> ShuffleHigh(Vector128<short> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_shufflehi_epi16 (__m128i a,  int control)
+        ///    PSHUFHW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFHW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFHW xmm1 {k1}{z}, xmm2/m128, imm8
+        /// </summary>
+        public static Vector128<ushort> ShuffleHigh(Vector128<ushort> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m128i _mm_shufflelo_epi16 (__m128i a,  int control)
+        ///    PSHUFLW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFLW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFLW xmm1 {k1}{z}, xmm2/m128, imm8
+        /// </summary>
+        public static Vector128<short> ShuffleLow(Vector128<short> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_shufflelo_epi16 (__m128i a,  int control)
+        ///    PSHUFLW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFLW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFLW xmm1 {k1}{z}, xmm2/m128, imm8
+        /// </summary>
+        public static Vector128<ushort> ShuffleLow(Vector128<ushort> value, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m128d _mm_sqrt_pd (__m128d a)
-        ///   SQRTPD xmm, xmm/m128
+        ///    SQRTPD xmm1,         xmm2/m128
+        ///   VSQRTPD xmm1,         xmm2/m128
+        ///   VSQRTPD xmm1 {k1}{z}, xmm2/m128/m64bcst
          /// </summary>
          public static Vector128<double> Sqrt(Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128d _mm_sqrt_sd (__m128d a)
-        ///   SQRTSD xmm, xmm/64
+        ///    SQRTSD xmm1,               xmm2/m64
+        ///   VSQRTSD xmm1,         xmm2, xmm3/m64
+        ///   VSQRTSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<double> SqrtScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm_sqrt_sd (__m128d a, __m128d b)
-        ///   SQRTSD xmm, xmm/64
+        ///    SQRTSD xmm1,               xmm2/m64
+        ///   VSQRTSD xmm1,         xmm2, xmm3/m64
+        ///   VSQRTSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> SqrtScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// void _mm_store_sd (double* mem_addr, __m128d a)
-        ///   MOVSD m64, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU  m128,         xmm1
+        ///   VMOVDQU  m128,         xmm1
+        ///   VMOVDQU8 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(double* address, Vector128<double> source) { throw new PlatformNotSupportedException(); }
+        public static unsafe void Store(sbyte* address, Vector128<sbyte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// void _mm_storeu_si32 (void* mem_addr, __m128i a)
-        ///   MOVD m32, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU  m128,         xmm1
+        ///   VMOVDQU  m128,         xmm1
+        ///   VMOVDQU8 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(int* address, Vector128<int> source) { throw new PlatformNotSupportedException(); }
+        public static unsafe void Store(byte* address, Vector128<byte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
-        ///   MOVQ m64, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU16 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(long* address, Vector128<long> source) { throw new PlatformNotSupportedException(); }
+        public static unsafe void Store(short* address, Vector128<short> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// void _mm_storeu_si32 (void* mem_addr, __m128i a)
-        ///   MOVD m32, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU16 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(uint* address, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
+        public static unsafe void Store(ushort* address, Vector128<ushort> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
-        ///   MOVQ m64, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU32 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(ulong* address, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
+        public static unsafe void Store(int* address, Vector128<int> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU32 m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(uint* address, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU64 m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(long* address, Vector128<long> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU64 m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(ulong* address, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm_storeu_pd (double* mem_addr, __m128d a)
+        ///    MOVAPD m128,         xmm1
+        ///   VMOVAPD m128,         xmm1
+        ///   VMOVAPD m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(double* address, Vector128<double> source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(sbyte* address, Vector128<sbyte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(byte* address, Vector128<byte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(short* address, Vector128<short> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(ushort* address, Vector128<ushort> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(int* address, Vector128<int> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(uint* address, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA64 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(long* address, Vector128<long> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA64 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(ulong* address, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_store_pd (double* mem_addr, __m128d a)
-        ///   MOVAPD m128, xmm
+        ///    MOVAPD m128,         xmm1
+        ///   VMOVAPD m128,         xmm1
+        ///   VMOVAPD m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(double* address, Vector128<double> source) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector128<sbyte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(byte* address, Vector128<byte> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(short* address, Vector128<short> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector128<ushort> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(int* address, Vector128<int> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(uint* address, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(long* address, Vector128<long> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// void _mm_stream_pd (double* mem_addr, __m128d a)
-        ///   MOVNTPD m128, xmm
+        ///    MOVNTPD m128, xmm1
+        ///   VMOVNTPD m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(double* address, Vector128<double> source) { throw new PlatformNotSupportedException(); }
  
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(sbyte* address, Vector128<sbyte> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(byte* address, Vector128<byte> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(short* address, Vector128<short> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(ushort* address, Vector128<ushort> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(int* address, Vector128<int> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(uint* address, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(long* address, Vector128<long> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(ulong* address, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// void _mm_storeu_pd (double* mem_addr, __m128d a)
-        ///   MOVUPD m128, xmm
-        /// </summary>
-        public static unsafe void Store(double* address, Vector128<double> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm_storeh_pd (double* mem_addr, __m128d a)
-        ///   MOVHPD m64, xmm
+        ///    MOVHPD m64, xmm1
+        ///   VMOVHPD m64, xmm1
          /// </summary>
          public static unsafe void StoreHigh(double* address, Vector128<double> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// void _mm_storel_pd (double* mem_addr, __m128d a)
-        ///   MOVLPD m64, xmm
+        ///    MOVLPD m64, xmm1
+        ///   VMOVLPD m64, xmm1
          /// </summary>
          public static unsafe void StoreLow(double* address, Vector128<double> source) { throw new PlatformNotSupportedException(); }
  
@@ -1474,214 +1812,332 @@ namespace System.Runtime.Intrinsics.X86
          /// </summary>
          public static unsafe void StoreNonTemporal(uint* address, uint value) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// void _mm_storeu_si32 (void* mem_addr, __m128i a)
+        ///    MOVD m32, xmm1
+        ///   VMOVD m32, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(int* address, Vector128<int> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm_storeu_si32 (void* mem_addr, __m128i a)
+        ///    MOVD m32, xmm1
+        ///   VMOVD m32, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(uint* address, Vector128<uint> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
+        ///    MOVQ m64, xmm1
+        ///   VMOVQ m64, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(long* address, Vector128<long> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
+        ///    MOVQ m64, xmm1
+        ///   VMOVQ m64, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(ulong* address, Vector128<ulong> source) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// void _mm_store_sd (double* mem_addr, __m128d a)
+        ///    MOVSD m64,      xmm1
+        ///   VMOVSD m64,      xmm1
+        ///   VMOVSD m64 {k1}, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(double* address, Vector128<double> source) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m128i _mm_sub_epi8 (__m128i a,  __m128i b)
-        ///   PSUBB xmm, xmm/m128
+        ///    PSUBB xmm1,               xmm2/m128
+        ///   VPSUBB xmm1,         xmm2, xmm3/m128
+        ///   VPSUBB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Subtract(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sub_epi8 (__m128i a,  __m128i b)
-        ///   PSUBB xmm, xmm/m128
+        ///    PSUBB xmm1,               xmm2/m128
+        ///   VPSUBB xmm1,         xmm2, xmm3/m128
+        ///   VPSUBB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Subtract(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sub_epi16 (__m128i a,  __m128i b)
-        ///   PSUBW xmm, xmm/m128
+        ///    PSUBW xmm1,               xmm2/m128
+        ///   VPSUBW xmm1,         xmm2, xmm3/m128
+        ///   VPSUBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Subtract(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sub_epi16 (__m128i a,  __m128i b)
-        ///   PSUBW xmm, xmm/m128
+        ///    PSUBW xmm1,               xmm2/m128
+        ///   VPSUBW xmm1,         xmm2, xmm3/m128
+        ///   VPSUBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Subtract(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sub_epi32 (__m128i a,  __m128i b)
-        ///   PSUBD xmm, xmm/m128
+        ///    PSUBD xmm1,               xmm2/m128
+        ///   VPSUBD xmm1,         xmm2, xmm3/m128
+        ///   VPSUBD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> Subtract(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sub_epi32 (__m128i a,  __m128i b)
-        ///   PSUBD xmm, xmm/m128
+        ///    PSUBD xmm1,               xmm2/m128
+        ///   VPSUBD xmm1,         xmm2, xmm3/m128
+        ///   VPSUBD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<uint> Subtract(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sub_epi64 (__m128i a,  __m128i b)
-        ///   PSUBQ xmm, xmm/m128
+        ///    PSUBQ xmm1,               xmm2/m128
+        ///   VPSUBQ xmm1,         xmm2, xmm3/m128
+        ///   VPSUBQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> Subtract(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sub_epi64 (__m128i a,  __m128i b)
-        ///   PSUBQ xmm, xmm/m128
+        ///    PSUBQ xmm1,               xmm2/m128
+        ///   VPSUBQ xmm1,         xmm2, xmm3/m128
+        ///   VPSUBQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ulong> Subtract(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_sub_pd (__m128d a, __m128d b)
-        ///   SUBPD xmm, xmm/m128
+        ///    SUBPD xmm1,               xmm2/m128
+        ///   VSUBPD xmm1,         xmm2, xmm3/m128
+        ///   VSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Subtract(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128d _mm_sub_sd (__m128d a, __m128d b)
-        ///   SUBSD xmm, xmm/m64
+        ///    SUBSD xmm1,               xmm2/m64
+        ///   VSUBSD xmm1,         xmm2, xmm3/m64
+        ///   VSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> SubtractScalar(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_subs_epi8 (__m128i a,  __m128i b)
-        ///   PSUBSB xmm, xmm/m128
+        ///    PSUBSB xmm1,               xmm2/m128
+        ///   VPSUBSB xmm1,         xmm2, xmm3/m128
+        ///   VPSUBSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> SubtractSaturate(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_subs_epi16 (__m128i a,  __m128i b)
-        ///   PSUBSW xmm, xmm/m128
+        ///    PSUBSW xmm1,               xmm2/m128
+        ///   VPSUBSW xmm1,         xmm2, xmm3/m128
+        ///   VPSUBSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> SubtractSaturate(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_subs_epu8 (__m128i a,  __m128i b)
-        ///   PSUBUSB xmm, xmm/m128
+        ///    PSUBUSB xmm1,               xmm2/m128
+        ///   VPSUBUSB xmm1,         xmm2, xmm3/m128
+        ///   VPSUBUSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> SubtractSaturate(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_subs_epu16 (__m128i a,  __m128i b)
-        ///   PSUBUSW xmm, xmm/m128
+        ///    PSUBUSW xmm1,               xmm2/m128
+        ///   VPSUBUSW xmm1,         xmm2, xmm3/m128
+        ///   VPSUBUSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> SubtractSaturate(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// __m128i _mm_sad_epu8 (__m128i a,  __m128i b)
+        ///    PSADBW xmm1,               xmm2/m128
+        ///   VPSADBW xmm1,         xmm2, xmm3/m128
+        ///   VPSADBW xmm1 {k1}{z}, xmm2, xmm3/m128
+        /// </summary>
+        public static Vector128<ushort> SumAbsoluteDifferences(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m128i _mm_unpackhi_epi8 (__m128i a,  __m128i b)
-        ///   PUNPCKHBW xmm, xmm/m128
+        ///    PUNPCKHBW xmm1,               xmm2/m128
+        ///   VPUNPCKHBW xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> UnpackHigh(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpackhi_epi8 (__m128i a,  __m128i b)
-        ///   PUNPCKHBW xmm, xmm/m128
+        ///    PUNPCKHBW xmm1,               xmm2/m128
+        ///   VPUNPCKHBW xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> UnpackHigh(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpackhi_epi16 (__m128i a,  __m128i b)
-        ///   PUNPCKHWD xmm, xmm/m128
+        ///    PUNPCKHWD xmm1,               xmm2/m128
+        ///   VPUNPCKHWD xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> UnpackHigh(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpackhi_epi16 (__m128i a,  __m128i b)
-        ///   PUNPCKHWD xmm, xmm/m128
+        ///    PUNPCKHWD xmm1,               xmm2/m128
+        ///   VPUNPCKHWD xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> UnpackHigh(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpackhi_epi32 (__m128i a,  __m128i b)
-        ///   PUNPCKHDQ xmm, xmm/m128
+        ///    PUNPCKHDQ xmm1,               xmm2/m128
+        ///   VPUNPCKHDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHDQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> UnpackHigh(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpackhi_epi32 (__m128i a,  __m128i b)
-        ///   PUNPCKHDQ xmm, xmm/m128
+        ///    PUNPCKHDQ xmm1,               xmm2/m128
+        ///   VPUNPCKHDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHDQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<uint> UnpackHigh(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpackhi_epi64 (__m128i a,  __m128i b)
-        ///   PUNPCKHQDQ xmm, xmm/m128
+        ///    PUNPCKHQDQ xmm1,               xmm2/m128
+        ///   VPUNPCKHQDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHQDQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> UnpackHigh(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpackhi_epi64 (__m128i a,  __m128i b)
-        ///   PUNPCKHQDQ xmm, xmm/m128
+        ///    PUNPCKHQDQ xmm1,               xmm2/m128
+        ///   VPUNPCKHQDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHQDQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ulong> UnpackHigh(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_unpackhi_pd (__m128d a,  __m128d b)
-        ///   UNPCKHPD xmm, xmm/m128
+        ///    UNPCKHPD xmm1,               xmm2/m128
+        ///   VUNPCKHPD xmm1,         xmm2, xmm3/m128
+        ///   VUNPCKHPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> UnpackHigh(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_unpacklo_epi8 (__m128i a,  __m128i b)
-        ///   PUNPCKLBW xmm, xmm/m128
+        ///    PUNPCKLBW xmm1,               xmm2/m128
+        ///   VPUNPCKLBW xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> UnpackLow(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpacklo_epi8 (__m128i a,  __m128i b)
-        ///   PUNPCKLBW xmm, xmm/m128
+        ///    PUNPCKLBW xmm1,               xmm2/m128
+        ///   VPUNPCKLBW xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> UnpackLow(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpacklo_epi16 (__m128i a,  __m128i b)
-        ///   PUNPCKLWD xmm, xmm/m128
+        ///    PUNPCKLWD xmm1,               xmm2/m128
+        ///   VPUNPCKLWD xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> UnpackLow(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpacklo_epi16 (__m128i a,  __m128i b)
-        ///   PUNPCKLWD xmm, xmm/m128
+        ///    PUNPCKLWD xmm1,               xmm2/m128
+        ///   VPUNPCKLWD xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> UnpackLow(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpacklo_epi32 (__m128i a,  __m128i b)
-        ///   PUNPCKLDQ xmm, xmm/m128
+        ///    PUNPCKLDQ xmm1,               xmm2/m128
+        ///   VPUNPCKLDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> UnpackLow(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpacklo_epi32 (__m128i a,  __m128i b)
-        ///   PUNPCKLDQ xmm, xmm/m128
+        ///    PUNPCKLDQ xmm1,               xmm2/m128
+        ///   VPUNPCKLDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> UnpackLow(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpacklo_epi64 (__m128i a,  __m128i b)
-        ///   PUNPCKLQDQ xmm, xmm/m128
+        ///    PUNPCKLQDQ xmm1,               xmm2/m128
+        ///   VPUNPCKLQDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> UnpackLow(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_unpacklo_epi64 (__m128i a,  __m128i b)
-        ///   PUNPCKLQDQ xmm, xmm/m128
+        ///    PUNPCKLQDQ xmm1,               xmm2/m128
+        ///   VPUNPCKLQDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> UnpackLow(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_unpacklo_pd (__m128d a,  __m128d b)
-        ///   UNPCKLPD xmm, xmm/m128
+        ///    UNPCKLPD xmm1,               xmm2/m128
+        ///   VUNPCKLPD xmm1,         xmm2, xmm3/m128
+        ///   VUNPCKLPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> UnpackLow(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR xmm1,       xmm2/m128
+        ///   VPXOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Xor(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR xmm1,       xmm2/m128
+        ///   VPXOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Xor(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR xmm1,       xmm2/m128
+        ///   VPXOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Xor(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR xmm1,       xmm2/m128
+        ///   VPXOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Xor(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR  xmm1,               xmm2/m128
+        ///   VPXOR  xmm1,         xmm2, xmm3/m128
+        ///   VPXORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Xor(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR  xmm1,               xmm2/m128
+        ///   VPXOR  xmm1,         xmm2, xmm3/m128
+        ///   VPXORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Xor(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR  xmm1,               xmm2/m128
+        ///   VPXOR  xmm1,         xmm2, xmm3/m128
+        ///   VPXORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> Xor(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR  xmm1,               xmm2/m128
+        ///   VPXOR  xmm1,         xmm2, xmm3/m128
+        ///   VPXORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> Xor(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_xor_pd (__m128d a,  __m128d b)
-        ///   XORPD xmm, xmm/m128
+        ///    XORPD xmm1,               xmm2/m128
+        ///   VXORPD xmm1,         xmm2, xmm3/m128
+        ///   VXORPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Xor(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.cs

index 9e4a8d673bcd753cb415265e1c67786c1c7d0cfe..072c7f9b64bdcb303bee4b24c946e4c236043ce6 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.cs
@@ -25,52 +25,55 @@ namespace System.Runtime.Intrinsics.X86
              public static new bool IsSupported { get => IsSupported; }
  
              /// <summary>
-            /// __int64 _mm_cvtsd_si64 (__m128d a)
-            ///   CVTSD2SI r64, xmm/m64
+            /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b)
+            ///    CVTSI2SD xmm1,       r/m64
+            ///   VCVTSI2SD xmm1, xmm2, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static long ConvertToInt64(Vector128<double> value) => ConvertToInt64(value);
+            public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, long value) => ConvertScalarToVector128Double(upper, value);
              /// <summary>
-            /// __int64 _mm_cvtsi128_si64 (__m128i a)
-            ///   MOVQ reg/m64, xmm
+            /// __m128i _mm_cvtsi64_si128 (__int64 a)
+            ///    MOVQ xmm1, r/m64
+            ///   VMOVQ xmm1, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static long ConvertToInt64(Vector128<long> value) => ConvertToInt64(value);
-
+            public static Vector128<long> ConvertScalarToVector128Int64(long value) => ConvertScalarToVector128Int64(value);
              /// <summary>
-            /// __int64 _mm_cvtsi128_si64 (__m128i a)
-            ///   MOVQ reg/m64, xmm
+            /// __m128i _mm_cvtsi64_si128 (__int64 a)
+            ///    MOVQ xmm1, r/m64
+            ///   VMOVQ xmm1, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static ulong ConvertToUInt64(Vector128<ulong> value) => ConvertToUInt64(value);
+            public static Vector128<ulong> ConvertScalarToVector128UInt64(ulong value) => ConvertScalarToVector128UInt64(value);
  
              /// <summary>
-            /// __m128d _mm_cvtsi64_sd (__m128d a, __int64 b)
-            ///   CVTSI2SD xmm, reg/m64
+            /// __int64 _mm_cvtsi128_si64 (__m128i a)
+            ///    MOVQ r/m64, xmm1
+            ///   VMOVQ r/m64, xmm1
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, long value) => ConvertScalarToVector128Double(upper, value);
-
+            public static long ConvertToInt64(Vector128<long> value) => ConvertToInt64(value);
              /// <summary>
-            /// __m128i _mm_cvtsi64_si128 (__int64 a)
-            ///   MOVQ xmm, reg/m64
+            /// __int64 _mm_cvtsd_si64 (__m128d a)
+            ///    CVTSD2SI r64, xmm1/m64
+            ///   VCVTSD2SI r64, xmm1/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static Vector128<long> ConvertScalarToVector128Int64(long value) => ConvertScalarToVector128Int64(value);
-
+            public static long ConvertToInt64(Vector128<double> value) => ConvertToInt64(value);
              /// <summary>
-            /// __m128i _mm_cvtsi64_si128 (__int64 a)
-            ///   MOVQ xmm, reg/m64
+            /// __int64 _mm_cvttsd_si64 (__m128d a)
+            ///    CVTTSD2SI r64, xmm1/m64
+            ///   VCVTTSD2SI r64, xmm1/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static Vector128<ulong> ConvertScalarToVector128UInt64(ulong value) => ConvertScalarToVector128UInt64(value);
-
+            public static long ConvertToInt64WithTruncation(Vector128<double> value) => ConvertToInt64WithTruncation(value);
              /// <summary>
-            /// __int64 _mm_cvttsd_si64 (__m128d a)
-            ///   CVTTSD2SI reg, xmm/m64
+            /// __int64 _mm_cvtsi128_si64 (__m128i a)
+            ///    MOVQ r/m64, xmm1
+            ///   VMOVQ r/m64, xmm1
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
-            public static long ConvertToInt64WithTruncation(Vector128<double> value) => ConvertToInt64WithTruncation(value);
+            public static ulong ConvertToUInt64(Vector128<ulong> value) => ConvertToUInt64(value);
  
              /// <summary>
              /// void _mm_stream_si64(__int64 *p, __int64 a)
@@ -88,674 +91,758 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_add_epi8 (__m128i a,  __m128i b)
-        ///   PADDB xmm, xmm/m128
+        ///    PADDB xmm1,               xmm2/m128
+        ///   VPADDB xmm1,         xmm2, xmm3/m128
+        ///   VPADDB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Add(Vector128<byte> left, Vector128<byte> right) => Add(left, right);
          /// <summary>
          /// __m128i _mm_add_epi8 (__m128i a,  __m128i b)
-        ///   PADDB xmm, xmm/m128
+        ///    PADDB xmm1,               xmm2/m128
+        ///   VPADDB xmm1,         xmm2, xmm3/m128
+        ///   VPADDB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Add(Vector128<sbyte> left, Vector128<sbyte> right) => Add(left, right);
          /// <summary>
          /// __m128i _mm_add_epi16 (__m128i a,  __m128i b)
-        ///   PADDW xmm, xmm/m128
+        ///    PADDW xmm1,               xmm2/m128
+        ///   VPADDW xmm1,         xmm2, xmm3/m128
+        ///   VPADDW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Add(Vector128<short> left, Vector128<short> right) => Add(left, right);
          /// <summary>
          /// __m128i _mm_add_epi16 (__m128i a,  __m128i b)
-        ///   PADDW xmm, xmm/m128
+        ///    PADDW xmm1,               xmm2/m128
+        ///   VPADDW xmm1,         xmm2, xmm3/m128
+        ///   VPADDW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Add(Vector128<ushort> left, Vector128<ushort> right) => Add(left, right);
          /// <summary>
          /// __m128i _mm_add_epi32 (__m128i a,  __m128i b)
-        ///   PADDD xmm, xmm/m128
+        ///    PADDD xmm1,               xmm2/m128
+        ///   VPADDD xmm1,         xmm2, xmm3/m128
+        ///   VPADDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Add(Vector128<int> left, Vector128<int> right) => Add(left, right);
          /// <summary>
          /// __m128i _mm_add_epi32 (__m128i a,  __m128i b)
-        ///   PADDD xmm, xmm/m128
+        ///    PADDD xmm1,               xmm2/m128
+        ///   VPADDD xmm1,         xmm2, xmm3/m128
+        ///   VPADDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Add(Vector128<uint> left, Vector128<uint> right) => Add(left, right);
          /// <summary>
          /// __m128i _mm_add_epi64 (__m128i a,  __m128i b)
-        ///   PADDQ xmm, xmm/m128
+        ///    PADDQ xmm1,               xmm2/m128
+        ///   VPADDQ xmm1,         xmm2, xmm3/m128
+        ///   VPADDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> Add(Vector128<long> left, Vector128<long> right) => Add(left, right);
          /// <summary>
          /// __m128i _mm_add_epi64 (__m128i a,  __m128i b)
-        ///   PADDQ xmm, xmm/m128
+        ///    PADDQ xmm1,               xmm2/m128
+        ///   VPADDQ xmm1,         xmm2, xmm3/m128
+        ///   VPADDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> Add(Vector128<ulong> left, Vector128<ulong> right) => Add(left, right);
          /// <summary>
          /// __m128d _mm_add_pd (__m128d a,  __m128d b)
-        ///   ADDPD xmm, xmm/m128
+        ///    ADDPD xmm1,               xmm2/m128
+        ///   VADDPD xmm1,         xmm2, xmm3/m128
+        ///   VADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Add(Vector128<double> left, Vector128<double> right) => Add(left, right);
  
          /// <summary>
          /// __m128d _mm_add_sd (__m128d a,  __m128d b)
-        ///   ADDSD xmm, xmm/m64
+        ///    ADDSD xmm1,               xmm2/m64
+        ///   VADDSD xmm1,         xmm2, xmm3/m64
+        ///   VADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> AddScalar(Vector128<double> left, Vector128<double> right) => AddScalar(left, right);
  
          /// <summary>
          /// __m128i _mm_adds_epi8 (__m128i a,  __m128i b)
-        ///   PADDSB xmm, xmm/m128
+        ///    PADDSB xmm1,               xmm2/m128
+        ///   VPADDSB xmm1,         xmm2, xmm3/m128
+        ///   VPADDSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> AddSaturate(Vector128<sbyte> left, Vector128<sbyte> right) => AddSaturate(left, right);
          /// <summary>
          /// __m128i _mm_adds_epu8 (__m128i a,  __m128i b)
-        ///   PADDUSB xmm, xmm/m128
+        ///    PADDUSB xmm1,               xmm2/m128
+        ///   VPADDUSB xmm1,         xmm2, xmm3/m128
+        ///   VPADDUSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> AddSaturate(Vector128<byte> left, Vector128<byte> right) => AddSaturate(left, right);
          /// <summary>
          /// __m128i _mm_adds_epi16 (__m128i a,  __m128i b)
-        ///   PADDSW xmm, xmm/m128
+        ///    PADDSW xmm1,               xmm2/m128
+        ///   VPADDSW xmm1,         xmm2, xmm3/m128
+        ///   VPADDSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> AddSaturate(Vector128<short> left, Vector128<short> right) => AddSaturate(left, right);
          /// <summary>
          /// __m128i _mm_adds_epu16 (__m128i a,  __m128i b)
-        ///   PADDUSW xmm, xmm/m128
+        ///    PADDUSW xmm1,               xmm2/m128
+        ///   VPADDUSW xmm1,         xmm2, xmm3/m128
+        ///   VPADDUSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> AddSaturate(Vector128<ushort> left, Vector128<ushort> right) => AddSaturate(left, right);
  
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND xmm1,       xmm2/m128
+        ///   VPAND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> And(Vector128<byte> left, Vector128<byte> right) => And(left, right);
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND xmm1,       xmm2/m128
+        ///   VPAND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> And(Vector128<sbyte> left, Vector128<sbyte> right) => And(left, right);
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND xmm1,       xmm2/m128
+        ///   VPAND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> And(Vector128<short> left, Vector128<short> right) => And(left, right);
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND xmm1,       xmm2/m128
+        ///   VPAND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> And(Vector128<ushort> left, Vector128<ushort> right) => And(left, right);
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND  xmm1,               xmm2/m128
+        ///   VPAND  xmm1,         xmm2, xmm3/m128
+        ///   VPANDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> And(Vector128<int> left, Vector128<int> right) => And(left, right);
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND  xmm1,               xmm2/m128
+        ///   VPAND  xmm1,         xmm2, xmm3/m128
+        ///   VPANDD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> And(Vector128<uint> left, Vector128<uint> right) => And(left, right);
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND  xmm1,               xmm2/m128
+        ///   VPAND  xmm1,         xmm2, xmm3/m128
+        ///   VPANDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> And(Vector128<long> left, Vector128<long> right) => And(left, right);
          /// <summary>
          /// __m128i _mm_and_si128 (__m128i a,  __m128i b)
-        ///   PAND xmm, xmm/m128
+        ///    PAND  xmm1,               xmm2/m128
+        ///   VPAND  xmm1,         xmm2, xmm3/m128
+        ///   VPANDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> And(Vector128<ulong> left, Vector128<ulong> right) => And(left, right);
          /// <summary>
          /// __m128d _mm_and_pd (__m128d a, __m128d b)
-        ///   ANDPD xmm, xmm/m128
+        ///    ANDPD xmm1,               xmm2/m128
+        ///   VANDPD xmm1,         xmm2, xmm3/m128
+        ///   VANDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> And(Vector128<double> left, Vector128<double> right) => And(left, right);
  
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN xmm1,       xmm2/m128
+        ///   VPANDN xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> AndNot(Vector128<byte> left, Vector128<byte> right) => AndNot(left, right);
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN xmm1,       xmm2/m128
+        ///   VPANDN xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> AndNot(Vector128<sbyte> left, Vector128<sbyte> right) => AndNot(left, right);
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN xmm1,       xmm2/m128
+        ///   VPANDN xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> AndNot(Vector128<short> left, Vector128<short> right) => AndNot(left, right);
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN xmm1,       xmm2/m128
+        ///   VPANDN xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> AndNot(Vector128<ushort> left, Vector128<ushort> right) => AndNot(left, right);
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN  xmm1,               xmm2/m128
+        ///   VPANDN  xmm1,         xmm2, xmm3/m128
+        ///   VPANDND xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> AndNot(Vector128<int> left, Vector128<int> right) => AndNot(left, right);
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN  xmm1,               xmm2/m128
+        ///   VPANDN  xmm1,         xmm2, xmm3/m128
+        ///   VPANDND xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> AndNot(Vector128<uint> left, Vector128<uint> right) => AndNot(left, right);
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN  xmm1,               xmm2/m128
+        ///   VPANDN  xmm1,         xmm2, xmm3/m128
+        ///   VPANDNQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> AndNot(Vector128<long> left, Vector128<long> right) => AndNot(left, right);
          /// <summary>
          /// __m128i _mm_andnot_si128 (__m128i a,  __m128i b)
-        ///   PANDN xmm, xmm/m128
+        ///    PANDN  xmm1,               xmm2/m128
+        ///   VPANDN  xmm1,         xmm2, xmm3/m128
+        ///   VPANDNQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> AndNot(Vector128<ulong> left, Vector128<ulong> right) => AndNot(left, right);
          /// <summary>
          /// __m128d _mm_andnot_pd (__m128d a, __m128d b)
-        ///   ADDNPD xmm, xmm/m128
+        ///    ANDNPD xmm1,               xmm2/m128
+        ///   VANDNPD xmm1,         xmm2, xmm3/m128
+        ///   VANDNPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> AndNot(Vector128<double> left, Vector128<double> right) => AndNot(left, right);
  
          /// <summary>
          /// __m128i _mm_avg_epu8 (__m128i a,  __m128i b)
-        ///   PAVGB xmm, xmm/m128
+        ///    PAVGB xmm1,               xmm2/m128
+        ///   VPAVGB xmm1,         xmm2, xmm3/m128
+        ///   VPAVGB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Average(Vector128<byte> left, Vector128<byte> right) => Average(left, right);
          /// <summary>
          /// __m128i _mm_avg_epu16 (__m128i a,  __m128i b)
-        ///   PAVGW xmm, xmm/m128
+        ///    PAVGW xmm1,               xmm2/m128
+        ///   VPAVGW xmm1,         xmm2, xmm3/m128
+        ///   VPAVGW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Average(Vector128<ushort> left, Vector128<ushort> right) => Average(left, right);
  
          /// <summary>
          /// __m128i _mm_cmpeq_epi8 (__m128i a,  __m128i b)
-        ///   PCMPEQB xmm, xmm/m128
+        ///    PCMPEQB xmm1,       xmm2/m128
+        ///   VPCMPEQB xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> CompareEqual(Vector128<sbyte> left, Vector128<sbyte> right) => CompareEqual(left, right);
          /// <summary>
          /// __m128i _mm_cmpeq_epi8 (__m128i a,  __m128i b)
-        ///   PCMPEQB xmm, xmm/m128
+        ///    PCMPEQB xmm1,       xmm2/m128
+        ///   VPCMPEQB xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> CompareEqual(Vector128<byte> left, Vector128<byte> right) => CompareEqual(left, right);
          /// <summary>
          /// __m128i _mm_cmpeq_epi16 (__m128i a,  __m128i b)
-        ///   PCMPEQW xmm, xmm/m128
+        ///    PCMPEQW xmm1,       xmm2/m128
+        ///   VPCMPEQW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> CompareEqual(Vector128<short> left, Vector128<short> right) => CompareEqual(left, right);
          /// <summary>
          /// __m128i _mm_cmpeq_epi16 (__m128i a,  __m128i b)
-        ///   PCMPEQW xmm, xmm/m128
+        ///    PCMPEQW xmm1,       xmm2/m128
+        ///   VPCMPEQW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> CompareEqual(Vector128<ushort> left, Vector128<ushort> right) => CompareEqual(left, right);
          /// <summary>
          /// __m128i _mm_cmpeq_epi32 (__m128i a,  __m128i b)
-        ///   PCMPEQD xmm, xmm/m128
+        ///    PCMPEQD xmm1,       xmm2/m128
+        ///   VPCMPEQD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> CompareEqual(Vector128<int> left, Vector128<int> right) => CompareEqual(left, right);
          /// <summary>
          /// __m128i _mm_cmpeq_epi32 (__m128i a,  __m128i b)
-        ///   PCMPEQD xmm, xmm/m128
+        ///    PCMPEQD xmm1,       xmm2/m128
+        ///   VPCMPEQD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<uint> CompareEqual(Vector128<uint> left, Vector128<uint> right) => CompareEqual(left, right);
          /// <summary>
          /// __m128d _mm_cmpeq_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(0)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(0)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(0)
          /// </summary>
          public static Vector128<double> CompareEqual(Vector128<double> left, Vector128<double> right) => CompareEqual(left, right);
  
-        /// <summary>
-        /// int _mm_comieq_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarOrderedEqual(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedEqual(left, right);
-
-        /// <summary>
-        /// int _mm_ucomieq_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarUnorderedEqual(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedEqual(left, right);
-
-        /// <summary>
-        /// __m128d _mm_cmpeq_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(0)
-        /// </summary>
-        public static Vector128<double> CompareScalarEqual(Vector128<double> left, Vector128<double> right) => CompareScalarEqual(left, right);
-
          /// <summary>
          /// __m128i _mm_cmpgt_epi8 (__m128i a,  __m128i b)
-        ///   PCMPGTB xmm, xmm/m128
+        ///    PCMPGTB xmm1,       xmm2/m128
+        ///   VPCMPGTB xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> CompareGreaterThan(Vector128<sbyte> left, Vector128<sbyte> right) => CompareGreaterThan(left, right);
          /// <summary>
          /// __m128i _mm_cmpgt_epi16 (__m128i a,  __m128i b)
-        ///   PCMPGTW xmm, xmm/m128
+        ///    PCMPGTW xmm1,       xmm2/m128
+        ///   VPCMPGTW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> CompareGreaterThan(Vector128<short> left, Vector128<short> right) => CompareGreaterThan(left, right);
          /// <summary>
          /// __m128i _mm_cmpgt_epi32 (__m128i a,  __m128i b)
-        ///   PCMPGTD xmm, xmm/m128
+        ///    PCMPGTD xmm1,       xmm2/m128
+        ///   VPCMPGTD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> CompareGreaterThan(Vector128<int> left, Vector128<int> right) => CompareGreaterThan(left, right);
          /// <summary>
          /// __m128d _mm_cmpgt_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(1) with swapped operands
+        ///    CMPPD xmm1,       xmm2/m128, imm8(1)   ; with swapped operands
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(1)   ; with swapped operands
          /// </summary>
          public static Vector128<double> CompareGreaterThan(Vector128<double> left, Vector128<double> right) => CompareGreaterThan(left, right);
  
-        /// <summary>
-        /// int _mm_comigt_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarOrderedGreaterThan(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedGreaterThan(left, right);
-
-        /// <summary>
-        /// int _mm_ucomigt_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarUnorderedGreaterThan(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedGreaterThan(left, right);
-
-        /// <summary>
-        /// __m128d _mm_cmpgt_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(1) with swapped operands
-        /// </summary>
-        public static Vector128<double> CompareScalarGreaterThan(Vector128<double> left, Vector128<double> right) => CompareScalarGreaterThan(left, right);
-
          /// <summary>
          /// __m128d _mm_cmpge_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(2) with swapped operands
+        ///    CMPPD xmm1,       xmm2/m128, imm8(2)   ; with swapped operands
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(2)   ; with swapped operands
          /// </summary>
          public static Vector128<double> CompareGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareGreaterThanOrEqual(left, right);
  
-        /// <summary>
-        /// int _mm_comige_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedGreaterThanOrEqual(left, right);
-
-        /// <summary>
-        /// int _mm_ucomige_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
-        /// </summary>
-        public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedGreaterThanOrEqual(left, right);
-
-        /// <summary>
-        /// __m128d _mm_cmpge_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(2) with swapped operands
-        /// </summary>
-        public static Vector128<double> CompareScalarGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarGreaterThanOrEqual(left, right);
-
          /// <summary>
          /// __m128i _mm_cmplt_epi8 (__m128i a,  __m128i b)
-        ///   PCMPGTB xmm, xmm/m128
+        ///    PCMPGTB xmm1,       xmm2/m128    ; with swapped operands
+        ///   VPCMPGTB xmm1, xmm2, xmm3/m128    ; with swapped operands
          /// </summary>
          public static Vector128<sbyte> CompareLessThan(Vector128<sbyte> left, Vector128<sbyte> right) => CompareLessThan(left, right);
          /// <summary>
          /// __m128i _mm_cmplt_epi16 (__m128i a,  __m128i b)
-        ///   PCMPGTW xmm, xmm/m128
+        ///    PCMPGTW xmm1,       xmm2/m128    ; with swapped operands
+        ///   VPCMPGTW xmm1, xmm2, xmm3/m128    ; with swapped operands
          /// </summary>
          public static Vector128<short> CompareLessThan(Vector128<short> left, Vector128<short> right) => CompareLessThan(left, right);
          /// <summary>
          /// __m128i _mm_cmplt_epi32 (__m128i a,  __m128i b)
-        ///   PCMPGTD xmm, xmm/m128
+        ///    PCMPGTD xmm1,       xmm2/m128    ; with swapped operands
+        ///   VPCMPGTD xmm1, xmm2, xmm3/m128    ; with swapped operands
          /// </summary>
          public static Vector128<int> CompareLessThan(Vector128<int> left, Vector128<int> right) => CompareLessThan(left, right);
          /// <summary>
          /// __m128d _mm_cmplt_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(1)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(1)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(1)
          /// </summary>
          public static Vector128<double> CompareLessThan(Vector128<double> left, Vector128<double> right) => CompareLessThan(left, right);
  
          /// <summary>
-        /// int _mm_comilt_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
+        /// __m128d _mm_cmple_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(2)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(2)
          /// </summary>
-        public static bool CompareScalarOrderedLessThan(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedLessThan(left, right);
-
+        public static Vector128<double> CompareLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareLessThanOrEqual(left, right);
          /// <summary>
-        /// int _mm_ucomilt_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
+        /// __m128d _mm_cmpneq_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(4)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(4)
          /// </summary>
-        public static bool CompareScalarUnorderedLessThan(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedLessThan(left, right);
-
+        public static Vector128<double> CompareNotEqual(Vector128<double> left, Vector128<double> right) => CompareNotEqual(left, right);
          /// <summary>
-        /// __m128d _mm_cmplt_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(1)
+        /// __m128d _mm_cmpngt_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(5)   ; with swapped operands
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(5)   ; with swapped operands
          /// </summary>
-        public static Vector128<double> CompareScalarLessThan(Vector128<double> left, Vector128<double> right) => CompareScalarLessThan(left, right);
-
+        public static Vector128<double> CompareNotGreaterThan(Vector128<double> left, Vector128<double> right) => CompareNotGreaterThan(left, right);
          /// <summary>
-        /// __m128d _mm_cmple_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(2)
+        /// __m128d _mm_cmpnge_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(6)   ; with swapped operands
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(6)   ; with swapped operands
          /// </summary>
-        public static Vector128<double> CompareLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareLessThanOrEqual(left, right);
-
+        public static Vector128<double> CompareNotGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareNotGreaterThanOrEqual(left, right);
          /// <summary>
-        /// int _mm_comile_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
+        /// __m128d _mm_cmpnlt_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(5)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(5)
          /// </summary>
-        public static bool CompareScalarOrderedLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedLessThanOrEqual(left, right);
-
+        public static Vector128<double> CompareNotLessThan(Vector128<double> left, Vector128<double> right) => CompareNotLessThan(left, right);
          /// <summary>
-        /// int _mm_ucomile_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
+        /// __m128d _mm_cmpnle_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(6)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(6)
          /// </summary>
-        public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedLessThanOrEqual(left, right);
-
+        public static Vector128<double> CompareNotLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareNotLessThanOrEqual(left, right);
          /// <summary>
-        /// __m128d _mm_cmple_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(2)
+        /// __m128d _mm_cmpord_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(7)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(7)
          /// </summary>
-        public static Vector128<double> CompareScalarLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarLessThanOrEqual(left, right);
+        public static Vector128<double> CompareOrdered(Vector128<double> left, Vector128<double> right) => CompareOrdered(left, right);
  
          /// <summary>
-        /// __m128d _mm_cmpneq_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(4)
+        /// __m128d _mm_cmpeq_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(0)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(0)
          /// </summary>
-        public static Vector128<double> CompareNotEqual(Vector128<double> left, Vector128<double> right) => CompareNotEqual(left, right);
-
+        public static Vector128<double> CompareScalarEqual(Vector128<double> left, Vector128<double> right) => CompareScalarEqual(left, right);
          /// <summary>
-        /// int _mm_comineq_sd (__m128d a, __m128d b)
-        ///   COMISD xmm, xmm/m64
+        /// __m128d _mm_cmpgt_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(1)   ; with swapped operands
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(1)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarOrderedNotEqual(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedNotEqual(left, right);
-
+        public static Vector128<double> CompareScalarGreaterThan(Vector128<double> left, Vector128<double> right) => CompareScalarGreaterThan(left, right);
          /// <summary>
-        /// int _mm_ucomineq_sd (__m128d a, __m128d b)
-        ///   UCOMISD xmm, xmm/m64
+        /// __m128d _mm_cmpge_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(2)   ; with swapped operands
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(2)   ; with swapped operands
          /// </summary>
-        public static bool CompareScalarUnorderedNotEqual(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedNotEqual(left, right);
-
+        public static Vector128<double> CompareScalarGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarGreaterThanOrEqual(left, right);
          /// <summary>
-        /// __m128d _mm_cmpneq_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(4)
+        /// __m128d _mm_cmplt_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(1)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(1)
          /// </summary>
-        public static Vector128<double> CompareScalarNotEqual(Vector128<double> left, Vector128<double> right) => CompareScalarNotEqual(left, right);
-
+        public static Vector128<double> CompareScalarLessThan(Vector128<double> left, Vector128<double> right) => CompareScalarLessThan(left, right);
          /// <summary>
-        /// __m128d _mm_cmpngt_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(5) with swapped operands
+        /// __m128d _mm_cmple_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(2)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(2)
          /// </summary>
-        public static Vector128<double> CompareNotGreaterThan(Vector128<double> left, Vector128<double> right) => CompareNotGreaterThan(left, right);
-
+        public static Vector128<double> CompareScalarLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarLessThanOrEqual(left, right);
          /// <summary>
-        /// __m128d _mm_cmpngt_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(5) with swapped operands
+        /// __m128d _mm_cmpneq_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(4)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(4)
          /// </summary>
-        public static Vector128<double> CompareScalarNotGreaterThan(Vector128<double> left, Vector128<double> right) => CompareScalarNotGreaterThan(left, right);
-
+        public static Vector128<double> CompareScalarNotEqual(Vector128<double> left, Vector128<double> right) => CompareScalarNotEqual(left, right);
          /// <summary>
-        /// __m128d _mm_cmpnge_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(6) with swapped operands
+        /// __m128d _mm_cmpngt_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(5)   ; with swapped operands
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(5)   ; with swapped operands
          /// </summary>
-        public static Vector128<double> CompareNotGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareNotGreaterThanOrEqual(left, right);
-
+        public static Vector128<double> CompareScalarNotGreaterThan(Vector128<double> left, Vector128<double> right) => CompareScalarNotGreaterThan(left, right);
          /// <summary>
          /// __m128d _mm_cmpnge_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(6) with swapped operands
+        ///    CMPDS xmm1,       xmm2/m64, imm8(6)   ; with swapped operands
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(6)   ; with swapped operands
          /// </summary>
          public static Vector128<double> CompareScalarNotGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarNotGreaterThanOrEqual(left, right);
-
-        /// <summary>
-        /// __m128d _mm_cmpnlt_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(5)
-        /// </summary>
-        public static Vector128<double> CompareNotLessThan(Vector128<double> left, Vector128<double> right) => CompareNotLessThan(left, right);
-
          /// <summary>
          /// __m128d _mm_cmpnlt_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(5)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(5)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(5)
          /// </summary>
          public static Vector128<double> CompareScalarNotLessThan(Vector128<double> left, Vector128<double> right) => CompareScalarNotLessThan(left, right);
-
-        /// <summary>
-        /// __m128d _mm_cmpnle_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(6)
-        /// </summary>
-        public static Vector128<double> CompareNotLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareNotLessThanOrEqual(left, right);
-
          /// <summary>
          /// __m128d _mm_cmpnle_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(6)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(6)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(6)
          /// </summary>
          public static Vector128<double> CompareScalarNotLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarNotLessThanOrEqual(left, right);
  
-        /// <summary>
-        /// __m128d _mm_cmpord_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(7)
-        /// </summary>
-        public static Vector128<double> CompareOrdered(Vector128<double> left, Vector128<double> right) => CompareOrdered(left, right);
-
          /// <summary>
          /// __m128d _mm_cmpord_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(7)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(7)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(7)
          /// </summary>
          public static Vector128<double> CompareScalarOrdered(Vector128<double> left, Vector128<double> right) => CompareScalarOrdered(left, right);
-
          /// <summary>
-        /// __m128d _mm_cmpunord_pd (__m128d a,  __m128d b)
-        ///   CMPPD xmm, xmm/m128, imm8(3)
+        /// int _mm_comieq_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; ZF=1 &amp;&amp; PF=0
+        ///   VCOMISD xmm1, xmm2/m64        ; ZF=1 &amp;&amp; PF=0
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; ZF=1 &amp;&amp; PF=0
          /// </summary>
-        public static Vector128<double> CompareUnordered(Vector128<double> left, Vector128<double> right) => CompareUnordered(left, right);
-
+        public static bool CompareScalarOrderedEqual(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedEqual(left, right);
          /// <summary>
-        /// __m128d _mm_cmpunord_sd (__m128d a,  __m128d b)
-        ///   CMPSD xmm, xmm/m64, imm8(3)
+        /// int _mm_comigt_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; ZF=0 &amp;&amp; CF=0
+        ///   VCOMISD xmm1, xmm2/m64        ; ZF=0 &amp;&amp; CF=0
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; ZF=0 &amp;&amp; CF=0
          /// </summary>
-        public static Vector128<double> CompareScalarUnordered(Vector128<double> left, Vector128<double> right) => CompareScalarUnordered(left, right);
-
+        public static bool CompareScalarOrderedGreaterThan(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedGreaterThan(left, right);
          /// <summary>
-        /// __m128i _mm_cvtps_epi32 (__m128 a)
-        ///   CVTPS2DQ xmm, xmm/m128
+        /// int _mm_comige_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; CF=0
+        ///   VCOMISD xmm1, xmm2/m64        ; CF=0
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; CF=0
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32(Vector128<float> value) => ConvertToVector128Int32(value);
+        public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedGreaterThanOrEqual(left, right);
          /// <summary>
-        /// __m128i _mm_cvtpd_epi32 (__m128d a)
-        ///   CVTPD2DQ xmm, xmm/m128
+        /// int _mm_comilt_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; PF=0 &amp;&amp; CF=1
+        ///   VCOMISD xmm1, xmm2/m64        ; PF=0 &amp;&amp; CF=1
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; PF=0 &amp;&amp; CF=1
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32(Vector128<double> value) => ConvertToVector128Int32(value);
+        public static bool CompareScalarOrderedLessThan(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedLessThan(left, right);
          /// <summary>
-        /// __m128 _mm_cvtepi32_ps (__m128i a)
-        ///   CVTDQ2PS xmm, xmm/m128
+        /// int _mm_comile_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VCOMISD xmm1, xmm2/m64        ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; PF=0 &amp;&amp; (ZF=1 || CF=1)
          /// </summary>
-        public static Vector128<float> ConvertToVector128Single(Vector128<int> value) => ConvertToVector128Single(value);
+        public static bool CompareScalarOrderedLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedLessThanOrEqual(left, right);
          /// <summary>
-        /// __m128 _mm_cvtpd_ps (__m128d a)
-        ///   CVTPD2PS xmm, xmm/m128
+        /// int _mm_comineq_sd (__m128d a, __m128d b)
+        ///    COMISD xmm1, xmm2/m64        ; ZF=0 || PF=1
+        ///   VCOMISD xmm1, xmm2/m64        ; ZF=0 || PF=1
+        ///   VCOMISD xmm1, xmm2/m64{sae}   ; ZF=0 || PF=1
          /// </summary>
-        public static Vector128<float> ConvertToVector128Single(Vector128<double> value) => ConvertToVector128Single(value);
+        public static bool CompareScalarOrderedNotEqual(Vector128<double> left, Vector128<double> right) => CompareScalarOrderedNotEqual(left, right);
+
          /// <summary>
-        /// __m128d _mm_cvtepi32_pd (__m128i a)
-        ///   CVTDQ2PD xmm, xmm/m128
+        /// __m128d _mm_cmpunord_sd (__m128d a,  __m128d b)
+        ///    CMPDS xmm1,       xmm2/m64, imm8(3)
+        ///   VCMPDS xmm1, xmm2, xmm3/m64, imm8(3)
          /// </summary>
-        public static Vector128<double> ConvertToVector128Double(Vector128<int> value) => ConvertToVector128Double(value);
+        public static Vector128<double> CompareScalarUnordered(Vector128<double> left, Vector128<double> right) => CompareScalarUnordered(left, right);
          /// <summary>
-        /// __m128d _mm_cvtps_pd (__m128 a)
-        ///   CVTPS2PD xmm, xmm/m128
+        /// int _mm_ucomieq_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; ZF=1 &amp;&amp; PF=0
+        ///   VUCOMISD xmm1, xmm2/m64       ; ZF=1 &amp;&amp; PF=0
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; ZF=1 &amp;&amp; PF=0
          /// </summary>
-        public static Vector128<double> ConvertToVector128Double(Vector128<float> value) => ConvertToVector128Double(value);
-
+        public static bool CompareScalarUnorderedEqual(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedEqual(left, right);
          /// <summary>
-        /// int _mm_cvtsd_si32 (__m128d a)
-        ///   CVTSD2SI r32, xmm/m64
+        /// int _mm_ucomigt_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; ZF=0 &amp;&amp; CF=0
+        ///   VUCOMISD xmm1, xmm2/m64       ; ZF=0 &amp;&amp; CF=0
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; ZF=0 &amp;&amp; CF=0
          /// </summary>
-        public static int ConvertToInt32(Vector128<double> value) => ConvertToInt32(value);
+        public static bool CompareScalarUnorderedGreaterThan(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedGreaterThan(left, right);
          /// <summary>
-        /// int _mm_cvtsi128_si32 (__m128i a)
-        ///   MOVD reg/m32, xmm
+        /// int _mm_ucomige_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; CF=0
+        ///   VUCOMISD xmm1, xmm2/m64       ; CF=0
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; CF=0
          /// </summary>
-        public static int ConvertToInt32(Vector128<int> value) => ConvertToInt32(value);
+        public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedGreaterThanOrEqual(left, right);
+        /// <summary>
+        /// int _mm_ucomilt_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; PF=0 &amp;&amp; CF=1
+        ///   VUCOMISD xmm1, xmm2/m64       ; PF=0 &amp;&amp; CF=1
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; PF=0 &amp;&amp; CF=1
+        /// </summary>
+        public static bool CompareScalarUnorderedLessThan(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedLessThan(left, right);
+        /// <summary>
+        /// int _mm_ucomile_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VUCOMISD xmm1, xmm2/m64       ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; PF=0 &amp;&amp; (ZF=1 || CF=1)
+        /// </summary>
+        public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedLessThanOrEqual(left, right);
+        /// <summary>
+        /// int _mm_ucomineq_sd (__m128d a, __m128d b)
+        ///    UCOMISD xmm1, xmm2/m64       ; ZF=0 || PF=1
+        ///   VUCOMISD xmm1, xmm2/m64       ; ZF=0 || PF=1
+        ///   VUCOMISD xmm1, xmm2/m64{sae}  ; ZF=0 || PF=1
+        /// </summary>
+        public static bool CompareScalarUnorderedNotEqual(Vector128<double> left, Vector128<double> right) => CompareScalarUnorderedNotEqual(left, right);
  
          /// <summary>
-        /// int _mm_cvtsi128_si32 (__m128i a)
-        ///   MOVD reg/m32, xmm
+        /// __m128d _mm_cmpunord_pd (__m128d a,  __m128d b)
+        ///    CMPPD xmm1,       xmm2/m128, imm8(3)
+        ///   VCMPPD xmm1, xmm2, xmm3/m128, imm8(3)
          /// </summary>
-        public static uint ConvertToUInt32(Vector128<uint> value) => ConvertToUInt32(value);
+        public static Vector128<double> CompareUnordered(Vector128<double> left, Vector128<double> right) => CompareUnordered(left, right);
  
          /// <summary>
          /// __m128d _mm_cvtsi32_sd (__m128d a, int b)
-        ///   CVTSI2SD xmm, reg/m32
+        ///    CVTSI2SD xmm1,       r/m32
+        ///   VCVTSI2SD xmm1, xmm2, r/m32
          /// </summary>
          public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, int value) => ConvertScalarToVector128Double(upper, value);
-
          /// <summary>
          /// __m128d _mm_cvtss_sd (__m128d a, __m128 b)
-        ///   CVTSS2SD xmm, xmm/m32
+        ///    CVTSS2SD xmm1,       xmm2/m32
+        ///   VCVTSS2SD xmm1, xmm2, xmm3/m32
          /// </summary>
          public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, Vector128<float> value) => ConvertScalarToVector128Double(upper, value);
-
          /// <summary>
          /// __m128i _mm_cvtsi32_si128 (int a)
-        ///   MOVD xmm, reg/m32
+        ///    MOVD xmm1, r/m32
+        ///   VMOVD xmm1, r/m32
          /// </summary>
          public static Vector128<int> ConvertScalarToVector128Int32(int value) => ConvertScalarToVector128Int32(value);
-
          /// <summary>
          /// __m128 _mm_cvtsd_ss (__m128 a, __m128d b)
-        ///   CVTSD2SS xmm, xmm/m64
+        ///    CVTSD2SS xmm1,       xmm2/m64
+        ///   VCVTSD2SS xmm1, xmm2, xmm3/m64
          /// </summary>
          public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<double> value) => ConvertScalarToVector128Single(upper, value);
          /// <summary>
          /// __m128i _mm_cvtsi32_si128 (int a)
-        ///   MOVD xmm, reg/m32
+        ///    MOVD xmm1, r/m32
+        ///   VMOVD xmm1, r/m32
          /// </summary>
          public static Vector128<uint> ConvertScalarToVector128UInt32(uint value) => ConvertScalarToVector128UInt32(value);
  
          /// <summary>
-        /// __m128i _mm_cvttps_epi32 (__m128 a)
-        ///   CVTTPS2DQ xmm, xmm/m128
+        /// int _mm_cvtsi128_si32 (__m128i a)
+        ///    MOVD r/m32, xmm1
+        ///   VMOVD r/m32, xmm1
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<float> value) => ConvertToVector128Int32WithTruncation(value);
+        public static int ConvertToInt32(Vector128<int> value) => ConvertToInt32(value);
          /// <summary>
-        /// __m128i _mm_cvttpd_epi32 (__m128d a)
-        ///   CVTTPD2DQ xmm, xmm/m128
+        /// int _mm_cvtsd_si32 (__m128d a)
+        ///    CVTSD2SI r32, xmm1/m64
+        ///   VCVTSD2SI r32, xmm1/m64
          /// </summary>
-        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<double> value) => ConvertToVector128Int32WithTruncation(value);
-
+        public static int ConvertToInt32(Vector128<double> value) => ConvertToInt32(value);
          /// <summary>
          /// int _mm_cvttsd_si32 (__m128d a)
-        ///   CVTTSD2SI reg, xmm/m64
+        ///    CVTTSD2SI r32, xmm1/m64
+        ///   VCVTTSD2SI r32, xmm1/m64
          /// </summary>
          public static int ConvertToInt32WithTruncation(Vector128<double> value) => ConvertToInt32WithTruncation(value);
-
-        /// <summary>
-        /// __m128d _mm_div_pd (__m128d a,  __m128d b)
-        ///   DIVPD xmm, xmm/m128
-        /// </summary>
-        public static Vector128<double> Divide(Vector128<double> left, Vector128<double> right) => Divide(left, right);
-
          /// <summary>
-        /// __m128d _mm_div_sd (__m128d a,  __m128d b)
-        ///   DIVSD xmm, xmm/m64
-        /// </summary>
-        public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) => DivideScalar(left, right);
-
-        /// <summary>
-        /// int _mm_extract_epi16 (__m128i a,  int immediate)
-        ///   PEXTRW reg, xmm, imm8
+        /// int _mm_cvtsi128_si32 (__m128i a)
+        ///    MOVD r/m32, xmm1
+        ///   VMOVD r/m32, xmm1
          /// </summary>
-        public static ushort Extract(Vector128<ushort> value, [ConstantExpected] byte index) => Extract(value, index);
+        public static uint ConvertToUInt32(Vector128<uint> value) => ConvertToUInt32(value);
  
          /// <summary>
-        /// __m128i _mm_insert_epi16 (__m128i a,  int i, int immediate)
-        ///   PINSRW xmm, reg/m16, imm8
+        /// __m128d _mm_cvtepi32_pd (__m128i a)
+        ///    CVTDQ2PD xmm1,         xmm2/m64
+        ///   VCVTDQ2PD xmm1,         xmm2/m64
+        ///   VCVTDQ2PD xmm1 {k1}{z}, xmm2/m64/m32bcst
          /// </summary>
-        public static Vector128<short> Insert(Vector128<short> value, short data, [ConstantExpected] byte index) => Insert(value, data, index);
+        public static Vector128<double> ConvertToVector128Double(Vector128<int> value) => ConvertToVector128Double(value);
          /// <summary>
-        /// __m128i _mm_insert_epi16 (__m128i a,  int i, int immediate)
-        ///   PINSRW xmm, reg/m16, imm8
+        /// __m128d _mm_cvtps_pd (__m128 a)
+        ///    CVTPS2PD xmm1,         xmm2/m64
+        ///   VCVTPS2PD xmm1,         xmm2/m64
+        ///   VCVTPS2PD xmm1 {k1}{z}, xmm2/m64/m32bcst
          /// </summary>
-        public static Vector128<ushort> Insert(Vector128<ushort> value, ushort data, [ConstantExpected] byte index) => Insert(value, data, index);
-
+        public static Vector128<double> ConvertToVector128Double(Vector128<float> value) => ConvertToVector128Double(value);
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128i _mm_cvtps_epi32 (__m128 a)
+        ///    CVTPS2DQ xmm1,         xmm2/m128
+        ///   VCVTPS2DQ xmm1,         xmm2/m128
+        ///   VCVTPS2DQ xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
-        public static unsafe Vector128<sbyte> LoadVector128(sbyte* address) => LoadVector128(address);
+        public static Vector128<int> ConvertToVector128Int32(Vector128<float> value) => ConvertToVector128Int32(value);
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128i _mm_cvtpd_epi32 (__m128d a)
+        ///    CVTPD2DQ xmm1,         xmm2/m128
+        ///   VCVTPD2DQ xmm1,         xmm2/m128
+        ///   VCVTPD2DQ xmm1 {k1}{z}, xmm2/m128/m64bcst
          /// </summary>
-        public static unsafe Vector128<byte> LoadVector128(byte* address) => LoadVector128(address);
+        public static Vector128<int> ConvertToVector128Int32(Vector128<double> value) => ConvertToVector128Int32(value);
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128i _mm_cvttps_epi32 (__m128 a)
+        ///    CVTTPS2DQ xmm1,         xmm2/m128
+        ///   VCVTTPS2DQ xmm1,         xmm2/m128
+        ///   VCVTTPS2DQ xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
-        public static unsafe Vector128<short> LoadVector128(short* address) => LoadVector128(address);
+        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<float> value) => ConvertToVector128Int32WithTruncation(value);
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128i _mm_cvttpd_epi32 (__m128d a)
+        ///    CVTTPD2DQ xmm1,         xmm2/m128
+        ///   VCVTTPD2DQ xmm1,         xmm2/m128
+        ///   VCVTTPD2DQ xmm1 {k1}{z}, xmm2/m128/m64bcst
          /// </summary>
-        public static unsafe Vector128<ushort> LoadVector128(ushort* address) => LoadVector128(address);
+        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<double> value) => ConvertToVector128Int32WithTruncation(value);
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128 _mm_cvtepi32_ps (__m128i a)
+        ///    CVTDQ2PS xmm1,         xmm2/m128
+        ///   VCVTDQ2PS xmm1,         xmm2/m128
+        ///   VCVTDQ2PS xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
-        public static unsafe Vector128<int> LoadVector128(int* address) => LoadVector128(address);
+        public static Vector128<float> ConvertToVector128Single(Vector128<int> value) => ConvertToVector128Single(value);
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128 _mm_cvtpd_ps (__m128d a)
+        ///    CVTPD2PS xmm1,         xmm2/m128
+        ///   VCVTPD2PS xmm1,         xmm2/m128
+        ///   VCVTPD2PS xmm1 {k1}{z}, xmm2/m128/m64bcst
          /// </summary>
-        public static unsafe Vector128<uint> LoadVector128(uint* address) => LoadVector128(address);
+        public static Vector128<float> ConvertToVector128Single(Vector128<double> value) => ConvertToVector128Single(value);
+
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128d _mm_div_pd (__m128d a,  __m128d b)
+        ///    DIVPD xmm1,               xmm2/m128
+        ///   VDIVPD xmm1,         xmm2, xmm3/m128
+        ///   VDIVPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
-        public static unsafe Vector128<long> LoadVector128(long* address) => LoadVector128(address);
+        public static Vector128<double> Divide(Vector128<double> left, Vector128<double> right) => Divide(left, right);
+
          /// <summary>
-        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
-        ///   MOVDQU xmm, m128
+        /// __m128d _mm_div_sd (__m128d a,  __m128d b)
+        ///    DIVSD xmm1,       xmm2/m64
+        ///   VDIVSD xmm1, xmm2, xmm3/m64
          /// </summary>
-        public static unsafe Vector128<ulong> LoadVector128(ulong* address) => LoadVector128(address);
+        public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) => DivideScalar(left, right);
+
          /// <summary>
-        /// __m128d _mm_loadu_pd (double const* mem_address)
-        ///   MOVUPD xmm, m128
+        /// int _mm_extract_epi16 (__m128i a,  int immediate)
+        ///    PEXTRW r/m16, xmm1, imm8
+        ///   VPEXTRW r/m16, xmm1, imm8
          /// </summary>
-        public static unsafe Vector128<double> LoadVector128(double* address) => LoadVector128(address);
+        public static ushort Extract(Vector128<ushort> value, [ConstantExpected] byte index) => Extract(value, index);
  
          /// <summary>
-        /// __m128d _mm_load_sd (double const* mem_address)
-        ///   MOVSD xmm, m64
+        /// __m128i _mm_insert_epi16 (__m128i a,  int i, int immediate)
+        ///    PINSRW xmm1,       r/m16, imm8
+        ///   VPINSRW xmm1, xmm2, r/m16, imm8
          /// </summary>
-        public static unsafe Vector128<double> LoadScalarVector128(double* address) => LoadScalarVector128(address);
+        public static Vector128<short> Insert(Vector128<short> value, short data, [ConstantExpected] byte index) => Insert(value, data, index);
+        /// <summary>
+        /// __m128i _mm_insert_epi16 (__m128i a,  int i, int immediate)
+        ///    PINSRW xmm1,       r/m16, imm8
+        ///   VPINSRW xmm1, xmm2, r/m16, imm8
+        /// </summary>
+        public static Vector128<ushort> Insert(Vector128<ushort> value, ushort data, [ConstantExpected] byte index) => Insert(value, data, index);
  
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<sbyte> LoadAlignedVector128(sbyte* address) => LoadAlignedVector128(address);
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<byte> LoadAlignedVector128(byte* address) => LoadAlignedVector128(address);
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<short> LoadAlignedVector128(short* address) => LoadAlignedVector128(address);
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<ushort> LoadAlignedVector128(ushort* address) => LoadAlignedVector128(address);
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<int> LoadAlignedVector128(int* address) => LoadAlignedVector128(address);
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA32 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<uint> LoadAlignedVector128(uint* address) => LoadAlignedVector128(address);
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA64 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<long> LoadAlignedVector128(long* address) => LoadAlignedVector128(address);
          /// <summary>
          /// __m128i _mm_load_si128 (__m128i const* mem_address)
-        ///   MOVDQA xmm, m128
+        ///    MOVDQA   xmm1,         m128
+        ///   VMOVDQA   xmm1,         m128
+        ///   VMOVDQA64 xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<ulong> LoadAlignedVector128(ulong* address) => LoadAlignedVector128(address);
          /// <summary>
          /// __m128d _mm_load_pd (double const* mem_address)
-        ///   MOVAPD xmm, m128
+        ///    MOVAPD xmm1,         m128
+        ///   VMOVAPD xmm1,         m128
+        ///   VMOVAPD xmm1 {k1}{z}, m128
          /// </summary>
          public static unsafe Vector128<double> LoadAlignedVector128(double* address) => LoadAlignedVector128(address);
  
@@ -764,70 +851,154 @@ namespace System.Runtime.Intrinsics.X86
          ///   LFENCE
          /// </summary>
          public static void LoadFence() => LoadFence();
-
          /// <summary>
          /// __m128d _mm_loadh_pd (__m128d a, double const* mem_addr)
-        ///   MOVHPD xmm, m64
+        ///    MOVHPD xmm1,       m64
+        ///   VMOVHPD xmm1, xmm2, m64
          /// </summary>
          public static unsafe Vector128<double> LoadHigh(Vector128<double> lower, double* address) => LoadHigh(lower, address);
-
          /// <summary>
          /// __m128d _mm_loadl_pd (__m128d a, double const* mem_addr)
-        ///   MOVLPD xmm, m64
+        ///    MOVLPD xmm1,       m64
+        ///   VMOVLPD xmm1, xmm2, m64
          /// </summary>
          public static unsafe Vector128<double> LoadLow(Vector128<double> upper, double* address) => LoadLow(upper, address);
  
          /// <summary>
          /// __m128i _mm_loadu_si32 (void const* mem_addr)
-        ///   MOVD xmm, reg/m32
+        ///    MOVD xmm1, m32
+        ///   VMOVD xmm1, m32
          /// </summary>
          public static unsafe Vector128<int> LoadScalarVector128(int* address) => LoadScalarVector128(address);
          /// <summary>
          /// __m128i _mm_loadu_si32 (void const* mem_addr)
-        ///   MOVD xmm, reg/m32
+        ///    MOVD xmm1, m32
+        ///   VMOVD xmm1, m32
          /// </summary>
          public static unsafe Vector128<uint> LoadScalarVector128(uint* address) => LoadScalarVector128(address);
          /// <summary>
          /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr)
-        ///   MOVQ xmm, reg/m64
+        ///    MOVQ xmm1, m64
+        ///   VMOVQ xmm1, m64
          /// </summary>
          public static unsafe Vector128<long> LoadScalarVector128(long* address) => LoadScalarVector128(address);
          /// <summary>
          /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr)
-        ///   MOVQ xmm, reg/m64
+        ///    MOVQ xmm1, m64
+        ///   VMOVQ xmm1, m64
          /// </summary>
          public static unsafe Vector128<ulong> LoadScalarVector128(ulong* address) => LoadScalarVector128(address);
+        /// <summary>
+        /// __m128d _mm_load_sd (double const* mem_address)
+        ///    MOVSD xmm1,      m64
+        ///   VMOVSD xmm1,      m64
+        ///   VMOVSD xmm1 {k1}, m64
+        /// </summary>
+        public static unsafe Vector128<double> LoadScalarVector128(double* address) => LoadScalarVector128(address);
+
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU  xmm1,         m128
+        ///   VMOVDQU  xmm1,         m128
+        ///   VMOVDQU8 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<sbyte> LoadVector128(sbyte* address) => LoadVector128(address);
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU  xmm1,         m128
+        ///   VMOVDQU  xmm1,         m128
+        ///   VMOVDQU8 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<byte> LoadVector128(byte* address) => LoadVector128(address);
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU16 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<short> LoadVector128(short* address) => LoadVector128(address);
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU16 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<ushort> LoadVector128(ushort* address) => LoadVector128(address);
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU32 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<int> LoadVector128(int* address) => LoadVector128(address);
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU32 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<uint> LoadVector128(uint* address) => LoadVector128(address);
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU64 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<long> LoadVector128(long* address) => LoadVector128(address);
+        /// <summary>
+        /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
+        ///    MOVDQU   xmm1,         m128
+        ///   VMOVDQU   xmm1,         m128
+        ///   VMOVDQU64 xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<ulong> LoadVector128(ulong* address) => LoadVector128(address);
+        /// <summary>
+        /// __m128d _mm_loadu_pd (double const* mem_address)
+        ///    MOVUPD xmm1,         m128
+        ///   VMOVUPD xmm1,         m128
+        ///   VMOVUPD xmm1 {k1}{z}, m128
+        /// </summary>
+        public static unsafe Vector128<double> LoadVector128(double* address) => LoadVector128(address);
  
          /// <summary>
          /// void _mm_maskmoveu_si128 (__m128i a,  __m128i mask, char* mem_address)
-        ///   MASKMOVDQU xmm, xmm
+        ///    MASKMOVDQU xmm1, xmm2    ; Address: EDI/RDI
+        ///   VMASKMOVDQU xmm1, xmm2    ; Address: EDI/RDI
          /// </summary>
          public static unsafe void MaskMove(Vector128<sbyte> source, Vector128<sbyte> mask, sbyte* address) => MaskMove(source, mask, address);
          /// <summary>
          /// void _mm_maskmoveu_si128 (__m128i a,  __m128i mask, char* mem_address)
-        ///   MASKMOVDQU xmm, xmm
+        ///    MASKMOVDQU xmm1, xmm2    ; Address: EDI/RDI
+        ///   VMASKMOVDQU xmm1, xmm2    ; Address: EDI/RDI
          /// </summary>
          public static unsafe void MaskMove(Vector128<byte> source, Vector128<byte> mask, byte* address) => MaskMove(source, mask, address);
  
          /// <summary>
          /// __m128i _mm_max_epu8 (__m128i a,  __m128i b)
-        ///   PMAXUB xmm, xmm/m128
+        ///    PMAXUB xmm1,               xmm2/m128
+        ///   VPMAXUB xmm1,         xmm2, xmm3/m128
+        ///   VPMAXUB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Max(Vector128<byte> left, Vector128<byte> right) => Max(left, right);
          /// <summary>
          /// __m128i _mm_max_epi16 (__m128i a,  __m128i b)
-        ///   PMAXSW xmm, xmm/m128
+        ///    PMAXSW xmm1,               xmm2/m128
+        ///   VPMAXSW xmm1,         xmm2, xmm3/m128
+        ///   VPMAXSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Max(Vector128<short> left, Vector128<short> right) => Max(left, right);
          /// <summary>
          /// __m128d _mm_max_pd (__m128d a,  __m128d b)
-        ///   MAXPD xmm, xmm/m128
+        ///    MAXPD xmm1,               xmm2/m128
+        ///   VMAXPD xmm1,         xmm2, xmm3/m128
+        ///   VMAXPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Max(Vector128<double> left, Vector128<double> right) => Max(left, right);
  
          /// <summary>
          /// __m128d _mm_max_sd (__m128d a,  __m128d b)
-        ///   MAXSD xmm, xmm/m64
+        ///    MAXSD xmm1,       xmm2/m64
+        ///   VMAXSD xmm1, xmm2, xmm3/m64
          /// </summary>
          public static Vector128<double> MaxScalar(Vector128<double> left, Vector128<double> right) => MaxScalar(left, right);
  
@@ -839,631 +1010,796 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_min_epu8 (__m128i a,  __m128i b)
-        ///   PMINUB xmm, xmm/m128
+        ///    PMINUB xmm1,               xmm2/m128
+        ///   VPMINUB xmm1,         xmm2, xmm3/m128
+        ///   VPMINUB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Min(Vector128<byte> left, Vector128<byte> right) => Min(left, right);
          /// <summary>
          /// __m128i _mm_min_epi16 (__m128i a,  __m128i b)
-        ///   PMINSW xmm, xmm/m128
+        ///    PMINSW xmm1,               xmm2/m128
+        ///   VPMINSW xmm1,         xmm2, xmm3/m128
+        ///   VPMINSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Min(Vector128<short> left, Vector128<short> right) => Min(left, right);
          /// <summary>
          /// __m128d _mm_min_pd (__m128d a,  __m128d b)
-        ///   MINPD xmm, xmm/m128
+        ///    MINPD xmm1,               xmm2/m128
+        ///   VMINPD xmm1,         xmm2, xmm3/m128
+        ///   VMINPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Min(Vector128<double> left, Vector128<double> right) => Min(left, right);
  
          /// <summary>
          /// __m128d _mm_min_sd (__m128d a,  __m128d b)
-        ///   MINSD xmm, xmm/m64
+        ///    MINSD xmm1,       xmm2/m64
+        ///   VMINSD xmm1, xmm2, xmm3/m64
          /// </summary>
          public static Vector128<double> MinScalar(Vector128<double> left, Vector128<double> right) => MinScalar(left, right);
  
-        /// <summary>
-        /// __m128d _mm_move_sd (__m128d a, __m128d b)
-        ///   MOVSD xmm, xmm
-        /// </summary>
-        public static Vector128<double> MoveScalar(Vector128<double> upper, Vector128<double> value) => MoveScalar(upper, value);
-
          /// <summary>
          /// int _mm_movemask_epi8 (__m128i a)
-        ///   PMOVMSKB reg, xmm
+        ///    PMOVMSKB r32, xmm1
+        ///   VPMOVMSKB r32, xmm1
          /// </summary>
          public static int MoveMask(Vector128<sbyte> value) => MoveMask(value);
          /// <summary>
          /// int _mm_movemask_epi8 (__m128i a)
-        ///   PMOVMSKB reg, xmm
+        ///    PMOVMSKB r32, xmm1
+        ///   VPMOVMSKB r32, xmm1
          /// </summary>
          public static int MoveMask(Vector128<byte> value) => MoveMask(value);
          /// <summary>
          /// int _mm_movemask_pd (__m128d a)
-        ///   MOVMSKPD reg, xmm
+        ///    MOVMSKPD r32, xmm1
+        ///   VMOVMSKPD r32, xmm1
          /// </summary>
          public static int MoveMask(Vector128<double> value) => MoveMask(value);
  
          /// <summary>
          /// __m128i _mm_move_epi64 (__m128i a)
-        ///   MOVQ xmm, xmm
+        ///    MOVQ xmm1, xmm2
+        ///   VMOVQ xmm1, xmm2
          /// </summary>
          public static Vector128<long> MoveScalar(Vector128<long> value) => MoveScalar(value);
          /// <summary>
          /// __m128i _mm_move_epi64 (__m128i a)
-        ///   MOVQ xmm, xmm
+        ///    MOVQ xmm1, xmm2
+        ///   VMOVQ xmm1, xmm2
          /// </summary>
          public static Vector128<ulong> MoveScalar(Vector128<ulong> value) => MoveScalar(value);
+        /// <summary>
+        /// __m128d _mm_move_sd (__m128d a, __m128d b)
+        ///    MOVSD xmm1,               xmm2
+        ///   VMOVSD xmm1,         xmm2, xmm3
+        ///   VMOVSD xmm1 {k1}{z}, xmm2, xmm3
+        /// </summary>
+        public static Vector128<double> MoveScalar(Vector128<double> upper, Vector128<double> value) => MoveScalar(upper, value);
  
          /// <summary>
          /// __m128i _mm_mul_epu32 (__m128i a,  __m128i b)
-        ///   PMULUDQ xmm, xmm/m128
+        ///    PMULUDQ xmm1,               xmm2/m128
+        ///   VPMULUDQ xmm1,         xmm2, xmm3/m128
+        ///   VPMULUDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> Multiply(Vector128<uint> left, Vector128<uint> right) => Multiply(left, right);
          /// <summary>
          /// __m128d _mm_mul_pd (__m128d a,  __m128d b)
-        ///   MULPD xmm, xmm/m128
+        ///    MULPD xmm1,               xmm2/m128
+        ///   VMULPD xmm1,         xmm2, xmm3/m128
+        ///   VMULPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Multiply(Vector128<double> left, Vector128<double> right) => Multiply(left, right);
  
          /// <summary>
-        /// __m128d _mm_mul_sd (__m128d a,  __m128d b)
-        ///   MULSD xmm, xmm/m64
+        /// __m128i _mm_madd_epi16 (__m128i a,  __m128i b)
+        ///    PMADDWD xmm1,               xmm2/m128
+        ///   VPMADDWD xmm1,         xmm2, xmm3/m128
+        ///   VPMADDWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
-        public static Vector128<double> MultiplyScalar(Vector128<double> left, Vector128<double> right) => MultiplyScalar(left, right);
+        public static Vector128<int> MultiplyAddAdjacent(Vector128<short> left, Vector128<short> right) => MultiplyAddAdjacent(left, right);
  
          /// <summary>
          /// __m128i _mm_mulhi_epi16 (__m128i a,  __m128i b)
-        ///   PMULHW xmm, xmm/m128
+        ///    PMULHW xmm1,               xmm2/m128
+        ///   VPMULHW xmm1,         xmm2, xmm3/m128
+        ///   VPMULHW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> MultiplyHigh(Vector128<short> left, Vector128<short> right) => MultiplyHigh(left, right);
          /// <summary>
          /// __m128i _mm_mulhi_epu16 (__m128i a,  __m128i b)
-        ///   PMULHUW xmm, xmm/m128
+        ///    PMULHUW xmm1,               xmm2/m128
+        ///   VPMULHUW xmm1,         xmm2, xmm3/m128
+        ///   VPMULHUW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> MultiplyHigh(Vector128<ushort> left, Vector128<ushort> right) => MultiplyHigh(left, right);
  
-        /// <summary>
-        /// __m128i _mm_madd_epi16 (__m128i a,  __m128i b)
-        ///   PMADDWD xmm, xmm/m128
-        /// </summary>
-        public static Vector128<int> MultiplyAddAdjacent(Vector128<short> left, Vector128<short> right) => MultiplyAddAdjacent(left, right);
-
          /// <summary>
          /// __m128i _mm_mullo_epi16 (__m128i a,  __m128i b)
-        ///   PMULLW xmm, xmm/m128
+        ///    PMULLW xmm1,               xmm2/m128
+        ///   VPMULLW xmm1,         xmm2, xmm3/m128
+        ///   VPMULLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> MultiplyLow(Vector128<short> left, Vector128<short> right) => MultiplyLow(left, right);
          /// <summary>
          /// __m128i _mm_mullo_epi16 (__m128i a,  __m128i b)
-        ///   PMULLW xmm, xmm/m128
+        ///    PMULLW xmm1,               xmm2/m128
+        ///   VPMULLW xmm1,         xmm2, xmm3/m128
+        ///   VPMULLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> MultiplyLow(Vector128<ushort> left, Vector128<ushort> right) => MultiplyLow(left, right);
  
+        /// <summary>
+        /// __m128d _mm_mul_sd (__m128d a,  __m128d b)
+        ///    MULSD xmm1,       xmm2/m64
+        ///   VMULSD xmm1, xmm2, xmm3/m64
+        /// </summary>
+        public static Vector128<double> MultiplyScalar(Vector128<double> left, Vector128<double> right) => MultiplyScalar(left, right);
+
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR xmm1,       xmm2/m128
+        ///   VPOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Or(Vector128<byte> left, Vector128<byte> right) => Or(left, right);
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR xmm1,       xmm2/m128
+        ///   VPOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Or(Vector128<sbyte> left, Vector128<sbyte> right) => Or(left, right);
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR xmm1,       xmm2/m128
+        ///   VPOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Or(Vector128<short> left, Vector128<short> right) => Or(left, right);
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR xmm1,       xmm2/m128
+        ///   VPOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Or(Vector128<ushort> left, Vector128<ushort> right) => Or(left, right);
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR  xmm1,               xmm2/m128
+        ///   VPOR  xmm1,         xmm2, xmm3/m128
+        ///   VPORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Or(Vector128<int> left, Vector128<int> right) => Or(left, right);
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR  xmm1,               xmm2/m128
+        ///   VPOR  xmm1,         xmm2, xmm3/m128
+        ///   VPORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Or(Vector128<uint> left, Vector128<uint> right) => Or(left, right);
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR  xmm1,               xmm2/m128
+        ///   VPOR  xmm1,         xmm2, xmm3/m128
+        ///   VPORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> Or(Vector128<long> left, Vector128<long> right) => Or(left, right);
          /// <summary>
          /// __m128i _mm_or_si128 (__m128i a,  __m128i b)
-        ///   POR xmm, xmm/m128
+        ///    POR  xmm1,               xmm2/m128
+        ///   VPOR  xmm1,         xmm2, xmm3/m128
+        ///   VPORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> Or(Vector128<ulong> left, Vector128<ulong> right) => Or(left, right);
          /// <summary>
          /// __m128d _mm_or_pd (__m128d a,  __m128d b)
-        ///   ORPD xmm, xmm/m128
+        ///    ORPD xmm1,               xmm2/m128
+        ///   VORPD xmm1,         xmm2, xmm3/m128
+        ///   VORPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Or(Vector128<double> left, Vector128<double> right) => Or(left, right);
  
          /// <summary>
          /// __m128i _mm_packs_epi16 (__m128i a,  __m128i b)
-        ///   PACKSSWB xmm, xmm/m128
+        ///    PACKSSWB xmm1,               xmm2/m128
+        ///   VPACKSSWB xmm1,         xmm2, xmm3/m128
+        ///   VPACKSSWB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> PackSignedSaturate(Vector128<short> left, Vector128<short> right) => PackSignedSaturate(left, right);
          /// <summary>
          /// __m128i _mm_packs_epi32 (__m128i a,  __m128i b)
-        ///   PACKSSDW xmm, xmm/m128
+        ///    PACKSSDW xmm1,               xmm2/m128
+        ///   VPACKSSDW xmm1,         xmm2, xmm3/m128
+        ///   VPACKSSDW xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128<int> right) => PackSignedSaturate(left, right);
  
          /// <summary>
          /// __m128i _mm_packus_epi16 (__m128i a,  __m128i b)
-        ///   PACKUSWB xmm, xmm/m128
+        ///    PACKUSWB xmm1,               xmm2/m128
+        ///   VPACKUSWB xmm1,         xmm2, xmm3/m128
+        ///   VPACKUSWB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector128<short> right) => PackUnsignedSaturate(left, right);
  
-        /// <summary>
-        /// __m128i _mm_sad_epu8 (__m128i a,  __m128i b)
-        ///   PSADBW xmm, xmm/m128
-        /// </summary>
-        public static Vector128<ushort> SumAbsoluteDifferences(Vector128<byte> left, Vector128<byte> right) => SumAbsoluteDifferences(left, right);
-
-        /// <summary>
-        /// __m128i _mm_shuffle_epi32 (__m128i a,  int immediate)
-        ///   PSHUFD xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<int> Shuffle(Vector128<int> value, [ConstantExpected] byte control) => Shuffle(value, control);
-        /// <summary>
-        /// __m128i _mm_shuffle_epi32 (__m128i a,  int immediate)
-        ///   PSHUFD xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<uint> Shuffle(Vector128<uint> value, [ConstantExpected] byte control) => Shuffle(value, control);
-        /// <summary>
-        /// __m128d _mm_shuffle_pd (__m128d a,  __m128d b, int immediate)
-        ///   SHUFPD xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<double> Shuffle(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) => Shuffle(left, right, control);
-
-        /// <summary>
-        /// __m128i _mm_shufflehi_epi16 (__m128i a,  int immediate)
-        ///   PSHUFHW xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<short> ShuffleHigh(Vector128<short> value, [ConstantExpected] byte control) => ShuffleHigh(value, control);
-        /// <summary>
-        /// __m128i _mm_shufflehi_epi16 (__m128i a,  int control)
-        ///   PSHUFHW xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<ushort> ShuffleHigh(Vector128<ushort> value, [ConstantExpected] byte control) => ShuffleHigh(value, control);
-
-        /// <summary>
-        /// __m128i _mm_shufflelo_epi16 (__m128i a,  int control)
-        ///   PSHUFLW xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<short> ShuffleLow(Vector128<short> value, [ConstantExpected] byte control) => ShuffleLow(value, control);
-        /// <summary>
-        /// __m128i _mm_shufflelo_epi16 (__m128i a,  int control)
-        ///   PSHUFLW xmm, xmm/m128, imm8
-        /// </summary>
-        public static Vector128<ushort> ShuffleLow(Vector128<ushort> value, [ConstantExpected] byte control) => ShuffleLow(value, control);
-
          /// <summary>
          /// __m128i _mm_sll_epi16 (__m128i a, __m128i count)
-        ///   PSLLW xmm, xmm/m128
+        ///    PSLLW xmm1,               xmm2/m128
+        ///   VPSLLW xmm1,         xmm2, xmm3/m128
+        ///   VPSLLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> ShiftLeftLogical(Vector128<short> value, Vector128<short> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_sll_epi16 (__m128i a,  __m128i count)
-        ///   PSLLW xmm, xmm/m128
+        ///    PSLLW xmm1,               xmm2/m128
+        ///   VPSLLW xmm1,         xmm2, xmm3/m128
+        ///   VPSLLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> ShiftLeftLogical(Vector128<ushort> value, Vector128<ushort> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_sll_epi32 (__m128i a, __m128i count)
-        ///   PSLLD xmm, xmm/m128
+        ///    PSLLD xmm1,               xmm2/m128
+        ///   VPSLLD xmm1,         xmm2, xmm3/m128
+        ///   VPSLLD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> ShiftLeftLogical(Vector128<int> value, Vector128<int> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_sll_epi32 (__m128i a, __m128i count)
-        ///   PSLLD xmm, xmm/m128
+        ///    PSLLD xmm1,               xmm2/m128
+        ///   VPSLLD xmm1,         xmm2, xmm3/m128
+        ///   VPSLLD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<uint> ShiftLeftLogical(Vector128<uint> value, Vector128<uint> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_sll_epi64 (__m128i a, __m128i count)
-        ///   PSLLQ xmm, xmm/m128
+        ///    PSLLQ xmm1,               xmm2/m128
+        ///   VPSLLQ xmm1,         xmm2, xmm3/m128
+        ///   VPSLLQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> ShiftLeftLogical(Vector128<long> value, Vector128<long> count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_sll_epi64 (__m128i a, __m128i count)
-        ///   PSLLQ xmm, xmm/m128
+        ///    PSLLQ xmm1,               xmm2/m128
+        ///   VPSLLQ xmm1,         xmm2, xmm3/m128
+        ///   VPSLLQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ulong> ShiftLeftLogical(Vector128<ulong> value, Vector128<ulong> count) => ShiftLeftLogical(value, count);
  
          /// <summary>
          /// __m128i _mm_slli_epi16 (__m128i a,  int immediate)
-        ///   PSLLW xmm, imm8
+        ///    PSLLW xmm1,               imm8
+        ///   VPSLLW xmm1,         xmm2, imm8
+        ///   VPSLLW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<short> ShiftLeftLogical(Vector128<short> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_slli_epi16 (__m128i a,  int immediate)
-        ///   PSLLW xmm, imm8
+        ///    PSLLW xmm1,               imm8
+        ///   VPSLLW xmm1,         xmm2, imm8
+        ///   VPSLLW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<ushort> ShiftLeftLogical(Vector128<ushort> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_slli_epi32 (__m128i a,  int immediate)
-        ///   PSLLD xmm, imm8
+        ///    PSLLD xmm1,               imm8
+        ///   VPSLLD xmm1,         xmm2, imm8
+        ///   VPSLLD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<int> ShiftLeftLogical(Vector128<int> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_slli_epi32 (__m128i a,  int immediate)
-        ///   PSLLD xmm, imm8
+        ///    PSLLD xmm1,               imm8
+        ///   VPSLLD xmm1,         xmm2, imm8
+        ///   VPSLLD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<uint> ShiftLeftLogical(Vector128<uint> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_slli_epi64 (__m128i a,  int immediate)
-        ///   PSLLQ xmm, imm8
+        ///    PSLLQ xmm1,               imm8
+        ///   VPSLLQ xmm1,         xmm2, imm8
+        ///   VPSLLQ xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<long> ShiftLeftLogical(Vector128<long> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
          /// <summary>
          /// __m128i _mm_slli_epi64 (__m128i a,  int immediate)
-        ///   PSLLQ xmm, imm8
+        ///    PSLLQ xmm1,               imm8
+        ///   VPSLLQ xmm1,         xmm2, imm8
+        ///   VPSLLQ xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<ulong> ShiftLeftLogical(Vector128<ulong> value, [ConstantExpected] byte count) => ShiftLeftLogical(value, count);
  
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<sbyte> ShiftLeftLogical128BitLane(Vector128<sbyte> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<byte> ShiftLeftLogical128BitLane(Vector128<byte> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<short> ShiftLeftLogical128BitLane(Vector128<short> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ushort> ShiftLeftLogical128BitLane(Vector128<ushort> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<int> ShiftLeftLogical128BitLane(Vector128<int> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<uint> ShiftLeftLogical128BitLane(Vector128<uint> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<long> ShiftLeftLogical128BitLane(Vector128<long> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
-        ///   PSLLDQ xmm, imm8
+        ///    PSLLDQ xmm1,            imm8
+        ///   VPSLLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSLLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ulong> ShiftLeftLogical128BitLane(Vector128<ulong> value, [ConstantExpected] byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
  
          /// <summary>
          /// __m128i _mm_sra_epi16 (__m128i a, __m128i count)
-        ///   PSRAW xmm, xmm/m128
+        ///    PSRAW xmm1,               xmm2/m128
+        ///   VPSRAW xmm1,         xmm2, xmm3/m128
+        ///   VPSRAW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> ShiftRightArithmetic(Vector128<short> value, Vector128<short> count) => ShiftRightArithmetic(value, count);
          /// <summary>
          /// __m128i _mm_sra_epi32 (__m128i a, __m128i count)
-        ///   PSRAD xmm, xmm/m128
+        ///    PSRAD xmm1,               xmm2/m128
+        ///   VPSRAD xmm1,         xmm2, xmm3/m128
+        ///   VPSRAD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> ShiftRightArithmetic(Vector128<int> value, Vector128<int> count) => ShiftRightArithmetic(value, count);
  
          /// <summary>
          /// __m128i _mm_srai_epi16 (__m128i a,  int immediate)
-        ///   PSRAW xmm, imm8
+        ///    PSRAW xmm1,               imm8
+        ///   VPSRAW xmm1,         xmm2, imm8
+        ///   VPSRAW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<short> ShiftRightArithmetic(Vector128<short> value, [ConstantExpected] byte count) => ShiftRightArithmetic(value, count);
          /// <summary>
          /// __m128i _mm_srai_epi32 (__m128i a,  int immediate)
-        ///   PSRAD xmm, imm8
+        ///    PSRAD xmm1,               imm8
+        ///   VPSRAD xmm1,         xmm2, imm8
+        ///   VPSRAD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<int> ShiftRightArithmetic(Vector128<int> value, [ConstantExpected] byte count) => ShiftRightArithmetic(value, count);
  
          /// <summary>
          /// __m128i _mm_srl_epi16 (__m128i a, __m128i count)
-        ///   PSRLW xmm, xmm/m128
+        ///    PSRLW xmm1,               xmm2/m128
+        ///   VPSRLW xmm1,         xmm2, xmm3/m128
+        ///   VPSRLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> ShiftRightLogical(Vector128<short> value, Vector128<short> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srl_epi16 (__m128i a, __m128i count)
-        ///   PSRLW xmm, xmm/m128
+        ///    PSRLW xmm1,               xmm2/m128
+        ///   VPSRLW xmm1,         xmm2, xmm3/m128
+        ///   VPSRLW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> ShiftRightLogical(Vector128<ushort> value, Vector128<ushort> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srl_epi32 (__m128i a, __m128i count)
-        ///   PSRLD xmm, xmm/m128
+        ///    PSRLD xmm1,               xmm2/m128
+        ///   VPSRLD xmm1,         xmm2, xmm3/m128
+        ///   VPSRLD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> ShiftRightLogical(Vector128<int> value, Vector128<int> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srl_epi32 (__m128i a, __m128i count)
-        ///   PSRLD xmm, xmm/m128
+        ///    PSRLD xmm1,               xmm2/m128
+        ///   VPSRLD xmm1,         xmm2, xmm3/m128
+        ///   VPSRLD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<uint> ShiftRightLogical(Vector128<uint> value, Vector128<uint> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srl_epi64 (__m128i a, __m128i count)
-        ///   PSRLQ xmm, xmm/m128
+        ///    PSRLQ xmm1,               xmm2/m128
+        ///   VPSRLQ xmm1,         xmm2, xmm3/m128
+        ///   VPSRLQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> ShiftRightLogical(Vector128<long> value, Vector128<long> count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srl_epi64 (__m128i a, __m128i count)
-        ///   PSRLQ xmm, xmm/m128
+        ///    PSRLQ xmm1,               xmm2/m128
+        ///   VPSRLQ xmm1,         xmm2, xmm3/m128
+        ///   VPSRLQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ulong> ShiftRightLogical(Vector128<ulong> value, Vector128<ulong> count) => ShiftRightLogical(value, count);
  
          /// <summary>
          /// __m128i _mm_srli_epi16 (__m128i a,  int immediate)
-        ///   PSRLW xmm, imm8
+        ///    PSRLW xmm1,               imm8
+        ///   VPSRLW xmm1,         xmm2, imm8
+        ///   VPSRLW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<short> ShiftRightLogical(Vector128<short> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srli_epi16 (__m128i a,  int immediate)
-        ///   PSRLW xmm, imm8
+        ///    PSRLW xmm1,               imm8
+        ///   VPSRLW xmm1,         xmm2, imm8
+        ///   VPSRLW xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<ushort> ShiftRightLogical(Vector128<ushort> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srli_epi32 (__m128i a,  int immediate)
-        ///   PSRLD xmm, imm8
+        ///    PSRLD xmm1,               imm8
+        ///   VPSRLD xmm1,         xmm2, imm8
+        ///   VPSRLD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<int> ShiftRightLogical(Vector128<int> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srli_epi32 (__m128i a,  int immediate)
-        ///   PSRLD xmm, imm8
+        ///    PSRLD xmm1,               imm8
+        ///   VPSRLD xmm1,         xmm2, imm8
+        ///   VPSRLD xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<uint> ShiftRightLogical(Vector128<uint> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srli_epi64 (__m128i a,  int immediate)
-        ///   PSRLQ xmm, imm8
+        ///    PSRLQ xmm1,               imm8
+        ///   VPSRLQ xmm1,         xmm2, imm8
+        ///   VPSRLQ xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<long> ShiftRightLogical(Vector128<long> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
          /// <summary>
          /// __m128i _mm_srli_epi64 (__m128i a,  int immediate)
-        ///   PSRLQ xmm, imm8
+        ///    PSRLQ xmm1,               imm8
+        ///   VPSRLQ xmm1,         xmm2, imm8
+        ///   VPSRLQ xmm1 {k1}{z}, xmm2, imm8
          /// </summary>
          public static Vector128<ulong> ShiftRightLogical(Vector128<ulong> value, [ConstantExpected] byte count) => ShiftRightLogical(value, count);
  
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<sbyte> ShiftRightLogical128BitLane(Vector128<sbyte> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
          /// </summary>
          public static Vector128<byte> ShiftRightLogical128BitLane(Vector128<byte> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<short> ShiftRightLogical128BitLane(Vector128<short> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ushort> ShiftRightLogical128BitLane(Vector128<ushort> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<int> ShiftRightLogical128BitLane(Vector128<int> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<uint> ShiftRightLogical128BitLane(Vector128<uint> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<long> ShiftRightLogical128BitLane(Vector128<long> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
          /// <summary>
          /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
-        ///   PSRLDQ xmm, imm8
+        ///    PSRLDQ xmm1,            imm8
+        ///   VPSRLDQ xmm1, xmm2/m128, imm8
+        /// This intrinsic generates PSRLDQ that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ulong> ShiftRightLogical128BitLane(Vector128<ulong> value, [ConstantExpected] byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
  
+        /// <summary>
+        /// __m128i _mm_shuffle_epi32 (__m128i a,  int immediate)
+        ///    PSHUFD xmm1,         xmm2/m128,         imm8
+        ///   VPSHUFD xmm1,         xmm2/m128,         imm8
+        ///   VPSHUFD xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8
+        /// </summary>
+        public static Vector128<int> Shuffle(Vector128<int> value, [ConstantExpected] byte control) => Shuffle(value, control);
+        /// <summary>
+        /// __m128i _mm_shuffle_epi32 (__m128i a,  int immediate)
+        ///    PSHUFD xmm1,         xmm2/m128,         imm8
+        ///   VPSHUFD xmm1,         xmm2/m128,         imm8
+        ///   VPSHUFD xmm1 {k1}{z}, xmm2/m128/m32bcst, imm8
+        /// </summary>
+        public static Vector128<uint> Shuffle(Vector128<uint> value, [ConstantExpected] byte control) => Shuffle(value, control);
+        /// <summary>
+        /// __m128d _mm_shuffle_pd (__m128d a,  __m128d b, int immediate)
+        ///    SHUFPD xmm1,               xmm2/m128,         imm8
+        ///   VSHUFPD xmm1,         xmm2, xmm3/m128,         imm8
+        ///   VSHUFPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst, imm8
+        /// </summary>
+        public static Vector128<double> Shuffle(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) => Shuffle(left, right, control);
+
+        /// <summary>
+        /// __m128i _mm_shufflehi_epi16 (__m128i a,  int immediate)
+        ///    PSHUFHW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFHW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFHW xmm1 {k1}{z}, xmm2/m128, imm8
+        /// </summary>
+        public static Vector128<short> ShuffleHigh(Vector128<short> value, [ConstantExpected] byte control) => ShuffleHigh(value, control);
+        /// <summary>
+        /// __m128i _mm_shufflehi_epi16 (__m128i a,  int control)
+        ///    PSHUFHW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFHW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFHW xmm1 {k1}{z}, xmm2/m128, imm8
+        /// </summary>
+        public static Vector128<ushort> ShuffleHigh(Vector128<ushort> value, [ConstantExpected] byte control) => ShuffleHigh(value, control);
+
+        /// <summary>
+        /// __m128i _mm_shufflelo_epi16 (__m128i a,  int control)
+        ///    PSHUFLW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFLW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFLW xmm1 {k1}{z}, xmm2/m128, imm8
+        /// </summary>
+        public static Vector128<short> ShuffleLow(Vector128<short> value, [ConstantExpected] byte control) => ShuffleLow(value, control);
+        /// <summary>
+        /// __m128i _mm_shufflelo_epi16 (__m128i a,  int control)
+        ///    PSHUFLW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFLW xmm1,         xmm2/m128, imm8
+        ///   VPSHUFLW xmm1 {k1}{z}, xmm2/m128, imm8
+        /// </summary>
+        public static Vector128<ushort> ShuffleLow(Vector128<ushort> value, [ConstantExpected] byte control) => ShuffleLow(value, control);
+
          /// <summary>
          /// __m128d _mm_sqrt_pd (__m128d a)
-        ///   SQRTPD xmm, xmm/m128
+        ///    SQRTPD xmm1,         xmm2/m128
+        ///   VSQRTPD xmm1,         xmm2/m128
+        ///   VSQRTPD xmm1 {k1}{z}, xmm2/m128/m64bcst
          /// </summary>
          public static Vector128<double> Sqrt(Vector128<double> value) => Sqrt(value);
  
          /// <summary>
          /// __m128d _mm_sqrt_sd (__m128d a)
-        ///   SQRTSD xmm, xmm/64
+        ///    SQRTSD xmm1,               xmm2/m64
+        ///   VSQRTSD xmm1,         xmm2, xmm3/m64
+        ///   VSQRTSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<double> SqrtScalar(Vector128<double> value) => SqrtScalar(value);
-
          /// <summary>
          /// __m128d _mm_sqrt_sd (__m128d a, __m128d b)
-        ///   SQRTSD xmm, xmm/64
+        ///    SQRTSD xmm1,               xmm2/m64
+        ///   VSQRTSD xmm1,         xmm2, xmm3/m64
+        ///   VSQRTSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> SqrtScalar(Vector128<double> upper, Vector128<double> value) => SqrtScalar(upper, value);
  
          /// <summary>
-        /// void _mm_store_sd (double* mem_addr, __m128d a)
-        ///   MOVSD m64, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU  m128,         xmm1
+        ///   VMOVDQU  m128,         xmm1
+        ///   VMOVDQU8 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(double* address, Vector128<double> source) => StoreScalar(address, source);
+        public static unsafe void Store(sbyte* address, Vector128<sbyte> source) => Store(address, source);
          /// <summary>
-        /// void _mm_storeu_si32 (void* mem_addr, __m128i a)
-        ///   MOVD m32, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU  m128,         xmm1
+        ///   VMOVDQU  m128,         xmm1
+        ///   VMOVDQU8 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(int* address, Vector128<int> source) => StoreScalar(address, source);
+        public static unsafe void Store(byte* address, Vector128<byte> source) => Store(address, source);
          /// <summary>
-        /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
-        ///   MOVQ m64, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU16 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(long* address, Vector128<long> source) => StoreScalar(address, source);
+        public static unsafe void Store(short* address, Vector128<short> source) => Store(address, source);
          /// <summary>
-        /// void _mm_storeu_si32 (void* mem_addr, __m128i a)
-        ///   MOVD m32, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU16 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(uint* address, Vector128<uint> source) => StoreScalar(address, source);
+        public static unsafe void Store(ushort* address, Vector128<ushort> source) => Store(address, source);
          /// <summary>
-        /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
-        ///   MOVQ m64, xmm
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU32 m128 {k1}{z}, xmm1
          /// </summary>
-        public static unsafe void StoreScalar(ulong* address, Vector128<ulong> source) => StoreScalar(address, source);
+        public static unsafe void Store(int* address, Vector128<int> source) => Store(address, source);
+        /// <summary>
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU32 m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(uint* address, Vector128<uint> source) => Store(address, source);
+        /// <summary>
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU64 m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(long* address, Vector128<long> source) => Store(address, source);
+        /// <summary>
+        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+        ///    MOVDQU   m128,         xmm1
+        ///   VMOVDQU   m128,         xmm1
+        ///   VMOVDQU64 m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(ulong* address, Vector128<ulong> source) => Store(address, source);
+        /// <summary>
+        /// void _mm_storeu_pd (double* mem_addr, __m128d a)
+        ///    MOVUPD m128,         xmm1
+        ///   VMOVUPD m128,         xmm1
+        ///   VMOVUPD m128 {k1}{z}, xmm1
+        /// </summary>
+        public static unsafe void Store(double* address, Vector128<double> source) => Store(address, source);
  
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(sbyte* address, Vector128<sbyte> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(byte* address, Vector128<byte> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(short* address, Vector128<short> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(ushort* address, Vector128<ushort> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(int* address, Vector128<int> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA32 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(uint* address, Vector128<uint> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA64 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(long* address, Vector128<long> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQA m128, xmm
+        ///    MOVDQA   m128,         xmm1
+        ///   VMOVDQA   m128,         xmm1
+        ///   VMOVDQA64 m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(ulong* address, Vector128<ulong> source) => StoreAligned(address, source);
          /// <summary>
          /// void _mm_store_pd (double* mem_addr, __m128d a)
-        ///   MOVAPD m128, xmm
+        ///    MOVAPD m128,         xmm1
+        ///   VMOVAPD m128,         xmm1
+        ///   VMOVAPD m128 {k1}{z}, xmm1
          /// </summary>
          public static unsafe void StoreAligned(double* address, Vector128<double> source) => StoreAligned(address, source);
  
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector128<sbyte> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(byte* address, Vector128<byte> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(short* address, Vector128<short> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector128<ushort> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(int* address, Vector128<int> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(uint* address, Vector128<uint> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(long* address, Vector128<long> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVNTDQ m128, xmm
+        ///    MOVNTDQ m128, xmm1
+        ///   VMOVNTDQ m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector128<ulong> source) => StoreAlignedNonTemporal(address, source);
          /// <summary>
          /// void _mm_stream_pd (double* mem_addr, __m128d a)
-        ///   MOVNTPD m128, xmm
+        ///    MOVNTPD m128, xmm1
+        ///   VMOVNTPD m128, xmm1
          /// </summary>
          public static unsafe void StoreAlignedNonTemporal(double* address, Vector128<double> source) => StoreAlignedNonTemporal(address, source);
  
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(sbyte* address, Vector128<sbyte> source) => Store(address, source);
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(byte* address, Vector128<byte> source) => Store(address, source);
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(short* address, Vector128<short> source) => Store(address, source);
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(ushort* address, Vector128<ushort> source) => Store(address, source);
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(int* address, Vector128<int> source) => Store(address, source);
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(uint* address, Vector128<uint> source) => Store(address, source);
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(long* address, Vector128<long> source) => Store(address, source);
-        /// <summary>
-        /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
-        ///   MOVDQU m128, xmm
-        /// </summary>
-        public static unsafe void Store(ulong* address, Vector128<ulong> source) => Store(address, source);
-        /// <summary>
-        /// void _mm_storeu_pd (double* mem_addr, __m128d a)
-        ///   MOVUPD m128, xmm
-        /// </summary>
-        public static unsafe void Store(double* address, Vector128<double> source) => Store(address, source);
-
          /// <summary>
          /// void _mm_storeh_pd (double* mem_addr, __m128d a)
-        ///   MOVHPD m64, xmm
+        ///    MOVHPD m64, xmm1
+        ///   VMOVHPD m64, xmm1
          /// </summary>
          public static unsafe void StoreHigh(double* address, Vector128<double> source) => StoreHigh(address, source);
-
          /// <summary>
          /// void _mm_storel_pd (double* mem_addr, __m128d a)
-        ///   MOVLPD m64, xmm
+        ///    MOVLPD m64, xmm1
+        ///   VMOVLPD m64, xmm1
          /// </summary>
          public static unsafe void StoreLow(double* address, Vector128<double> source) => StoreLow(address, source);
  
@@ -1478,214 +1814,332 @@ namespace System.Runtime.Intrinsics.X86
          /// </summary>
          public static unsafe void StoreNonTemporal(uint* address, uint value) => StoreNonTemporal(address, value);
  
+        /// <summary>
+        /// void _mm_storeu_si32 (void* mem_addr, __m128i a)
+        ///    MOVD m32, xmm1
+        ///   VMOVD m32, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(int* address, Vector128<int> source) => StoreScalar(address, source);
+        /// <summary>
+        /// void _mm_storeu_si32 (void* mem_addr, __m128i a)
+        ///    MOVD m32, xmm1
+        ///   VMOVD m32, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(uint* address, Vector128<uint> source) => StoreScalar(address, source);
+        /// <summary>
+        /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
+        ///    MOVQ m64, xmm1
+        ///   VMOVQ m64, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(long* address, Vector128<long> source) => StoreScalar(address, source);
+        /// <summary>
+        /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
+        ///    MOVQ m64, xmm1
+        ///   VMOVQ m64, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(ulong* address, Vector128<ulong> source) => StoreScalar(address, source);
+        /// <summary>
+        /// void _mm_store_sd (double* mem_addr, __m128d a)
+        ///    MOVSD m64,      xmm1
+        ///   VMOVSD m64,      xmm1
+        ///   VMOVSD m64 {k1}, xmm1
+        /// </summary>
+        public static unsafe void StoreScalar(double* address, Vector128<double> source) => StoreScalar(address, source);
+
          /// <summary>
          /// __m128i _mm_sub_epi8 (__m128i a,  __m128i b)
-        ///   PSUBB xmm, xmm/m128
+        ///    PSUBB xmm1,               xmm2/m128
+        ///   VPSUBB xmm1,         xmm2, xmm3/m128
+        ///   VPSUBB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Subtract(Vector128<byte> left, Vector128<byte> right) => Subtract(left, right);
          /// <summary>
          /// __m128i _mm_sub_epi8 (__m128i a,  __m128i b)
-        ///   PSUBB xmm, xmm/m128
+        ///    PSUBB xmm1,               xmm2/m128
+        ///   VPSUBB xmm1,         xmm2, xmm3/m128
+        ///   VPSUBB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Subtract(Vector128<sbyte> left, Vector128<sbyte> right) => Subtract(left, right);
          /// <summary>
          /// __m128i _mm_sub_epi16 (__m128i a,  __m128i b)
-        ///   PSUBW xmm, xmm/m128
+        ///    PSUBW xmm1,               xmm2/m128
+        ///   VPSUBW xmm1,         xmm2, xmm3/m128
+        ///   VPSUBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Subtract(Vector128<short> left, Vector128<short> right) => Subtract(left, right);
          /// <summary>
          /// __m128i _mm_sub_epi16 (__m128i a,  __m128i b)
-        ///   PSUBW xmm, xmm/m128
+        ///    PSUBW xmm1,               xmm2/m128
+        ///   VPSUBW xmm1,         xmm2, xmm3/m128
+        ///   VPSUBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Subtract(Vector128<ushort> left, Vector128<ushort> right) => Subtract(left, right);
          /// <summary>
          /// __m128i _mm_sub_epi32 (__m128i a,  __m128i b)
-        ///   PSUBD xmm, xmm/m128
+        ///    PSUBD xmm1,               xmm2/m128
+        ///   VPSUBD xmm1,         xmm2, xmm3/m128
+        ///   VPSUBD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> Subtract(Vector128<int> left, Vector128<int> right) => Subtract(left, right);
          /// <summary>
          /// __m128i _mm_sub_epi32 (__m128i a,  __m128i b)
-        ///   PSUBD xmm, xmm/m128
+        ///    PSUBD xmm1,               xmm2/m128
+        ///   VPSUBD xmm1,         xmm2, xmm3/m128
+        ///   VPSUBD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<uint> Subtract(Vector128<uint> left, Vector128<uint> right) => Subtract(left, right);
          /// <summary>
          /// __m128i _mm_sub_epi64 (__m128i a,  __m128i b)
-        ///   PSUBQ xmm, xmm/m128
+        ///    PSUBQ xmm1,               xmm2/m128
+        ///   VPSUBQ xmm1,         xmm2, xmm3/m128
+        ///   VPSUBQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> Subtract(Vector128<long> left, Vector128<long> right) => Subtract(left, right);
          /// <summary>
          /// __m128i _mm_sub_epi64 (__m128i a,  __m128i b)
-        ///   PSUBQ xmm, xmm/m128
+        ///    PSUBQ xmm1,               xmm2/m128
+        ///   VPSUBQ xmm1,         xmm2, xmm3/m128
+        ///   VPSUBQ xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ulong> Subtract(Vector128<ulong> left, Vector128<ulong> right) => Subtract(left, right);
          /// <summary>
          /// __m128d _mm_sub_pd (__m128d a, __m128d b)
-        ///   SUBPD xmm, xmm/m128
+        ///    SUBPD xmm1,               xmm2/m128
+        ///   VSUBPD xmm1,         xmm2, xmm3/m128
+        ///   VSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Subtract(Vector128<double> left, Vector128<double> right) => Subtract(left, right);
  
          /// <summary>
          /// __m128d _mm_sub_sd (__m128d a, __m128d b)
-        ///   SUBSD xmm, xmm/m64
+        ///    SUBSD xmm1,               xmm2/m64
+        ///   VSUBSD xmm1,         xmm2, xmm3/m64
+        ///   VSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er}
          /// </summary>
          public static Vector128<double> SubtractScalar(Vector128<double> left, Vector128<double> right) => SubtractScalar(left, right);
  
          /// <summary>
          /// __m128i _mm_subs_epi8 (__m128i a,  __m128i b)
-        ///   PSUBSB xmm, xmm/m128
+        ///    PSUBSB xmm1,               xmm2/m128
+        ///   VPSUBSB xmm1,         xmm2, xmm3/m128
+        ///   VPSUBSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> SubtractSaturate(Vector128<sbyte> left, Vector128<sbyte> right) => SubtractSaturate(left, right);
          /// <summary>
          /// __m128i _mm_subs_epi16 (__m128i a,  __m128i b)
-        ///   PSUBSW xmm, xmm/m128
+        ///    PSUBSW xmm1,               xmm2/m128
+        ///   VPSUBSW xmm1,         xmm2, xmm3/m128
+        ///   VPSUBSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> SubtractSaturate(Vector128<short> left, Vector128<short> right) => SubtractSaturate(left, right);
          /// <summary>
          /// __m128i _mm_subs_epu8 (__m128i a,  __m128i b)
-        ///   PSUBUSB xmm, xmm/m128
+        ///    PSUBUSB xmm1,               xmm2/m128
+        ///   VPSUBUSB xmm1,         xmm2, xmm3/m128
+        ///   VPSUBUSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> SubtractSaturate(Vector128<byte> left, Vector128<byte> right) => SubtractSaturate(left, right);
          /// <summary>
          /// __m128i _mm_subs_epu16 (__m128i a,  __m128i b)
-        ///   PSUBUSW xmm, xmm/m128
+        ///    PSUBUSW xmm1,               xmm2/m128
+        ///   VPSUBUSW xmm1,         xmm2, xmm3/m128
+        ///   VPSUBUSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> SubtractSaturate(Vector128<ushort> left, Vector128<ushort> right) => SubtractSaturate(left, right);
  
+        /// <summary>
+        /// __m128i _mm_sad_epu8 (__m128i a,  __m128i b)
+        ///    PSADBW xmm1,               xmm2/m128
+        ///   VPSADBW xmm1,         xmm2, xmm3/m128
+        ///   VPSADBW xmm1 {k1}{z}, xmm2, xmm3/m128
+        /// </summary>
+        public static Vector128<ushort> SumAbsoluteDifferences(Vector128<byte> left, Vector128<byte> right) => SumAbsoluteDifferences(left, right);
+
          /// <summary>
          /// __m128i _mm_unpackhi_epi8 (__m128i a,  __m128i b)
-        ///   PUNPCKHBW xmm, xmm/m128
+        ///    PUNPCKHBW xmm1,               xmm2/m128
+        ///   VPUNPCKHBW xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> UnpackHigh(Vector128<byte> left, Vector128<byte> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m128i _mm_unpackhi_epi8 (__m128i a,  __m128i b)
-        ///   PUNPCKHBW xmm, xmm/m128
+        ///    PUNPCKHBW xmm1,               xmm2/m128
+        ///   VPUNPCKHBW xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> UnpackHigh(Vector128<sbyte> left, Vector128<sbyte> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m128i _mm_unpackhi_epi16 (__m128i a,  __m128i b)
-        ///   PUNPCKHWD xmm, xmm/m128
+        ///    PUNPCKHWD xmm1,               xmm2/m128
+        ///   VPUNPCKHWD xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> UnpackHigh(Vector128<short> left, Vector128<short> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m128i _mm_unpackhi_epi16 (__m128i a,  __m128i b)
-        ///   PUNPCKHWD xmm, xmm/m128
+        ///    PUNPCKHWD xmm1,               xmm2/m128
+        ///   VPUNPCKHWD xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> UnpackHigh(Vector128<ushort> left, Vector128<ushort> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m128i _mm_unpackhi_epi32 (__m128i a,  __m128i b)
-        ///   PUNPCKHDQ xmm, xmm/m128
+        ///    PUNPCKHDQ xmm1,               xmm2/m128
+        ///   VPUNPCKHDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> UnpackHigh(Vector128<int> left, Vector128<int> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m128i _mm_unpackhi_epi32 (__m128i a,  __m128i b)
-        ///   PUNPCKHDQ xmm, xmm/m128
+        ///    PUNPCKHDQ xmm1,               xmm2/m128
+        ///   VPUNPCKHDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> UnpackHigh(Vector128<uint> left, Vector128<uint> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m128i _mm_unpackhi_epi64 (__m128i a,  __m128i b)
-        ///   PUNPCKHQDQ xmm, xmm/m128
+        ///    PUNPCKHQDQ xmm1,               xmm2/m128
+        ///   VPUNPCKHQDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> UnpackHigh(Vector128<long> left, Vector128<long> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m128i _mm_unpackhi_epi64 (__m128i a,  __m128i b)
-        ///   PUNPCKHQDQ xmm, xmm/m128
+        ///    PUNPCKHQDQ xmm1,               xmm2/m128
+        ///   VPUNPCKHQDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKHQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> UnpackHigh(Vector128<ulong> left, Vector128<ulong> right) => UnpackHigh(left, right);
          /// <summary>
          /// __m128d _mm_unpackhi_pd (__m128d a,  __m128d b)
-        ///   UNPCKHPD xmm, xmm/m128
+        ///    UNPCKHPD xmm1,               xmm2/m128
+        ///   VUNPCKHPD xmm1,         xmm2, xmm3/m128
+        ///   VUNPCKHPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> UnpackHigh(Vector128<double> left, Vector128<double> right) => UnpackHigh(left, right);
  
          /// <summary>
          /// __m128i _mm_unpacklo_epi8 (__m128i a,  __m128i b)
-        ///   PUNPCKLBW xmm, xmm/m128
+        ///    PUNPCKLBW xmm1,               xmm2/m128
+        ///   VPUNPCKLBW xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> UnpackLow(Vector128<byte> left, Vector128<byte> right) => UnpackLow(left, right);
          /// <summary>
          /// __m128i _mm_unpacklo_epi8 (__m128i a,  __m128i b)
-        ///   PUNPCKLBW xmm, xmm/m128
+        ///    PUNPCKLBW xmm1,               xmm2/m128
+        ///   VPUNPCKLBW xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLBW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> UnpackLow(Vector128<sbyte> left, Vector128<sbyte> right) => UnpackLow(left, right);
          /// <summary>
          /// __m128i _mm_unpacklo_epi16 (__m128i a,  __m128i b)
-        ///   PUNPCKLWD xmm, xmm/m128
+        ///    PUNPCKLWD xmm1,               xmm2/m128
+        ///   VPUNPCKLWD xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> UnpackLow(Vector128<short> left, Vector128<short> right) => UnpackLow(left, right);
          /// <summary>
          /// __m128i _mm_unpacklo_epi16 (__m128i a,  __m128i b)
-        ///   PUNPCKLWD xmm, xmm/m128
+        ///    PUNPCKLWD xmm1,               xmm2/m128
+        ///   VPUNPCKLWD xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLWD xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> UnpackLow(Vector128<ushort> left, Vector128<ushort> right) => UnpackLow(left, right);
          /// <summary>
          /// __m128i _mm_unpacklo_epi32 (__m128i a,  __m128i b)
-        ///   PUNPCKLDQ xmm, xmm/m128
+        ///    PUNPCKLDQ xmm1,               xmm2/m128
+        ///   VPUNPCKLDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> UnpackLow(Vector128<int> left, Vector128<int> right) => UnpackLow(left, right);
          /// <summary>
          /// __m128i _mm_unpacklo_epi32 (__m128i a,  __m128i b)
-        ///   PUNPCKLDQ xmm, xmm/m128
+        ///    PUNPCKLDQ xmm1,               xmm2/m128
+        ///   VPUNPCKLDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> UnpackLow(Vector128<uint> left, Vector128<uint> right) => UnpackLow(left, right);
          /// <summary>
          /// __m128i _mm_unpacklo_epi64 (__m128i a,  __m128i b)
-        ///   PUNPCKLQDQ xmm, xmm/m128
+        ///    PUNPCKLQDQ xmm1,               xmm2/m128
+        ///   VPUNPCKLQDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> UnpackLow(Vector128<long> left, Vector128<long> right) => UnpackLow(left, right);
          /// <summary>
          /// __m128i _mm_unpacklo_epi64 (__m128i a,  __m128i b)
-        ///   PUNPCKLQDQ xmm, xmm/m128
+        ///    PUNPCKLQDQ xmm1,               xmm2/m128
+        ///   VPUNPCKLQDQ xmm1,         xmm2, xmm3/m128
+        ///   VPUNPCKLQDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> UnpackLow(Vector128<ulong> left, Vector128<ulong> right) => UnpackLow(left, right);
          /// <summary>
          /// __m128d _mm_unpacklo_pd (__m128d a,  __m128d b)
-        ///   UNPCKLPD xmm, xmm/m128
+        ///    UNPCKLPD xmm1,               xmm2/m128
+        ///   VUNPCKLPD xmm1,         xmm2, xmm3/m128
+        ///   VUNPCKLPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> UnpackLow(Vector128<double> left, Vector128<double> right) => UnpackLow(left, right);
  
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR xmm1,       xmm2/m128
+        ///   VPXOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Xor(Vector128<byte> left, Vector128<byte> right) => Xor(left, right);
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR xmm1,       xmm2/m128
+        ///   VPXOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Xor(Vector128<sbyte> left, Vector128<sbyte> right) => Xor(left, right);
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR xmm1,       xmm2/m128
+        ///   VPXOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Xor(Vector128<short> left, Vector128<short> right) => Xor(left, right);
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR xmm1,       xmm2/m128
+        ///   VPXOR xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Xor(Vector128<ushort> left, Vector128<ushort> right) => Xor(left, right);
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR  xmm1,               xmm2/m128
+        ///   VPXOR  xmm1,         xmm2, xmm3/m128
+        ///   VPXORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Xor(Vector128<int> left, Vector128<int> right) => Xor(left, right);
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR  xmm1,               xmm2/m128
+        ///   VPXOR  xmm1,         xmm2, xmm3/m128
+        ///   VPXORD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Xor(Vector128<uint> left, Vector128<uint> right) => Xor(left, right);
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR  xmm1,               xmm2/m128
+        ///   VPXOR  xmm1,         xmm2, xmm3/m128
+        ///   VPXORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> Xor(Vector128<long> left, Vector128<long> right) => Xor(left, right);
          /// <summary>
          /// __m128i _mm_xor_si128 (__m128i a,  __m128i b)
-        ///   PXOR xmm, xmm/m128
+        ///    PXOR  xmm1,               xmm2/m128
+        ///   VPXOR  xmm1,         xmm2, xmm3/m128
+        ///   VPXORQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<ulong> Xor(Vector128<ulong> left, Vector128<ulong> right) => Xor(left, right);
          /// <summary>
          /// __m128d _mm_xor_pd (__m128d a,  __m128d b)
-        ///   XORPD xmm, xmm/m128
+        ///    XORPD xmm1,               xmm2/m128
+        ///   VXORPD xmm1,         xmm2, xmm3/m128
+        ///   VXORPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<double> Xor(Vector128<double> left, Vector128<double> right) => Xor(left, right);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.PlatformNotSupported.cs

index 368557dcba47316f3f1c411db754ef62e26c14de..ffc067b654bb56934eceb6e6997da632aec6ac72 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.PlatformNotSupported.cs
@@ -26,71 +26,119 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128 _mm_addsub_ps (__m128 a, __m128 b)
-        ///   ADDSUBPS xmm, xmm/m128
+        ///    ADDSUBPS xmm1,       xmm2/m128
+        ///   VADDSUBPS xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<float> AddSubtract(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_addsub_pd (__m128d a, __m128d b)
-        ///   ADDSUBPD xmm, xmm/m128
+        ///    ADDSUBPD xmm1,       xmm2/m128
+        ///   VADDSUBPD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<double> AddSubtract(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_hadd_ps (__m128 a, __m128 b)
-        ///   HADDPS xmm, xmm/m128
+        ///    HADDPS xmm1,       xmm2/m128
+        ///   VHADDPS xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<float> HorizontalAdd(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_hadd_pd (__m128d a, __m128d b)
-        ///   HADDPD xmm, xmm/m128
+        ///    HADDPD xmm1,       xmm2/m128
+        ///   VHADDPD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<double> HorizontalAdd(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_hsub_ps (__m128 a, __m128 b)
-        ///   HSUBPS xmm, xmm/m128
+        ///    HSUBPS xmm1,       xmm2/m128
+        ///   VHSUBPS xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<float> HorizontalSubtract(Vector128<float> left, Vector128<float> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_hsub_pd (__m128d a, __m128d b)
-        ///   HSUBPD xmm, xmm/m128
+        ///    HSUBPD xmm1,       xmm2/m128
+        ///   VHSUBPD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<double> HorizontalSubtract(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128d _mm_loaddup_pd (double const* mem_addr)
-        /// MOVDDUP xmm, m64
+        ///    MOVDDUP xmm1,         m64
+        ///   VMOVDDUP xmm1,         m64
+        ///   VMOVDDUP xmm1 {k1}{z}, m64
          /// </summary>
          public static unsafe Vector128<double> LoadAndDuplicateToVector128(double* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
-        ///   LDDQU xmm, m128
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
          /// </summary>
          public static unsafe Vector128<sbyte> LoadDquVector128(sbyte* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<byte> LoadDquVector128(byte* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<short> LoadDquVector128(short* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<ushort> LoadDquVector128(ushort* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<int> LoadDquVector128(int* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<uint> LoadDquVector128(uint* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<long> LoadDquVector128(long* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<ulong> LoadDquVector128(ulong* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128d _mm_movedup_pd (__m128d a)
-        ///   MOVDDUP xmm, xmm/m64
+        ///    MOVDDUP xmm1,         xmm2/m64
+        ///   VMOVDDUP xmm1,         xmm2/m64
+        ///   VMOVDDUP xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<double> MoveAndDuplicate(Vector128<double> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_movehdup_ps (__m128 a)
-        ///   MOVSHDUP xmm, xmm/m128
+        ///    MOVSHDUP xmm1,         xmm2/m128
+        ///   VMOVSHDUP xmm1,         xmm2/m128
+        ///   VMOVSHDUP xmm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector128<float> MoveHighAndDuplicate(Vector128<float> source) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_moveldup_ps (__m128 a)
-        ///   MOVSLDUP xmm, xmm/m128
+        ///    MOVSLDUP xmm1,         xmm2/m128
+        ///   VMOVSLDUP xmm1,         xmm2/m128
+        ///   VMOVSLDUP xmm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector128<float> MoveLowAndDuplicate(Vector128<float> source) { throw new PlatformNotSupportedException(); }
  
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.cs

index 2ec3f9b04d4f14fc397f571660742008380b0f1f..85fff23f4cd9cc26557a893b01cd8aac6e6f0f8b 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.cs
@@ -26,71 +26,119 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128 _mm_addsub_ps (__m128 a, __m128 b)
-        ///   ADDSUBPS xmm, xmm/m128
+        ///    ADDSUBPS xmm1,       xmm2/m128
+        ///   VADDSUBPS xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<float> AddSubtract(Vector128<float> left, Vector128<float> right) => AddSubtract(left, right);
          /// <summary>
          /// __m128d _mm_addsub_pd (__m128d a, __m128d b)
-        ///   ADDSUBPD xmm, xmm/m128
+        ///    ADDSUBPD xmm1,       xmm2/m128
+        ///   VADDSUBPD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<double> AddSubtract(Vector128<double> left, Vector128<double> right) => AddSubtract(left, right);
  
          /// <summary>
          /// __m128 _mm_hadd_ps (__m128 a, __m128 b)
-        ///   HADDPS xmm, xmm/m128
+        ///    HADDPS xmm1,       xmm2/m128
+        ///   VHADDPS xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<float> HorizontalAdd(Vector128<float> left, Vector128<float> right) => HorizontalAdd(left, right);
          /// <summary>
          /// __m128d _mm_hadd_pd (__m128d a, __m128d b)
-        ///   HADDPD xmm, xmm/m128
+        ///    HADDPD xmm1,       xmm2/m128
+        ///   VHADDPD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<double> HorizontalAdd(Vector128<double> left, Vector128<double> right) => HorizontalAdd(left, right);
  
          /// <summary>
          /// __m128 _mm_hsub_ps (__m128 a, __m128 b)
-        ///   HSUBPS xmm, xmm/m128
+        ///    HSUBPS xmm1,       xmm2/m128
+        ///   VHSUBPS xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<float> HorizontalSubtract(Vector128<float> left, Vector128<float> right) => HorizontalSubtract(left, right);
          /// <summary>
          /// __m128d _mm_hsub_pd (__m128d a, __m128d b)
-        ///   HSUBPD xmm, xmm/m128
+        ///    HSUBPD xmm1,       xmm2/m128
+        ///   VHSUBPD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<double> HorizontalSubtract(Vector128<double> left, Vector128<double> right) => HorizontalSubtract(left, right);
  
          /// <summary>
          /// __m128d _mm_loaddup_pd (double const* mem_addr)
-        /// MOVDDUP xmm, m64
+        ///    MOVDDUP xmm1,         m64
+        ///   VMOVDDUP xmm1,         m64
+        ///   VMOVDDUP xmm1 {k1}{z}, m64
          /// </summary>
          public static unsafe Vector128<double> LoadAndDuplicateToVector128(double* address) => LoadAndDuplicateToVector128(address);
  
          /// <summary>
          /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
-        ///   LDDQU xmm, m128
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
          /// </summary>
          public static unsafe Vector128<sbyte> LoadDquVector128(sbyte* address) => LoadDquVector128(address);
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<byte> LoadDquVector128(byte* address) => LoadDquVector128(address);
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<short> LoadDquVector128(short* address) => LoadDquVector128(address);
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<ushort> LoadDquVector128(ushort* address) => LoadDquVector128(address);
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<int> LoadDquVector128(int* address) => LoadDquVector128(address);
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<uint> LoadDquVector128(uint* address) => LoadDquVector128(address);
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<long> LoadDquVector128(long* address) => LoadDquVector128(address);
+        /// <summary>
+        /// __m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+        ///    LDDQU xmm1, m128
+        ///   VLDDQU xmm1, m128
+        /// </summary>
          public static unsafe Vector128<ulong> LoadDquVector128(ulong* address) => LoadDquVector128(address);
  
          /// <summary>
          /// __m128d _mm_movedup_pd (__m128d a)
-        ///   MOVDDUP xmm, xmm/m64
+        ///    MOVDDUP xmm1,         xmm2/m64
+        ///   VMOVDDUP xmm1,         xmm2/m64
+        ///   VMOVDDUP xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<double> MoveAndDuplicate(Vector128<double> source) => MoveAndDuplicate(source);
-
          /// <summary>
          /// __m128 _mm_movehdup_ps (__m128 a)
-        ///   MOVSHDUP xmm, xmm/m128
+        ///    MOVSHDUP xmm1,         xmm2/m128
+        ///   VMOVSHDUP xmm1,         xmm2/m128
+        ///   VMOVSHDUP xmm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector128<float> MoveHighAndDuplicate(Vector128<float> source) => MoveHighAndDuplicate(source);
-
          /// <summary>
          /// __m128 _mm_moveldup_ps (__m128 a)
-        ///   MOVSLDUP xmm, xmm/m128
+        ///    MOVSLDUP xmm1,         xmm2/m128
+        ///   VMOVSLDUP xmm1,         xmm2/m128
+        ///   VMOVSLDUP xmm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector128<float> MoveLowAndDuplicate(Vector128<float> source) => MoveLowAndDuplicate(source);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs

index c3a8d1216d477eb3debdc7ce1271d84bdfc453c8..2f5dd6a2edaac7096286077cc8661ab51052f83d 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs
@@ -26,26 +26,30 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// __int64 _mm_extract_epi64 (__m128i a, const int imm8)
-            ///   PEXTRQ reg/m64, xmm, imm8
+            ///    PEXTRQ r/m64, xmm1, imm8
+            ///   VPEXTRQ r/m64, xmm1, imm8
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static long Extract(Vector128<long> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
              /// <summary>
              /// __int64 _mm_extract_epi64 (__m128i a, const int imm8)
-            ///   PEXTRQ reg/m64, xmm, imm8
+            ///    PEXTRQ r/m64, xmm1, imm8
+            ///   VPEXTRQ r/m64, xmm1, imm8
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong Extract(Vector128<ulong> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
  
              /// <summary>
              /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8)
-            ///   PINSRQ xmm, reg/m64, imm8
+            ///    PINSRQ xmm1,       r/m64, imm8
+            ///   VPINSRQ xmm1, xmm2, r/m64, imm8
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static Vector128<long> Insert(Vector128<long> value, long data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
              /// <summary>
              /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8)
-            ///   PINSRQ xmm, reg/m64, imm8
+            ///    PINSRQ xmm1,       r/m64, imm8
+            ///   VPINSRQ xmm1, xmm2, r/m64, imm8
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static Vector128<ulong> Insert(Vector128<ulong> value, ulong data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
@@ -53,744 +57,944 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8)
-        ///   PBLENDW xmm, xmm/m128 imm8
+        ///    PBLENDW xmm1,       xmm2/m128 imm8
+        ///   VPBLENDW xmm1, xmm2, xmm3/m128 imm8
          /// </summary>
          public static Vector128<short> Blend(Vector128<short> left, Vector128<short> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8)
-        ///   PBLENDW xmm, xmm/m128 imm8
+        ///    PBLENDW xmm1,       xmm2/m128 imm8
+        ///   VPBLENDW xmm1, xmm2, xmm3/m128 imm8
          /// </summary>
          public static Vector128<ushort> Blend(Vector128<ushort> left, Vector128<ushort> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128 _mm_blend_ps (__m128 a, __m128 b, const int imm8)
-        ///   BLENDPS xmm, xmm/m128, imm8
+        ///    BLENDPS xmm1,       xmm2/m128, imm8
+        ///   VBLENDPS xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<float> Blend(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128d _mm_blend_pd (__m128d a, __m128d b, const int imm8)
-        ///   BLENDPD xmm, xmm/m128, imm8
+        ///    BLENDPD xmm1,       xmm2/m128, imm8
+        ///   VBLENDPD xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<double> Blend(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// </summary>
          public static Vector128<sbyte> BlendVariable(Vector128<sbyte> left, Vector128<sbyte> right, Vector128<sbyte> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// </summary>
          public static Vector128<byte> BlendVariable(Vector128<byte> left, Vector128<byte> right, Vector128<byte> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<short> BlendVariable(Vector128<short> left, Vector128<short> right, Vector128<short> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<ushort> BlendVariable(Vector128<ushort> left, Vector128<ushort> right, Vector128<ushort> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<int> BlendVariable(Vector128<int> left, Vector128<int> right, Vector128<int> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<uint> BlendVariable(Vector128<uint> left, Vector128<uint> right, Vector128<uint> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<long> BlendVariable(Vector128<long> left, Vector128<long> right, Vector128<long> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<ulong> BlendVariable(Vector128<ulong> left, Vector128<ulong> right, Vector128<ulong> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm_blendv_ps (__m128 a, __m128 b, __m128 mask)
-        ///   BLENDVPS xmm, xmm/m128, xmm0
+        ///    BLENDVPS xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VBLENDVPS xmm1, xmm2, xmm3/m128, xmm4
          /// </summary>
          public static Vector128<float> BlendVariable(Vector128<float> left, Vector128<float> right, Vector128<float> mask) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_blendv_pd (__m128d a, __m128d b, __m128d mask)
-        ///   BLENDVPD xmm, xmm/m128, xmm0
+        ///    BLENDVPD xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VBLENDVPD xmm1, xmm2, xmm3/m128, xmm4
          /// </summary>
          public static Vector128<double> BlendVariable(Vector128<double> left, Vector128<double> right, Vector128<double> mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_ceil_ps (__m128 a)
-        ///   ROUNDPS xmm, xmm/m128, imm8(10)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(10)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(10)
          /// </summary>
          public static Vector128<float> Ceiling(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_ceil_pd (__m128d a)
-        ///   ROUNDPD xmm, xmm/m128, imm8(10)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(10)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(10)
          /// </summary>
          public static Vector128<double> Ceiling(Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128d _mm_ceil_sd (__m128d a)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        /// __m128 _mm_ceil_ss (__m128 a)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> CeilingScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> CeilingScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_ceil_ss (__m128 a)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        /// __m128 _mm_ceil_ss (__m128 a, __m128 b)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10)
+        /// </summary>
+        public static Vector128<float> CeilingScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_ceil_sd (__m128d a)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> CeilingScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> CeilingScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_ceil_sd (__m128d a, __m128d b)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10)
          /// </summary>
          public static Vector128<double> CeilingScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m128 _mm_ceil_ss (__m128 a, __m128 b)
-        ///   ROUNDSS xmm, xmm/m128, imm8(10)
-        /// </summary>
-        public static Vector128<float> CeilingScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b)
-        ///   PCMPEQQ xmm, xmm/m128
+        ///    PCMPEQQ xmm1,       xmm2/m128
+        ///   VPCMPEQQ xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> CompareEqual(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b)
-        ///   PCMPEQQ xmm, xmm/m128
+        ///    PCMPEQQ xmm1,       xmm2/m128
+        ///   VPCMPEQQ xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ulong> CompareEqual(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_cvtepi8_epi16 (__m128i a)
-        ///   PMOVSXBW xmm, xmm
+        ///    PMOVSXBW xmm1,         xmm2/m64
+        ///   VPMOVSXBW xmm1,         xmm2/m64
+        ///   VPMOVSXBW xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<short> ConvertToVector128Int16(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepu8_epi16 (__m128i a)
-        ///   PMOVZXBW xmm, xmm
+        ///    PMOVZXBW xmm1,         xmm2/m64
+        ///   VPMOVZXBW xmm1,         xmm2/m64
+        ///   VPMOVZXBW xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<short> ConvertToVector128Int16(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepi8_epi32 (__m128i a)
-        ///   PMOVSXBD xmm, xmm
+        ///    PMOVSXBD xmm1,         xmm2/m32
+        ///   VPMOVSXBD xmm1,         xmm2/m32
+        ///   VPMOVSXBD xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepu8_epi32 (__m128i a)
-        ///   PMOVZXBD xmm, xmm
+        ///    PMOVZXBD xmm1,         xmm2/m32
+        ///   VPMOVZXBD xmm1,         xmm2/m32
+        ///   VPMOVZXBD xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepi16_epi32 (__m128i a)
-        ///   PMOVSXWD xmm, xmm
+        ///    PMOVSXWD xmm1,         xmm2/m64
+        ///   VPMOVSXWD xmm1,         xmm2/m64
+        ///   VPMOVSXWD xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector128<short> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepu16_epi32 (__m128i a)
-        ///   PMOVZXWD xmm, xmm
+        ///    PMOVZXWD xmm1,         xmm2/m64
+        ///   VPMOVZXWD xmm1,         xmm2/m64
+        ///   VPMOVZXWD xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector128<ushort> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepi8_epi64 (__m128i a)
-        ///   PMOVSXBQ xmm, xmm
+        ///    PMOVSXBQ xmm1,         xmm2/m16
+        ///   VPMOVSXBQ xmm1,         xmm2/m16
+        ///   VPMOVSXBQ xmm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepu8_epi64 (__m128i a)
-        ///   PMOVZXBQ xmm, xmm
+        ///    PMOVZXBQ xmm1,         xmm2/m16
+        ///   VPMOVZXBQ xmm1,         xmm2/m16
+        ///   VPMOVZXBQ xmm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<byte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepi16_epi64 (__m128i a)
-        ///   PMOVSXWQ xmm, xmm
+        ///    PMOVSXWQ xmm1,         xmm2/m32
+        ///   VPMOVSXWQ xmm1,         xmm2/m32
+        ///   VPMOVSXWQ xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<short> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepu16_epi64 (__m128i a)
-        ///   PMOVZXWQ xmm, xmm
+        ///    PMOVZXWQ xmm1,         xmm2/m32
+        ///   VPMOVZXWQ xmm1,         xmm2/m32
+        ///   VPMOVZXWQ xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<ushort> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepi32_epi64 (__m128i a)
-        ///   PMOVSXDQ xmm, xmm
+        ///    PMOVSXDQ xmm1,         xmm2/m64
+        ///   VPMOVSXDQ xmm1,         xmm2/m64
+        ///   VPMOVSXDQ xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<int> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_cvtepu32_epi64 (__m128i a)
-        ///   PMOVZXDQ xmm, xmm
+        ///    PMOVZXDQ xmm1,         xmm2/m64
+        ///   VPMOVZXDQ xmm1,         xmm2/m64
+        ///   VPMOVZXDQ xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<uint> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        ///   PMOVSXBW xmm, m64
+        ///    PMOVSXBW xmm1,         m64
+        ///   VPMOVSXBW xmm1,         m64
+        ///   VPMOVSXBW xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<short> ConvertToVector128Int16(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVZXBW xmm, m64
+        ///    PMOVZXBW xmm1,         m64
+        ///   VPMOVZXBW xmm1,         m64
+        ///   VPMOVZXBW xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<short> ConvertToVector128Int16(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVSXBD xmm, m32
+        ///    PMOVSXBD xmm1,         m32
+        ///   VPMOVSXBD xmm1,         m32
+        ///   VPMOVSXBD xmm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<int> ConvertToVector128Int32(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVZXBD xmm, m32
+        ///    PMOVZXBD xmm1,         m32
+        ///   VPMOVZXBD xmm1,         m32
+        ///   VPMOVZXBD xmm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<int> ConvertToVector128Int32(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVSXWD xmm, m64
+        ///    PMOVSXWD xmm1,         m64
+        ///   VPMOVSXWD xmm1,         m64
+        ///   VPMOVSXWD xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<int> ConvertToVector128Int32(short* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVZXWD xmm, m64
+        ///    PMOVZXWD xmm1,         m64
+        ///   VPMOVZXWD xmm1,         m64
+        ///   VPMOVZXWD xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<int> ConvertToVector128Int32(ushort* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVSXBQ xmm, m16
+        ///    PMOVSXBQ xmm1,         m16
+        ///   VPMOVSXBQ xmm1,         m16
+        ///   VPMOVSXBQ xmm1 {k1}{z}, m16
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(sbyte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVZXBQ xmm, m16
+        ///    PMOVZXBQ xmm1,         m16
+        ///   VPMOVZXBQ xmm1,         m16
+        ///   VPMOVZXBQ xmm1 {k1}{z}, m16
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(byte* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVSXWQ xmm, m32
+        ///    PMOVSXWQ xmm1,         m32
+        ///   VPMOVSXWQ xmm1,         m32
+        ///   VPMOVSXWQ xmm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(short* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVZXWQ xmm, m32
+        ///    PMOVZXWQ xmm1,         m32
+        ///   VPMOVZXWQ xmm1,         m32
+        ///   VPMOVZXWQ xmm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(ushort* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVSXDQ xmm, m64
+        ///    PMOVSXDQ xmm1,         m64
+        ///   VPMOVSXDQ xmm1,         m64
+        ///   VPMOVSXDQ xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(int* address) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        ///   PMOVZXDQ xmm, m64
+        ///    PMOVZXDQ xmm1,         m64
+        ///   VPMOVZXDQ xmm1,         m64
+        ///   VPMOVZXDQ xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(uint* address) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_dp_ps (__m128 a, __m128 b, const int imm8)
-        ///   DPPS xmm, xmm/m128, imm8
+        ///    DPPS xmm1,       xmm2/m128, imm8
+        ///   VDPPS xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<float> DotProduct(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_dp_pd (__m128d a, __m128d b, const int imm8)
-        ///   DPPD xmm, xmm/m128, imm8
+        ///    DPPD xmm1,       xmm2/m128, imm8
+        ///   VDPPD xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<double> DotProduct(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm_extract_epi8 (__m128i a, const int imm8)
-        ///   PEXTRB reg/m8, xmm, imm8
+        ///    PEXTRB r/m8, xmm1, imm8
+        ///   VPEXTRB r/m8, xmm1, imm8
          /// </summary>
          public static byte Extract(Vector128<byte> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_extract_epi32 (__m128i a, const int imm8)
-        ///   PEXTRD reg/m32, xmm, imm8
+        ///    PEXTRD r/m32, xmm1, imm8
+        ///   VPEXTRD r/m32, xmm1, imm8
          /// </summary>
          public static int Extract(Vector128<int> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_extract_epi32 (__m128i a, const int imm8)
-        ///   PEXTRD reg/m32, xmm, imm8
+        ///    PEXTRD r/m32, xmm1, imm8
+        ///   VPEXTRD r/m32, xmm1, imm8
          /// </summary>
          public static uint Extract(Vector128<uint> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_extract_ps (__m128 a, const int imm8)
-        ///   EXTRACTPS xmm, xmm/m32, imm8
+        ///    EXTRACTPS r/m32, xmm1, imm8
+        ///   VEXTRACTPS r/m32, xmm1, imm8
          /// </summary>
          public static float Extract(Vector128<float> value, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128 _mm_floor_ps (__m128 a)
-        ///   ROUNDPS xmm, xmm/m128, imm8(9)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(9)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(9)
          /// </summary>
          public static Vector128<float> Floor(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_floor_pd (__m128d a)
-        ///   ROUNDPD xmm, xmm/m128, imm8(9)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(9)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(9)
          /// </summary>
          public static Vector128<double> Floor(Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128d _mm_floor_sd (__m128d a)
-        ///   ROUNDSD xmm, xmm/m128, imm8(9)
+        /// __m128 _mm_floor_ss (__m128 a)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> FloorScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> FloorScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_floor_ss (__m128 a)
-        ///   ROUNDSS xmm, xmm/m128, imm8(9)
+        /// __m128 _mm_floor_ss (__m128 a, __m128 b)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9)
+        /// </summary>
+        public static Vector128<float> FloorScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_floor_sd (__m128d a)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> FloorScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<double> FloorScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128d _mm_floor_sd (__m128d a, __m128d b)
-        ///   ROUNDSD xmm, xmm/m128, imm8(9)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9)
          /// </summary>
          public static Vector128<double> FloorScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// __m128 _mm_floor_ss (__m128 a, __m128 b)
-        ///   ROUNDSS xmm, xmm/m128, imm8(9)
-        /// </summary>
-        public static Vector128<float> FloorScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8)
-        ///   PINSRB xmm, reg/m8, imm8
+        ///    PINSRB xmm1,       r/m8, imm8
+        ///   VPINSRB xmm1, xmm2, r/m8, imm8
          /// </summary>
          public static Vector128<sbyte> Insert(Vector128<sbyte> value, sbyte data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8)
-        ///   PINSRB xmm, reg/m8, imm8
+        ///    PINSRB xmm1,       r/m8, imm8
+        ///   VPINSRB xmm1, xmm2, r/m8, imm8
          /// </summary>
          public static Vector128<byte> Insert(Vector128<byte> value, byte data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8)
-        ///   PINSRD xmm, reg/m32, imm8
+        ///    PINSRD xmm1,       r/m32, imm8
+        ///   VPINSRD xmm1, xmm2, r/m32, imm8
          /// </summary>
          public static Vector128<int> Insert(Vector128<int> value, int data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8)
-        ///   PINSRD xmm, reg/m32, imm8
+        ///    PINSRD xmm1,       r/m32, imm8
+        ///   VPINSRD xmm1, xmm2, r/m32, imm8
          /// </summary>
          public static Vector128<uint> Insert(Vector128<uint> value, uint data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
-        ///   INSERTPS xmm, xmm/m32, imm8
+        ///    INSERTPS xmm1,       xmm2/m32, imm8
+        ///   VINSERTPS xmm1, xmm2, xmm3/m32, imm8
          /// </summary>
          public static Vector128<float> Insert(Vector128<float> value, Vector128<float> data, [ConstantExpected] byte index) { throw new PlatformNotSupportedException(); }
  
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<sbyte> LoadAlignedVector128NonTemporal(sbyte* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<byte> LoadAlignedVector128NonTemporal(byte* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<short> LoadAlignedVector128NonTemporal(short* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<ushort> LoadAlignedVector128NonTemporal(ushort* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<int> LoadAlignedVector128NonTemporal(int* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<uint> LoadAlignedVector128NonTemporal(uint* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<long> LoadAlignedVector128NonTemporal(long* address) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<ulong> LoadAlignedVector128NonTemporal(ulong* address) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m128i _mm_max_epi8 (__m128i a, __m128i b)
-        ///   PMAXSB xmm, xmm/m128
+        ///    PMAXSB xmm1,               xmm2/m128
+        ///   VPMAXSB xmm1,         xmm2, xmm3/m128
+        ///   VPMAXSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Max(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_max_epu16 (__m128i a, __m128i b)
-        ///   PMAXUW xmm, xmm/m128
+        ///    PMAXUW xmm1,               xmm2/m128
+        ///   VPMAXUW xmm1,         xmm2, xmm3/m128
+        ///   VPMAXUW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Max(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_max_epi32 (__m128i a, __m128i b)
-        ///   PMAXSD xmm, xmm/m128
+        ///    PMAXSD xmm1,               xmm2/m128
+        ///   VPMAXSD xmm1,         xmm2, xmm3/m128
+        ///   VPMAXSD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Max(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_max_epu32 (__m128i a, __m128i b)
-        ///   PMAXUD xmm, xmm/m128
+        ///    PMAXUD xmm1,               xmm2/m128
+        ///   VPMAXUD xmm1,         xmm2, xmm3/m128
+        ///   VPMAXUD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Max(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_min_epi8 (__m128i a, __m128i b)
-        ///   PMINSB xmm, xmm/m128
+        ///    PMINSB xmm1,               xmm2/m128
+        ///   VPMINSB xmm1,         xmm2, xmm3/m128
+        ///   VPMINSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Min(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_min_epu16 (__m128i a, __m128i b)
-        ///   PMINUW xmm, xmm/m128
+        ///    PMINUW xmm1,               xmm2/m128
+        ///   VPMINUW xmm1,         xmm2, xmm3/m128
+        ///   VPMINUW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Min(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_min_epi32 (__m128i a, __m128i b)
-        ///   PMINSD xmm, xmm/m128
+        ///    PMINSD xmm1,               xmm2/m128
+        ///   VPMINSD xmm1,         xmm2, xmm3/m128
+        ///   VPMINSD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Min(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_min_epu32 (__m128i a, __m128i b)
-        ///   PMINUD xmm, xmm/m128
+        ///    PMINUD xmm1,               xmm2/m128
+        ///   VPMINUD xmm1,         xmm2, xmm3/m128
+        ///   VPMINUD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Min(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_minpos_epu16 (__m128i a)
-        ///   PHMINPOSUW xmm, xmm/m128
+        ///    PHMINPOSUW xmm1, xmm2/m128
+        ///   VPHMINPOSUW xmm1, xmm2/m128
          /// </summary>
          public static Vector128<ushort> MinHorizontal(Vector128<ushort> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_mpsadbw_epu8 (__m128i a, __m128i b, const int imm8)
-        ///   MPSADBW xmm, xmm/m128, imm8
+        ///    MPSADBW xmm1,       xmm2/m128, imm8
+        ///   VMPSADBW xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<ushort> MultipleSumAbsoluteDifferences(Vector128<byte> left, Vector128<byte> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_mul_epi32 (__m128i a, __m128i b)
-        ///   PMULDQ xmm, xmm/m128
+        ///    PMULDQ xmm1,               xmm2/m128
+        ///   VPMULDQ xmm1,         xmm2, xmm3/m128
+        ///   VPMULDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> Multiply(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b)
-        ///   PMULLD xmm, xmm/m128
+        ///    PMULLD xmm1,               xmm2/m128
+        ///   VPMULLD xmm1,         xmm2, xmm3/m128
+        ///   VPMULLD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> MultiplyLow(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b)
-        ///   PMULLD xmm, xmm/m128
+        ///    PMULLD xmm1,               xmm2/m128
+        ///   VPMULLD xmm1,         xmm2, xmm3/m128
+        ///   VPMULLD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> MultiplyLow(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_packus_epi32 (__m128i a, __m128i b)
-        ///   PACKUSDW xmm, xmm/m128
+        ///    PACKUSDW xmm1,               xmm2/m128
+        ///   VPACKUSDW xmm1,         xmm2, xmm3/m128
+        ///   VPACKUSDW xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<ushort> PackUnsignedSaturate(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128 _mm_round_ps (__m128 a, int rounding)
-        ///   ROUNDPS xmm, xmm/m128, imm8(8)
-        /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC
-        /// </summary>
-        public static Vector128<float> RoundToNearestInteger(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(9)
-        /// </summary>
-        public static Vector128<float> RoundToNegativeInfinity(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(10)
-        /// </summary>
-        public static Vector128<float> RoundToPositiveInfinity(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(11)
-        /// </summary>
-        public static Vector128<float> RoundToZero(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// _MM_FROUND_CUR_DIRECTION; ROUNDPS xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(4)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(4)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<float> RoundCurrentDirection(Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128d _mm_round_pd (__m128d a, int rounding)
-        ///   ROUNDPD xmm, xmm/m128, imm8(8)
-        /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC
-        /// </summary>
-        public static Vector128<double> RoundToNearestInteger(Vector128<double> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(9)
-        /// </summary>
-        public static Vector128<double> RoundToNegativeInfinity(Vector128<double> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(10)
-        /// </summary>
-        public static Vector128<double> RoundToPositiveInfinity(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(11)
-        /// </summary>
-        public static Vector128<double> RoundToZero(Vector128<double> value) { throw new PlatformNotSupportedException(); }
-        /// <summary>
-        /// _MM_FROUND_CUR_DIRECTION; ROUNDPD xmm, xmm/m128, imm8(4)
+        /// __m128d _mm_round_pd (__m128d a, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(4)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(4)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<double> RoundCurrentDirection(Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_CUR_DIRECTION)
-        ///   ROUNDSD xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(4)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(8)
+        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(4)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(9)
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(4)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(4)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(11)
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(8)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(8)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToZeroScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
-
+        public static Vector128<float> RoundToNearestInteger(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_CUR_DIRECTION)
-        ///   ROUNDSD xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(8)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToNearestInteger(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(8)
+        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(8)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(9)
+        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(8)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(8)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(11)
+        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(8)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToZeroScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_CUR_DIRECTION)
-        ///   ROUNDSS xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(9)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundToNegativeInfinity(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(8)
+        /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(9)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToNegativeInfinity(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
          /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(9)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<float> RoundToNegativeInfinityScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(10)
+        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToPositiveInfinityScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundToNegativeInfinityScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(11)
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToZeroScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
+        /// </summary>
+        public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_CUR_DIRECTION)
-        ///   ROUNDSS xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(10)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundToPositiveInfinity(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(8)
+        /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(10)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToPositiveInfinity(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(9)
+        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToNegativeInfinityScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundToPositiveInfinityScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(10)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<float> RoundToPositiveInfinityScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(11)
-        /// </summary>
-        public static Vector128<float> RoundToZeroScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
-
-        /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<sbyte> LoadAlignedVector128NonTemporal(sbyte* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<byte> LoadAlignedVector128NonTemporal(byte* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(11)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<short> LoadAlignedVector128NonTemporal(short* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundToZero(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(11)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<ushort> LoadAlignedVector128NonTemporal(ushort* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToZero(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(11)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<int> LoadAlignedVector128NonTemporal(int* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundToZeroScalar(Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(11)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<uint> LoadAlignedVector128NonTemporal(uint* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float> RoundToZeroScalar(Vector128<float> upper, Vector128<float> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(11)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<long> LoadAlignedVector128NonTemporal(long* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToZeroScalar(Vector128<double> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128d _mm_round_sd (__m128d a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(11)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<ulong> LoadAlignedVector128NonTemporal(ulong* address) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> RoundToZeroScalar(Vector128<double> upper, Vector128<double> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<byte> left, Vector128<byte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<uint> left, Vector128<uint> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<ulong> left, Vector128<ulong> right) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.cs

index 46e8b7aeb946985bd662991c46a6e9c725efe8a9..b69727e61c76bc0af21a66a78d150e878638cf9f 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.cs
@@ -26,26 +26,30 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// __int64 _mm_extract_epi64 (__m128i a, const int imm8)
-            ///   PEXTRQ reg/m64, xmm, imm8
+            ///    PEXTRQ r/m64, xmm1, imm8
+            ///   VPEXTRQ r/m64, xmm1, imm8
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static long Extract(Vector128<long> value, [ConstantExpected] byte index) => Extract(value, index);
              /// <summary>
              /// __int64 _mm_extract_epi64 (__m128i a, const int imm8)
-            ///   PEXTRQ reg/m64, xmm, imm8
+            ///    PEXTRQ r/m64, xmm1, imm8
+            ///   VPEXTRQ r/m64, xmm1, imm8
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong Extract(Vector128<ulong> value, [ConstantExpected] byte index) => Extract(value, index);
  
              /// <summary>
              /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8)
-            ///   PINSRQ xmm, reg/m64, imm8
+            ///    PINSRQ xmm1,       r/m64, imm8
+            ///   VPINSRQ xmm1, xmm2, r/m64, imm8
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static Vector128<long> Insert(Vector128<long> value, long data, [ConstantExpected] byte index) => Insert(value, data, index);
              /// <summary>
              /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8)
-            ///   PINSRQ xmm, reg/m64, imm8
+            ///    PINSRQ xmm1,       r/m64, imm8
+            ///   VPINSRQ xmm1, xmm2, r/m64, imm8
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static Vector128<ulong> Insert(Vector128<ulong> value, ulong data, [ConstantExpected] byte index) => Insert(value, data, index);
@@ -53,744 +57,944 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8)
-        ///   PBLENDW xmm, xmm/m128 imm8
+        ///    PBLENDW xmm1,       xmm2/m128 imm8
+        ///   VPBLENDW xmm1, xmm2, xmm3/m128 imm8
          /// </summary>
          public static Vector128<short> Blend(Vector128<short> left, Vector128<short> right, [ConstantExpected] byte control) => Blend(left, right, control);
-
          /// <summary>
          /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8)
-        ///   PBLENDW xmm, xmm/m128 imm8
+        ///    PBLENDW xmm1,       xmm2/m128 imm8
+        ///   VPBLENDW xmm1, xmm2, xmm3/m128 imm8
          /// </summary>
          public static Vector128<ushort> Blend(Vector128<ushort> left, Vector128<ushort> right, [ConstantExpected] byte control) => Blend(left, right, control);
-
          /// <summary>
          /// __m128 _mm_blend_ps (__m128 a, __m128 b, const int imm8)
-        ///   BLENDPS xmm, xmm/m128, imm8
+        ///    BLENDPS xmm1,       xmm2/m128, imm8
+        ///   VBLENDPS xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<float> Blend(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) => Blend(left, right, control);
-
          /// <summary>
          /// __m128d _mm_blend_pd (__m128d a, __m128d b, const int imm8)
-        ///   BLENDPD xmm, xmm/m128, imm8
+        ///    BLENDPD xmm1,       xmm2/m128, imm8
+        ///   VBLENDPD xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<double> Blend(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) => Blend(left, right, control);
  
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// </summary>
          public static Vector128<sbyte> BlendVariable(Vector128<sbyte> left, Vector128<sbyte> right, Vector128<sbyte> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// </summary>
          public static Vector128<byte> BlendVariable(Vector128<byte> left, Vector128<byte> right, Vector128<byte> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<short> BlendVariable(Vector128<short> left, Vector128<short> right, Vector128<short> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<ushort> BlendVariable(Vector128<ushort> left, Vector128<ushort> right, Vector128<ushort> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<int> BlendVariable(Vector128<int> left, Vector128<int> right, Vector128<int> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<uint> BlendVariable(Vector128<uint> left, Vector128<uint> right, Vector128<uint> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<long> BlendVariable(Vector128<long> left, Vector128<long> right, Vector128<long> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
-        ///   PBLENDVB xmm, xmm/m128, xmm
+        ///    PBLENDVB xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VPBLENDVB xmm1, xmm2, xmm3/m128, xmm4
          /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
          /// </summary>
          public static Vector128<ulong> BlendVariable(Vector128<ulong> left, Vector128<ulong> right, Vector128<ulong> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m128 _mm_blendv_ps (__m128 a, __m128 b, __m128 mask)
-        ///   BLENDVPS xmm, xmm/m128, xmm0
+        ///    BLENDVPS xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VBLENDVPS xmm1, xmm2, xmm3/m128, xmm4
          /// </summary>
          public static Vector128<float> BlendVariable(Vector128<float> left, Vector128<float> right, Vector128<float> mask) => BlendVariable(left, right, mask);
          /// <summary>
          /// __m128d _mm_blendv_pd (__m128d a, __m128d b, __m128d mask)
-        ///   BLENDVPD xmm, xmm/m128, xmm0
+        ///    BLENDVPD xmm1,       xmm2/m128, &lt;XMM0&gt;
+        ///   VBLENDVPD xmm1, xmm2, xmm3/m128, xmm4
          /// </summary>
          public static Vector128<double> BlendVariable(Vector128<double> left, Vector128<double> right, Vector128<double> mask) => BlendVariable(left, right, mask);
  
          /// <summary>
          /// __m128 _mm_ceil_ps (__m128 a)
-        ///   ROUNDPS xmm, xmm/m128, imm8(10)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(10)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(10)
          /// </summary>
          public static Vector128<float> Ceiling(Vector128<float> value) => Ceiling(value);
          /// <summary>
          /// __m128d _mm_ceil_pd (__m128d a)
-        ///   ROUNDPD xmm, xmm/m128, imm8(10)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(10)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(10)
          /// </summary>
          public static Vector128<double> Ceiling(Vector128<double> value) => Ceiling(value);
  
          /// <summary>
-        /// __m128d _mm_ceil_sd (__m128d a)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        /// __m128 _mm_ceil_ss (__m128 a)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> CeilingScalar(Vector128<double> value) => CeilingScalar(value);
+        public static Vector128<float> CeilingScalar(Vector128<float> value) => CeilingScalar(value);
          /// <summary>
-        /// __m128 _mm_ceil_ss (__m128 a)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        /// __m128 _mm_ceil_ss (__m128 a, __m128 b)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10)
+        /// </summary>
+        public static Vector128<float> CeilingScalar(Vector128<float> upper, Vector128<float> value) => CeilingScalar(upper, value);
+        /// <summary>
+        /// __m128d _mm_ceil_sd (__m128d a)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> CeilingScalar(Vector128<float> value) => CeilingScalar(value);
-
+        public static Vector128<double> CeilingScalar(Vector128<double> value) => CeilingScalar(value);
          /// <summary>
          /// __m128d _mm_ceil_sd (__m128d a, __m128d b)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10)
          /// </summary>
          public static Vector128<double> CeilingScalar(Vector128<double> upper, Vector128<double> value) => CeilingScalar(upper, value);
-        /// <summary>
-        /// __m128 _mm_ceil_ss (__m128 a, __m128 b)
-        ///   ROUNDSS xmm, xmm/m128, imm8(10)
-        /// </summary>
-        public static Vector128<float> CeilingScalar(Vector128<float> upper, Vector128<float> value) => CeilingScalar(upper, value);
  
          /// <summary>
          /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b)
-        ///   PCMPEQQ xmm, xmm/m128
+        ///    PCMPEQQ xmm1,       xmm2/m128
+        ///   VPCMPEQQ xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> CompareEqual(Vector128<long> left, Vector128<long> right) => CompareEqual(left, right);
          /// <summary>
          /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b)
-        ///   PCMPEQQ xmm, xmm/m128
+        ///    PCMPEQQ xmm1,       xmm2/m128
+        ///   VPCMPEQQ xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ulong> CompareEqual(Vector128<ulong> left, Vector128<ulong> right) => CompareEqual(left, right);
  
          /// <summary>
          /// __m128i _mm_cvtepi8_epi16 (__m128i a)
-        ///   PMOVSXBW xmm, xmm
+        ///    PMOVSXBW xmm1,         xmm2/m64
+        ///   VPMOVSXBW xmm1,         xmm2/m64
+        ///   VPMOVSXBW xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<short> ConvertToVector128Int16(Vector128<sbyte> value) => ConvertToVector128Int16(value);
          /// <summary>
          /// __m128i _mm_cvtepu8_epi16 (__m128i a)
-        ///   PMOVZXBW xmm, xmm
+        ///    PMOVZXBW xmm1,         xmm2/m64
+        ///   VPMOVZXBW xmm1,         xmm2/m64
+        ///   VPMOVZXBW xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<short> ConvertToVector128Int16(Vector128<byte> value) => ConvertToVector128Int16(value);
          /// <summary>
          /// __m128i _mm_cvtepi8_epi32 (__m128i a)
-        ///   PMOVSXBD xmm, xmm
+        ///    PMOVSXBD xmm1,         xmm2/m32
+        ///   VPMOVSXBD xmm1,         xmm2/m32
+        ///   VPMOVSXBD xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector128<sbyte> value) => ConvertToVector128Int32(value);
          /// <summary>
          /// __m128i _mm_cvtepu8_epi32 (__m128i a)
-        ///   PMOVZXBD xmm, xmm
+        ///    PMOVZXBD xmm1,         xmm2/m32
+        ///   VPMOVZXBD xmm1,         xmm2/m32
+        ///   VPMOVZXBD xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector128<byte> value) => ConvertToVector128Int32(value);
          /// <summary>
          /// __m128i _mm_cvtepi16_epi32 (__m128i a)
-        ///   PMOVSXWD xmm, xmm
+        ///    PMOVSXWD xmm1,         xmm2/m64
+        ///   VPMOVSXWD xmm1,         xmm2/m64
+        ///   VPMOVSXWD xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector128<short> value) => ConvertToVector128Int32(value);
          /// <summary>
          /// __m128i _mm_cvtepu16_epi32 (__m128i a)
-        ///   PMOVZXWD xmm, xmm
+        ///    PMOVZXWD xmm1,         xmm2/m64
+        ///   VPMOVZXWD xmm1,         xmm2/m64
+        ///   VPMOVZXWD xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<int> ConvertToVector128Int32(Vector128<ushort> value) => ConvertToVector128Int32(value);
          /// <summary>
          /// __m128i _mm_cvtepi8_epi64 (__m128i a)
-        ///   PMOVSXBQ xmm, xmm
+        ///    PMOVSXBQ xmm1,         xmm2/m16
+        ///   VPMOVSXBQ xmm1,         xmm2/m16
+        ///   VPMOVSXBQ xmm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<sbyte> value) => ConvertToVector128Int64(value);
          /// <summary>
          /// __m128i _mm_cvtepu8_epi64 (__m128i a)
-        ///   PMOVZXBQ xmm, xmm
+        ///    PMOVZXBQ xmm1,         xmm2/m16
+        ///   VPMOVZXBQ xmm1,         xmm2/m16
+        ///   VPMOVZXBQ xmm1 {k1}{z}, xmm2/m16
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<byte> value) => ConvertToVector128Int64(value);
          /// <summary>
          /// __m128i _mm_cvtepi16_epi64 (__m128i a)
-        ///   PMOVSXWQ xmm, xmm
+        ///    PMOVSXWQ xmm1,         xmm2/m32
+        ///   VPMOVSXWQ xmm1,         xmm2/m32
+        ///   VPMOVSXWQ xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<short> value) => ConvertToVector128Int64(value);
          /// <summary>
          /// __m128i _mm_cvtepu16_epi64 (__m128i a)
-        ///   PMOVZXWQ xmm, xmm
+        ///    PMOVZXWQ xmm1,         xmm2/m32
+        ///   VPMOVZXWQ xmm1,         xmm2/m32
+        ///   VPMOVZXWQ xmm1 {k1}{z}, xmm2/m32
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<ushort> value) => ConvertToVector128Int64(value);
          /// <summary>
          /// __m128i _mm_cvtepi32_epi64 (__m128i a)
-        ///   PMOVSXDQ xmm, xmm
+        ///    PMOVSXDQ xmm1,         xmm2/m64
+        ///   VPMOVSXDQ xmm1,         xmm2/m64
+        ///   VPMOVSXDQ xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<int> value) => ConvertToVector128Int64(value);
          /// <summary>
          /// __m128i _mm_cvtepu32_epi64 (__m128i a)
-        ///   PMOVZXDQ xmm, xmm
+        ///    PMOVZXDQ xmm1,         xmm2/m64
+        ///   VPMOVZXDQ xmm1,         xmm2/m64
+        ///   VPMOVZXDQ xmm1 {k1}{z}, xmm2/m64
          /// </summary>
          public static Vector128<long> ConvertToVector128Int64(Vector128<uint> value) => ConvertToVector128Int64(value);
  
          /// <summary>
-        ///   PMOVSXBW xmm, m64
+        ///    PMOVSXBW xmm1,         m64
+        ///   VPMOVSXBW xmm1,         m64
+        ///   VPMOVSXBW xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<short> ConvertToVector128Int16(sbyte* address) => ConvertToVector128Int16(address);
          /// <summary>
-        ///   PMOVZXBW xmm, m64
+        ///    PMOVZXBW xmm1,         m64
+        ///   VPMOVZXBW xmm1,         m64
+        ///   VPMOVZXBW xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<short> ConvertToVector128Int16(byte* address) => ConvertToVector128Int16(address);
          /// <summary>
-        ///   PMOVSXBD xmm, m32
+        ///    PMOVSXBD xmm1,         m32
+        ///   VPMOVSXBD xmm1,         m32
+        ///   VPMOVSXBD xmm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<int> ConvertToVector128Int32(sbyte* address) => ConvertToVector128Int32(address);
          /// <summary>
-        ///   PMOVZXBD xmm, m32
+        ///    PMOVZXBD xmm1,         m32
+        ///   VPMOVZXBD xmm1,         m32
+        ///   VPMOVZXBD xmm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<int> ConvertToVector128Int32(byte* address) => ConvertToVector128Int32(address);
          /// <summary>
-        ///   PMOVSXWD xmm, m64
+        ///    PMOVSXWD xmm1,         m64
+        ///   VPMOVSXWD xmm1,         m64
+        ///   VPMOVSXWD xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<int> ConvertToVector128Int32(short* address) => ConvertToVector128Int32(address);
          /// <summary>
-        ///   PMOVZXWD xmm, m64
+        ///    PMOVZXWD xmm1,         m64
+        ///   VPMOVZXWD xmm1,         m64
+        ///   VPMOVZXWD xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<int> ConvertToVector128Int32(ushort* address) => ConvertToVector128Int32(address);
          /// <summary>
-        ///   PMOVSXBQ xmm, m16
+        ///    PMOVSXBQ xmm1,         m16
+        ///   VPMOVSXBQ xmm1,         m16
+        ///   VPMOVSXBQ xmm1 {k1}{z}, m16
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(sbyte* address) => ConvertToVector128Int64(address);
          /// <summary>
-        ///   PMOVZXBQ xmm, m16
+        ///    PMOVZXBQ xmm1,         m16
+        ///   VPMOVZXBQ xmm1,         m16
+        ///   VPMOVZXBQ xmm1 {k1}{z}, m16
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(byte* address) => ConvertToVector128Int64(address);
          /// <summary>
-        ///   PMOVSXWQ xmm, m32
+        ///    PMOVSXWQ xmm1,         m32
+        ///   VPMOVSXWQ xmm1,         m32
+        ///   VPMOVSXWQ xmm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(short* address) => ConvertToVector128Int64(address);
          /// <summary>
-        ///   PMOVZXWQ xmm, m32
+        ///    PMOVZXWQ xmm1,         m32
+        ///   VPMOVZXWQ xmm1,         m32
+        ///   VPMOVZXWQ xmm1 {k1}{z}, m32
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(ushort* address) => ConvertToVector128Int64(address);
          /// <summary>
-        ///   PMOVSXDQ xmm, m64
+        ///    PMOVSXDQ xmm1,         m64
+        ///   VPMOVSXDQ xmm1,         m64
+        ///   VPMOVSXDQ xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(int* address) => ConvertToVector128Int64(address);
          /// <summary>
-        ///   PMOVZXDQ xmm, m64
+        ///    PMOVZXDQ xmm1,         m64
+        ///   VPMOVZXDQ xmm1,         m64
+        ///   VPMOVZXDQ xmm1 {k1}{z}, m64
          /// The native signature does not exist. We provide this additional overload for completeness.
          /// </summary>
          public static unsafe Vector128<long> ConvertToVector128Int64(uint* address) => ConvertToVector128Int64(address);
  
          /// <summary>
          /// __m128 _mm_dp_ps (__m128 a, __m128 b, const int imm8)
-        ///   DPPS xmm, xmm/m128, imm8
+        ///    DPPS xmm1,       xmm2/m128, imm8
+        ///   VDPPS xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<float> DotProduct(Vector128<float> left, Vector128<float> right, [ConstantExpected] byte control) => DotProduct(left, right, control);
          /// <summary>
          /// __m128d _mm_dp_pd (__m128d a, __m128d b, const int imm8)
-        ///   DPPD xmm, xmm/m128, imm8
+        ///    DPPD xmm1,       xmm2/m128, imm8
+        ///   VDPPD xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<double> DotProduct(Vector128<double> left, Vector128<double> right, [ConstantExpected] byte control) => DotProduct(left, right, control);
  
          /// <summary>
          /// int _mm_extract_epi8 (__m128i a, const int imm8)
-        ///   PEXTRB reg/m8, xmm, imm8
+        ///    PEXTRB r/m8, xmm1, imm8
+        ///   VPEXTRB r/m8, xmm1, imm8
          /// </summary>
          public static byte Extract(Vector128<byte> value, [ConstantExpected] byte index) => Extract(value, index);
          /// <summary>
          /// int _mm_extract_epi32 (__m128i a, const int imm8)
-        ///   PEXTRD reg/m32, xmm, imm8
+        ///    PEXTRD r/m32, xmm1, imm8
+        ///   VPEXTRD r/m32, xmm1, imm8
          /// </summary>
          public static int Extract(Vector128<int> value, [ConstantExpected] byte index) => Extract(value, index);
          /// <summary>
          /// int _mm_extract_epi32 (__m128i a, const int imm8)
-        ///   PEXTRD reg/m32, xmm, imm8
+        ///    PEXTRD r/m32, xmm1, imm8
+        ///   VPEXTRD r/m32, xmm1, imm8
          /// </summary>
          public static uint Extract(Vector128<uint> value, [ConstantExpected] byte index) => Extract(value, index);
          /// <summary>
          /// int _mm_extract_ps (__m128 a, const int imm8)
-        ///   EXTRACTPS xmm, xmm/m32, imm8
+        ///    EXTRACTPS r/m32, xmm1, imm8
+        ///   VEXTRACTPS r/m32, xmm1, imm8
          /// </summary>
          public static float Extract(Vector128<float> value, [ConstantExpected] byte index) => Extract(value, index);
  
          /// <summary>
          /// __m128 _mm_floor_ps (__m128 a)
-        ///   ROUNDPS xmm, xmm/m128, imm8(9)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(9)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(9)
          /// </summary>
          public static Vector128<float> Floor(Vector128<float> value) => Floor(value);
          /// <summary>
          /// __m128d _mm_floor_pd (__m128d a)
-        ///   ROUNDPD xmm, xmm/m128, imm8(9)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(9)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(9)
          /// </summary>
          public static Vector128<double> Floor(Vector128<double> value) => Floor(value);
  
          /// <summary>
-        /// __m128d _mm_floor_sd (__m128d a)
-        ///   ROUNDSD xmm, xmm/m128, imm8(9)
+        /// __m128 _mm_floor_ss (__m128 a)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> FloorScalar(Vector128<double> value) => FloorScalar(value);
+        public static Vector128<float> FloorScalar(Vector128<float> value) => FloorScalar(value);
          /// <summary>
-        /// __m128 _mm_floor_ss (__m128 a)
-        ///   ROUNDSS xmm, xmm/m128, imm8(9)
+        /// __m128 _mm_floor_ss (__m128 a, __m128 b)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9)
+        /// </summary>
+        public static Vector128<float> FloorScalar(Vector128<float> upper, Vector128<float> value) => FloorScalar(upper, value);
+        /// <summary>
+        /// __m128d _mm_floor_sd (__m128d a)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> FloorScalar(Vector128<float> value) => FloorScalar(value);
-
+        public static Vector128<double> FloorScalar(Vector128<double> value) => FloorScalar(value);
          /// <summary>
          /// __m128d _mm_floor_sd (__m128d a, __m128d b)
-        ///   ROUNDSD xmm, xmm/m128, imm8(9)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9)
          /// </summary>
          public static Vector128<double> FloorScalar(Vector128<double> upper, Vector128<double> value) => FloorScalar(upper, value);
-        /// <summary>
-        /// __m128 _mm_floor_ss (__m128 a, __m128 b)
-        ///   ROUNDSS xmm, xmm/m128, imm8(9)
-        /// </summary>
-        public static Vector128<float> FloorScalar(Vector128<float> upper, Vector128<float> value) => FloorScalar(upper, value);
  
          /// <summary>
          /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8)
-        ///   PINSRB xmm, reg/m8, imm8
+        ///    PINSRB xmm1,       r/m8, imm8
+        ///   VPINSRB xmm1, xmm2, r/m8, imm8
          /// </summary>
          public static Vector128<sbyte> Insert(Vector128<sbyte> value, sbyte data, [ConstantExpected] byte index) => Insert(value, data, index);
          /// <summary>
          /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8)
-        ///   PINSRB xmm, reg/m8, imm8
+        ///    PINSRB xmm1,       r/m8, imm8
+        ///   VPINSRB xmm1, xmm2, r/m8, imm8
          /// </summary>
          public static Vector128<byte> Insert(Vector128<byte> value, byte data, [ConstantExpected] byte index) => Insert(value, data, index);
          /// <summary>
          /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8)
-        ///   PINSRD xmm, reg/m32, imm8
+        ///    PINSRD xmm1,       r/m32, imm8
+        ///   VPINSRD xmm1, xmm2, r/m32, imm8
          /// </summary>
          public static Vector128<int> Insert(Vector128<int> value, int data, [ConstantExpected] byte index) => Insert(value, data, index);
          /// <summary>
          /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8)
-        ///   PINSRD xmm, reg/m32, imm8
+        ///    PINSRD xmm1,       r/m32, imm8
+        ///   VPINSRD xmm1, xmm2, r/m32, imm8
          /// </summary>
          public static Vector128<uint> Insert(Vector128<uint> value, uint data, [ConstantExpected] byte index) => Insert(value, data, index);
          /// <summary>
          /// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
-        ///   INSERTPS xmm, xmm/m32, imm8
+        ///    INSERTPS xmm1,       xmm2/m32, imm8
+        ///   VINSERTPS xmm1, xmm2, xmm3/m32, imm8
          /// </summary>
          public static Vector128<float> Insert(Vector128<float> value, Vector128<float> data, [ConstantExpected] byte index) => Insert(value, data, index);
  
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<sbyte> LoadAlignedVector128NonTemporal(sbyte* address) => LoadAlignedVector128NonTemporal(address);
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<byte> LoadAlignedVector128NonTemporal(byte* address) => LoadAlignedVector128NonTemporal(address);
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<short> LoadAlignedVector128NonTemporal(short* address) => LoadAlignedVector128NonTemporal(address);
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<ushort> LoadAlignedVector128NonTemporal(ushort* address) => LoadAlignedVector128NonTemporal(address);
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<int> LoadAlignedVector128NonTemporal(int* address) => LoadAlignedVector128NonTemporal(address);
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<uint> LoadAlignedVector128NonTemporal(uint* address) => LoadAlignedVector128NonTemporal(address);
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<long> LoadAlignedVector128NonTemporal(long* address) => LoadAlignedVector128NonTemporal(address);
+        /// <summary>
+        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
+        ///    MOVNTDQA xmm1, m128
+        ///   VMOVNTDQA xmm1, m128
+        /// </summary>
+        public static unsafe Vector128<ulong> LoadAlignedVector128NonTemporal(ulong* address) => LoadAlignedVector128NonTemporal(address);
+
          /// <summary>
          /// __m128i _mm_max_epi8 (__m128i a, __m128i b)
-        ///   PMAXSB xmm, xmm/m128
+        ///    PMAXSB xmm1,               xmm2/m128
+        ///   VPMAXSB xmm1,         xmm2, xmm3/m128
+        ///   VPMAXSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Max(Vector128<sbyte> left, Vector128<sbyte> right) => Max(left, right);
          /// <summary>
          /// __m128i _mm_max_epu16 (__m128i a, __m128i b)
-        ///   PMAXUW xmm, xmm/m128
+        ///    PMAXUW xmm1,               xmm2/m128
+        ///   VPMAXUW xmm1,         xmm2, xmm3/m128
+        ///   VPMAXUW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Max(Vector128<ushort> left, Vector128<ushort> right) => Max(left, right);
          /// <summary>
          /// __m128i _mm_max_epi32 (__m128i a, __m128i b)
-        ///   PMAXSD xmm, xmm/m128
+        ///    PMAXSD xmm1,               xmm2/m128
+        ///   VPMAXSD xmm1,         xmm2, xmm3/m128
+        ///   VPMAXSD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Max(Vector128<int> left, Vector128<int> right) => Max(left, right);
          /// <summary>
          /// __m128i _mm_max_epu32 (__m128i a, __m128i b)
-        ///   PMAXUD xmm, xmm/m128
+        ///    PMAXUD xmm1,               xmm2/m128
+        ///   VPMAXUD xmm1,         xmm2, xmm3/m128
+        ///   VPMAXUD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Max(Vector128<uint> left, Vector128<uint> right) => Max(left, right);
  
          /// <summary>
          /// __m128i _mm_min_epi8 (__m128i a, __m128i b)
-        ///   PMINSB xmm, xmm/m128
+        ///    PMINSB xmm1,               xmm2/m128
+        ///   VPMINSB xmm1,         xmm2, xmm3/m128
+        ///   VPMINSB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Min(Vector128<sbyte> left, Vector128<sbyte> right) => Min(left, right);
          /// <summary>
          /// __m128i _mm_min_epu16 (__m128i a, __m128i b)
-        ///   PMINUW xmm, xmm/m128
+        ///    PMINUW xmm1,               xmm2/m128
+        ///   VPMINUW xmm1,         xmm2, xmm3/m128
+        ///   VPMINUW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<ushort> Min(Vector128<ushort> left, Vector128<ushort> right) => Min(left, right);
          /// <summary>
          /// __m128i _mm_min_epi32 (__m128i a, __m128i b)
-        ///   PMINSD xmm, xmm/m128
+        ///    PMINSD xmm1,               xmm2/m128
+        ///   VPMINSD xmm1,         xmm2, xmm3/m128
+        ///   VPMINSD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> Min(Vector128<int> left, Vector128<int> right) => Min(left, right);
          /// <summary>
          /// __m128i _mm_min_epu32 (__m128i a, __m128i b)
-        ///   PMINUD xmm, xmm/m128
+        ///    PMINUD xmm1,               xmm2/m128
+        ///   VPMINUD xmm1,         xmm2, xmm3/m128
+        ///   VPMINUD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Min(Vector128<uint> left, Vector128<uint> right) => Min(left, right);
  
          /// <summary>
          /// __m128i _mm_minpos_epu16 (__m128i a)
-        ///   PHMINPOSUW xmm, xmm/m128
+        ///    PHMINPOSUW xmm1, xmm2/m128
+        ///   VPHMINPOSUW xmm1, xmm2/m128
          /// </summary>
          public static Vector128<ushort> MinHorizontal(Vector128<ushort> value) => MinHorizontal(value);
  
          /// <summary>
          /// __m128i _mm_mpsadbw_epu8 (__m128i a, __m128i b, const int imm8)
-        ///   MPSADBW xmm, xmm/m128, imm8
+        ///    MPSADBW xmm1,       xmm2/m128, imm8
+        ///   VMPSADBW xmm1, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<ushort> MultipleSumAbsoluteDifferences(Vector128<byte> left, Vector128<byte> right, [ConstantExpected] byte mask) => MultipleSumAbsoluteDifferences(left, right, mask);
  
          /// <summary>
          /// __m128i _mm_mul_epi32 (__m128i a, __m128i b)
-        ///   PMULDQ xmm, xmm/m128
+        ///    PMULDQ xmm1,               xmm2/m128
+        ///   VPMULDQ xmm1,         xmm2, xmm3/m128
+        ///   VPMULDQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst
          /// </summary>
          public static Vector128<long> Multiply(Vector128<int> left, Vector128<int> right) => Multiply(left, right);
  
          /// <summary>
          /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b)
-        ///   PMULLD xmm, xmm/m128
+        ///    PMULLD xmm1,               xmm2/m128
+        ///   VPMULLD xmm1,         xmm2, xmm3/m128
+        ///   VPMULLD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<int> MultiplyLow(Vector128<int> left, Vector128<int> right) => MultiplyLow(left, right);
          /// <summary>
          /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b)
-        ///   PMULLD xmm, xmm/m128
+        ///    PMULLD xmm1,               xmm2/m128
+        ///   VPMULLD xmm1,         xmm2, xmm3/m128
+        ///   VPMULLD xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<uint> MultiplyLow(Vector128<uint> left, Vector128<uint> right) => MultiplyLow(left, right);
  
          /// <summary>
          /// __m128i _mm_packus_epi32 (__m128i a, __m128i b)
-        ///   PACKUSDW xmm, xmm/m128
+        ///    PACKUSDW xmm1,               xmm2/m128
+        ///   VPACKUSDW xmm1,         xmm2, xmm3/m128
+        ///   VPACKUSDW xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst
          /// </summary>
          public static Vector128<ushort> PackUnsignedSaturate(Vector128<int> left, Vector128<int> right) => PackUnsignedSaturate(left, right);
  
          /// <summary>
-        /// __m128 _mm_round_ps (__m128 a, int rounding)
-        ///   ROUNDPS xmm, xmm/m128, imm8(8)
-        /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC
-        /// </summary>
-        public static Vector128<float> RoundToNearestInteger(Vector128<float> value) => RoundToNearestInteger(value);
-        /// <summary>
-        /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(9)
-        /// </summary>
-        public static Vector128<float> RoundToNegativeInfinity(Vector128<float> value) => RoundToNegativeInfinity(value);
-        /// <summary>
-        /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(10)
-        /// </summary>
-        public static Vector128<float> RoundToPositiveInfinity(Vector128<float> value) => RoundToPositiveInfinity(value);
-        /// <summary>
-        /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(11)
-        /// </summary>
-        public static Vector128<float> RoundToZero(Vector128<float> value) => RoundToZero(value);
-        /// <summary>
-        /// _MM_FROUND_CUR_DIRECTION; ROUNDPS xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(4)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(4)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<float> RoundCurrentDirection(Vector128<float> value) => RoundCurrentDirection(value);
-
-        /// <summary>
-        /// __m128d _mm_round_pd (__m128d a, int rounding)
-        ///   ROUNDPD xmm, xmm/m128, imm8(8)
-        /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC
-        /// </summary>
-        public static Vector128<double> RoundToNearestInteger(Vector128<double> value) => RoundToNearestInteger(value);
-        /// <summary>
-        /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(9)
-        /// </summary>
-        public static Vector128<double> RoundToNegativeInfinity(Vector128<double> value) => RoundToNegativeInfinity(value);
-        /// <summary>
-        /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(10)
-        /// </summary>
-        public static Vector128<double> RoundToPositiveInfinity(Vector128<double> value) => RoundToPositiveInfinity(value);
          /// <summary>
-        /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(11)
-        /// </summary>
-        public static Vector128<double> RoundToZero(Vector128<double> value) => RoundToZero(value);
-        /// <summary>
-        /// _MM_FROUND_CUR_DIRECTION; ROUNDPD xmm, xmm/m128, imm8(4)
+        /// __m128d _mm_round_pd (__m128d a, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(4)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(4)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<double> RoundCurrentDirection(Vector128<double> value) => RoundCurrentDirection(value);
  
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_CUR_DIRECTION)
-        ///   ROUNDSD xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(4)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> value) => RoundCurrentDirectionScalar(value);
+        public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> value) => RoundCurrentDirectionScalar(value);
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(8)
+        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(4)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> value) => RoundToNearestIntegerScalar(value);
+        public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> upper, Vector128<float> value) => RoundCurrentDirectionScalar(upper, value);
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(9)
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(4)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> value) => RoundToNegativeInfinityScalar(value);
+        public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> value) => RoundCurrentDirectionScalar(value);
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_CUR_DIRECTION)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(4)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(4)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> value) => RoundToPositiveInfinityScalar(value);
+        public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> upper, Vector128<double> value) => RoundCurrentDirectionScalar(upper, value);
+
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(11)
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(8)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(8)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToZeroScalar(Vector128<double> value) => RoundToZeroScalar(value);
-
+        public static Vector128<float> RoundToNearestInteger(Vector128<float> value) => RoundToNearestInteger(value);
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_CUR_DIRECTION)
-        ///   ROUNDSD xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(8)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> upper, Vector128<double> value) => RoundCurrentDirectionScalar(upper, value);
+        public static Vector128<double> RoundToNearestInteger(Vector128<double> value) => RoundToNearestInteger(value);
+
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(8)
+        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(8)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> upper, Vector128<double> value) => RoundToNearestIntegerScalar(upper, value);
+        public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> value) => RoundToNearestIntegerScalar(value);
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(9)
+        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(8)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> upper, Vector128<double> value) => RoundToNegativeInfinityScalar(upper, value);
+        public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> upper, Vector128<float> value) => RoundToNearestIntegerScalar(upper, value);
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(10)
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(8)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> upper, Vector128<double> value) => RoundToPositiveInfinityScalar(upper, value);
+        public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> value) => RoundToNearestIntegerScalar(value);
          /// <summary>
-        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
-        ///   ROUNDSD xmm, xmm/m128, imm8(11)
+        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(8)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(8)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<double> RoundToZeroScalar(Vector128<double> upper, Vector128<double> value) => RoundToZeroScalar(upper, value);
+        public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> upper, Vector128<double> value) => RoundToNearestIntegerScalar(upper, value);
  
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_CUR_DIRECTION)
-        ///   ROUNDSS xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(9)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> value) => RoundCurrentDirectionScalar(value);
+        public static Vector128<float> RoundToNegativeInfinity(Vector128<float> value) => RoundToNegativeInfinity(value);
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(8)
+        /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(9)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> value) => RoundToNearestIntegerScalar(value);
+        public static Vector128<double> RoundToNegativeInfinity(Vector128<double> value) => RoundToNegativeInfinity(value);
+
          /// <summary>
          /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(9)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<float> RoundToNegativeInfinityScalar(Vector128<float> value) => RoundToNegativeInfinityScalar(value);
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(10)
+        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToPositiveInfinityScalar(Vector128<float> value) => RoundToPositiveInfinityScalar(value);
+        public static Vector128<float> RoundToNegativeInfinityScalar(Vector128<float> upper, Vector128<float> value) => RoundToNegativeInfinityScalar(upper, value);
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(11)
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9)
          /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToZeroScalar(Vector128<float> value) => RoundToZeroScalar(value);
+        public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> value) => RoundToNegativeInfinityScalar(value);
+        /// <summary>
+        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(9)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(9)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
+        /// </summary>
+        public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> upper, Vector128<double> value) => RoundToNegativeInfinityScalar(upper, value);
  
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_CUR_DIRECTION)
-        ///   ROUNDSS xmm, xmm/m128, imm8(4)
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(10)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> upper, Vector128<float> value) => RoundCurrentDirectionScalar(upper, value);
+        public static Vector128<float> RoundToPositiveInfinity(Vector128<float> value) => RoundToPositiveInfinity(value);
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(8)
+        /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(10)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> upper, Vector128<float> value) => RoundToNearestIntegerScalar(upper, value);
+        public static Vector128<double> RoundToPositiveInfinity(Vector128<double> value) => RoundToPositiveInfinity(value);
+
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(9)
+        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static Vector128<float> RoundToNegativeInfinityScalar(Vector128<float> upper, Vector128<float> value) => RoundToNegativeInfinityScalar(upper, value);
+        public static Vector128<float> RoundToPositiveInfinityScalar(Vector128<float> value) => RoundToPositiveInfinityScalar(value);
          /// <summary>
          /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(10)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
          public static Vector128<float> RoundToPositiveInfinityScalar(Vector128<float> upper, Vector128<float> value) => RoundToPositiveInfinityScalar(upper, value);
          /// <summary>
-        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
-        ///   ROUNDSS xmm, xmm/m128, imm8(11)
-        /// </summary>
-        public static Vector128<float> RoundToZeroScalar(Vector128<float> upper, Vector128<float> value) => RoundToZeroScalar(upper, value);
-
-        /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<sbyte> LoadAlignedVector128NonTemporal(sbyte* address) => LoadAlignedVector128NonTemporal(address);
+        public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> value) => RoundToPositiveInfinityScalar(value);
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(10)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(10)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<byte> LoadAlignedVector128NonTemporal(byte* address) => LoadAlignedVector128NonTemporal(address);
+        public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> upper, Vector128<double> value) => RoundToPositiveInfinityScalar(upper, value);
+
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128 _mm_round_ps (__m128 a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
+        ///    ROUNDPS xmm1, xmm2/m128, imm8(11)
+        ///   VROUNDPS xmm1, xmm2/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<short> LoadAlignedVector128NonTemporal(short* address) => LoadAlignedVector128NonTemporal(address);
+        public static Vector128<float> RoundToZero(Vector128<float> value) => RoundToZero(value);
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128 _mm_round_pd (__m128 a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
+        ///    ROUNDPD xmm1, xmm2/m128, imm8(11)
+        ///   VROUNDPD xmm1, xmm2/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<ushort> LoadAlignedVector128NonTemporal(ushort* address) => LoadAlignedVector128NonTemporal(address);
+        public static Vector128<double> RoundToZero(Vector128<double> value) => RoundToZero(value);
+
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(11)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<int> LoadAlignedVector128NonTemporal(int* address) => LoadAlignedVector128NonTemporal(address);
+        public static Vector128<float> RoundToZeroScalar(Vector128<float> value) => RoundToZeroScalar(value);
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///    ROUNDSS xmm1,       xmm2/m128, imm8(11)
+        ///   VROUNDSS xmm1, xmm2, xmm3/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<uint> LoadAlignedVector128NonTemporal(uint* address) => LoadAlignedVector128NonTemporal(address);
+        public static Vector128<float> RoundToZeroScalar(Vector128<float> upper, Vector128<float> value) => RoundToZeroScalar(upper, value);
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(11)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<long> LoadAlignedVector128NonTemporal(long* address) => LoadAlignedVector128NonTemporal(address);
+        public static Vector128<double> RoundToZeroScalar(Vector128<double> value) => RoundToZeroScalar(value);
          /// <summary>
-        /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
-        ///   MOVNTDQA xmm, m128
+        /// __m128d _mm_round_sd (__m128d a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
+        ///    ROUNDSD xmm1,       xmm2/m128, imm8(11)
+        ///   VROUNDSD xmm1, xmm2, xmm3/m128, imm8(11)
+        /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
          /// </summary>
-        public static unsafe Vector128<ulong> LoadAlignedVector128NonTemporal(ulong* address) => LoadAlignedVector128NonTemporal(address);
+        public static Vector128<double> RoundToZeroScalar(Vector128<double> upper, Vector128<double> value) => RoundToZeroScalar(upper, value);
  
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<sbyte> left, Vector128<sbyte> right) => TestC(left, right);
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<byte> left, Vector128<byte> right) => TestC(left, right);
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<short> left, Vector128<short> right) => TestC(left, right);
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<ushort> left, Vector128<ushort> right) => TestC(left, right);
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<int> left, Vector128<int> right) => TestC(left, right);
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<uint> left, Vector128<uint> right) => TestC(left, right);
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<long> left, Vector128<long> right) => TestC(left, right);
          /// <summary>
          /// int _mm_testc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; CF=1
+        ///   VPTEST xmm1, xmm2/m128    ; CF=1
          /// </summary>
          public static bool TestC(Vector128<ulong> left, Vector128<ulong> right) => TestC(left, right);
  
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<sbyte> left, Vector128<sbyte> right) => TestNotZAndNotC(left, right);
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<byte> left, Vector128<byte> right) => TestNotZAndNotC(left, right);
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<short> left, Vector128<short> right) => TestNotZAndNotC(left, right);
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<ushort> left, Vector128<ushort> right) => TestNotZAndNotC(left, right);
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<int> left, Vector128<int> right) => TestNotZAndNotC(left, right);
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<uint> left, Vector128<uint> right) => TestNotZAndNotC(left, right);
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<long> left, Vector128<long> right) => TestNotZAndNotC(left, right);
          /// <summary>
          /// int _mm_testnzc_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=0 &amp;&amp; CF=0
          /// </summary>
          public static bool TestNotZAndNotC(Vector128<ulong> left, Vector128<ulong> right) => TestNotZAndNotC(left, right);
  
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<sbyte> left, Vector128<sbyte> right) => TestZ(left, right);
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<byte> left, Vector128<byte> right) => TestZ(left, right);
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<short> left, Vector128<short> right) => TestZ(left, right);
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<ushort> left, Vector128<ushort> right) => TestZ(left, right);
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<int> left, Vector128<int> right) => TestZ(left, right);
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<uint> left, Vector128<uint> right) => TestZ(left, right);
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<long> left, Vector128<long> right) => TestZ(left, right);
          /// <summary>
          /// int _mm_testz_si128 (__m128i a, __m128i b)
-        ///   PTEST xmm, xmm/m128
+        ///    PTEST xmm1, xmm2/m128    ; ZF=1
+        ///   VPTEST xmm1, xmm2/m128    ; ZF=1
          /// </summary>
          public static bool TestZ(Vector128<ulong> left, Vector128<ulong> right) => TestZ(left, right);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.PlatformNotSupported.cs

index e06b3545be9bf6b54a83cf2b4da745e235a937db..a92a80e1ae1c0dc673067ef0b36ae350a9f60928 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.PlatformNotSupported.cs
@@ -25,7 +25,7 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _mm_crc32_u64 (unsigned __int64 crc, unsigned __int64 v)
-            ///   CRC32 reg, reg/m64
+            ///   CRC32 r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong Crc32(ulong crc, ulong data) { throw new PlatformNotSupportedException(); }
@@ -33,23 +33,24 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_cmpgt_epi64 (__m128i a, __m128i b)
-        ///   PCMPGTQ xmm, xmm/m128
+        ///    PCMPGTQ xmm1,       xmm2/m128
+        ///   VPCMPGTQ xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> CompareGreaterThan(Vector128<long> left, Vector128<long> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// unsigned int _mm_crc32_u8 (unsigned int crc, unsigned char v)
-        ///   CRC32 reg, reg/m8
+        ///   CRC32 r32, r/m8
          /// </summary>
          public static uint Crc32(uint crc, byte data) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// unsigned int _mm_crc32_u16 (unsigned int crc, unsigned short v)
-        ///   CRC32 reg, reg/m16
+        ///   CRC32 r32, r/m16
          /// </summary>
          public static uint Crc32(uint crc, ushort data) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// unsigned int _mm_crc32_u32 (unsigned int crc, unsigned int v)
-        ///   CRC32 reg, reg/m32
+        ///   CRC32 r32, r/m32
          /// </summary>
          public static uint Crc32(uint crc, uint data) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.cs

index 83ec7c0a536d59eab81eca509b2f78699ff46357..7eb1c84f5913e0027dea2354d56bce129241d55a 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.cs
@@ -25,7 +25,7 @@ namespace System.Runtime.Intrinsics.X86
  
              /// <summary>
              /// unsigned __int64 _mm_crc32_u64 (unsigned __int64 crc, unsigned __int64 v)
-            ///   CRC32 reg, reg/m64
+            ///   CRC32 r64, r/m64
              /// This intrinsic is only available on 64-bit processes
              /// </summary>
              public static ulong Crc32(ulong crc, ulong data) => Crc32(crc, data);
@@ -33,23 +33,24 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_cmpgt_epi64 (__m128i a, __m128i b)
-        ///   PCMPGTQ xmm, xmm/m128
+        ///    PCMPGTQ xmm1,       xmm2/m128
+        ///   VPCMPGTQ xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<long> CompareGreaterThan(Vector128<long> left, Vector128<long> right) => CompareGreaterThan(left, right);
  
          /// <summary>
          /// unsigned int _mm_crc32_u8 (unsigned int crc, unsigned char v)
-        ///   CRC32 reg, reg/m8
+        ///   CRC32 r32, r/m8
          /// </summary>
          public static uint Crc32(uint crc, byte data) => Crc32(crc, data);
          /// <summary>
          /// unsigned int _mm_crc32_u16 (unsigned int crc, unsigned short v)
-        ///   CRC32 reg, reg/m16
+        ///   CRC32 r32, r/m16
          /// </summary>
          public static uint Crc32(uint crc, ushort data) => Crc32(crc, data);
          /// <summary>
          /// unsigned int _mm_crc32_u32 (unsigned int crc, unsigned int v)
-        ///   CRC32 reg, reg/m32
+        ///   CRC32 r32, r/m32
          /// </summary>
          public static uint Crc32(uint crc, uint data) => Crc32(crc, data);
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.PlatformNotSupported.cs

index 8ea7840fd6f0f71a13c59b5981a45b5aac752b43..d5a1abc545a16a262dfe28b38b0b5901a748744d 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.PlatformNotSupported.cs
@@ -27,145 +27,175 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_abs_epi8 (__m128i a)
-        ///   PABSB xmm, xmm/m128
+        ///    PABSB xmm1,         xmm2/m128
+        ///   VPABSB xmm1,         xmm2/m128
+        ///   VPABSB xmm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector128<byte> Abs(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_abs_epi16 (__m128i a)
-        ///   PABSW xmm, xmm/m128
+        ///    PABSW xmm1,         xmm2/m128
+        ///   VPABSW xmm1,         xmm2/m128
+        ///   VPABSW xmm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector128<ushort> Abs(Vector128<short> value) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_abs_epi32 (__m128i a)
-        ///   PABSD xmm, xmm/m128
+        ///    PABSD xmm1,         xmm2/m128
+        ///   VPABSD xmm1,         xmm2/m128
+        ///   VPABSD xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Abs(Vector128<int> value) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<sbyte> AlignRight(Vector128<sbyte> left, Vector128<sbyte> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
-        /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<byte> AlignRight(Vector128<byte> left, Vector128<byte> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<short> AlignRight(Vector128<short> left, Vector128<short> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ushort> AlignRight(Vector128<ushort> left, Vector128<ushort> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<int> AlignRight(Vector128<int> left, Vector128<int> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<uint> AlignRight(Vector128<uint> left, Vector128<uint> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<long> AlignRight(Vector128<long> left, Vector128<long> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<ulong> AlignRight(Vector128<ulong> left, Vector128<ulong> right, [ConstantExpected] byte mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_hadd_epi16 (__m128i a, __m128i b)
-        ///   PHADDW xmm, xmm/m128
+        ///    PHADDW xmm1,       xmm2/m128
+        ///   VPHADDW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> HorizontalAdd(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_hadd_epi32 (__m128i a, __m128i b)
-        ///   PHADDD xmm, xmm/m128
+        ///    PHADDD xmm1,       xmm2/m128
+        ///   VPHADDD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> HorizontalAdd(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_hadds_epi16 (__m128i a, __m128i b)
-        ///   PHADDSW xmm, xmm/m128
+        ///    PHADDSW xmm1,       xmm2/m128
+        ///   VPHADDSW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> HorizontalAddSaturate(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_hsub_epi16 (__m128i a, __m128i b)
-        ///   PHSUBW xmm, xmm/m128
+        ///    PHSUBW xmm1,       xmm2/m128
+        ///   VPHSUBW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> HorizontalSubtract(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_hsub_epi32 (__m128i a, __m128i b)
-        ///   PHSUBD xmm, xmm/m128
+        ///    PHSUBD xmm1,       xmm2/m128
+        ///   VPHSUBD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> HorizontalSubtract(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_hsubs_epi16 (__m128i a, __m128i b)
-        ///   PHSUBSW xmm, xmm/m128
+        ///    PHSUBSW xmm1,       xmm2/m128
+        ///   VPHSUBSW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> HorizontalSubtractSaturate(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_maddubs_epi16 (__m128i a, __m128i b)
-        ///   PMADDUBSW xmm, xmm/m128
+        ///    PMADDUBSW xmm1,               xmm2/m128
+        ///   VPMADDUBSW xmm1,         xmm2, xmm3/m128
+        ///   VPMADDUBSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> MultiplyAddAdjacent(Vector128<byte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_mulhrs_epi16 (__m128i a, __m128i b)
-        ///   PMULHRSW xmm, xmm/m128
+        ///    PMULHRSW xmm1,               xmm2/m128
+        ///   VPMULHRSW xmm1,         xmm2, xmm3/m128
+        ///   VPMULHRSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> MultiplyHighRoundScale(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_shuffle_epi8 (__m128i a, __m128i b)
-        ///   PSHUFB xmm, xmm/m128
+        ///    PSHUFB xmm1,               xmm2/m128
+        ///   VPSHUFB xmm1,         xmm2, xmm3/m128
+        ///   VPSHUFB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Shuffle(Vector128<sbyte> value, Vector128<sbyte> mask) { throw new PlatformNotSupportedException(); }
-
          /// <summary>
          /// __m128i _mm_shuffle_epi8 (__m128i a, __m128i b)
-        ///   PSHUFB xmm, xmm/m128
+        ///    PSHUFB xmm1,               xmm2/m128
+        ///   VPSHUFB xmm1,         xmm2, xmm3/m128
+        ///   VPSHUFB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Shuffle(Vector128<byte> value, Vector128<byte> mask) { throw new PlatformNotSupportedException(); }
  
          /// <summary>
          /// __m128i _mm_sign_epi8 (__m128i a, __m128i b)
-        ///   PSIGNB xmm, xmm/m128
+        ///    PSIGNB xmm1,       xmm2/m128
+        ///   VPSIGNB xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Sign(Vector128<sbyte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sign_epi16 (__m128i a, __m128i b)
-        ///   PSIGNW xmm, xmm/m128
+        ///    PSIGNW xmm1,       xmm2/m128
+        ///   VPSIGNW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Sign(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
          /// <summary>
          /// __m128i _mm_sign_epi32 (__m128i a, __m128i b)
-        ///   PSIGND xmm, xmm/m128
+        ///    PSIGND xmm1,       xmm2/m128
+        ///   VPSIGND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> Sign(Vector128<int> left, Vector128<int> right) { throw new PlatformNotSupportedException(); }
      }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.cs

index 5ea6532cc33890ea855ed63b3cf4eea240b0b6ef..30acb5e323328b1fc5058bf4a467c13694656c0c 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.cs
@@ -27,146 +27,176 @@ namespace System.Runtime.Intrinsics.X86
  
          /// <summary>
          /// __m128i _mm_abs_epi8 (__m128i a)
-        ///   PABSB xmm, xmm/m128
+        ///    PABSB xmm1,         xmm2/m128
+        ///   VPABSB xmm1,         xmm2/m128
+        ///   VPABSB xmm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector128<byte> Abs(Vector128<sbyte> value) => Abs(value);
          /// <summary>
          /// __m128i _mm_abs_epi16 (__m128i a)
-        ///   PABSW xmm, xmm/m128
+        ///    PABSW xmm1,         xmm2/m128
+        ///   VPABSW xmm1,         xmm2/m128
+        ///   VPABSW xmm1 {k1}{z}, xmm2/m128
          /// </summary>
          public static Vector128<ushort> Abs(Vector128<short> value) => Abs(value);
          /// <summary>
          /// __m128i _mm_abs_epi32 (__m128i a)
-        ///   PABSD xmm, xmm/m128
+        ///    PABSD xmm1,         xmm2/m128
+        ///   VPABSD xmm1,         xmm2/m128
+        ///   VPABSD xmm1 {k1}{z}, xmm2/m128/m32bcst
          /// </summary>
          public static Vector128<uint> Abs(Vector128<int> value) => Abs(value);
  
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<sbyte> AlignRight(Vector128<sbyte> left, Vector128<sbyte> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
-        /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// </summary>
          public static Vector128<byte> AlignRight(Vector128<byte> left, Vector128<byte> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<short> AlignRight(Vector128<short> left, Vector128<short> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ushort> AlignRight(Vector128<ushort> left, Vector128<ushort> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<int> AlignRight(Vector128<int> left, Vector128<int> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<uint> AlignRight(Vector128<uint> left, Vector128<uint> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<long> AlignRight(Vector128<long> left, Vector128<long> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
-
          /// <summary>
          /// __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
-        ///   PALIGNR xmm, xmm/m128, imm8
+        ///    PALIGNR xmm1,               xmm2/m128, imm8
+        ///   VPALIGNR xmm1,         xmm2, xmm3/m128, imm8
+        ///   VPALIGNR xmm1 {k1}{z}, xmm2, xmm3/m128, imm8
          /// This intrinsic generates PALIGNR that operates over bytes rather than elements of the vectors.
          /// </summary>
          public static Vector128<ulong> AlignRight(Vector128<ulong> left, Vector128<ulong> right, [ConstantExpected] byte mask) => AlignRight(left, right, mask);
  
          /// <summary>
          /// __m128i _mm_hadd_epi16 (__m128i a, __m128i b)
-        ///   PHADDW xmm, xmm/m128
+        ///    PHADDW xmm1,       xmm2/m128
+        ///   VPHADDW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> HorizontalAdd(Vector128<short> left, Vector128<short> right) => HorizontalAdd(left, right);
          /// <summary>
          /// __m128i _mm_hadd_epi32 (__m128i a, __m128i b)
-        ///   PHADDD xmm, xmm/m128
+        ///    PHADDD xmm1,       xmm2/m128
+        ///   VPHADDD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> HorizontalAdd(Vector128<int> left, Vector128<int> right) => HorizontalAdd(left, right);
  
          /// <summary>
          /// __m128i _mm_hadds_epi16 (__m128i a, __m128i b)
-        ///   PHADDSW xmm, xmm/m128
+        ///    PHADDSW xmm1,       xmm2/m128
+        ///   VPHADDSW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> HorizontalAddSaturate(Vector128<short> left, Vector128<short> right) => HorizontalAddSaturate(left, right);
  
          /// <summary>
          /// __m128i _mm_hsub_epi16 (__m128i a, __m128i b)
-        ///   PHSUBW xmm, xmm/m128
+        ///    PHSUBW xmm1,       xmm2/m128
+        ///   VPHSUBW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> HorizontalSubtract(Vector128<short> left, Vector128<short> right) => HorizontalSubtract(left, right);
          /// <summary>
          /// __m128i _mm_hsub_epi32 (__m128i a, __m128i b)
-        ///   PHSUBD xmm, xmm/m128
+        ///    PHSUBD xmm1,       xmm2/m128
+        ///   VPHSUBD xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> HorizontalSubtract(Vector128<int> left, Vector128<int> right) => HorizontalSubtract(left, right);
  
          /// <summary>
          /// __m128i _mm_hsubs_epi16 (__m128i a, __m128i b)
-        ///   PHSUBSW xmm, xmm/m128
+        ///    PHSUBSW xmm1,       xmm2/m128
+        ///   VPHSUBSW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> HorizontalSubtractSaturate(Vector128<short> left, Vector128<short> right) => HorizontalSubtractSaturate(left, right);
  
          /// <summary>
          /// __m128i _mm_maddubs_epi16 (__m128i a, __m128i b)
-        ///   PMADDUBSW xmm, xmm/m128
+        ///    PMADDUBSW xmm1,               xmm2/m128
+        ///   VPMADDUBSW xmm1,         xmm2, xmm3/m128
+        ///   VPMADDUBSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> MultiplyAddAdjacent(Vector128<byte> left, Vector128<sbyte> right) => MultiplyAddAdjacent(left, right);
  
          /// <summary>
          /// __m128i _mm_mulhrs_epi16 (__m128i a, __m128i b)
-        ///   PMULHRSW xmm, xmm/m128
+        ///    PMULHRSW xmm1,               xmm2/m128
+        ///   VPMULHRSW xmm1,         xmm2, xmm3/m128
+        ///   VPMULHRSW xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> MultiplyHighRoundScale(Vector128<short> left, Vector128<short> right) => MultiplyHighRoundScale(left, right);
  
          /// <summary>
          /// __m128i _mm_shuffle_epi8 (__m128i a, __m128i b)
-        ///   PSHUFB xmm, xmm/m128
+        ///    PSHUFB xmm1,               xmm2/m128
+        ///   VPSHUFB xmm1,         xmm2, xmm3/m128
+        ///   VPSHUFB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Shuffle(Vector128<sbyte> value, Vector128<sbyte> mask) => Shuffle(value, mask);
-
          /// <summary>
          /// __m128i _mm_shuffle_epi8 (__m128i a, __m128i b)
-        ///   PSHUFB xmm, xmm/m128
+        ///    PSHUFB xmm1,               xmm2/m128
+        ///   VPSHUFB xmm1,         xmm2, xmm3/m128
+        ///   VPSHUFB xmm1 {k1}{z}, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<byte> Shuffle(Vector128<byte> value, Vector128<byte> mask) => Shuffle(value, mask);
  
          /// <summary>
          /// __m128i _mm_sign_epi8 (__m128i a, __m128i b)
-        ///   PSIGNB xmm, xmm/m128
+        ///    PSIGNB xmm1,       xmm2/m128
+        ///   VPSIGNB xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<sbyte> Sign(Vector128<sbyte> left, Vector128<sbyte> right) => Sign(left, right);
          /// <summary>
          /// __m128i _mm_sign_epi16 (__m128i a, __m128i b)
-        ///   PSIGNW xmm, xmm/m128
+        ///    PSIGNW xmm1,       xmm2/m128
+        ///   VPSIGNW xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<short> Sign(Vector128<short> left, Vector128<short> right) => Sign(left, right);
          /// <summary>
          /// __m128i _mm_sign_epi32 (__m128i a, __m128i b)
-        ///   PSIGND xmm, xmm/m128
+        ///    PSIGND xmm1,       xmm2/m128
+        ///   VPSIGND xmm1, xmm2, xmm3/m128
          /// </summary>
          public static Vector128<int> Sign(Vector128<int> left, Vector128<int> right) => Sign(left, right);
      }
author	Tanner Gooding <tagoo@outlook.com>
	Wed, 19 Apr 2023 03:23:49 +0000 (20:23 -0700)
committer	GitHub <noreply@github.com>
	Wed, 19 Apr 2023 03:23:49 +0000 (20:23 -0700)
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Aes.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx2.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi1.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Bmi2.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Fma.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Lzcnt.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Pclmulqdq.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Popcnt.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse2.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse3.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse41.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Sse42.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.PlatformNotSupported.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Ssse3.cs		patch \| blob \| history