1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 using System.Runtime.Intrinsics;
8 namespace System.Runtime.Intrinsics.X86
11 /// This class provides access to Intel SSE4.1 hardware instructions via intrinsics
14 public abstract class Sse41 : Ssse3
18 public new static bool IsSupported { get => IsSupported; }
20 public new abstract class X64 : Sse2.X64
24 public new static bool IsSupported { get => IsSupported; }
27 /// __int64 _mm_extract_epi64 (__m128i a, const int imm8)
28 /// PEXTRQ reg/m64, xmm, imm8
29 /// This intrinisc is only available on 64-bit processes
31 public static long Extract(Vector128<long> value, byte index) => Extract(value, index);
33 /// __int64 _mm_extract_epi64 (__m128i a, const int imm8)
34 /// PEXTRQ reg/m64, xmm, imm8
35 /// This intrinisc is only available on 64-bit processes
37 public static ulong Extract(Vector128<ulong> value, byte index) => Extract(value, index);
40 /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8)
41 /// PINSRQ xmm, reg/m64, imm8
42 /// This intrinisc is only available on 64-bit processes
44 public static Vector128<long> Insert(Vector128<long> value, long data, byte index) => Insert(value, data, index);
46 /// __m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8)
47 /// PINSRQ xmm, reg/m64, imm8
48 /// This intrinisc is only available on 64-bit processes
50 public static Vector128<ulong> Insert(Vector128<ulong> value, ulong data, byte index) => Insert(value, data, index);
54 /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8)
55 /// PBLENDW xmm, xmm/m128 imm8
57 public static Vector128<short> Blend(Vector128<short> left, Vector128<short> right, byte control) => Blend(left, right, control);
60 /// __m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8)
61 /// PBLENDW xmm, xmm/m128 imm8
63 public static Vector128<ushort> Blend(Vector128<ushort> left, Vector128<ushort> right, byte control) => Blend(left, right, control);
66 /// __m128 _mm_blend_ps (__m128 a, __m128 b, const int imm8)
67 /// BLENDPS xmm, xmm/m128, imm8
69 public static Vector128<float> Blend(Vector128<float> left, Vector128<float> right, byte control) => Blend(left, right, control);
72 /// __m128d _mm_blend_pd (__m128d a, __m128d b, const int imm8)
73 /// BLENDPD xmm, xmm/m128, imm8
75 public static Vector128<double> Blend(Vector128<double> left, Vector128<double> right, byte control) => Blend(left, right, control);
78 /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
79 /// PBLENDVB xmm, xmm/m128, xmm
81 public static Vector128<sbyte> BlendVariable(Vector128<sbyte> left, Vector128<sbyte> right, Vector128<sbyte> mask) => BlendVariable(left, right, mask);
83 /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
84 /// PBLENDVB xmm, xmm/m128, xmm
86 public static Vector128<byte> BlendVariable(Vector128<byte> left, Vector128<byte> right, Vector128<byte> mask) => BlendVariable(left, right, mask);
88 /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
89 /// PBLENDVB xmm, xmm/m128, xmm
90 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
92 public static Vector128<short> BlendVariable(Vector128<short> left, Vector128<short> right, Vector128<short> mask) => BlendVariable(left, right, mask);
94 /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
95 /// PBLENDVB xmm, xmm/m128, xmm
96 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
98 public static Vector128<ushort> BlendVariable(Vector128<ushort> left, Vector128<ushort> right, Vector128<ushort> mask) => BlendVariable(left, right, mask);
100 /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
101 /// PBLENDVB xmm, xmm/m128, xmm
102 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
104 public static Vector128<int> BlendVariable(Vector128<int> left, Vector128<int> right, Vector128<int> mask) => BlendVariable(left, right, mask);
106 /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
107 /// PBLENDVB xmm, xmm/m128, xmm
108 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
110 public static Vector128<uint> BlendVariable(Vector128<uint> left, Vector128<uint> right, Vector128<uint> mask) => BlendVariable(left, right, mask);
112 /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
113 /// PBLENDVB xmm, xmm/m128, xmm
114 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
116 public static Vector128<long> BlendVariable(Vector128<long> left, Vector128<long> right, Vector128<long> mask) => BlendVariable(left, right, mask);
118 /// __m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
119 /// PBLENDVB xmm, xmm/m128, xmm
120 /// This intrinsic generates PBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
122 public static Vector128<ulong> BlendVariable(Vector128<ulong> left, Vector128<ulong> right, Vector128<ulong> mask) => BlendVariable(left, right, mask);
124 /// __m128 _mm_blendv_ps (__m128 a, __m128 b, __m128 mask)
125 /// BLENDVPS xmm, xmm/m128, xmm0
127 public static Vector128<float> BlendVariable(Vector128<float> left, Vector128<float> right, Vector128<float> mask) => BlendVariable(left, right, mask);
129 /// __m128d _mm_blendv_pd (__m128d a, __m128d b, __m128d mask)
130 /// BLENDVPD xmm, xmm/m128, xmm0
132 public static Vector128<double> BlendVariable(Vector128<double> left, Vector128<double> right, Vector128<double> mask) => BlendVariable(left, right, mask);
135 /// __m128 _mm_ceil_ps (__m128 a)
136 /// ROUNDPS xmm, xmm/m128, imm8(10)
138 public static Vector128<float> Ceiling(Vector128<float> value) => Ceiling(value);
140 /// __m128d _mm_ceil_pd (__m128d a)
141 /// ROUNDPD xmm, xmm/m128, imm8(10)
143 public static Vector128<double> Ceiling(Vector128<double> value) => Ceiling(value);
146 /// __m128d _mm_ceil_sd (__m128d a)
147 /// ROUNDSD xmm, xmm/m128, imm8(10)
148 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
150 public static Vector128<double> CeilingScalar(Vector128<double> value) => CeilingScalar(value);
152 /// __m128 _mm_ceil_ss (__m128 a)
153 /// ROUNDSD xmm, xmm/m128, imm8(10)
154 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
156 public static Vector128<float> CeilingScalar(Vector128<float> value) => CeilingScalar(value);
159 /// __m128d _mm_ceil_sd (__m128d a, __m128d b)
160 /// ROUNDSD xmm, xmm/m128, imm8(10)
162 public static Vector128<double> CeilingScalar(Vector128<double> upper, Vector128<double> value) => CeilingScalar(upper, value);
164 /// __m128 _mm_ceil_ss (__m128 a, __m128 b)
165 /// ROUNDSS xmm, xmm/m128, imm8(10)
167 public static Vector128<float> CeilingScalar(Vector128<float> upper, Vector128<float> value) => CeilingScalar(upper, value);
170 /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b)
171 /// PCMPEQQ xmm, xmm/m128
173 public static Vector128<long> CompareEqual(Vector128<long> left, Vector128<long> right) => CompareEqual(left, right);
175 /// __m128i _mm_cmpeq_epi64 (__m128i a, __m128i b)
176 /// PCMPEQQ xmm, xmm/m128
178 public static Vector128<ulong> CompareEqual(Vector128<ulong> left, Vector128<ulong> right) => CompareEqual(left, right);
181 /// __m128i _mm_cvtepi8_epi16 (__m128i a)
182 /// PMOVSXBW xmm, xmm/m64
184 public static Vector128<short> ConvertToVector128Int16(Vector128<sbyte> value) => ConvertToVector128Int16(value);
186 /// __m128i _mm_cvtepu8_epi16 (__m128i a)
187 /// PMOVZXBW xmm, xmm/m64
189 public static Vector128<short> ConvertToVector128Int16(Vector128<byte> value) => ConvertToVector128Int16(value);
191 /// __m128i _mm_cvtepi8_epi32 (__m128i a)
192 /// PMOVSXBD xmm, xmm/m32
194 public static Vector128<int> ConvertToVector128Int32(Vector128<sbyte> value) => ConvertToVector128Int32(value);
196 /// __m128i _mm_cvtepu8_epi32 (__m128i a)
197 /// PMOVZXBD xmm, xmm/m32
199 public static Vector128<int> ConvertToVector128Int32(Vector128<byte> value) => ConvertToVector128Int32(value);
201 /// __m128i _mm_cvtepi16_epi32 (__m128i a)
202 /// PMOVSXWD xmm, xmm/m64
204 public static Vector128<int> ConvertToVector128Int32(Vector128<short> value) => ConvertToVector128Int32(value);
206 /// __m128i _mm_cvtepu16_epi32 (__m128i a)
207 /// PMOVZXWD xmm, xmm/m64
209 public static Vector128<int> ConvertToVector128Int32(Vector128<ushort> value) => ConvertToVector128Int32(value);
211 /// __m128i _mm_cvtepi8_epi64 (__m128i a)
212 /// PMOVSXBQ xmm, xmm/m16
214 public static Vector128<long> ConvertToVector128Int64(Vector128<sbyte> value) => ConvertToVector128Int64(value);
216 /// __m128i _mm_cvtepu8_epi64 (__m128i a)
217 /// PMOVZXBQ xmm, xmm/m16
219 public static Vector128<long> ConvertToVector128Int64(Vector128<byte> value) => ConvertToVector128Int64(value);
221 /// __m128i _mm_cvtepi16_epi64 (__m128i a)
222 /// PMOVSXWQ xmm, xmm/m32
224 public static Vector128<long> ConvertToVector128Int64(Vector128<short> value) => ConvertToVector128Int64(value);
226 /// __m128i _mm_cvtepu16_epi64 (__m128i a)
227 /// PMOVZXWQ xmm, xmm/m32
229 public static Vector128<long> ConvertToVector128Int64(Vector128<ushort> value) => ConvertToVector128Int64(value);
231 /// __m128i _mm_cvtepi32_epi64 (__m128i a)
232 /// PMOVSXDQ xmm, xmm/m64
234 public static Vector128<long> ConvertToVector128Int64(Vector128<int> value) => ConvertToVector128Int64(value);
236 /// __m128i _mm_cvtepu32_epi64 (__m128i a)
237 /// PMOVZXDQ xmm, xmm/m64
239 public static Vector128<long> ConvertToVector128Int64(Vector128<uint> value) => ConvertToVector128Int64(value);
242 /// __m128 _mm_dp_ps (__m128 a, __m128 b, const int imm8)
243 /// DPPS xmm, xmm/m128, imm8
245 public static Vector128<float> DotProduct(Vector128<float> left, Vector128<float> right, byte control) => DotProduct(left, right, control);
247 /// __m128d _mm_dp_pd (__m128d a, __m128d b, const int imm8)
248 /// DPPD xmm, xmm/m128, imm8
250 public static Vector128<double> DotProduct(Vector128<double> left, Vector128<double> right, byte control) => DotProduct(left, right, control);
253 /// int _mm_extract_epi8 (__m128i a, const int imm8)
254 /// PEXTRB reg/m8, xmm, imm8
256 public static byte Extract(Vector128<byte> value, byte index) => Extract(value, index);
258 /// int _mm_extract_epi32 (__m128i a, const int imm8)
259 /// PEXTRD reg/m32, xmm, imm8
261 public static int Extract(Vector128<int> value, byte index) => Extract(value, index);
263 /// int _mm_extract_epi32 (__m128i a, const int imm8)
264 /// PEXTRD reg/m32, xmm, imm8
266 public static uint Extract(Vector128<uint> value, byte index) => Extract(value, index);
268 /// int _mm_extract_ps (__m128 a, const int imm8)
269 /// EXTRACTPS xmm, xmm/m32, imm8
271 public static float Extract(Vector128<float> value, byte index) => Extract(value, index);
274 /// __m128 _mm_floor_ps (__m128 a)
275 /// ROUNDPS xmm, xmm/m128, imm8(9)
277 public static Vector128<float> Floor(Vector128<float> value) => Floor(value);
279 /// __m128d _mm_floor_pd (__m128d a)
280 /// ROUNDPD xmm, xmm/m128, imm8(9)
282 public static Vector128<double> Floor(Vector128<double> value) => Floor(value);
285 /// __m128d _mm_floor_sd (__m128d a)
286 /// ROUNDSD xmm, xmm/m128, imm8(9)
287 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
289 public static Vector128<double> FloorScalar(Vector128<double> value) => FloorScalar(value);
291 /// __m128 _mm_floor_ss (__m128 a)
292 /// ROUNDSS xmm, xmm/m128, imm8(9)
293 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
295 public static Vector128<float> FloorScalar(Vector128<float> value) => FloorScalar(value);
298 /// __m128d _mm_floor_sd (__m128d a, __m128d b)
299 /// ROUNDSD xmm, xmm/m128, imm8(9)
301 public static Vector128<double> FloorScalar(Vector128<double> upper, Vector128<double> value) => FloorScalar(upper, value);
303 /// __m128 _mm_floor_ss (__m128 a, __m128 b)
304 /// ROUNDSS xmm, xmm/m128, imm8(9)
306 public static Vector128<float> FloorScalar(Vector128<float> upper, Vector128<float> value) => FloorScalar(upper, value);
309 /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8)
310 /// PINSRB xmm, reg/m8, imm8
312 public static Vector128<sbyte> Insert(Vector128<sbyte> value, sbyte data, byte index) => Insert(value, data, index);
314 /// __m128i _mm_insert_epi8 (__m128i a, int i, const int imm8)
315 /// PINSRB xmm, reg/m8, imm8
317 public static Vector128<byte> Insert(Vector128<byte> value, byte data, byte index) => Insert(value, data, index);
319 /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8)
320 /// PINSRD xmm, reg/m32, imm8
322 public static Vector128<int> Insert(Vector128<int> value, int data, byte index) => Insert(value, data, index);
324 /// __m128i _mm_insert_epi32 (__m128i a, int i, const int imm8)
325 /// PINSRD xmm, reg/m32, imm8
327 public static Vector128<uint> Insert(Vector128<uint> value, uint data, byte index) => Insert(value, data, index);
329 /// __m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
330 /// INSERTPS xmm, xmm/m32, imm8
332 public static Vector128<float> Insert(Vector128<float> value, Vector128<float> data, byte index) => Insert(value, data, index);
335 /// __m128i _mm_max_epi8 (__m128i a, __m128i b)
336 /// PMAXSB xmm, xmm/m128
338 public static Vector128<sbyte> Max(Vector128<sbyte> left, Vector128<sbyte> right) => Max(left, right);
340 /// __m128i _mm_max_epu16 (__m128i a, __m128i b)
341 /// PMAXUW xmm, xmm/m128
343 public static Vector128<ushort> Max(Vector128<ushort> left, Vector128<ushort> right) => Max(left, right);
345 /// __m128i _mm_max_epi32 (__m128i a, __m128i b)
346 /// PMAXSD xmm, xmm/m128
348 public static Vector128<int> Max(Vector128<int> left, Vector128<int> right) => Max(left, right);
350 /// __m128i _mm_max_epu32 (__m128i a, __m128i b)
351 /// PMAXUD xmm, xmm/m128
353 public static Vector128<uint> Max(Vector128<uint> left, Vector128<uint> right) => Max(left, right);
356 /// __m128i _mm_min_epi8 (__m128i a, __m128i b)
357 /// PMINSB xmm, xmm/m128
359 public static Vector128<sbyte> Min(Vector128<sbyte> left, Vector128<sbyte> right) => Min(left, right);
361 /// __m128i _mm_min_epu16 (__m128i a, __m128i b)
362 /// PMINUW xmm, xmm/m128
364 public static Vector128<ushort> Min(Vector128<ushort> left, Vector128<ushort> right) => Min(left, right);
366 /// __m128i _mm_min_epi32 (__m128i a, __m128i b)
367 /// PMINSD xmm, xmm/m128
369 public static Vector128<int> Min(Vector128<int> left, Vector128<int> right) => Min(left, right);
371 /// __m128i _mm_min_epu32 (__m128i a, __m128i b)
372 /// PMINUD xmm, xmm/m128
374 public static Vector128<uint> Min(Vector128<uint> left, Vector128<uint> right) => Min(left, right);
377 /// __m128i _mm_minpos_epu16 (__m128i a)
378 /// PHMINPOSUW xmm, xmm/m128
380 public static Vector128<ushort> MinHorizontal(Vector128<ushort> value) => MinHorizontal(value);
383 /// __m128i _mm_mpsadbw_epu8 (__m128i a, __m128i b, const int imm8)
384 /// MPSADBW xmm, xmm/m128, imm8
386 public static Vector128<ushort> MultipleSumAbsoluteDifferences(Vector128<byte> left, Vector128<byte> right, byte mask) => MultipleSumAbsoluteDifferences(left, right, mask);
389 /// __m128i _mm_mul_epi32 (__m128i a, __m128i b)
390 /// PMULDQ xmm, xmm/m128
392 public static Vector128<long> Multiply(Vector128<int> left, Vector128<int> right) => Multiply(left, right);
395 /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b)
396 /// PMULLD xmm, xmm/m128
398 public static Vector128<int> MultiplyLow(Vector128<int> left, Vector128<int> right) => MultiplyLow(left, right);
400 /// __m128i _mm_mullo_epi32 (__m128i a, __m128i b)
401 /// PMULLD xmm, xmm/m128
403 public static Vector128<uint> MultiplyLow(Vector128<uint> left, Vector128<uint> right) => MultiplyLow(left, right);
406 /// __m128i _mm_packus_epi32 (__m128i a, __m128i b)
407 /// PACKUSDW xmm, xmm/m128
409 public static Vector128<ushort> PackUnsignedSaturate(Vector128<int> left, Vector128<int> right) => PackUnsignedSaturate(left, right);
412 /// __m128 _mm_round_ps (__m128 a, int rounding)
413 /// ROUNDPS xmm, xmm/m128, imm8(8)
414 /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC
416 public static Vector128<float> RoundToNearestInteger(Vector128<float> value) => RoundToNearestInteger(value);
418 /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(9)
420 public static Vector128<float> RoundToNegativeInfinity(Vector128<float> value) => RoundToNegativeInfinity(value);
422 /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(10)
424 public static Vector128<float> RoundToPositiveInfinity(Vector128<float> value) => RoundToPositiveInfinity(value);
426 /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPS xmm, xmm/m128, imm8(11)
428 public static Vector128<float> RoundToZero(Vector128<float> value) => RoundToZero(value);
430 /// _MM_FROUND_CUR_DIRECTION; ROUNDPS xmm, xmm/m128, imm8(4)
432 public static Vector128<float> RoundCurrentDirection(Vector128<float> value) => RoundCurrentDirection(value);
435 /// __m128d _mm_round_pd (__m128d a, int rounding)
436 /// ROUNDPD xmm, xmm/m128, imm8(8)
437 /// _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC
439 public static Vector128<double> RoundToNearestInteger(Vector128<double> value) => RoundToNearestInteger(value);
441 /// _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(9)
443 public static Vector128<double> RoundToNegativeInfinity(Vector128<double> value) => RoundToNegativeInfinity(value);
445 /// _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(10)
447 public static Vector128<double> RoundToPositiveInfinity(Vector128<double> value) => RoundToPositiveInfinity(value);
449 /// _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC; ROUNDPD xmm, xmm/m128, imm8(11)
451 public static Vector128<double> RoundToZero(Vector128<double> value) => RoundToZero(value);
453 /// _MM_FROUND_CUR_DIRECTION; ROUNDPD xmm, xmm/m128, imm8(4)
455 public static Vector128<double> RoundCurrentDirection(Vector128<double> value) => RoundCurrentDirection(value);
458 /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_CUR_DIRECTION)
459 /// ROUNDSD xmm, xmm/m128, imm8(4)
460 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
462 public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> value) => RoundCurrentDirectionScalar(value);
464 /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
465 /// ROUNDSD xmm, xmm/m128, imm8(8)
466 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
468 public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> value) => RoundToNearestIntegerScalar(value);
470 /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
471 /// ROUNDSD xmm, xmm/m128, imm8(9)
472 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
474 public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> value) => RoundToNegativeInfinityScalar(value);
476 /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
477 /// ROUNDSD xmm, xmm/m128, imm8(10)
478 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
480 public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> value) => RoundToPositiveInfinityScalar(value);
482 /// __m128d _mm_round_sd (__m128d a, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
483 /// ROUNDSD xmm, xmm/m128, imm8(11)
484 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
486 public static Vector128<double> RoundToZeroScalar(Vector128<double> value) => RoundToZeroScalar(value);
489 /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_CUR_DIRECTION)
490 /// ROUNDSD xmm, xmm/m128, imm8(4)
492 public static Vector128<double> RoundCurrentDirectionScalar(Vector128<double> upper, Vector128<double> value) => RoundCurrentDirectionScalar(upper, value);
494 /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
495 /// ROUNDSD xmm, xmm/m128, imm8(8)
497 public static Vector128<double> RoundToNearestIntegerScalar(Vector128<double> upper, Vector128<double> value) => RoundToNearestIntegerScalar(upper, value);
499 /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC)
500 /// ROUNDSD xmm, xmm/m128, imm8(9)
502 public static Vector128<double> RoundToNegativeInfinityScalar(Vector128<double> upper, Vector128<double> value) => RoundToNegativeInfinityScalar(upper, value);
504 /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC)
505 /// ROUNDSD xmm, xmm/m128, imm8(10)
507 public static Vector128<double> RoundToPositiveInfinityScalar(Vector128<double> upper, Vector128<double> value) => RoundToPositiveInfinityScalar(upper, value);
509 /// __m128d _mm_round_sd (__m128d a, __m128d b, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC)
510 /// ROUNDSD xmm, xmm/m128, imm8(11)
512 public static Vector128<double> RoundToZeroScalar(Vector128<double> upper, Vector128<double> value) => RoundToZeroScalar(upper, value);
515 /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_CUR_DIRECTION)
516 /// ROUNDSS xmm, xmm/m128, imm8(4)
517 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
519 public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> value) => RoundCurrentDirectionScalar(value);
521 /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
522 /// ROUNDSS xmm, xmm/m128, imm8(8)
523 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
525 public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> value) => RoundToNearestIntegerScalar(value);
527 /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
528 /// ROUNDSS xmm, xmm/m128, imm8(9)
529 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
531 public static Vector128<float> RoundToNegativeInfinityScalar(Vector128<float> value) => RoundToNegativeInfinityScalar(value);
533 /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
534 /// ROUNDSS xmm, xmm/m128, imm8(10)
535 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
537 public static Vector128<float> RoundToPositiveInfinityScalar(Vector128<float> value) => RoundToPositiveInfinityScalar(value);
539 /// __m128 _mm_round_ss (__m128 a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
540 /// ROUNDSS xmm, xmm/m128, imm8(11)
541 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
543 public static Vector128<float> RoundToZeroScalar(Vector128<float> value) => RoundToZeroScalar(value);
546 /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_CUR_DIRECTION)
547 /// ROUNDSS xmm, xmm/m128, imm8(4)
549 public static Vector128<float> RoundCurrentDirectionScalar(Vector128<float> upper, Vector128<float> value) => RoundCurrentDirectionScalar(upper, value);
551 /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)
552 /// ROUNDSS xmm, xmm/m128, imm8(8)
554 public static Vector128<float> RoundToNearestIntegerScalar(Vector128<float> upper, Vector128<float> value) => RoundToNearestIntegerScalar(upper, value);
556 /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)
557 /// ROUNDSS xmm, xmm/m128, imm8(9)
559 public static Vector128<float> RoundToNegativeInfinityScalar(Vector128<float> upper, Vector128<float> value) => RoundToNegativeInfinityScalar(upper, value);
561 /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)
562 /// ROUNDSS xmm, xmm/m128, imm8(10)
564 public static Vector128<float> RoundToPositiveInfinityScalar(Vector128<float> upper, Vector128<float> value) => RoundToPositiveInfinityScalar(upper, value);
566 /// __m128 _mm_round_ss (__m128 a, __m128 b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC)
567 /// ROUNDSS xmm, xmm/m128, imm8(11)
569 public static Vector128<float> RoundToZeroScalar(Vector128<float> upper, Vector128<float> value) => RoundToZeroScalar(upper, value);
572 /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
573 /// MOVNTDQA xmm, m128
575 public static unsafe Vector128<sbyte> LoadAlignedVector128NonTemporal(sbyte* address) => LoadAlignedVector128NonTemporal(address);
577 /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
578 /// MOVNTDQA xmm, m128
580 public static unsafe Vector128<byte> LoadAlignedVector128NonTemporal(byte* address) => LoadAlignedVector128NonTemporal(address);
582 /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
583 /// MOVNTDQA xmm, m128
585 public static unsafe Vector128<short> LoadAlignedVector128NonTemporal(short* address) => LoadAlignedVector128NonTemporal(address);
587 /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
588 /// MOVNTDQA xmm, m128
590 public static unsafe Vector128<ushort> LoadAlignedVector128NonTemporal(ushort* address) => LoadAlignedVector128NonTemporal(address);
592 /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
593 /// MOVNTDQA xmm, m128
595 public static unsafe Vector128<int> LoadAlignedVector128NonTemporal(int* address) => LoadAlignedVector128NonTemporal(address);
597 /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
598 /// MOVNTDQA xmm, m128
600 public static unsafe Vector128<uint> LoadAlignedVector128NonTemporal(uint* address) => LoadAlignedVector128NonTemporal(address);
602 /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
603 /// MOVNTDQA xmm, m128
605 public static unsafe Vector128<long> LoadAlignedVector128NonTemporal(long* address) => LoadAlignedVector128NonTemporal(address);
607 /// __m128i _mm_stream_load_si128 (const __m128i* mem_addr)
608 /// MOVNTDQA xmm, m128
610 public static unsafe Vector128<ulong> LoadAlignedVector128NonTemporal(ulong* address) => LoadAlignedVector128NonTemporal(address);
613 /// int _mm_test_all_ones (__m128i a)
614 /// PCMPEQD xmm, xmm/m128
615 /// PTEST xmm, xmm/m128
617 public static bool TestAllOnes(Vector128<sbyte> value) => TestAllOnes(value);
618 public static bool TestAllOnes(Vector128<byte> value) => TestAllOnes(value);
619 public static bool TestAllOnes(Vector128<short> value) => TestAllOnes(value);
620 public static bool TestAllOnes(Vector128<ushort> value) => TestAllOnes(value);
621 public static bool TestAllOnes(Vector128<int> value) => TestAllOnes(value);
622 public static bool TestAllOnes(Vector128<uint> value) => TestAllOnes(value);
623 public static bool TestAllOnes(Vector128<long> value) => TestAllOnes(value);
624 public static bool TestAllOnes(Vector128<ulong> value) => TestAllOnes(value);
627 /// int _mm_test_all_zeros (__m128i a, __m128i mask)
628 /// PTEST xmm, xmm/m128
630 public static bool TestAllZeros(Vector128<sbyte> left, Vector128<sbyte> right) => TestAllZeros(left, right);
631 public static bool TestAllZeros(Vector128<byte> left, Vector128<byte> right) => TestAllZeros(left, right);
632 public static bool TestAllZeros(Vector128<short> left, Vector128<short> right) => TestAllZeros(left, right);
633 public static bool TestAllZeros(Vector128<ushort> left, Vector128<ushort> right) => TestAllZeros(left, right);
634 public static bool TestAllZeros(Vector128<int> left, Vector128<int> right) => TestAllZeros(left, right);
635 public static bool TestAllZeros(Vector128<uint> left, Vector128<uint> right) => TestAllZeros(left, right);
636 public static bool TestAllZeros(Vector128<long> left, Vector128<long> right) => TestAllZeros(left, right);
637 public static bool TestAllZeros(Vector128<ulong> left, Vector128<ulong> right) => TestAllZeros(left, right);
640 /// int _mm_testc_si128 (__m128i a, __m128i b)
641 /// PTEST xmm, xmm/m128
643 public static bool TestC(Vector128<sbyte> left, Vector128<sbyte> right) => TestC(left, right);
644 public static bool TestC(Vector128<byte> left, Vector128<byte> right) => TestC(left, right);
645 public static bool TestC(Vector128<short> left, Vector128<short> right) => TestC(left, right);
646 public static bool TestC(Vector128<ushort> left, Vector128<ushort> right) => TestC(left, right);
647 public static bool TestC(Vector128<int> left, Vector128<int> right) => TestC(left, right);
648 public static bool TestC(Vector128<uint> left, Vector128<uint> right) => TestC(left, right);
649 public static bool TestC(Vector128<long> left, Vector128<long> right) => TestC(left, right);
650 public static bool TestC(Vector128<ulong> left, Vector128<ulong> right) => TestC(left, right);
653 /// int _mm_test_mix_ones_zeros (__m128i a, __m128i mask)
654 /// PTEST xmm, xmm/m128
656 public static bool TestMixOnesZeros(Vector128<sbyte> left, Vector128<sbyte> right) => TestMixOnesZeros(left, right);
657 public static bool TestMixOnesZeros(Vector128<byte> left, Vector128<byte> right) => TestMixOnesZeros(left, right);
658 public static bool TestMixOnesZeros(Vector128<short> left, Vector128<short> right) => TestMixOnesZeros(left, right);
659 public static bool TestMixOnesZeros(Vector128<ushort> left, Vector128<ushort> right) => TestMixOnesZeros(left, right);
660 public static bool TestMixOnesZeros(Vector128<int> left, Vector128<int> right) => TestMixOnesZeros(left, right);
661 public static bool TestMixOnesZeros(Vector128<uint> left, Vector128<uint> right) => TestMixOnesZeros(left, right);
662 public static bool TestMixOnesZeros(Vector128<long> left, Vector128<long> right) => TestMixOnesZeros(left, right);
663 public static bool TestMixOnesZeros(Vector128<ulong> left, Vector128<ulong> right) => TestMixOnesZeros(left, right);
666 /// int _mm_testnzc_si128 (__m128i a, __m128i b)
667 /// PTEST xmm, xmm/m128
669 public static bool TestNotZAndNotC(Vector128<sbyte> left, Vector128<sbyte> right) => TestNotZAndNotC(left, right);
670 public static bool TestNotZAndNotC(Vector128<byte> left, Vector128<byte> right) => TestNotZAndNotC(left, right);
671 public static bool TestNotZAndNotC(Vector128<short> left, Vector128<short> right) => TestNotZAndNotC(left, right);
672 public static bool TestNotZAndNotC(Vector128<ushort> left, Vector128<ushort> right) => TestNotZAndNotC(left, right);
673 public static bool TestNotZAndNotC(Vector128<int> left, Vector128<int> right) => TestNotZAndNotC(left, right);
674 public static bool TestNotZAndNotC(Vector128<uint> left, Vector128<uint> right) => TestNotZAndNotC(left, right);
675 public static bool TestNotZAndNotC(Vector128<long> left, Vector128<long> right) => TestNotZAndNotC(left, right);
676 public static bool TestNotZAndNotC(Vector128<ulong> left, Vector128<ulong> right) => TestNotZAndNotC(left, right);
679 /// int _mm_testz_si128 (__m128i a, __m128i b)
680 /// PTEST xmm, xmm/m128
682 public static bool TestZ(Vector128<sbyte> left, Vector128<sbyte> right) => TestZ(left, right);
683 public static bool TestZ(Vector128<byte> left, Vector128<byte> right) => TestZ(left, right);
684 public static bool TestZ(Vector128<short> left, Vector128<short> right) => TestZ(left, right);
685 public static bool TestZ(Vector128<ushort> left, Vector128<ushort> right) => TestZ(left, right);
686 public static bool TestZ(Vector128<int> left, Vector128<int> right) => TestZ(left, right);
687 public static bool TestZ(Vector128<uint> left, Vector128<uint> right) => TestZ(left, right);
688 public static bool TestZ(Vector128<long> left, Vector128<long> right) => TestZ(left, right);
689 public static bool TestZ(Vector128<ulong> left, Vector128<ulong> right) => TestZ(left, right);