1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 using System.Runtime.CompilerServices;
7 using System.Runtime.Intrinsics;
9 namespace System.Runtime.Intrinsics.X86
12 /// This class provides access to Intel SSE2 hardware instructions via intrinsics
15 public abstract class Sse2 : Sse
19 public new static bool IsSupported { get => IsSupported; }
22 /// __m128i _mm_add_epi8 (__m128i a, __m128i b)
23 /// PADDB xmm, xmm/m128
25 public static Vector128<byte> Add(Vector128<byte> left, Vector128<byte> right) => Add(left, right);
27 /// __m128i _mm_add_epi8 (__m128i a, __m128i b)
28 /// PADDB xmm, xmm/m128
30 public static Vector128<sbyte> Add(Vector128<sbyte> left, Vector128<sbyte> right) => Add(left, right);
32 /// __m128i _mm_add_epi16 (__m128i a, __m128i b)
33 /// PADDW xmm, xmm/m128
35 public static Vector128<short> Add(Vector128<short> left, Vector128<short> right) => Add(left, right);
37 /// __m128i _mm_add_epi16 (__m128i a, __m128i b)
38 /// PADDW xmm, xmm/m128
40 public static Vector128<ushort> Add(Vector128<ushort> left, Vector128<ushort> right) => Add(left, right);
42 /// __m128i _mm_add_epi32 (__m128i a, __m128i b)
43 /// PADDD xmm, xmm/m128
45 public static Vector128<int> Add(Vector128<int> left, Vector128<int> right) => Add(left, right);
47 /// __m128i _mm_add_epi32 (__m128i a, __m128i b)
48 /// PADDD xmm, xmm/m128
50 public static Vector128<uint> Add(Vector128<uint> left, Vector128<uint> right) => Add(left, right);
52 /// __m128i _mm_add_epi64 (__m128i a, __m128i b)
53 /// PADDQ xmm, xmm/m128
55 public static Vector128<long> Add(Vector128<long> left, Vector128<long> right) => Add(left, right);
57 /// __m128i _mm_add_epi64 (__m128i a, __m128i b)
58 /// PADDQ xmm, xmm/m128
60 public static Vector128<ulong> Add(Vector128<ulong> left, Vector128<ulong> right) => Add(left, right);
62 /// __m128d _mm_add_pd (__m128d a, __m128d b)
63 /// ADDPD xmm, xmm/m128
65 public static Vector128<double> Add(Vector128<double> left, Vector128<double> right) => Add(left, right);
68 /// __m128d _mm_add_sd (__m128d a, __m128d b)
69 /// ADDSD xmm, xmm/m64
71 public static Vector128<double> AddScalar(Vector128<double> left, Vector128<double> right) => AddScalar(left, right);
74 /// __m128i _mm_adds_epi8 (__m128i a, __m128i b)
75 /// PADDSB xmm, xmm/m128
77 public static Vector128<sbyte> AddSaturate(Vector128<sbyte> left, Vector128<sbyte> right) => AddSaturate(left, right);
79 /// __m128i _mm_adds_epu8 (__m128i a, __m128i b)
80 /// PADDUSB xmm, xmm/m128
82 public static Vector128<byte> AddSaturate(Vector128<byte> left, Vector128<byte> right) => AddSaturate(left, right);
84 /// __m128i _mm_adds_epi16 (__m128i a, __m128i b)
85 /// PADDSW xmm, xmm/m128
87 public static Vector128<short> AddSaturate(Vector128<short> left, Vector128<short> right) => AddSaturate(left, right);
89 /// __m128i _mm_adds_epu16 (__m128i a, __m128i b)
90 /// PADDUSW xmm, xmm/m128
92 public static Vector128<ushort> AddSaturate(Vector128<ushort> left, Vector128<ushort> right) => AddSaturate(left, right);
95 /// __m128i _mm_and_si128 (__m128i a, __m128i b)
96 /// PAND xmm, xmm/m128
98 public static Vector128<byte> And(Vector128<byte> left, Vector128<byte> right) => And(left, right);
100 /// __m128i _mm_and_si128 (__m128i a, __m128i b)
101 /// PAND xmm, xmm/m128
103 public static Vector128<sbyte> And(Vector128<sbyte> left, Vector128<sbyte> right) => And(left, right);
105 /// __m128i _mm_and_si128 (__m128i a, __m128i b)
106 /// PAND xmm, xmm/m128
108 public static Vector128<short> And(Vector128<short> left, Vector128<short> right) => And(left, right);
110 /// __m128i _mm_and_si128 (__m128i a, __m128i b)
111 /// PAND xmm, xmm/m128
113 public static Vector128<ushort> And(Vector128<ushort> left, Vector128<ushort> right) => And(left, right);
115 /// __m128i _mm_and_si128 (__m128i a, __m128i b)
116 /// PAND xmm, xmm/m128
118 public static Vector128<int> And(Vector128<int> left, Vector128<int> right) => And(left, right);
120 /// __m128i _mm_and_si128 (__m128i a, __m128i b)
121 /// PAND xmm, xmm/m128
123 public static Vector128<uint> And(Vector128<uint> left, Vector128<uint> right) => And(left, right);
125 /// __m128i _mm_and_si128 (__m128i a, __m128i b)
126 /// PAND xmm, xmm/m128
128 public static Vector128<long> And(Vector128<long> left, Vector128<long> right) => And(left, right);
130 /// __m128i _mm_and_si128 (__m128i a, __m128i b)
131 /// PAND xmm, xmm/m128
133 public static Vector128<ulong> And(Vector128<ulong> left, Vector128<ulong> right) => And(left, right);
135 /// __m128d _mm_and_pd (__m128d a, __m128d b)
136 /// ANDPD xmm, xmm/m128
138 public static Vector128<double> And(Vector128<double> left, Vector128<double> right) => And(left, right);
141 /// __m128i _mm_andnot_si128 (__m128i a, __m128i b)
142 /// PANDN xmm, xmm/m128
144 public static Vector128<byte> AndNot(Vector128<byte> left, Vector128<byte> right) => AndNot(left, right);
146 /// __m128i _mm_andnot_si128 (__m128i a, __m128i b)
147 /// PANDN xmm, xmm/m128
149 public static Vector128<sbyte> AndNot(Vector128<sbyte> left, Vector128<sbyte> right) => AndNot(left, right);
151 /// __m128i _mm_andnot_si128 (__m128i a, __m128i b)
152 /// PANDN xmm, xmm/m128
154 public static Vector128<short> AndNot(Vector128<short> left, Vector128<short> right) => AndNot(left, right);
156 /// __m128i _mm_andnot_si128 (__m128i a, __m128i b)
157 /// PANDN xmm, xmm/m128
159 public static Vector128<ushort> AndNot(Vector128<ushort> left, Vector128<ushort> right) => AndNot(left, right);
161 /// __m128i _mm_andnot_si128 (__m128i a, __m128i b)
162 /// PANDN xmm, xmm/m128
164 public static Vector128<int> AndNot(Vector128<int> left, Vector128<int> right) => AndNot(left, right);
166 /// __m128i _mm_andnot_si128 (__m128i a, __m128i b)
167 /// PANDN xmm, xmm/m128
169 public static Vector128<uint> AndNot(Vector128<uint> left, Vector128<uint> right) => AndNot(left, right);
171 /// __m128i _mm_andnot_si128 (__m128i a, __m128i b)
172 /// PANDN xmm, xmm/m128
174 public static Vector128<long> AndNot(Vector128<long> left, Vector128<long> right) => AndNot(left, right);
176 /// __m128i _mm_andnot_si128 (__m128i a, __m128i b)
177 /// PANDN xmm, xmm/m128
179 public static Vector128<ulong> AndNot(Vector128<ulong> left, Vector128<ulong> right) => AndNot(left, right);
181 /// __m128d _mm_andnot_pd (__m128d a, __m128d b)
182 /// ADDNPD xmm, xmm/m128
184 public static Vector128<double> AndNot(Vector128<double> left, Vector128<double> right) => AndNot(left, right);
187 /// __m128i _mm_avg_epu8 (__m128i a, __m128i b)
188 /// PAVGB xmm, xmm/m128
190 public static Vector128<byte> Average(Vector128<byte> left, Vector128<byte> right) => Average(left, right);
192 /// __m128i _mm_avg_epu16 (__m128i a, __m128i b)
193 /// PAVGW xmm, xmm/m128
195 public static Vector128<ushort> Average(Vector128<ushort> left, Vector128<ushort> right) => Average(left, right);
198 /// __m128i _mm_cmpeq_epi8 (__m128i a, __m128i b)
199 /// PCMPEQB xmm, xmm/m128
201 public static Vector128<sbyte> CompareEqual(Vector128<sbyte> left, Vector128<sbyte> right) => CompareEqual(left, right);
203 /// __m128i _mm_cmpeq_epi8 (__m128i a, __m128i b)
204 /// PCMPEQB xmm, xmm/m128
206 public static Vector128<byte> CompareEqual(Vector128<byte> left, Vector128<byte> right) => CompareEqual(left, right);
208 /// __m128i _mm_cmpeq_epi16 (__m128i a, __m128i b)
209 /// PCMPEQW xmm, xmm/m128
211 public static Vector128<short> CompareEqual(Vector128<short> left, Vector128<short> right) => CompareEqual(left, right);
213 /// __m128i _mm_cmpeq_epi16 (__m128i a, __m128i b)
214 /// PCMPEQW xmm, xmm/m128
216 public static Vector128<ushort> CompareEqual(Vector128<ushort> left, Vector128<ushort> right) => CompareEqual(left, right);
218 /// __m128i _mm_cmpeq_epi32 (__m128i a, __m128i b)
219 /// PCMPEQD xmm, xmm/m128
221 public static Vector128<int> CompareEqual(Vector128<int> left, Vector128<int> right) => CompareEqual(left, right);
223 /// __m128i _mm_cmpeq_epi32 (__m128i a, __m128i b)
224 /// PCMPEQD xmm, xmm/m128
226 public static Vector128<uint> CompareEqual(Vector128<uint> left, Vector128<uint> right) => CompareEqual(left, right);
228 /// __m128d _mm_cmpeq_pd (__m128d a, __m128d b)
229 /// CMPPD xmm, xmm/m128, imm8(0)
231 public static Vector128<double> CompareEqual(Vector128<double> left, Vector128<double> right) => CompareEqual(left, right);
234 /// int _mm_comieq_sd (__m128d a, __m128d b)
235 /// COMISS xmm, xmm/m64
237 public static bool CompareEqualOrderedScalar(Vector128<double> left, Vector128<double> right) => CompareEqualOrderedScalar(left, right);
240 /// int _mm_ucomieq_sd (__m128d a, __m128d b)
241 /// UCOMISS xmm, xmm/m64
243 public static bool CompareEqualUnorderedScalar(Vector128<double> left, Vector128<double> right) => CompareEqualUnorderedScalar(left, right);
246 /// __m128d _mm_cmpeq_sd (__m128d a, __m128d b)
247 /// CMPSD xmm, xmm/m64, imm8(0)
249 public static Vector128<double> CompareEqualScalar(Vector128<double> left, Vector128<double> right) => CompareEqualScalar(left, right);
252 /// __m128i _mm_cmpgt_epi8 (__m128i a, __m128i b)
253 /// PCMPGTB xmm, xmm/m128
255 public static Vector128<sbyte> CompareGreaterThan(Vector128<sbyte> left, Vector128<sbyte> right) => CompareGreaterThan(left, right);
257 /// __m128i _mm_cmpgt_epi16 (__m128i a, __m128i b)
258 /// PCMPGTW xmm, xmm/m128
260 public static Vector128<short> CompareGreaterThan(Vector128<short> left, Vector128<short> right) => CompareGreaterThan(left, right);
262 /// __m128i _mm_cmpgt_epi32 (__m128i a, __m128i b)
263 /// PCMPGTD xmm, xmm/m128
265 public static Vector128<int> CompareGreaterThan(Vector128<int> left, Vector128<int> right) => CompareGreaterThan(left, right);
267 /// __m128d _mm_cmpgt_pd (__m128d a, __m128d b)
268 /// CMPPD xmm, xmm/m128, imm8(6)
270 public static Vector128<double> CompareGreaterThan(Vector128<double> left, Vector128<double> right) => CompareGreaterThan(left, right);
273 /// int _mm_comigt_sd (__m128d a, __m128d b)
274 /// COMISS xmm, xmm/m64
276 public static bool CompareGreaterThanOrderedScalar(Vector128<double> left, Vector128<double> right) => CompareGreaterThanOrderedScalar(left, right);
279 /// int _mm_ucomigt_sd (__m128d a, __m128d b)
280 /// UCOMISS xmm, xmm/m64
282 public static bool CompareGreaterThanUnorderedScalar(Vector128<double> left, Vector128<double> right) => CompareGreaterThanUnorderedScalar(left, right);
285 /// __m128d _mm_cmpgt_sd (__m128d a, __m128d b)
286 /// CMPSD xmm, xmm/m64, imm8(6)
288 public static Vector128<double> CompareGreaterThanScalar(Vector128<double> left, Vector128<double> right) => CompareGreaterThanScalar(left, right);
291 /// __m128d _mm_cmpge_pd (__m128d a, __m128d b)
292 /// CMPPD xmm, xmm/m128, imm8(5)
294 public static Vector128<double> CompareGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareGreaterThanOrEqual(left, right);
297 /// int _mm_comige_sd (__m128d a, __m128d b)
298 /// COMISS xmm, xmm/m64
300 public static bool CompareGreaterThanOrEqualOrderedScalar(Vector128<double> left, Vector128<double> right) => CompareGreaterThanOrEqualOrderedScalar(left, right);
303 /// int _mm_ucomige_sd (__m128d a, __m128d b)
304 /// UCOMISS xmm, xmm/m64
306 public static bool CompareGreaterThanOrEqualUnorderedScalar(Vector128<double> left, Vector128<double> right) => CompareGreaterThanOrEqualUnorderedScalar(left, right);
309 /// __m128d _mm_cmpge_sd (__m128d a, __m128d b)
310 /// CMPSD xmm, xmm/m64, imm8(5)
312 public static Vector128<double> CompareGreaterThanOrEqualScalar(Vector128<double> left, Vector128<double> right) => CompareGreaterThanOrEqualScalar(left, right);
315 /// __m128i _mm_cmplt_epi8 (__m128i a, __m128i b)
316 /// PCMPGTB xmm, xmm/m128
318 public static Vector128<sbyte> CompareLessThan(Vector128<sbyte> left, Vector128<sbyte> right) => CompareLessThan(left, right);
320 /// __m128i _mm_cmplt_epi16 (__m128i a, __m128i b)
321 /// PCMPGTW xmm, xmm/m128
323 public static Vector128<short> CompareLessThan(Vector128<short> left, Vector128<short> right) => CompareLessThan(left, right);
325 /// __m128i _mm_cmplt_epi32 (__m128i a, __m128i b)
326 /// PCMPGTD xmm, xmm/m128
328 public static Vector128<int> CompareLessThan(Vector128<int> left, Vector128<int> right) => CompareLessThan(left, right);
330 /// __m128d _mm_cmplt_pd (__m128d a, __m128d b)
331 /// CMPPD xmm, xmm/m128, imm8(1)
333 public static Vector128<double> CompareLessThan(Vector128<double> left, Vector128<double> right) => CompareLessThan(left, right);
336 /// int _mm_comilt_sd (__m128d a, __m128d b)
337 /// COMISS xmm, xmm/m64
339 public static bool CompareLessThanOrderedScalar(Vector128<double> left, Vector128<double> right) => CompareLessThanOrderedScalar(left, right);
342 /// int _mm_ucomilt_sd (__m128d a, __m128d b)
343 /// UCOMISS xmm, xmm/m64
345 public static bool CompareLessThanUnorderedScalar(Vector128<double> left, Vector128<double> right) => CompareLessThanUnorderedScalar(left, right);
348 /// __m128d _mm_cmplt_sd (__m128d a, __m128d b)
349 /// CMPSD xmm, xmm/m64, imm8(1)
351 public static Vector128<double> CompareLessThanScalar(Vector128<double> left, Vector128<double> right) => CompareLessThanScalar(left, right);
354 /// __m128d _mm_cmple_pd (__m128d a, __m128d b)
355 /// CMPPD xmm, xmm/m128, imm8(2)
357 public static Vector128<double> CompareLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareLessThanOrEqual(left, right);
360 /// int _mm_comile_sd (__m128d a, __m128d b)
361 /// COMISS xmm, xmm/m64
363 public static bool CompareLessThanOrEqualOrderedScalar(Vector128<double> left, Vector128<double> right) => CompareLessThanOrEqualOrderedScalar(left, right);
366 /// int _mm_ucomile_sd (__m128d a, __m128d b)
367 /// UCOMISS xmm, xmm/m64
369 public static bool CompareLessThanOrEqualUnorderedScalar(Vector128<double> left, Vector128<double> right) => CompareLessThanOrEqualUnorderedScalar(left, right);
372 /// __m128d _mm_cmple_sd (__m128d a, __m128d b)
373 /// CMPSD xmm, xmm/m64, imm8(2)
375 public static Vector128<double> CompareLessThanOrEqualScalar(Vector128<double> left, Vector128<double> right) => CompareLessThanOrEqualScalar(left, right);
378 /// __m128d _mm_cmpneq_pd (__m128d a, __m128d b)
379 /// CMPPD xmm, xmm/m128, imm8(4)
381 public static Vector128<double> CompareNotEqual(Vector128<double> left, Vector128<double> right) => CompareNotEqual(left, right);
384 /// int _mm_comineq_sd (__m128d a, __m128d b)
385 /// COMISS xmm, xmm/m64
387 public static bool CompareNotEqualOrderedScalar(Vector128<double> left, Vector128<double> right) => CompareNotEqualOrderedScalar(left, right);
390 /// int _mm_ucomineq_sd (__m128d a, __m128d b)
391 /// UCOMISS xmm, xmm/m64
393 public static bool CompareNotEqualUnorderedScalar(Vector128<double> left, Vector128<double> right) => CompareNotEqualUnorderedScalar(left, right);
396 /// __m128d _mm_cmpneq_sd (__m128d a, __m128d b)
397 /// CMPSD xmm, xmm/m64, imm8(4)
399 public static Vector128<double> CompareNotEqualScalar(Vector128<double> left, Vector128<double> right) => CompareNotEqualScalar(left, right);
402 /// __m128d _mm_cmpngt_pd (__m128d a, __m128d b)
403 /// CMPPD xmm, xmm/m128, imm8(2)
405 public static Vector128<double> CompareNotGreaterThan(Vector128<double> left, Vector128<double> right) => CompareNotGreaterThan(left, right);
408 /// __m128d _mm_cmpngt_sd (__m128d a, __m128d b)
409 /// CMPSD xmm, xmm/m64, imm8(2)
411 public static Vector128<double> CompareNotGreaterThanScalar(Vector128<double> left, Vector128<double> right) => CompareNotGreaterThanScalar(left, right);
414 /// __m128d _mm_cmpnge_pd (__m128d a, __m128d b)
415 /// CMPPD xmm, xmm/m128, imm8(1)
417 public static Vector128<double> CompareNotGreaterThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareNotGreaterThanOrEqual(left, right);
420 /// __m128d _mm_cmpnge_sd (__m128d a, __m128d b)
421 /// CMPSD xmm, xmm/m64, imm8(1)
423 public static Vector128<double> CompareNotGreaterThanOrEqualScalar(Vector128<double> left, Vector128<double> right) => CompareNotGreaterThanOrEqualScalar(left, right);
426 /// __m128d _mm_cmpnlt_pd (__m128d a, __m128d b)
427 /// CMPPD xmm, xmm/m128, imm8(5)
429 public static Vector128<double> CompareNotLessThan(Vector128<double> left, Vector128<double> right) => CompareNotLessThan(left, right);
432 /// __m128d _mm_cmpnlt_sd (__m128d a, __m128d b)
433 /// CMPSD xmm, xmm/m64, imm8(5)
435 public static Vector128<double> CompareNotLessThanScalar(Vector128<double> left, Vector128<double> right) => CompareNotLessThanScalar(left, right);
438 /// __m128d _mm_cmpnle_pd (__m128d a, __m128d b)
439 /// CMPPD xmm, xmm/m128, imm8(6)
441 public static Vector128<double> CompareNotLessThanOrEqual(Vector128<double> left, Vector128<double> right) => CompareNotLessThanOrEqual(left, right);
444 /// __m128d _mm_cmpnle_sd (__m128d a, __m128d b)
445 /// CMPSD xmm, xmm/m64, imm8(6)
447 public static Vector128<double> CompareNotLessThanOrEqualScalar(Vector128<double> left, Vector128<double> right) => CompareNotLessThanOrEqualScalar(left, right);
450 /// __m128d _mm_cmpord_pd (__m128d a, __m128d b)
451 /// CMPPD xmm, xmm/m128, imm8(7)
453 public static Vector128<double> CompareOrdered(Vector128<double> left, Vector128<double> right) => CompareOrdered(left, right);
456 /// __m128d _mm_cmpord_sd (__m128d a, __m128d b)
457 /// CMPSD xmm, xmm/m64, imm8(7)
459 public static Vector128<double> CompareOrderedScalar(Vector128<double> left, Vector128<double> right) => CompareOrderedScalar(left, right);
462 /// __m128d _mm_cmpunord_pd (__m128d a, __m128d b)
463 /// CMPPD xmm, xmm/m128, imm8(3)
465 public static Vector128<double> CompareUnordered(Vector128<double> left, Vector128<double> right) => CompareUnordered(left, right);
468 /// __m128d _mm_cmpunord_sd (__m128d a, __m128d b)
469 /// CMPSD xmm, xmm/m64, imm8(3)
471 public static Vector128<double> CompareUnorderedScalar(Vector128<double> left, Vector128<double> right) => CompareUnorderedScalar(left, right);
474 /// __m128i _mm_cvtps_epi32 (__m128 a)
475 /// CVTPS2DQ xmm, xmm/m128
477 public static Vector128<int> ConvertToVector128Int32(Vector128<float> value) => ConvertToVector128Int32(value);
479 /// __m128i _mm_cvtpd_epi32 (__m128d a)
480 /// CVTPD2DQ xmm, xmm/m128
482 public static Vector128<int> ConvertToVector128Int32(Vector128<double> value) => ConvertToVector128Int32(value);
484 /// __m128 _mm_cvtepi32_ps (__m128i a)
485 /// CVTDQ2PS xmm, xmm/m128
487 public static Vector128<float> ConvertToVector128Single(Vector128<int> value) => ConvertToVector128Single(value);
489 /// __m128 _mm_cvtpd_ps (__m128d a)
490 /// CVTPD2PS xmm, xmm/m128
492 public static Vector128<float> ConvertToVector128Single(Vector128<double> value) => ConvertToVector128Single(value);
494 /// __m128d _mm_cvtepi32_pd (__m128i a)
495 /// CVTDQ2PD xmm, xmm/m128
497 public static Vector128<double> ConvertToVector128Double(Vector128<int> value) => ConvertToVector128Double(value);
499 /// __m128d _mm_cvtps_pd (__m128 a)
500 /// CVTPS2PD xmm, xmm/m128
502 public static Vector128<double> ConvertToVector128Double(Vector128<float> value) => ConvertToVector128Double(value);
505 /// double _mm_cvtsd_f64(__m128d a)
508 public static double ConvertToDouble(Vector128<double> value) => ConvertToDouble(value);
510 /// int _mm_cvtsd_si32 (__m128d a)
511 /// CVTSD2SI r32, xmm/m64
513 public static int ConvertToInt32(Vector128<double> value) => ConvertToInt32(value);
515 /// int _mm_cvtsi128_si32 (__m128i a)
516 /// MOVD reg/m32, xmm
518 public static int ConvertToInt32(Vector128<int> value) => ConvertToInt32(value);
520 /// __int64 _mm_cvtsd_si64 (__m128d a)
521 /// CVTSD2SI r64, xmm/m64
523 public static long ConvertToInt64(Vector128<double> value) => ConvertToInt64(value);
525 /// __int64 _mm_cvtsi128_si64 (__m128i a)
526 /// MOVQ reg/m64, xmm
528 public static long ConvertToInt64(Vector128<long> value) => ConvertToInt64(value);
530 /// int _mm_cvtsi128_si32 (__m128i a)
531 /// MOVD reg/m32, xmm
533 public static uint ConvertToUInt32(Vector128<uint> value) => ConvertToUInt32(value);
535 /// __int64 _mm_cvtsi128_si64 (__m128i a)
536 /// MOVQ reg/m64, xmm
538 public static ulong ConvertToUInt64(Vector128<ulong> value) => ConvertToUInt64(value);
541 /// __m128d _mm_cvtsi32_sd (__m128d a, int b)
542 /// CVTSI2SD xmm, reg/m64
544 public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, int value) => ConvertScalarToVector128Double(upper, value);
546 /// __m128d _mm_cvtsi64_sd (__m128d a, int b)
547 /// CVTSI2SD xmm, reg/m64
549 public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, long value) => ConvertScalarToVector128Double(upper, value);
551 /// __m128d _mm_cvtss_sd (__m128d a, __m128 b)
552 /// CVTSS2SD xmm, xmm/m32
554 public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, Vector128<float> value) => ConvertScalarToVector128Double(upper, value);
556 /// __m128i _mm_cvtsi32_si128 (int a)
557 /// MOVD xmm, reg/m32
559 public static Vector128<int> ConvertScalarToVector128Int32(int value) => ConvertScalarToVector128Int32(value);
561 /// __m128i _mm_cvtsi64_si128 (__int64 a)
562 /// MOVQ xmm, reg/m64
564 public static Vector128<long> ConvertScalarToVector128Int64(long value) => ConvertScalarToVector128Int64(value);
566 /// __m128 _mm_cvtsd_ss (__m128 a, __m128d b)
567 /// CVTSD2SS xmm, xmm/m64
569 public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<double> value) => ConvertScalarToVector128Single(upper, value);
571 /// __m128i _mm_cvtsi32_si128 (int a)
572 /// MOVD xmm, reg/m32
574 public static Vector128<uint> ConvertScalarToVector128UInt32(uint value) => ConvertScalarToVector128UInt32(value);
576 /// __m128i _mm_cvtsi64_si128 (__int64 a)
577 /// MOVQ xmm, reg/m64
579 public static Vector128<ulong> ConvertScalarToVector128UInt64(ulong value) => ConvertScalarToVector128UInt64(value);
582 /// __m128i _mm_cvttps_epi32 (__m128 a)
583 /// CVTTPS2DQ xmm, xmm/m128
585 public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<float> value) => ConvertToVector128Int32WithTruncation(value);
587 /// __m128i _mm_cvttpd_epi32 (__m128d a)
588 /// CVTTPD2DQ xmm, xmm/m128
590 public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<double> value) => ConvertToVector128Int32WithTruncation(value);
593 /// int _mm_cvttsd_si32 (__m128d a)
594 /// CVTTSD2SI reg, xmm/m64
596 public static int ConvertToInt32WithTruncation(Vector128<double> value) => ConvertToInt32WithTruncation(value);
598 /// __int64 _mm_cvttsd_si64 (__m128d a)
599 /// CVTTSD2SI reg, xmm/m64
601 public static long ConvertToInt64WithTruncation(Vector128<double> value) => ConvertToInt64WithTruncation(value);
604 /// __m128d _mm_div_pd (__m128d a, __m128d b)
605 /// DIVPD xmm, xmm/m128
607 public static Vector128<double> Divide(Vector128<double> left, Vector128<double> right) => Divide(left, right);
610 /// __m128d _mm_div_sd (__m128d a, __m128d b)
611 /// DIVSD xmm, xmm/m64
613 public static Vector128<double> DivideScalar(Vector128<double> left, Vector128<double> right) => DivideScalar(left, right);
616 /// int _mm_extract_epi16 (__m128i a, int immediate)
617 /// PEXTRW reg, xmm, imm8
619 public static ushort Extract(Vector128<ushort> value, byte index) => Extract(value, index);
622 /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate)
623 /// PINSRW xmm, reg/m16, imm8
625 public static Vector128<short> Insert(Vector128<short> value, short data, byte index) => Insert(value, data, index);
627 /// __m128i _mm_insert_epi16 (__m128i a, int i, int immediate)
628 /// PINSRW xmm, reg/m16, imm8
630 public static Vector128<ushort> Insert(Vector128<ushort> value, ushort data, byte index) => Insert(value, data, index);
633 /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
636 public static unsafe Vector128<sbyte> LoadVector128(sbyte* address) => LoadVector128(address);
638 /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
641 public static unsafe Vector128<byte> LoadVector128(byte* address) => LoadVector128(address);
643 /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
646 public static unsafe Vector128<short> LoadVector128(short* address) => LoadVector128(address);
648 /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
651 public static unsafe Vector128<ushort> LoadVector128(ushort* address) => LoadVector128(address);
653 /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
656 public static unsafe Vector128<int> LoadVector128(int* address) => LoadVector128(address);
658 /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
661 public static unsafe Vector128<uint> LoadVector128(uint* address) => LoadVector128(address);
663 /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
666 public static unsafe Vector128<long> LoadVector128(long* address) => LoadVector128(address);
668 /// __m128i _mm_loadu_si128 (__m128i const* mem_address)
671 public static unsafe Vector128<ulong> LoadVector128(ulong* address) => LoadVector128(address);
673 /// __m128d _mm_loadu_pd (double const* mem_address)
676 public static unsafe Vector128<double> LoadVector128(double* address) => LoadVector128(address);
679 /// __m128d _mm_load_sd (double const* mem_address)
682 public static unsafe Vector128<double> LoadScalarVector128(double* address) => LoadScalarVector128(address);
685 /// __m128i _mm_load_si128 (__m128i const* mem_address)
688 public static unsafe Vector128<sbyte> LoadAlignedVector128(sbyte* address) => LoadAlignedVector128(address);
690 /// __m128i _mm_load_si128 (__m128i const* mem_address)
693 public static unsafe Vector128<byte> LoadAlignedVector128(byte* address) => LoadAlignedVector128(address);
695 /// __m128i _mm_load_si128 (__m128i const* mem_address)
698 public static unsafe Vector128<short> LoadAlignedVector128(short* address) => LoadAlignedVector128(address);
700 /// __m128i _mm_load_si128 (__m128i const* mem_address)
703 public static unsafe Vector128<ushort> LoadAlignedVector128(ushort* address) => LoadAlignedVector128(address);
705 /// __m128i _mm_load_si128 (__m128i const* mem_address)
708 public static unsafe Vector128<int> LoadAlignedVector128(int* address) => LoadAlignedVector128(address);
710 /// __m128i _mm_load_si128 (__m128i const* mem_address)
713 public static unsafe Vector128<uint> LoadAlignedVector128(uint* address) => LoadAlignedVector128(address);
715 /// __m128i _mm_load_si128 (__m128i const* mem_address)
718 public static unsafe Vector128<long> LoadAlignedVector128(long* address) => LoadAlignedVector128(address);
720 /// __m128i _mm_load_si128 (__m128i const* mem_address)
723 public static unsafe Vector128<ulong> LoadAlignedVector128(ulong* address) => LoadAlignedVector128(address);
725 /// __m128d _mm_load_pd (double const* mem_address)
728 public static unsafe Vector128<double> LoadAlignedVector128(double* address) => LoadAlignedVector128(address);
731 /// void _mm_lfence(void)
734 public static void LoadFence() => LoadFence();
737 /// __m128d _mm_loadh_pd (__m128d a, double const* mem_addr)
740 public static unsafe Vector128<double> LoadHigh(Vector128<double> lower, double* address) => LoadHigh(lower, address);
743 /// __m128d _mm_loadl_pd (__m128d a, double const* mem_addr)
746 public static unsafe Vector128<double> LoadLow(Vector128<double> upper, double* address) => LoadLow(upper, address);
749 /// __m128i _mm_loadl_epi32 (__m128i const* mem_addr)
750 /// MOVD xmm, reg/m32
751 /// The above native signature does not exist. We provide this additional overload for completeness.
753 public static unsafe Vector128<int> LoadScalarVector128(int* address) => LoadScalarVector128(address);
755 /// __m128i _mm_loadl_epi32 (__m128i const* mem_addr)
756 /// MOVD xmm, reg/m32
757 /// The above native signature does not exist. We provide this additional overload for completeness.
759 public static unsafe Vector128<uint> LoadScalarVector128(uint* address) => LoadScalarVector128(address);
761 /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr)
762 /// MOVQ xmm, reg/m64
764 public static unsafe Vector128<long> LoadScalarVector128(long* address) => LoadScalarVector128(address);
766 /// __m128i _mm_loadl_epi64 (__m128i const* mem_addr)
767 /// MOVQ xmm, reg/m64
769 public static unsafe Vector128<ulong> LoadScalarVector128(ulong* address) => LoadScalarVector128(address);
772 /// void _mm_maskmoveu_si128 (__m128i a, __m128i mask, char* mem_address)
773 /// MASKMOVDQU xmm, xmm
775 public static unsafe void MaskMove(Vector128<sbyte> source, Vector128<sbyte> mask, sbyte* address) => MaskMove(source, mask, address);
777 /// void _mm_maskmoveu_si128 (__m128i a, __m128i mask, char* mem_address)
778 /// MASKMOVDQU xmm, xmm
780 public static unsafe void MaskMove(Vector128<byte> source, Vector128<byte> mask, byte* address) => MaskMove(source, mask, address);
783 /// __m128i _mm_max_epu8 (__m128i a, __m128i b)
784 /// PMAXUB xmm, xmm/m128
786 public static Vector128<byte> Max(Vector128<byte> left, Vector128<byte> right) => Max(left, right);
788 /// __m128i _mm_max_epi16 (__m128i a, __m128i b)
789 /// PMAXSW xmm, xmm/m128
791 public static Vector128<short> Max(Vector128<short> left, Vector128<short> right) => Max(left, right);
793 /// __m128d _mm_max_pd (__m128d a, __m128d b)
794 /// MAXPD xmm, xmm/m128
796 public static Vector128<double> Max(Vector128<double> left, Vector128<double> right) => Max(left, right);
799 /// __m128d _mm_max_sd (__m128d a, __m128d b)
800 /// MAXSD xmm, xmm/m64
802 public static Vector128<double> MaxScalar(Vector128<double> left, Vector128<double> right) => MaxScalar(left, right);
805 /// void _mm_mfence(void)
808 public static void MemoryFence() => MemoryFence();
811 /// __m128i _mm_min_epu8 (__m128i a, __m128i b)
812 /// PMINUB xmm, xmm/m128
814 public static Vector128<byte> Min(Vector128<byte> left, Vector128<byte> right) => Min(left, right);
816 /// __m128i _mm_min_epi16 (__m128i a, __m128i b)
817 /// PMINSW xmm, xmm/m128
819 public static Vector128<short> Min(Vector128<short> left, Vector128<short> right) => Min(left, right);
821 /// __m128d _mm_min_pd (__m128d a, __m128d b)
822 /// MINPD xmm, xmm/m128
824 public static Vector128<double> Min(Vector128<double> left, Vector128<double> right) => Min(left, right);
827 /// __m128d _mm_min_sd (__m128d a, __m128d b)
828 /// MINSD xmm, xmm/m64
830 public static Vector128<double> MinScalar(Vector128<double> left, Vector128<double> right) => MinScalar(left, right);
833 /// __m128d _mm_move_sd (__m128d a, __m128d b)
836 public static Vector128<double> MoveScalar(Vector128<double> upper, Vector128<double> value) => MoveScalar(upper, value);
839 /// int _mm_movemask_epi8 (__m128i a)
840 /// PMOVMSKB reg, xmm
842 public static int MoveMask(Vector128<sbyte> value) => MoveMask(value);
844 /// int _mm_movemask_epi8 (__m128i a)
845 /// PMOVMSKB reg, xmm
847 public static int MoveMask(Vector128<byte> value) => MoveMask(value);
849 /// int _mm_movemask_pd (__m128d a)
850 /// MOVMSKPD reg, xmm
852 public static int MoveMask(Vector128<double> value) => MoveMask(value);
855 /// __m128i _mm_move_epi64 (__m128i a)
858 public static Vector128<long> MoveScalar(Vector128<long> value) => MoveScalar(value);
860 /// __m128i _mm_move_epi64 (__m128i a)
863 public static Vector128<ulong> MoveScalar(Vector128<ulong> value) => MoveScalar(value);
866 /// __m128i _mm_mul_epu32 (__m128i a, __m128i b)
867 /// PMULUDQ xmm, xmm/m128
869 public static Vector128<ulong> Multiply(Vector128<uint> left, Vector128<uint> right) => Multiply(left, right);
871 /// __m128d _mm_mul_pd (__m128d a, __m128d b)
872 /// MULPD xmm, xmm/m128
874 public static Vector128<double> Multiply(Vector128<double> left, Vector128<double> right) => Multiply(left, right);
877 /// __m128d _mm_mul_sd (__m128d a, __m128d b)
878 /// MULSD xmm, xmm/m64
880 public static Vector128<double> MultiplyScalar(Vector128<double> left, Vector128<double> right) => MultiplyScalar(left, right);
883 /// __m128i _mm_mulhi_epi16 (__m128i a, __m128i b)
884 /// PMULHW xmm, xmm/m128
886 public static Vector128<short> MultiplyHigh(Vector128<short> left, Vector128<short> right) => MultiplyHigh(left, right);
888 /// __m128i _mm_mulhi_epu16 (__m128i a, __m128i b)
889 /// PMULHUW xmm, xmm/m128
891 public static Vector128<ushort> MultiplyHigh(Vector128<ushort> left, Vector128<ushort> right) => MultiplyHigh(left, right);
894 /// __m128i _mm_madd_epi16 (__m128i a, __m128i b)
895 /// PMADDWD xmm, xmm/m128
897 public static Vector128<int> MultiplyAddAdjacent(Vector128<short> left, Vector128<short> right) => MultiplyAddAdjacent(left, right);
900 /// __m128i _mm_mullo_epi16 (__m128i a, __m128i b)
901 /// PMULLW xmm, xmm/m128
903 public static Vector128<short> MultiplyLow(Vector128<short> left, Vector128<short> right) => MultiplyLow(left, right);
905 /// __m128i _mm_mullo_epi16 (__m128i a, __m128i b)
906 /// PMULLW xmm, xmm/m128
908 public static Vector128<ushort> MultiplyLow(Vector128<ushort> left, Vector128<ushort> right) => MultiplyLow(left, right);
911 /// __m128i _mm_or_si128 (__m128i a, __m128i b)
912 /// POR xmm, xmm/m128
914 public static Vector128<byte> Or(Vector128<byte> left, Vector128<byte> right) => Or(left, right);
916 /// __m128i _mm_or_si128 (__m128i a, __m128i b)
917 /// POR xmm, xmm/m128
919 public static Vector128<sbyte> Or(Vector128<sbyte> left, Vector128<sbyte> right) => Or(left, right);
921 /// __m128i _mm_or_si128 (__m128i a, __m128i b)
922 /// POR xmm, xmm/m128
924 public static Vector128<short> Or(Vector128<short> left, Vector128<short> right) => Or(left, right);
926 /// __m128i _mm_or_si128 (__m128i a, __m128i b)
927 /// POR xmm, xmm/m128
929 public static Vector128<ushort> Or(Vector128<ushort> left, Vector128<ushort> right) => Or(left, right);
931 /// __m128i _mm_or_si128 (__m128i a, __m128i b)
932 /// POR xmm, xmm/m128
934 public static Vector128<int> Or(Vector128<int> left, Vector128<int> right) => Or(left, right);
936 /// __m128i _mm_or_si128 (__m128i a, __m128i b)
937 /// POR xmm, xmm/m128
939 public static Vector128<uint> Or(Vector128<uint> left, Vector128<uint> right) => Or(left, right);
941 /// __m128i _mm_or_si128 (__m128i a, __m128i b)
942 /// POR xmm, xmm/m128
944 public static Vector128<long> Or(Vector128<long> left, Vector128<long> right) => Or(left, right);
946 /// __m128i _mm_or_si128 (__m128i a, __m128i b)
947 /// POR xmm, xmm/m128
949 public static Vector128<ulong> Or(Vector128<ulong> left, Vector128<ulong> right) => Or(left, right);
951 /// __m128d _mm_or_pd (__m128d a, __m128d b)
952 /// ORPD xmm, xmm/m128
954 public static Vector128<double> Or(Vector128<double> left, Vector128<double> right) => Or(left, right);
957 /// __m128i _mm_packs_epi16 (__m128i a, __m128i b)
958 /// PACKSSWB xmm, xmm/m128
960 public static Vector128<sbyte> PackSignedSaturate(Vector128<short> left, Vector128<short> right) => PackSignedSaturate(left, right);
962 /// __m128i _mm_packs_epi32 (__m128i a, __m128i b)
963 /// PACKSSDW xmm, xmm/m128
965 public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128<int> right) => PackSignedSaturate(left, right);
968 /// __m128i _mm_packus_epi16 (__m128i a, __m128i b)
969 /// PACKUSWB xmm, xmm/m128
971 public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector128<short> right) => PackUnsignedSaturate(left, right);
974 /// __m128i _mm_set1_epi8 (char a)
977 [MethodImpl(MethodImplOptions.AggressiveInlining)]
978 public static Vector128<byte> SetAllVector128(byte value)
980 // Zero vector and load value et index 0
981 Vector128<byte> vector1 = Sse.StaticCast<uint, byte>(ConvertScalarToVector128UInt32(value));
982 // Create { -- -- -- -- -- -- -- -- -- -- -- -- -- -- vl vl }
983 Vector128<ushort> tmpVector1 = Sse.StaticCast<byte, ushort>(UnpackLow(vector1, vector1));
984 // Create { -- -- -- -- -- -- -- -- -- -- -- -- vl vl vl vl }
985 Vector128<uint> tmpVector2 = Sse.StaticCast<ushort, uint>(UnpackLow(tmpVector1, tmpVector1));
986 // Create { vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl } and return result
987 return Sse.StaticCast<uint, byte>(Shuffle(tmpVector2, 0));
990 /// __m128i _mm_set1_epi8 (char a)
993 [MethodImpl(MethodImplOptions.AggressiveInlining)]
994 public static Vector128<sbyte> SetAllVector128(sbyte value)
996 // TODO-CQ Optimize algorithm choice based on benchmarks
998 // Zero vector and load value et index 0
999 Vector128<sbyte> vector = Sse.StaticCast<int, sbyte>(ConvertScalarToVector128Int32(value));
1000 // Create { -- -- -- -- -- -- -- -- -- -- -- -- -- -- vl vl }
1001 Vector128<short> tmpVector1 = Sse.StaticCast<sbyte, short>(UnpackLow(vector, vector));
1002 // Create { -- -- -- -- -- -- -- -- -- -- -- -- vl vl vl vl }
1003 Vector128<int> tmpVector2 = Sse.StaticCast<short, int>(UnpackLow(tmpVector1, tmpVector1));
1004 // Create { vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl } and return result
1005 return Sse.StaticCast<int, sbyte>(Shuffle(tmpVector2, 0));
1008 /// __m128i _mm_set1_epi16 (short a)
1011 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1012 public static Vector128<short> SetAllVector128(short value)
1014 // TODO-CQ Optimize algorithm choice based on benchmarks
1016 // Zero vector and load value et index 0
1017 Vector128<short> vector = Sse.StaticCast<int, short>(ConvertScalarToVector128Int32(value));
1018 // Create { -- -- -- -- -- -- vl vl }
1019 Vector128<int> tmpVector = Sse.StaticCast<short, int>(UnpackLow(vector, vector));
1020 // Create { vl vl vl vl vl vl vl vl } and return result
1021 return Sse.StaticCast<int, short>(Shuffle(tmpVector, (byte)0));
1024 /// __m128i _mm_set1_epi16 (short a)
1027 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1028 public static Vector128<ushort> SetAllVector128(ushort value)
1030 // TODO-CQ Optimize algorithm choice based on benchmarks
1032 // Zero vector and load value et index 0
1033 Vector128<ushort> vector = Sse.StaticCast<uint, ushort>(ConvertScalarToVector128UInt32(value));
1034 // Create { -- -- -- -- -- -- vl vl }
1035 Vector128<uint> tmpVector = Sse.StaticCast<ushort, uint>(UnpackLow(vector, vector));
1036 // Create { vl vl vl vl vl vl vl vl } and return result
1037 return Sse.StaticCast<uint, ushort>(Shuffle(tmpVector, (byte)0));
1040 /// __m128i _mm_set1_epi32 (int a)
1043 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1044 public static Vector128<int> SetAllVector128(int value)
1046 // TODO-CQ Optimize algorithm choice based on benchmarks
1048 // Zero vector and load value et index 0
1049 Vector128<int> vector = ConvertScalarToVector128Int32(value);
1050 // Create { vl vl vl vl } and return result
1051 return Shuffle(vector, 0);
1054 /// __m128i _mm_set1_epi32 (int a)
1057 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1058 public static Vector128<uint> SetAllVector128(uint value)
1060 // TODO-CQ Optimize algorithm choice based on benchmarks
1062 // Zero vector and load value et index 0
1063 Vector128<uint> vector = ConvertScalarToVector128UInt32(value);
1064 // Create { vl vl vl vl } and return result
1065 return Shuffle(vector, 0);
1068 /// __m128i _mm_set1_epi64x (long long a)
1071 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1072 public static Vector128<long> SetAllVector128(long value)
1074 // TODO-CQ Optimize algorithm choice based on benchmarks
1076 // Zero vector and load value et index 0
1077 Vector128<long> vector = ConvertScalarToVector128Int64(value);
1078 // Create { vl vl } and return result
1079 return UnpackLow(vector, vector);
1082 /// __m128i _mm_set1_epi64x (long long a)
1085 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1086 public static Vector128<ulong> SetAllVector128(ulong value)
1088 // TODO-CQ Optimize algorithm choice based on benchmarks
1090 // Zero vector and load value et index 0
1091 Vector128<ulong> vector = ConvertScalarToVector128UInt64(value);
1092 // Create { vl vl } and return result
1093 return UnpackLow(vector, vector);
1096 /// __m128d _mm_set1_pd (double a)
1099 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1100 public static Vector128<double> SetAllVector128(double value)
1102 // TODO-CQ Optimize algorithm choice based on benchmarks
1104 // Zero vector and load value et index 0
1105 Vector128<double> vector = SetScalarVector128(value);
1106 // Create { vl vl } and return result
1107 return UnpackLow(vector, vector);
1111 /// __m128d _mm_set_sd (double a)
1114 public static Vector128<double> SetScalarVector128(double value) => SetScalarVector128(value);
1117 /// ___m128i _mm_set_epi8 (char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0)
1120 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1121 public static unsafe Vector128<sbyte> SetVector128(sbyte e15, sbyte e14, sbyte e13, sbyte e12, sbyte e11, sbyte e10, sbyte e9, sbyte e8, sbyte e7, sbyte e6, sbyte e5, sbyte e4, sbyte e3, sbyte e2, sbyte e1, sbyte e0)
1123 // TODO-CQ Optimize algorithm choice based on benchmarks
1125 sbyte* buffer = stackalloc sbyte[16];
1143 // Load 16-byte buffer into Vector
1144 return LoadVector128(buffer);
1147 /// ___m128i _mm_set_epi8 (char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0)
1150 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1151 public static unsafe Vector128<byte> SetVector128(byte e15, byte e14, byte e13, byte e12, byte e11, byte e10, byte e9, byte e8, byte e7, byte e6, byte e5, byte e4, byte e3, byte e2, byte e1, byte e0)
1153 // TODO-CQ Optimize algorithm choice based on benchmarks
1155 byte* buffer = stackalloc byte[16];
1173 // Load 16-byte buffer into Vector
1174 return LoadVector128(buffer);
1177 /// __m128i _mm_set_epi16 (short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0)
1180 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1181 public static Vector128<short> SetVector128(short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0)
1183 // TODO-CQ Optimize algorithm choice based on benchmarks
1185 // Zero vector and load e0 et index 0
1186 Vector128<int> e0Vector = ConvertScalarToVector128Int32(e0);
1187 Vector128<short> vector = Sse.StaticCast<int, short>(e0Vector);
1188 // Insert e1 ... e7 at corresponding index
1189 vector = Insert(vector, e1, (byte)1);
1190 vector = Insert(vector, e2, (byte)2);
1191 vector = Insert(vector, e3, (byte)3);
1192 vector = Insert(vector, e4, (byte)4);
1193 vector = Insert(vector, e5, (byte)5);
1194 vector = Insert(vector, e6, (byte)6);
1195 return Insert(vector, e7, (byte)7);
1198 /// __m128i _mm_set_epi16 (short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0)
1201 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1202 public static Vector128<ushort> SetVector128(ushort e7, ushort e6, ushort e5, ushort e4, ushort e3, ushort e2, ushort e1, ushort e0)
1204 // TODO-CQ Optimize algorithm choice based on benchmarks
1206 // Zero vector and load e0 et index 0
1207 Vector128<uint> e0vector = ConvertScalarToVector128UInt32(e0);
1208 Vector128<ushort> vector = Sse.StaticCast<uint, ushort>(e0vector);
1209 // Insert e1 ... e7 at corresponding index
1210 vector = Insert(vector, e1, (byte)1);
1211 vector = Insert(vector, e2, (byte)2);
1212 vector = Insert(vector, e3, (byte)3);
1213 vector = Insert(vector, e4, (byte)4);
1214 vector = Insert(vector, e5, (byte)5);
1215 vector = Insert(vector, e6, (byte)6);
1216 return Insert(vector, e7, (byte)7);
1219 /// __m128i _mm_set_epi32 (int e3, int e2, int e1, int e0)
1222 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1223 public static Vector128<int> SetVector128(int e3, int e2, int e1, int e0)
1225 // TODO-CQ Optimize algorithm choice based on benchmarks
1227 // Zero vector and load e2 et index 0
1228 Vector128<int> e2Vector = ConvertScalarToVector128Int32(e2);
1229 Vector128<int> e1Vector = ConvertScalarToVector128Int32(e1);
1230 Vector128<int> e0Vector = ConvertScalarToVector128Int32(e0);
1231 // Create { -- -- e2 e0 }
1232 e0Vector = UnpackLow(e0Vector, e2Vector);
1233 e2Vector = ConvertScalarToVector128Int32(e3);
1234 // Create { -- -- e3 e1 }
1235 e1Vector = UnpackLow(e1Vector, e2Vector);
1236 // Create { e3 e2 e1 e0 } and return result
1237 return UnpackLow(e0Vector, e1Vector);
1240 /// __m128i _mm_set_epi32 (int e3, int e2, int e1, int e0)
1243 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1244 public static Vector128<uint> SetVector128(uint e3, uint e2, uint e1, uint e0)
1246 // TODO-CQ Optimize algorithm choice based on benchmarks
1248 // Zero vector and load e2 et index 0
1249 Vector128<uint> e2Vector = ConvertScalarToVector128UInt32(e2);
1250 Vector128<uint> e1Vector = ConvertScalarToVector128UInt32(e1);
1251 Vector128<uint> e0Vector = ConvertScalarToVector128UInt32(e0);
1252 // Create { -- -- e2 e0 }
1253 e0Vector = UnpackLow(e0Vector, e2Vector);
1254 e2Vector = ConvertScalarToVector128UInt32(e3);
1255 // Create { -- -- e3 e1 }
1256 e1Vector = UnpackLow(e1Vector, e2Vector);
1257 // Create { e3 e2 e1 e0 } and return result
1258 return UnpackLow(e0Vector, e1Vector);
1261 /// __m128i _mm_set_epi64x (__int64 e1, __int64 e0)
1264 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1265 public static Vector128<long> SetVector128(long e1, long e0)
1267 // TODO-CQ Optimize algorithm choice based on benchmarks
1269 // Zero vector and load e1 et index 0
1270 Vector128<long> e1Vector = ConvertScalarToVector128Int64(e1);
1271 Vector128<long> e0vector = ConvertScalarToVector128Int64(e0);
1272 // Create { e1 e0 } and return result
1273 return UnpackLow(e0vector, e1Vector);
1276 /// __m128i _mm_set_epi64x (__int64 e1, __int64 e0)
1279 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1280 public static Vector128<ulong> SetVector128(ulong e1, ulong e0)
1282 // TODO-CQ Optimize algorithm choice based on benchmarks
1284 // Zero vector and load e1 et index 0
1285 Vector128<ulong> e1vector = ConvertScalarToVector128UInt64(e1);
1286 Vector128<ulong> e0Vector = ConvertScalarToVector128UInt64(e0);
1287 // Create { e1 e0 } and return result
1288 return UnpackLow(e0Vector, e1vector);
1291 /// __m128d _mm_set_pd (double e1, double e0)
1294 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1295 public static unsafe Vector128<double> SetVector128(double e1, double e0)
1297 // TODO-CQ Optimize algorithm choice based on benchmarks
1299 // Zero vector and load e1 et index 0
1300 Vector128<double> e1Vector = SetScalarVector128(e1);
1301 Vector128<double> e0Vector = SetScalarVector128(e0);
1302 // Create { e1 e0 } and return result
1303 return UnpackLow(e0Vector, e1Vector);
1307 /// __m128i _mm_setzero_si128 ()
1309 /// __m128d _mm_setzero_pd (void)
1312 public static Vector128<T> SetZeroVector128<T>() where T : struct
1314 return SetZeroVector128<T>();
1318 /// __m128i _mm_sad_epu8 (__m128i a, __m128i b)
1319 /// PSADBW xmm, xmm/m128
1321 public static Vector128<ushort> SumAbsoluteDifferences(Vector128<byte> left, Vector128<byte> right) => SumAbsoluteDifferences(left, right);
1324 /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate)
1325 /// PSHUFD xmm, xmm/m128, imm8
1327 public static Vector128<int> Shuffle(Vector128<int> value, byte control) => Shuffle(value, control);
1329 /// __m128i _mm_shuffle_epi32 (__m128i a, int immediate)
1330 /// PSHUFD xmm, xmm/m128, imm8
1332 public static Vector128<uint> Shuffle(Vector128<uint> value, byte control) => Shuffle(value, control);
1334 /// __m128d _mm_shuffle_pd (__m128d a, __m128d b, int immediate)
1335 /// SHUFPD xmm, xmm/m128, imm8
1337 public static Vector128<double> Shuffle(Vector128<double> left, Vector128<double> right, byte control) => Shuffle(left, right, control);
1340 /// __m128i _mm_shufflehi_epi16 (__m128i a, int immediate)
1341 /// PSHUFHW xmm, xmm/m128, imm8
1343 public static Vector128<short> ShuffleHigh(Vector128<short> value, byte control) => ShuffleHigh(value, control);
1345 /// __m128i _mm_shufflehi_epi16 (__m128i a, int control)
1346 /// PSHUFHW xmm, xmm/m128, imm8
1348 public static Vector128<ushort> ShuffleHigh(Vector128<ushort> value, byte control) => ShuffleHigh(value, control);
1351 /// __m128i _mm_shufflelo_epi16 (__m128i a, int control)
1352 /// PSHUFLW xmm, xmm/m128, imm8
1354 public static Vector128<short> ShuffleLow(Vector128<short> value, byte control) => ShuffleLow(value, control);
1356 /// __m128i _mm_shufflelo_epi16 (__m128i a, int control)
1357 /// PSHUFLW xmm, xmm/m128, imm8
1359 public static Vector128<ushort> ShuffleLow(Vector128<ushort> value, byte control) => ShuffleLow(value, control);
1362 /// __m128i _mm_sll_epi16 (__m128i a, __m128i count)
1363 /// PSLLW xmm, xmm/m128
1365 public static Vector128<short> ShiftLeftLogical(Vector128<short> value, Vector128<short> count) => ShiftLeftLogical(value, count);
1367 /// __m128i _mm_sll_epi16 (__m128i a, __m128i count)
1368 /// PSLLW xmm, xmm/m128
1370 public static Vector128<ushort> ShiftLeftLogical(Vector128<ushort> value, Vector128<ushort> count) => ShiftLeftLogical(value, count);
1372 /// __m128i _mm_sll_epi32 (__m128i a, __m128i count)
1373 /// PSLLD xmm, xmm/m128
1375 public static Vector128<int> ShiftLeftLogical(Vector128<int> value, Vector128<int> count) => ShiftLeftLogical(value, count);
1377 /// __m128i _mm_sll_epi32 (__m128i a, __m128i count)
1378 /// PSLLD xmm, xmm/m128
1380 public static Vector128<uint> ShiftLeftLogical(Vector128<uint> value, Vector128<uint> count) => ShiftLeftLogical(value, count);
1382 /// __m128i _mm_sll_epi64 (__m128i a, __m128i count)
1383 /// PSLLQ xmm, xmm/m128
1385 public static Vector128<long> ShiftLeftLogical(Vector128<long> value, Vector128<long> count) => ShiftLeftLogical(value, count);
1387 /// __m128i _mm_sll_epi64 (__m128i a, __m128i count)
1388 /// PSLLQ xmm, xmm/m128
1390 public static Vector128<ulong> ShiftLeftLogical(Vector128<ulong> value, Vector128<ulong> count) => ShiftLeftLogical(value, count);
1393 /// __m128i _mm_slli_epi16 (__m128i a, int immediate)
1396 public static Vector128<short> ShiftLeftLogical(Vector128<short> value, byte count) => ShiftLeftLogical(value, count);
1398 /// __m128i _mm_slli_epi16 (__m128i a, int immediate)
1401 public static Vector128<ushort> ShiftLeftLogical(Vector128<ushort> value, byte count) => ShiftLeftLogical(value, count);
1403 /// __m128i _mm_slli_epi32 (__m128i a, int immediate)
1406 public static Vector128<int> ShiftLeftLogical(Vector128<int> value, byte count) => ShiftLeftLogical(value, count);
1408 /// __m128i _mm_slli_epi32 (__m128i a, int immediate)
1411 public static Vector128<uint> ShiftLeftLogical(Vector128<uint> value, byte count) => ShiftLeftLogical(value, count);
1413 /// __m128i _mm_slli_epi64 (__m128i a, int immediate)
1416 public static Vector128<long> ShiftLeftLogical(Vector128<long> value, byte count) => ShiftLeftLogical(value, count);
1418 /// __m128i _mm_slli_epi64 (__m128i a, int immediate)
1421 public static Vector128<ulong> ShiftLeftLogical(Vector128<ulong> value, byte count) => ShiftLeftLogical(value, count);
1424 /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
1425 /// PSLLDQ xmm, imm8
1427 public static Vector128<sbyte> ShiftLeftLogical128BitLane(Vector128<sbyte> value, byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
1429 /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
1430 /// PSLLDQ xmm, imm8
1432 public static Vector128<byte> ShiftLeftLogical128BitLane(Vector128<byte> value, byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
1434 /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
1435 /// PSLLDQ xmm, imm8
1437 public static Vector128<short> ShiftLeftLogical128BitLane(Vector128<short> value, byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
1439 /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
1440 /// PSLLDQ xmm, imm8
1442 public static Vector128<ushort> ShiftLeftLogical128BitLane(Vector128<ushort> value, byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
1444 /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
1445 /// PSLLDQ xmm, imm8
1447 public static Vector128<int> ShiftLeftLogical128BitLane(Vector128<int> value, byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
1449 /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
1450 /// PSLLDQ xmm, imm8
1452 public static Vector128<uint> ShiftLeftLogical128BitLane(Vector128<uint> value, byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
1454 /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
1455 /// PSLLDQ xmm, imm8
1457 public static Vector128<long> ShiftLeftLogical128BitLane(Vector128<long> value, byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
1459 /// __m128i _mm_bslli_si128 (__m128i a, int imm8)
1460 /// PSLLDQ xmm, imm8
1462 public static Vector128<ulong> ShiftLeftLogical128BitLane(Vector128<ulong> value, byte numBytes) => ShiftLeftLogical128BitLane(value, numBytes);
1465 /// __m128i _mm_sra_epi16 (__m128i a, __m128i count)
1466 /// PSRAW xmm, xmm/m128
1468 public static Vector128<short> ShiftRightArithmetic(Vector128<short> value, Vector128<short> count) => ShiftRightArithmetic(value, count);
1470 /// __m128i _mm_sra_epi32 (__m128i a, __m128i count)
1471 /// PSRAD xmm, xmm/m128
1473 public static Vector128<int> ShiftRightArithmetic(Vector128<int> value, Vector128<int> count) => ShiftRightArithmetic(value, count);
1476 /// __m128i _mm_srai_epi16 (__m128i a, int immediate)
1479 public static Vector128<short> ShiftRightArithmetic(Vector128<short> value, byte count) => ShiftRightArithmetic(value, count);
1481 /// __m128i _mm_srai_epi32 (__m128i a, int immediate)
1484 public static Vector128<int> ShiftRightArithmetic(Vector128<int> value, byte count) => ShiftRightArithmetic(value, count);
1487 /// __m128i _mm_srl_epi16 (__m128i a, __m128i count)
1488 /// PSRLW xmm, xmm/m128
1490 public static Vector128<short> ShiftRightLogical(Vector128<short> value, Vector128<short> count) => ShiftRightLogical(value, count);
1492 /// __m128i _mm_srl_epi16 (__m128i a, __m128i count)
1493 /// PSRLW xmm, xmm/m128
1495 public static Vector128<ushort> ShiftRightLogical(Vector128<ushort> value, Vector128<ushort> count) => ShiftRightLogical(value, count);
1497 /// __m128i _mm_srl_epi32 (__m128i a, __m128i count)
1498 /// PSRLD xmm, xmm/m128
1500 public static Vector128<int> ShiftRightLogical(Vector128<int> value, Vector128<int> count) => ShiftRightLogical(value, count);
1502 /// __m128i _mm_srl_epi32 (__m128i a, __m128i count)
1503 /// PSRLD xmm, xmm/m128
1505 public static Vector128<uint> ShiftRightLogical(Vector128<uint> value, Vector128<uint> count) => ShiftRightLogical(value, count);
1507 /// __m128i _mm_srl_epi64 (__m128i a, __m128i count)
1508 /// PSRLQ xmm, xmm/m128
1510 public static Vector128<long> ShiftRightLogical(Vector128<long> value, Vector128<long> count) => ShiftRightLogical(value, count);
1512 /// __m128i _mm_srl_epi64 (__m128i a, __m128i count)
1513 /// PSRLQ xmm, xmm/m128
1515 public static Vector128<ulong> ShiftRightLogical(Vector128<ulong> value, Vector128<ulong> count) => ShiftRightLogical(value, count);
1518 /// __m128i _mm_srli_epi16 (__m128i a, int immediate)
1521 public static Vector128<short> ShiftRightLogical(Vector128<short> value, byte count) => ShiftRightLogical(value, count);
1523 /// __m128i _mm_srli_epi16 (__m128i a, int immediate)
1526 public static Vector128<ushort> ShiftRightLogical(Vector128<ushort> value, byte count) => ShiftRightLogical(value, count);
1528 /// __m128i _mm_srli_epi32 (__m128i a, int immediate)
1531 public static Vector128<int> ShiftRightLogical(Vector128<int> value, byte count) => ShiftRightLogical(value, count);
1533 /// __m128i _mm_srli_epi32 (__m128i a, int immediate)
1536 public static Vector128<uint> ShiftRightLogical(Vector128<uint> value, byte count) => ShiftRightLogical(value, count);
1538 /// __m128i _mm_srli_epi64 (__m128i a, int immediate)
1541 public static Vector128<long> ShiftRightLogical(Vector128<long> value, byte count) => ShiftRightLogical(value, count);
1543 /// __m128i _mm_srli_epi64 (__m128i a, int immediate)
1546 public static Vector128<ulong> ShiftRightLogical(Vector128<ulong> value, byte count) => ShiftRightLogical(value, count);
1549 /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
1550 /// PSRLDQ xmm, imm8
1552 public static Vector128<sbyte> ShiftRightLogical128BitLane(Vector128<sbyte> value, byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
1554 /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
1555 /// PSRLDQ xmm, imm8
1557 public static Vector128<byte> ShiftRightLogical128BitLane(Vector128<byte> value, byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
1559 /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
1560 /// PSRLDQ xmm, imm8
1562 public static Vector128<short> ShiftRightLogical128BitLane(Vector128<short> value, byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
1564 /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
1565 /// PSRLDQ xmm, imm8
1567 public static Vector128<ushort> ShiftRightLogical128BitLane(Vector128<ushort> value, byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
1569 /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
1570 /// PSRLDQ xmm, imm8
1572 public static Vector128<int> ShiftRightLogical128BitLane(Vector128<int> value, byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
1574 /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
1575 /// PSRLDQ xmm, imm8
1577 public static Vector128<uint> ShiftRightLogical128BitLane(Vector128<uint> value, byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
1579 /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
1580 /// PSRLDQ xmm, imm8
1582 public static Vector128<long> ShiftRightLogical128BitLane(Vector128<long> value, byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
1584 /// __m128i _mm_bsrli_si128 (__m128i a, int imm8)
1585 /// PSRLDQ xmm, imm8
1587 public static Vector128<ulong> ShiftRightLogical128BitLane(Vector128<ulong> value, byte numBytes) => ShiftRightLogical128BitLane(value, numBytes);
1590 /// __m128d _mm_sqrt_pd (__m128d a)
1591 /// SQRTPD xmm, xmm/m128
1593 public static Vector128<double> Sqrt(Vector128<double> value) => Sqrt(value);
1596 /// __m128d _mm_sqrt_sd (__m128d a)
1597 /// SQRTSD xmm, xmm/64
1598 /// The above native signature does not exist. We provide this additional overload for the recommended use case of this intrinsic.
1600 public static Vector128<double> SqrtScalar(Vector128<double> value) => SqrtScalar(value);
1603 /// __m128d _mm_sqrt_sd (__m128d a, __m128d b)
1604 /// SQRTSD xmm, xmm/64
1606 public static Vector128<double> SqrtScalar(Vector128<double> upper, Vector128<double> value) => SqrtScalar(upper, value);
1609 /// void _mm_store_sd (double* mem_addr, __m128d a)
1612 public static unsafe void StoreScalar(double* address, Vector128<double> source) => StoreScalar(address, source);
1615 /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
1616 /// MOVDQA m128, xmm
1618 public static unsafe void StoreAligned(sbyte* address, Vector128<sbyte> source) => StoreAligned(address, source);
1620 /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
1621 /// MOVDQA m128, xmm
1623 public static unsafe void StoreAligned(byte* address, Vector128<byte> source) => StoreAligned(address, source);
1625 /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
1626 /// MOVDQA m128, xmm
1628 public static unsafe void StoreAligned(short* address, Vector128<short> source) => StoreAligned(address, source);
1630 /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
1631 /// MOVDQA m128, xmm
1633 public static unsafe void StoreAligned(ushort* address, Vector128<ushort> source) => StoreAligned(address, source);
1635 /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
1636 /// MOVDQA m128, xmm
1638 public static unsafe void StoreAligned(int* address, Vector128<int> source) => StoreAligned(address, source);
1640 /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
1641 /// MOVDQA m128, xmm
1643 public static unsafe void StoreAligned(uint* address, Vector128<uint> source) => StoreAligned(address, source);
1645 /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
1646 /// MOVDQA m128, xmm
1648 public static unsafe void StoreAligned(long* address, Vector128<long> source) => StoreAligned(address, source);
1650 /// void _mm_store_si128 (__m128i* mem_addr, __m128i a)
1651 /// MOVDQA m128, xmm
1653 public static unsafe void StoreAligned(ulong* address, Vector128<ulong> source) => StoreAligned(address, source);
1655 /// void _mm_store_pd (double* mem_addr, __m128d a)
1656 /// MOVAPD m128, xmm
1658 public static unsafe void StoreAligned(double* address, Vector128<double> source) => StoreAligned(address, source);
1661 /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
1662 /// MOVNTDQ m128, xmm
1664 public static unsafe void StoreAlignedNonTemporal(sbyte* address, Vector128<sbyte> source) => StoreAlignedNonTemporal(address, source);
1666 /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
1667 /// MOVNTDQ m128, xmm
1669 public static unsafe void StoreAlignedNonTemporal(byte* address, Vector128<byte> source) => StoreAlignedNonTemporal(address, source);
1671 /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
1672 /// MOVNTDQ m128, xmm
1674 public static unsafe void StoreAlignedNonTemporal(short* address, Vector128<short> source) => StoreAlignedNonTemporal(address, source);
1676 /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
1677 /// MOVNTDQ m128, xmm
1679 public static unsafe void StoreAlignedNonTemporal(ushort* address, Vector128<ushort> source) => StoreAlignedNonTemporal(address, source);
1681 /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
1682 /// MOVNTDQ m128, xmm
1684 public static unsafe void StoreAlignedNonTemporal(int* address, Vector128<int> source) => StoreAlignedNonTemporal(address, source);
1686 /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
1687 /// MOVNTDQ m128, xmm
1689 public static unsafe void StoreAlignedNonTemporal(uint* address, Vector128<uint> source) => StoreAlignedNonTemporal(address, source);
1691 /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
1692 /// MOVNTDQ m128, xmm
1694 public static unsafe void StoreAlignedNonTemporal(long* address, Vector128<long> source) => StoreAlignedNonTemporal(address, source);
1696 /// void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
1697 /// MOVNTDQ m128, xmm
1699 public static unsafe void StoreAlignedNonTemporal(ulong* address, Vector128<ulong> source) => StoreAlignedNonTemporal(address, source);
1701 /// void _mm_stream_pd (double* mem_addr, __m128d a)
1702 /// MOVNTPD m128, xmm
1704 public static unsafe void StoreAlignedNonTemporal(double* address, Vector128<double> source) => StoreAlignedNonTemporal(address, source);
1707 /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
1708 /// MOVDQU m128, xmm
1710 public static unsafe void Store(sbyte* address, Vector128<sbyte> source) => Store(address, source);
1712 /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
1713 /// MOVDQU m128, xmm
1715 public static unsafe void Store(byte* address, Vector128<byte> source) => Store(address, source);
1717 /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
1718 /// MOVDQU m128, xmm
1720 public static unsafe void Store(short* address, Vector128<short> source) => Store(address, source);
1722 /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
1723 /// MOVDQU m128, xmm
1725 public static unsafe void Store(ushort* address, Vector128<ushort> source) => Store(address, source);
1727 /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
1728 /// MOVDQU m128, xmm
1730 public static unsafe void Store(int* address, Vector128<int> source) => Store(address, source);
1732 /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
1733 /// MOVDQU m128, xmm
1735 public static unsafe void Store(uint* address, Vector128<uint> source) => Store(address, source);
1737 /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
1738 /// MOVDQU m128, xmm
1740 public static unsafe void Store(long* address, Vector128<long> source) => Store(address, source);
1742 /// void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
1743 /// MOVDQU m128, xmm
1745 public static unsafe void Store(ulong* address, Vector128<ulong> source) => Store(address, source);
1747 /// void _mm_storeu_pd (double* mem_addr, __m128d a)
1748 /// MOVUPD m128, xmm
1750 public static unsafe void Store(double* address, Vector128<double> source) => Store(address, source);
1753 /// void _mm_storeh_pd (double* mem_addr, __m128d a)
1756 public static unsafe void StoreHigh(double* address, Vector128<double> source) => StoreHigh(address, source);
1759 /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
1762 public static unsafe void StoreLow(long* address, Vector128<long> source) => StoreLow(address, source);
1764 /// void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
1767 public static unsafe void StoreLow(ulong* address, Vector128<ulong> source) => StoreLow(address, source);
1769 /// void _mm_storel_pd (double* mem_addr, __m128d a)
1772 public static unsafe void StoreLow(double* address, Vector128<double> source) => StoreLow(address, source);
1775 /// void _mm_stream_si32(int *p, int a)
1778 public static unsafe void StoreNonTemporal(int* address, int value) => StoreNonTemporal(address, value);
1780 /// void _mm_stream_si32(int *p, int a)
1783 public static unsafe void StoreNonTemporal(uint* address, uint value) => StoreNonTemporal(address, value);
1786 /// void _mm_stream_si64(__int64 *p, __int64 a)
1789 public static unsafe void StoreNonTemporal(long* address, long value) => StoreNonTemporal(address, value);
1791 /// void _mm_stream_si64(__int64 *p, __int64 a)
1794 public static unsafe void StoreNonTemporal(ulong* address, ulong value) => StoreNonTemporal(address, value);
1797 /// __m128i _mm_sub_epi8 (__m128i a, __m128i b)
1798 /// PSUBB xmm, xmm/m128
1800 public static Vector128<byte> Subtract(Vector128<byte> left, Vector128<byte> right) => Subtract(left, right);
1802 /// __m128i _mm_sub_epi8 (__m128i a, __m128i b)
1803 /// PSUBB xmm, xmm/m128
1805 public static Vector128<sbyte> Subtract(Vector128<sbyte> left, Vector128<sbyte> right) => Subtract(left, right);
1807 /// __m128i _mm_sub_epi16 (__m128i a, __m128i b)
1808 /// PSUBW xmm, xmm/m128
1810 public static Vector128<short> Subtract(Vector128<short> left, Vector128<short> right) => Subtract(left, right);
1812 /// __m128i _mm_sub_epi16 (__m128i a, __m128i b)
1813 /// PSUBW xmm, xmm/m128
1815 public static Vector128<ushort> Subtract(Vector128<ushort> left, Vector128<ushort> right) => Subtract(left, right);
1817 /// __m128i _mm_sub_epi32 (__m128i a, __m128i b)
1818 /// PSUBD xmm, xmm/m128
1820 public static Vector128<int> Subtract(Vector128<int> left, Vector128<int> right) => Subtract(left, right);
1822 /// __m128i _mm_sub_epi32 (__m128i a, __m128i b)
1823 /// PSUBD xmm, xmm/m128
1825 public static Vector128<uint> Subtract(Vector128<uint> left, Vector128<uint> right) => Subtract(left, right);
1827 /// __m128i _mm_sub_epi64 (__m128i a, __m128i b)
1828 /// PSUBQ xmm, xmm/m128
1830 public static Vector128<long> Subtract(Vector128<long> left, Vector128<long> right) => Subtract(left, right);
1832 /// __m128i _mm_sub_epi64 (__m128i a, __m128i b)
1833 /// PSUBQ xmm, xmm/m128
1835 public static Vector128<ulong> Subtract(Vector128<ulong> left, Vector128<ulong> right) => Subtract(left, right);
1837 /// __m128d _mm_sub_pd (__m128d a, __m128d b)
1838 /// SUBPD xmm, xmm/m128
1840 public static Vector128<double> Subtract(Vector128<double> left, Vector128<double> right) => Subtract(left, right);
1843 /// __m128d _mm_sub_sd (__m128d a, __m128d b)
1844 /// SUBSD xmm, xmm/m64
1846 public static Vector128<double> SubtractScalar(Vector128<double> left, Vector128<double> right) => SubtractScalar(left, right);
1849 /// __m128i _mm_subs_epi8 (__m128i a, __m128i b)
1850 /// PSUBSB xmm, xmm/m128
1852 public static Vector128<sbyte> SubtractSaturate(Vector128<sbyte> left, Vector128<sbyte> right) => SubtractSaturate(left, right);
1854 /// __m128i _mm_subs_epi16 (__m128i a, __m128i b)
1855 /// PSUBSW xmm, xmm/m128
1857 public static Vector128<short> SubtractSaturate(Vector128<short> left, Vector128<short> right) => SubtractSaturate(left, right);
1859 /// __m128i _mm_subs_epu8 (__m128i a, __m128i b)
1860 /// PSUBUSB xmm, xmm/m128
1862 public static Vector128<byte> SubtractSaturate(Vector128<byte> left, Vector128<byte> right) => SubtractSaturate(left, right);
1864 /// __m128i _mm_subs_epu16 (__m128i a, __m128i b)
1865 /// PSUBUSW xmm, xmm/m128
1867 public static Vector128<ushort> SubtractSaturate(Vector128<ushort> left, Vector128<ushort> right) => SubtractSaturate(left, right);
1870 /// __m128i _mm_unpackhi_epi8 (__m128i a, __m128i b)
1871 /// PUNPCKHBW xmm, xmm/m128
1873 public static Vector128<byte> UnpackHigh(Vector128<byte> left, Vector128<byte> right) => UnpackHigh(left, right);
1875 /// __m128i _mm_unpackhi_epi8 (__m128i a, __m128i b)
1876 /// PUNPCKHBW xmm, xmm/m128
1878 public static Vector128<sbyte> UnpackHigh(Vector128<sbyte> left, Vector128<sbyte> right) => UnpackHigh(left, right);
1880 /// __m128i _mm_unpackhi_epi16 (__m128i a, __m128i b)
1881 /// PUNPCKHWD xmm, xmm/m128
1883 public static Vector128<short> UnpackHigh(Vector128<short> left, Vector128<short> right) => UnpackHigh(left, right);
1885 /// __m128i _mm_unpackhi_epi16 (__m128i a, __m128i b)
1886 /// PUNPCKHWD xmm, xmm/m128
1888 public static Vector128<ushort> UnpackHigh(Vector128<ushort> left, Vector128<ushort> right) => UnpackHigh(left, right);
1890 /// __m128i _mm_unpackhi_epi32 (__m128i a, __m128i b)
1891 /// PUNPCKHDQ xmm, xmm/m128
1893 public static Vector128<int> UnpackHigh(Vector128<int> left, Vector128<int> right) => UnpackHigh(left, right);
1895 /// __m128i _mm_unpackhi_epi32 (__m128i a, __m128i b)
1896 /// PUNPCKHDQ xmm, xmm/m128
1898 public static Vector128<uint> UnpackHigh(Vector128<uint> left, Vector128<uint> right) => UnpackHigh(left, right);
1900 /// __m128i _mm_unpackhi_epi64 (__m128i a, __m128i b)
1901 /// PUNPCKHQDQ xmm, xmm/m128
1903 public static Vector128<long> UnpackHigh(Vector128<long> left, Vector128<long> right) => UnpackHigh(left, right);
1905 /// __m128i _mm_unpackhi_epi64 (__m128i a, __m128i b)
1906 /// PUNPCKHQDQ xmm, xmm/m128
1908 public static Vector128<ulong> UnpackHigh(Vector128<ulong> left, Vector128<ulong> right) => UnpackHigh(left, right);
1910 /// __m128d _mm_unpackhi_pd (__m128d a, __m128d b)
1911 /// UNPCKHPD xmm, xmm/m128
1913 public static Vector128<double> UnpackHigh(Vector128<double> left, Vector128<double> right) => UnpackHigh(left, right);
1916 /// __m128i _mm_unpacklo_epi8 (__m128i a, __m128i b)
1917 /// PUNPCKLBW xmm, xmm/m128
1919 public static Vector128<byte> UnpackLow(Vector128<byte> left, Vector128<byte> right) => UnpackLow(left, right);
1921 /// __m128i _mm_unpacklo_epi8 (__m128i a, __m128i b)
1922 /// PUNPCKLBW xmm, xmm/m128
1924 public static Vector128<sbyte> UnpackLow(Vector128<sbyte> left, Vector128<sbyte> right) => UnpackLow(left, right);
1926 /// __m128i _mm_unpacklo_epi16 (__m128i a, __m128i b)
1927 /// PUNPCKLWD xmm, xmm/m128
1929 public static Vector128<short> UnpackLow(Vector128<short> left, Vector128<short> right) => UnpackLow(left, right);
1931 /// __m128i _mm_unpacklo_epi16 (__m128i a, __m128i b)
1932 /// PUNPCKLWD xmm, xmm/m128
1934 public static Vector128<ushort> UnpackLow(Vector128<ushort> left, Vector128<ushort> right) => UnpackLow(left, right);
1936 /// __m128i _mm_unpacklo_epi32 (__m128i a, __m128i b)
1937 /// PUNPCKLDQ xmm, xmm/m128
1939 public static Vector128<int> UnpackLow(Vector128<int> left, Vector128<int> right) => UnpackLow(left, right);
1941 /// __m128i _mm_unpacklo_epi32 (__m128i a, __m128i b)
1942 /// PUNPCKLDQ xmm, xmm/m128
1944 public static Vector128<uint> UnpackLow(Vector128<uint> left, Vector128<uint> right) => UnpackLow(left, right);
1946 /// __m128i _mm_unpacklo_epi64 (__m128i a, __m128i b)
1947 /// PUNPCKLQDQ xmm, xmm/m128
1949 public static Vector128<long> UnpackLow(Vector128<long> left, Vector128<long> right) => UnpackLow(left, right);
1951 /// __m128i _mm_unpacklo_epi64 (__m128i a, __m128i b)
1952 /// PUNPCKLQDQ xmm, xmm/m128
1954 public static Vector128<ulong> UnpackLow(Vector128<ulong> left, Vector128<ulong> right) => UnpackLow(left, right);
1956 /// __m128d _mm_unpacklo_pd (__m128d a, __m128d b)
1957 /// UNPCKLPD xmm, xmm/m128
1959 public static Vector128<double> UnpackLow(Vector128<double> left, Vector128<double> right) => UnpackLow(left, right);
1962 /// __m128i _mm_xor_si128 (__m128i a, __m128i b)
1963 /// PXOR xmm, xmm/m128
1965 public static Vector128<byte> Xor(Vector128<byte> left, Vector128<byte> right) => Xor(left, right);
1967 /// __m128i _mm_xor_si128 (__m128i a, __m128i b)
1968 /// PXOR xmm, xmm/m128
1970 public static Vector128<sbyte> Xor(Vector128<sbyte> left, Vector128<sbyte> right) => Xor(left, right);
1972 /// __m128i _mm_xor_si128 (__m128i a, __m128i b)
1973 /// PXOR xmm, xmm/m128
1975 public static Vector128<short> Xor(Vector128<short> left, Vector128<short> right) => Xor(left, right);
1977 /// __m128i _mm_xor_si128 (__m128i a, __m128i b)
1978 /// PXOR xmm, xmm/m128
1980 public static Vector128<ushort> Xor(Vector128<ushort> left, Vector128<ushort> right) => Xor(left, right);
1982 /// __m128i _mm_xor_si128 (__m128i a, __m128i b)
1983 /// PXOR xmm, xmm/m128
1985 public static Vector128<int> Xor(Vector128<int> left, Vector128<int> right) => Xor(left, right);
1987 /// __m128i _mm_xor_si128 (__m128i a, __m128i b)
1988 /// PXOR xmm, xmm/m128
1990 public static Vector128<uint> Xor(Vector128<uint> left, Vector128<uint> right) => Xor(left, right);
1992 /// __m128i _mm_xor_si128 (__m128i a, __m128i b)
1993 /// PXOR xmm, xmm/m128
1995 public static Vector128<long> Xor(Vector128<long> left, Vector128<long> right) => Xor(left, right);
1997 /// __m128i _mm_xor_si128 (__m128i a, __m128i b)
1998 /// PXOR xmm, xmm/m128
2000 public static Vector128<ulong> Xor(Vector128<ulong> left, Vector128<ulong> right) => Xor(left, right);
2002 /// __m128d _mm_xor_pd (__m128d a, __m128d b)
2003 /// XORPD xmm, xmm/m128
2005 public static Vector128<double> Xor(Vector128<double> left, Vector128<double> right) => Xor(left, right);