static inline BcmpReturnType bcmp16(CPtr p1, CPtr p2) {
using T = char __attribute__((__vector_size__(16)));
// A mask indicating which bytes differ after loading 16 bytes from p1 and p2.
- const int mask = _mm_movemask_epi8(load<T>(p1) != load<T>(p2));
+ const int mask =
+ _mm_movemask_epi8(cpp::bit_cast<__m128i>(load<T>(p1) != load<T>(p2)));
return static_cast<uint32_t>(mask);
}
template <size_t Size> using Bcmp = BcmpImpl<Size, 16, bcmp16>;
static inline BcmpReturnType bcmp32(CPtr p1, CPtr p2) {
using T = char __attribute__((__vector_size__(32)));
// A mask indicating which bytes differ after loading 32 bytes from p1 and p2.
- const int mask = _mm256_movemask_epi8(load<T>(p1) != load<T>(p2));
+ const int mask =
+ _mm256_movemask_epi8(cpp::bit_cast<__m256i>(load<T>(p1) != load<T>(p2)));
// _mm256_movemask_epi8 returns an int but it is to be interpreted as a 32-bit
// mask.
return static_cast<uint32_t>(mask);
static inline MemcmpReturnType memcmp16(CPtr p1, CPtr p2) {
using T = char __attribute__((__vector_size__(16)));
// A mask indicating which bytes differ after loading 16 bytes from p1 and p2.
- if (int mask = _mm_movemask_epi8(load<T>(p1) != load<T>(p2)))
+ if (int mask =
+ _mm_movemask_epi8(cpp::bit_cast<__m128i>(load<T>(p1) != load<T>(p2))))
return char_diff_no_zero(p1, p2, mask);
return MemcmpReturnType::ZERO();
}
static inline MemcmpReturnType memcmp32(CPtr p1, CPtr p2) {
using T = char __attribute__((__vector_size__(32)));
// A mask indicating which bytes differ after loading 32 bytes from p1 and p2.
- if (int mask = _mm256_movemask_epi8(load<T>(p1) != load<T>(p2)))
+ if (int mask = _mm256_movemask_epi8(
+ cpp::bit_cast<__m256i>(load<T>(p1) != load<T>(p2))))
return char_diff_no_zero(p1, p2, mask);
return MemcmpReturnType::ZERO();
}