[libc] Make string functions buildable with GCC

author Guillaume Chatelet <gchatelet@google.com>

Tue, 13 Dec 2022 15:47:26 +0000 (15:47 +0000)

committer Guillaume Chatelet <gchatelet@google.com>

Sun, 18 Dec 2022 14:56:01 +0000 (14:56 +0000)
author Guillaume Chatelet <gchatelet@google.com>
Tue, 13 Dec 2022 15:47:26 +0000 (15:47 +0000)
committer Guillaume Chatelet <gchatelet@google.com>
Sun, 18 Dec 2022 14:56:01 +0000 (14:56 +0000)
diff --git a/libc/src/__support/compiler_features.h b/libc/src/__support/compiler_features.h

index a30d2a2..fed5759 100644 (file)
--- a/libc/src/__support/compiler_features.h
+++ b/libc/src/__support/compiler_features.h
@@ -38,4 +38,12 @@
  #define LLVM_LIBC_HAS_FEATURE(FEATURE) 0
  #endif
  
+#if defined(LLVM_LIBC_COMPILER_CLANG)
+#define LLVM_LIBC_LOOP_NOUNROLL _Pragma("nounroll")
+#elif defined(LLVM_LIBC_COMPILER_GCC)
+#define LLVM_LIBC_LOOP_NOUNROLL _Pragma("GCC unroll 0")
+#else
+#define LLVM_LIBC_LOOP_NOUNROLL
+#endif
+
  #endif // LLVM_LIBC_SUPPORT_COMPILER_FEATURES_H
diff --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h

index e7ded19..18a4ab8 100644 (file)
--- a/libc/src/string/memory_utils/bcmp_implementations.h
+++ b/libc/src/string/memory_utils/bcmp_implementations.h
@@ -22,7 +22,7 @@ namespace __llvm_libc {
  
  [[maybe_unused]] static inline BcmpReturnType
  inline_bcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
-#pragma nounroll
+  LLVM_LIBC_LOOP_NOUNROLL
    for (size_t offset = 0; offset < count; ++offset)
      if (auto value = generic::Bcmp<1>::block(p1 + offset, p2 + offset))
        return value;
diff --git a/libc/src/string/memory_utils/memcmp_implementations.h b/libc/src/string/memory_utils/memcmp_implementations.h

index 33fbd7c..1dac6e0 100644 (file)
--- a/libc/src/string/memory_utils/memcmp_implementations.h
+++ b/libc/src/string/memory_utils/memcmp_implementations.h
@@ -22,7 +22,7 @@
  namespace __llvm_libc {
  [[maybe_unused]] static inline MemcmpReturnType
  inline_memcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
-#pragma nounroll
+  LLVM_LIBC_LOOP_NOUNROLL
    for (size_t offset = 0; offset < count; ++offset)
      if (auto value = generic::Memcmp<1>::block(p1 + offset, p2 + offset))
        return value;
@@ -83,6 +83,7 @@ inline_memcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
    }
    return x86::avx512bw::Memcmp<64>::loop_and_tail(p1, p2, count);
  }
+
  #endif // defined(LLVM_LIBC_ARCH_X86)
  
  #if defined(LLVM_LIBC_ARCH_AARCH64)
diff --git a/libc/src/string/memory_utils/memcpy_implementations.h b/libc/src/string/memory_utils/memcpy_implementations.h

index 8d8ba6f..4372733 100644 (file)
--- a/libc/src/string/memory_utils/memcpy_implementations.h
+++ b/libc/src/string/memory_utils/memcpy_implementations.h
@@ -24,7 +24,7 @@ namespace __llvm_libc {
  [[maybe_unused]] static inline void
  inline_memcpy_embedded_tiny(Ptr __restrict dst, CPtr __restrict src,
                              size_t count) {
-#pragma nounroll
+  LLVM_LIBC_LOOP_NOUNROLL
    for (size_t offset = 0; offset < count; ++offset)
      builtin::Memcpy<1>::block(dst + offset, src + offset);
  }
diff --git a/libc/src/string/memory_utils/memmove_implementations.h b/libc/src/string/memory_utils/memmove_implementations.h

index dfea5fa..7e26b36 100644 (file)
--- a/libc/src/string/memory_utils/memmove_implementations.h
+++ b/libc/src/string/memory_utils/memmove_implementations.h
@@ -23,11 +23,11 @@ inline_memmove_embedded_tiny(Ptr dst, CPtr src, size_t count) {
    if ((count == 0) || (dst == src))
      return;
    if (dst < src) {
-#pragma nounroll
+    LLVM_LIBC_LOOP_NOUNROLL
      for (size_t offset = 0; offset < count; ++offset)
        builtin::Memcpy<1>::block(dst + offset, src + offset);
    } else {
-#pragma nounroll
+    LLVM_LIBC_LOOP_NOUNROLL
      for (ptrdiff_t offset = count - 1; offset >= 0; --offset)
        builtin::Memcpy<1>::block(dst + offset, src + offset);
    }
diff --git a/libc/src/string/memory_utils/memset_implementations.h b/libc/src/string/memory_utils/memset_implementations.h

index 58779f7..dbcc356 100644 (file)
--- a/libc/src/string/memory_utils/memset_implementations.h
+++ b/libc/src/string/memory_utils/memset_implementations.h
@@ -22,7 +22,7 @@ namespace __llvm_libc {
  
  [[maybe_unused]] inline static void
  inline_memset_embedded_tiny(Ptr dst, uint8_t value, size_t count) {
-#pragma nounroll
+  LLVM_LIBC_LOOP_NOUNROLL
    for (size_t offset = 0; offset < count; ++offset)
      generic::Memset<1, 1>::block(dst + offset, value);
  }
diff --git a/libc/src/string/memory_utils/op_builtin.h b/libc/src/string/memory_utils/op_builtin.h

index 68ae862..ce33de3 100644 (file)
--- a/libc/src/string/memory_utils/op_builtin.h
+++ b/libc/src/string/memory_utils/op_builtin.h
@@ -27,9 +27,9 @@ template <size_t Size> struct Memcpy {
  #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
      return __builtin_memcpy_inline(dst, src, SIZE);
  #else
-    deferred_static_assert("Missing __builtin_memcpy_inline");
-    (void)dst;
-    (void)src;
+    // The codegen may be suboptimal.
+    for (size_t i = 0; i < Size; ++i)
+      dst[i] = src[i];
  #endif
    }
  
diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h

index e21ea6c..1603dbf 100644 (file)
--- a/libc/src/string/memory_utils/op_generic.h
+++ b/libc/src/string/memory_utils/op_generic.h
@@ -26,6 +26,7 @@
  #include "src/__support/CPP/array.h"
  #include "src/__support/CPP/type_traits.h"
  #include "src/__support/common.h"
+#include "src/__support/compiler_features.h"
  #include "src/__support/endian.h"
  #include "src/string/memory_utils/op_builtin.h"
  #include "src/string/memory_utils/utils.h"
@@ -71,9 +72,34 @@ template <typename T> struct ScalarType {
    }
  };
  
+// GCC can only take literals as __vector_size__ argument so we have to use
+// template specialization.
+template <size_t Size> struct VectorValueType {};
+template <> struct VectorValueType<1> {
+  using type = uint8_t __attribute__((__vector_size__(1)));
+};
+template <> struct VectorValueType<2> {
+  using type = uint8_t __attribute__((__vector_size__(2)));
+};
+template <> struct VectorValueType<4> {
+  using type = uint8_t __attribute__((__vector_size__(4)));
+};
+template <> struct VectorValueType<8> {
+  using type = uint8_t __attribute__((__vector_size__(8)));
+};
+template <> struct VectorValueType<16> {
+  using type = uint8_t __attribute__((__vector_size__(16)));
+};
+template <> struct VectorValueType<32> {
+  using type = uint8_t __attribute__((__vector_size__(32)));
+};
+template <> struct VectorValueType<64> {
+  using type = uint8_t __attribute__((__vector_size__(64)));
+};
+
  // Implements load, store and splat for vector types.
  template <size_t Size> struct VectorType {
-  using Type = uint8_t __attribute__((__vector_size__(Size)));
+  using Type = typename VectorValueType<Size>::type;
    static inline Type load(CPtr src) { return ::__llvm_libc::load<Type>(src); }
    static inline void store(Ptr dst, Type value) {
      ::__llvm_libc::store<Type>(dst, value);
@@ -434,7 +460,7 @@ template <size_t Size, size_t MaxSize> struct Memmove {
      const size_t tail_offset = count - Size;
      const auto tail_value = T::load(src + tail_offset);
      size_t offset = 0;
-#pragma nounroll
+    LLVM_LIBC_LOOP_NOUNROLL
      do {
        block(dst + offset, src + offset);
        offset += Size;
@@ -460,7 +486,7 @@ template <size_t Size, size_t MaxSize> struct Memmove {
      static_assert(Size > 1, "a loop of size 1 does not need tail");
      const auto head_value = T::load(src);
      ptrdiff_t offset = count - Size;
-#pragma nounroll
+    LLVM_LIBC_LOOP_NOUNROLL
      do {
        block(dst + offset, src + offset);
        offset -= Size;
diff --git a/libc/src/string/memory_utils/op_x86.h b/libc/src/string/memory_utils/op_x86.h

index f68af00..b2355d9 100644 (file)
--- a/libc/src/string/memory_utils/op_x86.h
+++ b/libc/src/string/memory_utils/op_x86.h
@@ -99,17 +99,24 @@ template <size_t Size, size_t BlockSize, auto BlockBcmp> struct BcmpImpl {
  
  namespace sse2 {
  static inline BcmpReturnType bcmp16(CPtr p1, CPtr p2) {
+#if defined(__SSE2__)
    using T = char __attribute__((__vector_size__(16)));
    // A mask indicating which bytes differ after loading 16 bytes from p1 and p2.
    const int mask =
        _mm_movemask_epi8(cpp::bit_cast<__m128i>(load<T>(p1) != load<T>(p2)));
    return static_cast<uint32_t>(mask);
+#else
+  (void)p1;
+  (void)p2;
+  return BcmpReturnType::ZERO();
+#endif // defined(__SSE2__)
  }
  template <size_t Size> using Bcmp = BcmpImpl<Size, 16, bcmp16>;
  } // namespace sse2
  
  namespace avx2 {
  static inline BcmpReturnType bcmp32(CPtr p1, CPtr p2) {
+#if defined(__AVX2__)
    using T = char __attribute__((__vector_size__(32)));
    // A mask indicating which bytes differ after loading 32 bytes from p1 and p2.
    const int mask =
@@ -117,17 +124,29 @@ static inline BcmpReturnType bcmp32(CPtr p1, CPtr p2) {
    // _mm256_movemask_epi8 returns an int but it is to be interpreted as a 32-bit
    // mask.
    return static_cast<uint32_t>(mask);
+#else
+  (void)p1;
+  (void)p2;
+  return BcmpReturnType::ZERO();
+#endif // defined(__AVX2__)
  }
  template <size_t Size> using Bcmp = BcmpImpl<Size, 32, bcmp32>;
  } // namespace avx2
  
  namespace avx512bw {
  static inline BcmpReturnType bcmp64(CPtr p1, CPtr p2) {
+#if defined(__AVX512BW__)
    using T = char __attribute__((__vector_size__(64)));
    // A mask indicating which bytes differ after loading 64 bytes from p1 and p2.
-  const uint64_t mask = _mm512_cmpneq_epi8_mask(load<T>(p1), load<T>(p2));
+  const uint64_t mask = _mm512_cmpneq_epi8_mask(
+      cpp::bit_cast<__m512i>(load<T>(p1)), cpp::bit_cast<__m512i>(load<T>(p2)));
    const bool mask_is_set = mask != 0;
    return static_cast<uint32_t>(mask_is_set);
+#else
+  (void)p1;
+  (void)p2;
+  return BcmpReturnType::ZERO();
+#endif // defined(__AVX512BW__)
  }
  template <size_t Size> using Bcmp = BcmpImpl<Size, 64, bcmp64>;
  } // namespace avx512bw
@@ -192,35 +211,55 @@ struct MemcmpImpl {
  
  namespace sse2 {
  static inline MemcmpReturnType memcmp16(CPtr p1, CPtr p2) {
+#if defined(__SSE2__)
    using T = char __attribute__((__vector_size__(16)));
    // A mask indicating which bytes differ after loading 16 bytes from p1 and p2.
    if (int mask =
            _mm_movemask_epi8(cpp::bit_cast<__m128i>(load<T>(p1) != load<T>(p2))))
      return char_diff_no_zero(p1, p2, mask);
    return MemcmpReturnType::ZERO();
+#else
+  (void)p1;
+  (void)p2;
+  return MemcmpReturnType::ZERO();
+#endif // defined(__SSE2__)
  }
  template <size_t Size> using Memcmp = MemcmpImpl<Size, 16, memcmp16, bcmp16>;
  } // namespace sse2
  
  namespace avx2 {
  static inline MemcmpReturnType memcmp32(CPtr p1, CPtr p2) {
+#if defined(__AVX2__)
    using T = char __attribute__((__vector_size__(32)));
    // A mask indicating which bytes differ after loading 32 bytes from p1 and p2.
    if (int mask = _mm256_movemask_epi8(
            cpp::bit_cast<__m256i>(load<T>(p1) != load<T>(p2))))
      return char_diff_no_zero(p1, p2, mask);
    return MemcmpReturnType::ZERO();
+#else
+  (void)p1;
+  (void)p2;
+  return MemcmpReturnType::ZERO();
+#endif // defined(__AVX2__)
  }
  template <size_t Size> using Memcmp = MemcmpImpl<Size, 32, memcmp32, bcmp32>;
  } // namespace avx2
  
  namespace avx512bw {
  static inline MemcmpReturnType memcmp64(CPtr p1, CPtr p2) {
+#if defined(__AVX512BW__)
    using T = char __attribute__((__vector_size__(64)));
    // A mask indicating which bytes differ after loading 64 bytes from p1 and p2.
-  if (uint64_t mask = _mm512_cmpneq_epi8_mask(load<T>(p1), load<T>(p2)))
+  if (uint64_t mask =
+          _mm512_cmpneq_epi8_mask(cpp::bit_cast<__m512i>(load<T>(p1)),
+                                  cpp::bit_cast<__m512i>(load<T>(p2))))
      return char_diff_no_zero(p1, p2, mask);
    return MemcmpReturnType::ZERO();
+#else
+  (void)p1;
+  (void)p2;
+  return MemcmpReturnType::ZERO();
+#endif // defined(__AVX512BW__)
  }
  template <size_t Size> using Memcmp = MemcmpImpl<Size, 64, memcmp64, bcmp64>;
  } // namespace avx512bw
author	Guillaume Chatelet <gchatelet@google.com>
	Tue, 13 Dec 2022 15:47:26 +0000 (15:47 +0000)
committer	Guillaume Chatelet <gchatelet@google.com>
	Sun, 18 Dec 2022 14:56:01 +0000 (14:56 +0000)
libc/src/__support/compiler_features.h		patch \| blob \| history
libc/src/string/memory_utils/bcmp_implementations.h		patch \| blob \| history
libc/src/string/memory_utils/memcmp_implementations.h		patch \| blob \| history
libc/src/string/memory_utils/memcpy_implementations.h		patch \| blob \| history
libc/src/string/memory_utils/memmove_implementations.h		patch \| blob \| history
libc/src/string/memory_utils/memset_implementations.h		patch \| blob \| history
libc/src/string/memory_utils/op_builtin.h		patch \| blob \| history
libc/src/string/memory_utils/op_generic.h		patch \| blob \| history
libc/src/string/memory_utils/op_x86.h		patch \| blob \| history