# Cpu features definition and flags
# ------------------------------------------------------------------------------
+# Initialize ALL_CPU_FEATURES as empty list.
+set(ALL_CPU_FEATURES "")
+
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
- set(ALL_CPU_FEATURES SSE SSE2 AVX AVX2 AVX512F)
- list(SORT ALL_CPU_FEATURES)
+ set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F)
+ set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
+elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
+ set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
endif()
+# Making sure ALL_CPU_FEATURES is sorted.
+list(SORT ALL_CPU_FEATURES)
+
# Function to check whether the target CPU supports the provided set of features.
# Usage:
# cpu_supports(
endif()
endfunction()
-# Function to compute the flags to pass down to the compiler.
-# Usage:
-# compute_flags(
-# <output variable>
-# MARCH <arch name or "native">
-# REQUIRE <list of mandatory features to enable>
-# REJECT <list of features to disable>
-# )
-function(compute_flags output_var)
- cmake_parse_arguments(
- "COMPUTE_FLAGS"
- "" # Optional arguments
- "MARCH" # Single value arguments
- "REQUIRE;REJECT" # Multi value arguments
- ${ARGN})
- # Check that features are not required and rejected at the same time.
- if(COMPUTE_FLAGS_REQUIRE AND COMPUTE_FLAGS_REJECT)
- _intersection(var ${COMPUTE_FLAGS_REQUIRE} ${COMPUTE_FLAGS_REJECT})
- if(var)
- message(FATAL_ERROR "Cpu Features REQUIRE and REJECT ${var}")
- endif()
- endif()
- # Generate the compiler flags in `current`.
- if(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang|GNU")
- if(COMPUTE_FLAGS_MARCH)
- list(APPEND current "-march=${COMPUTE_FLAGS_MARCH}")
- endif()
- foreach(feature IN LISTS COMPUTE_FLAGS_REQUIRE)
- string(TOLOWER ${feature} lowercase_feature)
- list(APPEND current "-m${lowercase_feature}")
- endforeach()
- foreach(feature IN LISTS COMPUTE_FLAGS_REJECT)
- string(TOLOWER ${feature} lowercase_feature)
- list(APPEND current "-mno-${lowercase_feature}")
- endforeach()
- else()
- # In future, we can extend for other compilers.
- message(FATAL_ERROR "Unkown compiler ${CMAKE_CXX_COMPILER_ID}.")
- endif()
- # Export the list of flags.
- set(${output_var} "${current}" PARENT_SCOPE)
-endfunction()
-
# ------------------------------------------------------------------------------
# Internal helpers and utilities.
# ------------------------------------------------------------------------------
endfunction()
_generate_check_code()
-# Compiles and runs the code generated above with the specified requirements.
-# This is helpful to infer which features a particular target supports or if
-# a specific features implies other features (e.g. BMI2 implies SSE2 and SSE).
-function(_check_defined_cpu_feature output_var)
- cmake_parse_arguments(
- "CHECK_DEFINED"
- "" # Optional arguments
- "MARCH" # Single value arguments
- "REQUIRE;REJECT" # Multi value arguments
- ${ARGN})
- compute_flags(
- flags
- MARCH ${CHECK_DEFINED_MARCH}
- REQUIRE ${CHECK_DEFINED_REQUIRE}
- REJECT ${CHECK_DEFINED_REJECT})
+set(LIBC_CPU_FEATURES "" CACHE PATH "Host supported CPU features")
+
+if(CMAKE_CROSSCOMPILING)
+ _intersection(cpu_features "${ALL_CPU_FEATURES}" "${LIBC_CPU_FEATURES}")
+ if(NOT "${cpu_features}" STREQUAL "${LIBC_CPU_FEATURES}")
+ message(FATAL_ERROR "Unsupported CPU features: ${cpu_features}")
+ endif()
+ set(LIBC_CPU_FEATURES "${cpu_features}")
+else()
+ # Populates the LIBC_CPU_FEATURES list from host.
try_run(
run_result compile_result "${CMAKE_CURRENT_BINARY_DIR}/check_${feature}"
"${CMAKE_CURRENT_BINARY_DIR}/cpu_features/check_cpu_features.cpp"
- COMPILE_DEFINITIONS ${flags}
+ COMPILE_DEFINITIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
COMPILE_OUTPUT_VARIABLE compile_output
RUN_OUTPUT_VARIABLE run_output)
if("${run_result}" EQUAL 0)
- set(${output_var}
- "${run_output}"
- PARENT_SCOPE)
+ set(LIBC_CPU_FEATURES "${run_output}")
elseif(NOT ${compile_result})
message(FATAL_ERROR "Failed to compile: ${compile_output}")
else()
message(FATAL_ERROR "Failed to run: ${run_output}")
endif()
-endfunction()
-
-set(LIBC_CPU_FEATURES "" CACHE PATH "supported CPU features")
-
-if(CMAKE_CROSSCOMPILING)
- _intersection(cpu_features "${ALL_CPU_FEATURES}" "${LIBC_CPU_FEATURES}")
- if(NOT "${cpu_features}" STREQUAL "${LIBC_CPU_FEATURES}")
- message(FATAL_ERROR "Unsupported CPU features: ${cpu_features}")
- endif()
- set(LIBC_CPU_FEATURES "${cpu_features}")
-else()
- # Populates the LIBC_CPU_FEATURES list.
- # Use -march=native only when the compiler supports it.
- include(CheckCXXCompilerFlag)
- CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
- if(COMPILER_SUPPORTS_MARCH_NATIVE)
- _check_defined_cpu_feature(LIBC_CPU_FEATURES MARCH native)
- else()
- _check_defined_cpu_feature(LIBC_CPU_FEATURES)
- endif()
endif()
cmake_parse_arguments(
"ADD_IMPL"
"" # Optional arguments
- "MARCH" # Single value arguments
- "REQUIRE;REJECT;SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi value arguments
+ "" # Single value arguments
+ "REQUIRE;SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi value arguments
${ARGN})
- compute_flags(flags
- MARCH ${ADD_IMPL_MARCH}
- REQUIRE ${ADD_IMPL_REQUIRE}
- REJECT ${ADD_IMPL_REJECT}
- )
add_entrypoint_object(${impl_name}
NAME ${name}
SRCS ${ADD_IMPL_SRCS}
HDRS ${ADD_IMPL_HDRS}
DEPENDS ${ADD_IMPL_DEPENDS}
- COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ${flags} -O2
+ COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS}
)
get_fq_target_name(${impl_name} fq_target_name)
set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_IMPL_REQUIRE}")
# memcpy
# ------------------------------------------------------------------------------
-# include the relevant architecture specific implementations
-if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
- set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/${LIBC_TARGET_ARCHITECTURE}/memcpy.cpp)
-elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
- set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/${LIBC_TARGET_ARCHITECTURE}/memcpy.cpp)
-#Disable tail merging as it leads to lower performance
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mllvm --tail-merge-threshold=0")
-else()
- set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp)
-endif()
-
function(add_memcpy memcpy_name)
add_implementation(memcpy ${memcpy_name}
SRCS ${MEMCPY_SRC}
endfunction()
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
- add_memcpy(memcpy MARCH native)
+ set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/x86_64/memcpy.cpp)
+ add_memcpy(memcpy_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
+ add_memcpy(memcpy_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
+ add_memcpy(memcpy_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
+ add_memcpy(memcpy_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
+ add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
+ add_memcpy(memcpy)
+elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
+ set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memcpy.cpp)
+ # Disable tail merging as it leads to lower performance.
+ # Note that '-mllvm' needs to be prefixed with 'SHELL:' to prevent CMake flag deduplication.
+ add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
+ COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
+ add_memcpy(memcpy COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
else()
+ set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp)
+ add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_memcpy(memcpy)
endif()
endfunction()
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
- add_memset(memset MARCH native)
+ add_memset(memset_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
+ add_memset(memset_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
+ add_memset(memset_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
+ add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
+ add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
+ add_memset(memset)
else()
+ add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_memset(memset)
endif()
endfunction()
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
- add_bzero(bzero MARCH native)
+ add_bzero(bzero_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
+ add_bzero(bzero_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
+ add_bzero(bzero_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
+ add_bzero(bzero_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
+ add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
+ add_bzero(bzero)
else()
+ add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_bzero(bzero)
endif()
-
-# ------------------------------------------------------------------------------
-# Add all other relevant implementations for the native target.
-# ------------------------------------------------------------------------------
-
-if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
- include(${LIBC_TARGET_ARCHITECTURE}/CMakeLists.txt)
-endif()