From fb947c358661b3bd3d5e1fa776ec74cc0f308854 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20Storsj=C3=B6?= Date: Fri, 11 Nov 2022 00:21:22 +0200 Subject: [PATCH] [openmp] Use z_Linux_asm.S to provide __kmp_invoke_microtask with Clang for Windows/aarch64 When building for Windows aarch64, and not using the actual MSVC, we can assemble gnu assembly files just fine, and the existing correct implementation of __kmp_invoke_microtask is fully usable. The C implementation of __kmp_invoke_microtask in z_Windows_NT-586_util.cpp relies on unguaranteed assumptions about the compiler behaviour - it does work currently on MSVC, but doesn't necessarily on other compilers. That function uses an alloca to pass parameters on the stack to the called functions. There's no guarantee that the buffer allocated by alloca is exactly at the bottom of the stack when doing the call; the compiler might have left space for extra things to save on the stack there. Additionally, when compiled with Clang with optimization, Clang optimizes out the alloca and memcpy entirely. On the C language level, they don't have any visible effect outside of the function and thus can be omitted entirely. This fixes calling microtasks with more than 6 parameters, in builds for Windows/aarch64 with Clang. Differential Revision: https://reviews.llvm.org/D137827 --- openmp/runtime/src/CMakeLists.txt | 19 ++++++++----- openmp/runtime/src/z_Linux_asm.S | 41 +++++++++++++++++++++++----- openmp/runtime/src/z_Windows_NT-586_util.cpp | 4 ++- 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index ac483b6..f222129 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -60,6 +60,7 @@ endif() # Getting correct source files to build library set(LIBOMP_CXXFILES) set(LIBOMP_ASMFILES) +set(LIBOMP_GNUASMFILES) if(STUBS_LIBRARY) set(LIBOMP_CXXFILES kmp_stub.cpp) else() @@ -94,12 +95,18 @@ else() libomp_append(LIBOMP_CXXFILES z_Windows_NT-586_util.cpp) if(${LIBOMP_ARCH} STREQUAL "i386" OR ${LIBOMP_ARCH} STREQUAL "x86_64") libomp_append(LIBOMP_ASMFILES z_Windows_NT-586_asm.asm) # Windows assembly file + elseif(${LIBOMP_ARCH} STREQUAL "aarch64" AND (NOT MSVC OR CMAKE_C_COMPILER_ID STREQUAL "Clang")) + # z_Linux_asm.S works for AArch64 Windows too. + libomp_append(LIBOMP_GNUASMFILES z_Linux_asm.S) + else() + # AArch64 with MSVC gets implementations of the functions from + # ifdeffed sections in z_Windows_NT-586_util.cpp. endif() else() # Unix specific files libomp_append(LIBOMP_CXXFILES z_Linux_util.cpp) libomp_append(LIBOMP_CXXFILES kmp_gsupport.cpp) - libomp_append(LIBOMP_ASMFILES z_Linux_asm.S) # Unix assembly file + libomp_append(LIBOMP_GNUASMFILES z_Linux_asm.S) # Unix assembly file endif() libomp_append(LIBOMP_CXXFILES thirdparty/ittnotify/ittnotify_static.cpp LIBOMP_USE_ITT_NOTIFY) libomp_append(LIBOMP_CXXFILES kmp_debugger.cpp LIBOMP_USE_DEBUGGER) @@ -115,7 +122,7 @@ libomp_append(LIBOMP_CXXFILES kmp_version.cpp) libomp_append(LIBOMP_CXXFILES ompt-general.cpp IF_TRUE LIBOMP_OMPT_SUPPORT) libomp_append(LIBOMP_CXXFILES ompd-specific.cpp IF_TRUE LIBOMP_OMPD_SUPPORT) -set(LIBOMP_SOURCE_FILES ${LIBOMP_CXXFILES} ${LIBOMP_ASMFILES}) +set(LIBOMP_SOURCE_FILES ${LIBOMP_CXXFILES} ${LIBOMP_ASMFILES} ${LIBOMP_GNUASMFILES}) # For Windows, there is a resource file (.rc -> .res) that is also compiled libomp_append(LIBOMP_SOURCE_FILES libomp.rc WIN32) @@ -124,11 +131,9 @@ libomp_get_cxxflags(LIBOMP_CONFIGURED_CXXFLAGS) libomp_get_asmflags(LIBOMP_CONFIGURED_ASMFLAGS) # Set the compiler flags for each type of source set_source_files_properties(${LIBOMP_CXXFILES} PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_CXXFLAGS}") -set_source_files_properties(${LIBOMP_ASMFILES} PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_ASMFLAGS}") -# Let the compiler handle the assembly files on Unix-like systems -if(NOT WIN32) - set_source_files_properties(${LIBOMP_ASMFILES} PROPERTIES LANGUAGE C) -endif() +set_source_files_properties(${LIBOMP_ASMFILES} ${LIBOMP_GNUASMFILES} PROPERTIES COMPILE_FLAGS "${LIBOMP_CONFIGURED_ASMFLAGS}") +# Let the compiler handle the GNU assembly files +set_source_files_properties(${LIBOMP_GNUASMFILES} PROPERTIES LANGUAGE C) # Remove any cmake-automatic linking of the standard C++ library. # We neither need (nor want) the standard C++ library dependency even though we compile c++ files. diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S index b3767c9..ff4c491 100644 --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -108,7 +108,7 @@ KMP_PREFIX_UNDERSCORE(\proc): # endif // KMP_OS_DARWIN #endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 -#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 +#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64 # if KMP_OS_DARWIN # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols @@ -117,6 +117,9 @@ KMP_PREFIX_UNDERSCORE(\proc): .macro ALIGN .align $0 .endmacro +.macro COMMON name, size, align_power + .comm \name, \size +.endm .macro DEBUG_INFO /* Not sure what .size does in icc, not sure if we need to do something @@ -129,7 +132,28 @@ KMP_PREFIX_UNDERSCORE(\proc): .globl KMP_PREFIX_UNDERSCORE($0) KMP_PREFIX_UNDERSCORE($0): .endmacro -# else // KMP_OS_DARWIN +# elif KMP_OS_WINDOWS +# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols +// Format labels so that they don't override function names in gdb's backtraces +# define KMP_LABEL(x) .L_##x // local label hidden from backtraces + +.macro ALIGN size + .align 1<<(\size) +.endm +.macro COMMON name, size, align_power + .comm \name, \size, \align_power +.endm + +.macro DEBUG_INFO proc + ALIGN 2 +.endm + +.macro PROC proc + ALIGN 2 + .globl KMP_PREFIX_UNDERSCORE(\proc) +KMP_PREFIX_UNDERSCORE(\proc): +.endm +# else // KMP_OS_DARWIN || KMP_OS_WINDOWS # define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols // Format labels so that they don't override function names in gdb's backtraces # define KMP_LABEL(x) .L_##x // local label hidden from backtraces @@ -137,6 +161,9 @@ KMP_PREFIX_UNDERSCORE($0): .macro ALIGN size .align 1<<(\size) .endm +.macro COMMON name, size, align_power + .comm \name, \size, (1<<(\align_power)) +.endm .macro DEBUG_INFO proc .cfi_endproc @@ -154,7 +181,7 @@ KMP_PREFIX_UNDERSCORE(\proc): .endm # endif // KMP_OS_DARWIN -#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 +#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64 // ----------------------------------------------------------------------- // data @@ -1204,7 +1231,7 @@ KMP_LABEL(kmp_1_exit): #endif /* KMP_ARCH_X86_64 */ // ' -#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 +#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64 //------------------------------------------------------------------------ // int @@ -1328,7 +1355,7 @@ KMP_LABEL(kmp_1): DEBUG_INFO __kmp_invoke_microtask // -- End __kmp_invoke_microtask -#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */ +#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64 */ #if KMP_ARCH_PPC64 @@ -1885,7 +1912,7 @@ __kmp_invoke_microtask: #if KMP_ARCH_ARM || KMP_ARCH_MIPS .data - .comm .gomp_critical_user_,32,8 + COMMON .gomp_critical_user_, 32, 3 .data .align 4 .global __kmp_unnamed_critical_addr @@ -1899,7 +1926,7 @@ __kmp_unnamed_critical_addr: # define KMP_PREFIX_UNDERSCORE(x) x #endif .data - .comm .gomp_critical_user_,32,8 + COMMON .gomp_critical_user_, 32, 3 .data .align 8 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) diff --git a/openmp/runtime/src/z_Windows_NT-586_util.cpp b/openmp/runtime/src/z_Windows_NT-586_util.cpp index 991943c..befd257 100644 --- a/openmp/runtime/src/z_Windows_NT-586_util.cpp +++ b/openmp/runtime/src/z_Windows_NT-586_util.cpp @@ -134,7 +134,9 @@ kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) { return old_value; } -#if KMP_ARCH_AARCH64 +#if KMP_ARCH_AARCH64 && KMP_COMPILER_MSVC +// For !KMP_COMPILER_MSVC, this function is provided in assembly form +// by z_Linux_asm.S. int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, void *p_argv[] #if OMPT_SUPPORT -- 2.7.4