From df9d6c5d7b814c97923b2a3f0b49af6bb57e757c Mon Sep 17 00:00:00 2001 From: Aditya Mandaleeka Date: Mon, 18 Jul 2016 19:39:05 -0700 Subject: [PATCH] Save and restore ymm registers in signal handlers. - Modified the CONTEXT structure for storing the upper 16 bytes of ymm registers - Upon start of signal handler, ymmh data is copied from the native context to the CONTEXT structure, and a new flag is set to indicate that it has ymmh data - Upon calling RtlRestoreContext, the new flag is checked, and ymmh data is restored into registers from the CONTEXT structure - This change fixes only the Linux side for now. Commit migrated from https://github.com/dotnet/coreclr/commit/9585fc244705b3a06c1f99dc6d60b9c9583ec3ed --- src/coreclr/src/pal/inc/pal.h | 2 + src/coreclr/src/pal/src/arch/i386/asmconstants.h | 4 +- src/coreclr/src/pal/src/debug/debug.cpp | 6 +- .../src/pal/src/exception/machexception.cpp | 6 +- src/coreclr/src/pal/src/exception/signal.cpp | 14 ++-- src/coreclr/src/pal/src/include/pal/context.h | 78 ++++++++++++++++++---- src/coreclr/src/pal/src/thread/context.cpp | 23 ++++++- src/coreclr/src/pal/src/thread/thread.cpp | 5 +- 8 files changed, 109 insertions(+), 29 deletions(-) diff --git a/src/coreclr/src/pal/inc/pal.h b/src/coreclr/src/pal/inc/pal.h index fe29112..e086717 100644 --- a/src/coreclr/src/pal/inc/pal.h +++ b/src/coreclr/src/pal/inc/pal.h @@ -2570,6 +2570,8 @@ typedef struct _CONTEXT { #define CONTEXT_ALL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT | CONTEXT_DEBUG_REGISTERS) +#define CONTEXT_XSTATE (CONTEXT_AMD64 | 0x40L) + #define CONTEXT_EXCEPTION_ACTIVE 0x8000000 #define CONTEXT_SERVICE_ACTIVE 0x10000000 #define CONTEXT_EXCEPTION_REQUEST 0x40000000 diff --git a/src/coreclr/src/pal/src/arch/i386/asmconstants.h b/src/coreclr/src/pal/src/arch/i386/asmconstants.h index 8ec73b4..460d8a6 100644 --- a/src/coreclr/src/pal/src/arch/i386/asmconstants.h +++ b/src/coreclr/src/pal/src/arch/i386/asmconstants.h @@ -14,6 +14,8 @@ #define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) +#define CONTEXT_XSTATE 64 + #define CONTEXT_ContextFlags 6*8 #define CONTEXT_SegCs CONTEXT_ContextFlags+8 #define CONTEXT_SegDs CONTEXT_SegCs+2 @@ -47,7 +49,7 @@ #define CONTEXT_Rip CONTEXT_R15+8 #define CONTEXT_FltSave CONTEXT_Rip+8 #define FLOATING_SAVE_AREA_SIZE 4*8+24*16+96 -#define CONTEXT_Xmm0 CONTEXT_FltSave+FLOATING_SAVE_AREA_SIZE // was 10*16 +#define CONTEXT_Xmm0 CONTEXT_FltSave+10*16 #define CONTEXT_Xmm1 CONTEXT_Xmm0+16 #define CONTEXT_Xmm2 CONTEXT_Xmm1+16 #define CONTEXT_Xmm3 CONTEXT_Xmm2+16 diff --git a/src/coreclr/src/pal/src/debug/debug.cpp b/src/coreclr/src/pal/src/debug/debug.cpp index 86ea9f9..b3ce4b1 100644 --- a/src/coreclr/src/pal/src/debug/debug.cpp +++ b/src/coreclr/src/pal/src/debug/debug.cpp @@ -25,12 +25,14 @@ Revision History: #undef _FILE_OFFSET_BITS #endif +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(DEBUG); // some headers have code with asserts, so do this first + #include "pal/thread.hpp" #include "pal/procobj.hpp" #include "pal/file.hpp" #include "pal/palinternal.h" -#include "pal/dbgmsg.h" #include "pal/process.h" #include "pal/context.h" #include "pal/debug.h" @@ -66,8 +68,6 @@ Revision History: using namespace CorUnix; -SET_DEFAULT_DEBUG_CHANNEL(DEBUG); - extern "C" void DBG_DebugBreak_End(); #if HAVE_PROCFS_CTL diff --git a/src/coreclr/src/pal/src/exception/machexception.cpp b/src/coreclr/src/pal/src/exception/machexception.cpp index a483509..af1dc89 100644 --- a/src/coreclr/src/pal/src/exception/machexception.cpp +++ b/src/coreclr/src/pal/src/exception/machexception.cpp @@ -14,12 +14,14 @@ Abstract: --*/ +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); // some headers have code with asserts, so do this first + #include "pal/thread.hpp" #include "pal/seh.hpp" #include "pal/palinternal.h" #if HAVE_MACH_EXCEPTIONS #include "machexception.h" -#include "pal/dbgmsg.h" #include "pal/critsect.h" #include "pal/debug.h" #include "pal/init.h" @@ -42,8 +44,6 @@ Abstract: using namespace CorUnix; -SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); - // The port we use to handle exceptions and to set the thread context mach_port_t s_ExceptionPort; diff --git a/src/coreclr/src/pal/src/exception/signal.cpp b/src/coreclr/src/pal/src/exception/signal.cpp index 8dd75ac..c2c2179 100644 --- a/src/coreclr/src/pal/src/exception/signal.cpp +++ b/src/coreclr/src/pal/src/exception/signal.cpp @@ -18,6 +18,9 @@ Abstract: --*/ +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); // some headers have code with asserts, so do this first + #include "pal/corunix.hpp" #include "pal/handleapi.hpp" #include "pal/thread.hpp" @@ -27,7 +30,6 @@ Abstract: #include "pal/palinternal.h" #if !HAVE_MACH_EXCEPTIONS -#include "pal/dbgmsg.h" #include "pal/init.h" #include "pal/process.h" #include "pal/debug.h" @@ -43,8 +45,6 @@ Abstract: using namespace CorUnix; -SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); - #ifdef SIGRTMIN #define INJECT_ACTIVATION_SIGNAL SIGRTMIN #endif @@ -611,10 +611,16 @@ static bool common_signal_handler(int code, siginfo_t *siginfo, void *sigcontext // which is required for restoring context RtlCaptureContext(contextRecord); + ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; + +#if defined(_AMD64_) + contextFlags |= CONTEXT_XSTATE; +#endif + // Fill context record with required information. from pal.h: // On non-Win32 platforms, the CONTEXT pointer in the // PEXCEPTION_POINTERS will contain at least the CONTEXT_CONTROL registers. - CONTEXTFromNativeContext(ucontext, contextRecord, CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT); + CONTEXTFromNativeContext(ucontext, contextRecord, contextFlags); /* Unmask signal so we can receive it again */ sigemptyset(&signal_set); diff --git a/src/coreclr/src/pal/src/include/pal/context.h b/src/coreclr/src/pal/src/include/pal/context.h index 6f1b3fe..5e37894 100644 --- a/src/coreclr/src/pal/src/include/pal/context.h +++ b/src/coreclr/src/pal/src/include/pal/context.h @@ -121,19 +121,71 @@ typedef ucontext_t native_context_t; #define MCREG_R14(mc) ((mc).gregs[REG_R14]) #define MCREG_R15(mc) ((mc).gregs[REG_R15]) -#define FPREG_Xmm(uc, index) *(M128A*)&((uc)->uc_mcontext.fpregs->_xmm[index]) - -#define FPREG_St(uc, index) *(M128A*)&((uc)->uc_mcontext.fpregs->_st[index]) - -#define FPREG_ControlWord(uc) ((uc)->uc_mcontext.fpregs->cwd) -#define FPREG_StatusWord(uc) ((uc)->uc_mcontext.fpregs->swd) -#define FPREG_TagWord(uc) ((uc)->uc_mcontext.fpregs->ftw) -#define FPREG_ErrorOffset(uc) *(DWORD*)&((uc)->uc_mcontext.fpregs->rip) -#define FPREG_ErrorSelector(uc) *(((WORD*)&((uc)->uc_mcontext.fpregs->rip)) + 2) -#define FPREG_DataOffset(uc) *(DWORD*)&((uc)->uc_mcontext.fpregs->rdp) -#define FPREG_DataSelector(uc) *(((WORD*)&((uc)->uc_mcontext.fpregs->rdp)) + 2) -#define FPREG_MxCsr(uc) ((uc)->uc_mcontext.fpregs->mxcsr) -#define FPREG_MxCsr_Mask(uc) ((uc)->uc_mcontext.fpregs->mxcr_mask) +#define FPREG_Fpstate(uc) ((uc)->uc_mcontext.fpregs) +#define FPREG_Xmm(uc, index) *(M128A*)&(FPREG_Fpstate(uc)->_xmm[index]) + +#define FPREG_St(uc, index) *(M128A*)&(FPREG_Fpstate(uc)->_st[index]) + +#define FPREG_ControlWord(uc) (FPREG_Fpstate(uc)->cwd) +#define FPREG_StatusWord(uc) (FPREG_Fpstate(uc)->swd) +#define FPREG_TagWord(uc) (FPREG_Fpstate(uc)->ftw) +#define FPREG_ErrorOffset(uc) *(DWORD*)&(FPREG_Fpstate(uc)->rip) +#define FPREG_ErrorSelector(uc) *(((WORD*)&(FPREG_Fpstate(uc)->rip)) + 2) +#define FPREG_DataOffset(uc) *(DWORD*)&(FPREG_Fpstate(uc)->rdp) +#define FPREG_DataSelector(uc) *(((WORD*)&(FPREG_Fpstate(uc)->rdp)) + 2) +#define FPREG_MxCsr(uc) (FPREG_Fpstate(uc)->mxcsr) +#define FPREG_MxCsr_Mask(uc) (FPREG_Fpstate(uc)->mxcr_mask) + +///////////////////// +// Extended state + +inline _fpx_sw_bytes *FPREG_FpxSwBytes(const ucontext_t *uc) +{ + // Bytes 464..511 in the FXSAVE format are available for software to use for any purpose. In this case, they are used to + // indicate information about extended state. + _ASSERTE(reinterpret_cast(&FPREG_Fpstate(uc)->padding[12]) - reinterpret_cast(FPREG_Fpstate(uc)) == 464); + + _ASSERTE(FPREG_Fpstate(uc) != nullptr); + + return reinterpret_cast<_fpx_sw_bytes *>(&FPREG_Fpstate(uc)->padding[12]); +} + +inline UINT32 FPREG_ExtendedSize(const ucontext_t *uc) +{ + _ASSERTE(FPREG_FpxSwBytes(uc)->magic1 == FP_XSTATE_MAGIC1); + return FPREG_FpxSwBytes(uc)->extended_size; +} + +inline bool FPREG_HasExtendedState(const ucontext_t *uc) +{ + // See comments in /usr/include/x86_64-linux-gnu/asm/sigcontext.h for info on how to detect if extended state is present + static_assert_no_msg(FP_XSTATE_MAGIC2_SIZE == sizeof(UINT32)); + + if (FPREG_FpxSwBytes(uc)->magic1 != FP_XSTATE_MAGIC1) + { + return false; + } + + UINT32 extendedSize = FPREG_ExtendedSize(uc); + if (extendedSize < sizeof(_xstate)) + { + return false; + } + + _ASSERTE(extendedSize >= FP_XSTATE_MAGIC2_SIZE); + return *reinterpret_cast(reinterpret_cast(FPREG_Fpstate(uc)) + (extendedSize - FP_XSTATE_MAGIC2_SIZE)) + == FP_XSTATE_MAGIC2; +} + +inline void *FPREG_Xstate_Ymmh(const ucontext_t *uc) +{ + static_assert_no_msg(sizeof(reinterpret_cast<_xstate *>(FPREG_Fpstate(uc))->ymmh.ymmh_space) == 16 * 16); + _ASSERTE(FPREG_HasExtendedState(uc)); + + return reinterpret_cast<_xstate *>(FPREG_Fpstate(uc))->ymmh.ymmh_space; +} + +///////////////////// #else // BIT64 diff --git a/src/coreclr/src/pal/src/thread/context.cpp b/src/coreclr/src/pal/src/thread/context.cpp index 025bb97..c3412ba 100644 --- a/src/coreclr/src/pal/src/thread/context.cpp +++ b/src/coreclr/src/pal/src/thread/context.cpp @@ -19,8 +19,10 @@ Abstract: --*/ -#include "pal/palinternal.h" #include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(THREAD); // some headers have code with asserts, so do this first + +#include "pal/palinternal.h" #include "pal/context.h" #include "pal/debug.h" #include "pal/thread.hpp" @@ -29,8 +31,6 @@ Abstract: #include #include -SET_DEFAULT_DEBUG_CHANNEL(THREAD); - extern PGET_GCMARKER_EXCEPTION_CODE g_getGcMarkerExceptionCode; // in context2.S @@ -465,6 +465,15 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) } #endif } + + // TODO: Enable for all Unix systems +#if defined(_AMD64_) && defined(__linux__) + if ((lpContext->ContextFlags & CONTEXT_XSTATE) != 0) + { + _ASSERTE(FPREG_HasExtendedState(native)); + memcpy_s(FPREG_Xstate_Ymmh(native), sizeof(M128A) * 16, lpContext->VectorRegister, sizeof(M128A) * 16); + } +#endif // _AMD64_ } /*++ @@ -551,6 +560,14 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex } #endif } + + // TODO: Enable for all Unix systems +#if defined(_AMD64_) && defined(__linux__) + if ((contextFlags & CONTEXT_XSTATE) != 0 && FPREG_HasExtendedState(native)) + { + memcpy_s(lpContext->VectorRegister, sizeof(M128A) * 16, FPREG_Xstate_Ymmh(native), sizeof(M128A) * 16); + } +#endif // _AMD64_ } /*++ diff --git a/src/coreclr/src/pal/src/thread/thread.cpp b/src/coreclr/src/pal/src/thread/thread.cpp index 159c451..d6f6f9c 100644 --- a/src/coreclr/src/pal/src/thread/thread.cpp +++ b/src/coreclr/src/pal/src/thread/thread.cpp @@ -18,6 +18,9 @@ Abstract: --*/ +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(THREAD); // some headers have code with asserts, so do this first + #include "pal/corunix.hpp" #include "pal/context.h" #include "pal/thread.hpp" @@ -29,7 +32,6 @@ Abstract: #include "procprivate.hpp" #include "pal/process.h" #include "pal/module.h" -#include "pal/dbgmsg.h" #include "pal/environ.h" #include "pal/init.h" @@ -74,7 +76,6 @@ using namespace CorUnix; /* ------------------- Definitions ------------------------------*/ -SET_DEFAULT_DEBUG_CHANNEL(THREAD); // The default stack size of a newly created thread (currently 256KB) // when the dwStackSize parameter of PAL_CreateThread() -- 2.7.4