2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
8 * This code gets executed when switching from the service
9 * runtime to the NaCl module. NaClSwitch has one parameter only,
10 * which is a struct passed by reference.
12 * NB: this is not the only place where context switches from trusted
13 * to untrusted code. The NaClGetTlsFastPath* code (nacl_syscall_64.S)
14 * also performs a partial switch (to and fro).
17 #include "native_client/src/trusted/service_runtime/arch/x86_64/sel_rt_64.h"
18 #include "native_client/src/trusted/service_runtime/nacl_config.h"
21 * This function does not return. Thus, we need to preserve
22 * any callee-saved registers.
27 * The MacOS assembler has a macro facility that is pretty close
28 * to GNU as macros, but not quite the same.
30 #if NACL_OSX || defined(__clang__)
31 # define MACRO(name) .macro name
32 # define ENDMACRO .endmacro
33 # define MACROENTRY DEFINE_GLOBAL_HIDDEN_FUNCTION($0):
37 # define MACRO(name) .macro name arg1, arg2=0
38 # define ENDMACRO .endm
39 # define MACROENTRY DEFINE_GLOBAL_HIDDEN_FUNCTION(\arg1):
40 # define MACROARG1 \arg1
41 # define MACROARG2 \arg2
48 /* On Windows, 1st param is in %rcx. */
50 #elif NACL_LINUX || NACL_OSX
51 /* On Linux/OSX, 1st param is in %rdi. */
54 # error "What OS/compiler is the service runtime being compiled with?"
58 * TODO(mcgrathr): Perhaps drop callee-saved registers (these six)
59 * from this restore and instead just clear them at startup
60 * (i.e. have NaClStartThreadInApp call a NaClStartSwitch that
61 * clears them and calls NaClSwitch).
63 movq NACL_THREAD_CONTEXT_OFFSET_RBX(%r11), %rbx
64 movq NACL_THREAD_CONTEXT_OFFSET_RBP(%r11), %rbp
65 movq NACL_THREAD_CONTEXT_OFFSET_R12(%r11), %r12
66 movq NACL_THREAD_CONTEXT_OFFSET_R13(%r11), %r13
67 movq NACL_THREAD_CONTEXT_OFFSET_R14(%r11), %r14
68 movq NACL_THREAD_CONTEXT_OFFSET_R15(%r11), %r15
70 /* there is no springboard for x86_64 */
71 movq NACL_THREAD_CONTEXT_OFFSET_RSP(%r11), %rsp
72 movq NACL_THREAD_CONTEXT_OFFSET_SYSRET(%r11), %rax
75 * %rdi is the first argument in the user calling convention.
76 * When starting the initial thread, we are passing the address
77 * of the parameter block here. The initial stack pointer has
78 * been adjusted to one word below there, to insert a dummy
79 * return address for the user entry point function.
84 * Zero all unused registers. The 32-bit instructions
85 * are a byte shorter than their 64-bit counterparts
86 * when the target register is one of the first eight,
87 * and they implicitly zero the high halves.
89 * The 'xorl' instruction also resets most flags to known
100 * Clear the x87, MMX, and SSE state.
101 * Then restore the untrusted code's x87 and SSE control words.
102 * We could roll them together by storing a 512-byte per-thread
103 * buffer and setting the control words in that in NaClSyscallSeg.
104 * But that would bloat struct NaClThreadContext by 504 bytes or so,
105 * and the performance cost of these two instructions after fxrstor
106 * seems to be immeasurably small.
108 fxrstor fxrstor_default_state(%rip)
109 fldcw NACL_THREAD_CONTEXT_OFFSET_FCW(%r11)
110 ldmxcsr NACL_THREAD_CONTEXT_OFFSET_MXCSR(%r11)
114 * Clear the AVX state that the "fxrstor" instruction doesn't cover.
115 * We could roll them together by using the "xrstor" instruction, but
116 * that has a complicated protocol and this seems to perform fine.
118 * This is "vzeroupper".
119 * Some assembler versions don't know the AVX instructions.
121 .byte 0xc5, 0xf8, 0x77
125 * Load the return address into %r11 rather than doing
126 * "jmp *XXX(%r11)" so that we do not leak the address of the
127 * struct NaClThreadContext to untrusted code. Knowing this
128 * address would make bugs in the sandbox easier to exploit.
130 movq NACL_THREAD_CONTEXT_OFFSET_NEW_PROG_CTR(%r11), %r11
134 switcher NaClSwitchSSE, 0
135 switcher NaClSwitchAVX, 1
140 * This is the memory block for "fxrstor" to read. The only
141 * contents that matter are the fcw and mxcsr words, which we
142 * store separately. The mxcsr_mask word is ignored by the
143 * hardware, so there is no need to get the hardware-supplied
144 * value for that. The hardware requires that this address be
145 * aligned to 16 bytes. Align it further to 64 bytes because
146 * that is the usual size of a cache line; this might help
147 * performance and is very unlikely to hurt it.
150 fxrstor_default_state: