2 * Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
11 #include "native_client/src/trusted/service_runtime/arch/x86_32/sel_rt_32.h"
12 #include "native_client/src/trusted/service_runtime/nacl_config.h"
15 * NB: why movl vs movw for moving to segment registers: movl is two
16 * bytes, movw is three. ISA manual say these should be semantically
17 * equivalent. clang complains w/o explicit sizes.
25 #define DO_USING_ARG_IN_ECX 3
27 .macro switcher arg1, arg2
28 DEFINE_GLOBAL_HIDDEN_LOCATION(\arg1):
29 .if \arg2 != DO_USING_ARG_IN_ECX
31 * For DO_USING_ARG_IN_ECX don't touch the stack and get the
32 * thread context from ecx instead of the stack.
34 popl %eax /* throw away the return addr */
36 /* do not leak info to app */
38 /* xorl will leave eflags in a known state, so no info leaks */
41 movl NACL_THREAD_CONTEXT_OFFSET_NEW_PROG_CTR(%ecx), %edx
42 movl NACL_THREAD_CONTEXT_OFFSET_FRAME_PTR(%ecx), %ebp
43 movl NACL_THREAD_CONTEXT_OFFSET_EDI(%ecx), %edi
44 movl NACL_THREAD_CONTEXT_OFFSET_ESI(%ecx), %esi
45 movl NACL_THREAD_CONTEXT_OFFSET_EBX(%ecx), %ebx
47 movw NACL_THREAD_CONTEXT_OFFSET_GS(%ecx), %gs
48 movw NACL_THREAD_CONTEXT_OFFSET_FS(%ecx), %fs
49 movw NACL_THREAD_CONTEXT_OFFSET_ES(%ecx), %es
52 * None of the FPU state stuff matters for the DO_USING_ARG_IN_ECX
53 * case, which is only used to transfer control back to untrusted
54 * code. As well as being unnecessary, it's unsafe to use it here
55 * because the 'call' below might use an untrusted stack.
57 .if \arg2 != DO_USING_ARG_IN_ECX
59 * Clear the x87, MMX, and SSE state.
60 * Then restore the untrusted code's x87 and SSE control words.
61 * We could roll them together by storing a 512-byte per-thread
62 * buffer and setting the control words in that in NaClSyscallSeg.
63 * But that would bloat struct NaClThreadContext by 504 bytes or so,
64 * and the performance cost of these two instructions after fxrstor64
65 * seems to be immeasurably small.
67 call __x86.get_pc_thunk.ax /* PIC support */
68 .if \arg2 == DO_AVX || \arg2 == DO_SSE
69 fxrstor fxrstor_default_state-.(%eax)
70 ldmxcsr NACL_THREAD_CONTEXT_OFFSET_MXCSR(%ecx)
72 frstor frstor_default_state-.(%eax)
74 fldcw NACL_THREAD_CONTEXT_OFFSET_FCW(%ecx)
78 * Clear the AVX state that the "fxrstor" instruction doesn't cover.
79 * We could roll them together by using the "xrstor" instruction, but
80 * that has a complicated protocol and this seems to perform fine.
82 * This is "vzeroupper".
83 * Some assembler versions don't know the AVX instructions.
85 .byte 0xc5, 0xf8, 0x77
89 ljmp *NACL_THREAD_CONTEXT_OFFSET_SPRING_ADDR(%ecx)
92 switcher NaClSwitchNoSSE, DO_NONE
93 switcher NaClSwitchSSE, DO_SSE
94 switcher NaClSwitchAVX, DO_AVX
96 /* Generate register only version on OSX. */
98 switcher NaClSwitchNoSSEViaECX, DO_USING_ARG_IN_ECX
103 * All this magic matches what the compiler would emit.
104 * It arranges that this thunk is merged at link time with
105 * any other duplicate copy of the same code.
107 .section .text.__x86.get_pc_thunk.ax,"axG",@progbits,__x86.get_pc_thunk.ax,comdat
108 .globl __x86.get_pc_thunk.ax
109 .hidden __x86.get_pc_thunk.ax
110 .type __x86.get_pc_thunk.ax, @function
111 __x86.get_pc_thunk.ax:
117 .globl __x86.get_pc_thunk.ax
120 * This is what the HIDDEN macro does, but that prepends an
121 * underscore under NACL_OSX and that's not right for this case.
122 * HIDDEN is empty for NACL_WINDOWS, and the NACL_LINUX case is
125 .private_extern __x86.get_pc_thunk.ax
127 __x86.get_pc_thunk.ax:
134 * This is the memory block for "fxrstor" to read.
135 * The only contents that matter are the fcw and mxcsr words,
136 * which we store separately. The mxcsr_mask word is ignored by
137 * the hardware, so there is no need to get the
138 * hardware-supplied value for that. The hardware requires that
139 * this address be aligned to 16 bytes. Align it further to 64
140 * bytes because that is the usual size of a cache line; this
141 * might help performance and is very unlikely to hurt it.
144 fxrstor_default_state:
147 * This is the memory block for "frstor" to read. It matters that
148 * some of these words have the expected nonzero values, so it's
149 * not possible to reuse the fxrstor_default_state space for this
150 * too. The bit pattern below is the usual default state.
152 frstor_default_state:
153 .long 0xffff037f /* x87 control word */
154 .long 0xffff0000 /* x87 status word */
155 .long 0xffffffff /* x87 tag word */
156 .long 0 /* x87 IP offset */
157 .long 0 /* x87 IP selector */
158 .long 0 /* x87 operand pointer offset */
159 .long 0xffff0000 /* x87 operand pointer selector */
161 * This is better written as: .space 108 - (. - frstor_default_state).
162 * But the LLVM assembler (used on MacOS and in Clang builds on
163 * Linux) is buggy and does not support that use of label arithmetic.
164 * Reported as: http://llvm.org/bugs/show_bug.cgi?id=15315