2 * Copyright (c) 2013 The Native Client Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
7 #include "native_client/src/trusted/service_runtime/sys_memory.h"
12 #include "native_client/src/include/nacl_assert.h"
13 #include "native_client/src/include/nacl_platform.h"
14 #include "native_client/src/shared/platform/nacl_check.h"
15 #include "native_client/src/shared/platform/nacl_log.h"
16 #include "native_client/src/shared/platform/nacl_sync_checked.h"
17 #include "native_client/src/trusted/desc/nacl_desc_effector_trusted_mem.h"
18 #include "native_client/src/trusted/desc/nacl_desc_io.h"
19 #include "native_client/src/trusted/fault_injection/fault_injection.h"
20 #include "native_client/src/trusted/service_runtime/include/bits/mman.h"
21 #include "native_client/src/trusted/service_runtime/include/sys/errno.h"
22 #include "native_client/src/trusted/service_runtime/include/sys/fcntl.h"
23 #include "native_client/src/trusted/service_runtime/include/sys/stat.h"
24 #include "native_client/src/trusted/service_runtime/internal_errno.h"
25 #include "native_client/src/trusted/service_runtime/nacl_app_thread.h"
26 #include "native_client/src/trusted/service_runtime/nacl_syscall_common.h"
27 #include "native_client/src/trusted/service_runtime/nacl_text.h"
28 #include "native_client/src/trusted/service_runtime/sel_ldr.h"
29 #include "native_client/src/trusted/service_runtime/sel_memory.h"
30 #include "native_client/src/trusted/validator/validation_metadata.h"
33 # include "native_client/src/shared/platform/win/xlate_system_error.h"
37 static int32_t MunmapInternal(struct NaClApp *nap,
38 uintptr_t sysaddr, size_t length);
40 static const size_t kMaxUsableFileSize = (SIZE_T_MAX >> 1);
43 static INLINE size_t size_min(size_t a, size_t b) {
44 return (a < b) ? a : b;
47 int32_t NaClSysBrk(struct NaClAppThread *natp,
48 uintptr_t new_break) {
49 struct NaClApp *nap = natp->nap;
51 int32_t rv = -NACL_ABI_EINVAL;
52 struct NaClVmmapIter iter;
53 struct NaClVmmapEntry *ent;
54 struct NaClVmmapEntry *next_ent;
56 uintptr_t sys_new_break;
57 uintptr_t usr_last_data_page;
58 uintptr_t usr_new_last_data_page;
59 uintptr_t last_internal_data_addr;
60 uintptr_t last_internal_page;
61 uintptr_t start_new_region;
62 uintptr_t region_size;
65 * The sysbrk() IRT interface is deprecated and is not enabled for
66 * ABI-stable PNaCl pexes, so for security hardening, disable the
67 * syscall under PNaCl too.
70 return -NACL_ABI_ENOSYS;
72 break_addr = nap->break_addr;
74 NaClLog(3, "Entered NaClSysBrk(new_break 0x%08"NACL_PRIxPTR")\n",
77 sys_new_break = NaClUserToSysAddr(nap, new_break);
78 NaClLog(3, "sys_new_break 0x%08"NACL_PRIxPTR"\n", sys_new_break);
80 if (kNaClBadAddress == sys_new_break) {
83 if (NACL_SYNC_OK != NaClMutexLock(&nap->mu)) {
84 NaClLog(LOG_ERROR, "Could not get app lock for 0x%08"NACL_PRIxPTR"\n",
88 if (new_break < nap->data_end) {
89 NaClLog(4, "new_break before data_end (0x%"NACL_PRIxPTR")\n",
93 if (new_break <= nap->break_addr) {
95 NaClLog(4, "new_break before break (0x%"NACL_PRIxPTR"); freeing\n",
97 nap->break_addr = new_break;
98 break_addr = new_break;
101 * See if page containing new_break is in mem_map; if so, we are
102 * essentially done -- just update break_addr. Otherwise, we
103 * extend the VM map entry from the page containing the current
104 * break to the page containing new_break.
107 sys_break = NaClUserToSys(nap, nap->break_addr);
109 usr_last_data_page = (nap->break_addr - 1) >> NACL_PAGESHIFT;
111 usr_new_last_data_page = (new_break - 1) >> NACL_PAGESHIFT;
113 last_internal_data_addr = NaClRoundAllocPage(new_break) - 1;
114 last_internal_page = last_internal_data_addr >> NACL_PAGESHIFT;
116 NaClLog(4, ("current break sys addr 0x%08"NACL_PRIxPTR", "
117 "usr last data page 0x%"NACL_PRIxPTR"\n"),
118 sys_break, usr_last_data_page);
119 NaClLog(4, "new break usr last data page 0x%"NACL_PRIxPTR"\n",
120 usr_new_last_data_page);
121 NaClLog(4, "last internal data addr 0x%08"NACL_PRIxPTR"\n",
122 last_internal_data_addr);
124 if (NULL == NaClVmmapFindPageIter(&nap->mem_map,
127 || NaClVmmapIterAtEnd(&iter)) {
128 NaClLog(LOG_FATAL, ("current break (0x%08"NACL_PRIxPTR", "
129 "sys 0x%08"NACL_PRIxPTR") "
130 "not in address map\n"),
131 nap->break_addr, sys_break);
133 ent = NaClVmmapIterStar(&iter);
134 NaClLog(4, ("segment containing current break"
135 ": page_num 0x%08"NACL_PRIxPTR", npages 0x%"NACL_PRIxS"\n"),
136 ent->page_num, ent->npages);
137 if (usr_new_last_data_page < ent->page_num + ent->npages) {
138 NaClLog(4, "new break within break segment, just bumping addr\n");
139 nap->break_addr = new_break;
140 break_addr = new_break;
142 NaClVmmapIterIncr(&iter);
143 if (!NaClVmmapIterAtEnd(&iter)
144 && ((next_ent = NaClVmmapIterStar(&iter))->page_num
145 <= last_internal_page)) {
146 /* ran into next segment! */
148 ("new break request of usr address "
149 "0x%08"NACL_PRIxPTR" / usr page 0x%"NACL_PRIxPTR
150 " runs into next region, page_num 0x%"NACL_PRIxPTR", "
151 "npages 0x%"NACL_PRIxS"\n"),
152 new_break, usr_new_last_data_page,
153 next_ent->page_num, next_ent->npages);
157 "extending segment: page_num 0x%08"NACL_PRIxPTR", "
158 "npages 0x%"NACL_PRIxS"\n",
159 ent->page_num, ent->npages);
160 /* go ahead and extend ent to cover, and make pages accessible */
161 start_new_region = (ent->page_num + ent->npages) << NACL_PAGESHIFT;
162 ent->npages = (last_internal_page - ent->page_num + 1);
163 region_size = (((last_internal_page + 1) << NACL_PAGESHIFT)
165 if (0 != NaClMprotect((void *) NaClUserToSys(nap, start_new_region),
167 PROT_READ | PROT_WRITE)) {
169 ("Could not mprotect(0x%08"NACL_PRIxPTR", "
170 "0x%08"NACL_PRIxPTR", "
171 "PROT_READ|PROT_WRITE)\n"),
175 NaClLog(4, "segment now: page_num 0x%08"NACL_PRIxPTR", "
176 "npages 0x%"NACL_PRIxS"\n",
177 ent->page_num, ent->npages);
178 nap->break_addr = new_break;
179 break_addr = new_break;
182 * Zero out memory between old break and new break.
184 ASSERT(sys_new_break > sys_break);
185 memset((void *) sys_break, 0, sys_new_break - sys_break);
191 NaClXMutexUnlock(&nap->mu);
195 * This cast is safe because the incoming value (new_break) cannot
196 * exceed the user address space--even though its type (uintptr_t)
197 * theoretically allows larger values.
199 rv = (int32_t) break_addr;
201 NaClLog(3, "NaClSysBrk: returning 0x%08"NACL_PRIx32"\n", rv);
205 int NaClSysCommonAddrRangeContainsExecutablePages(struct NaClApp *nap,
209 * NOTE: currently only trampoline and text region are executable,
210 * and they are at the beginning of the address space, so this code
211 * is fine. We will probably never allow users to mark other pages
212 * as executable; but if so, we will have to revisit how this check
215 * nap->static_text_end is a multiple of 4K, the memory protection
216 * granularity. Since this routine is used for checking whether
217 * memory map adjustments / allocations -- which has 64K granularity
218 * -- is okay, usraddr must be an allocation granularity value. Our
219 * callers (as of this writing) does this, but we truncate it down
220 * to an allocation boundary to be sure.
222 UNREFERENCED_PARAMETER(length);
223 usraddr = NaClTruncAllocPage(usraddr);
224 return usraddr < nap->dynamic_text_end;
227 int NaClSysCommonAddrRangeInAllowedDynamicCodeSpace(struct NaClApp *nap,
230 uintptr_t usr_region_end = usraddr + length;
232 if (usr_region_end < usraddr) {
233 /* Check for unsigned addition overflow */
236 usr_region_end = NaClRoundAllocPage(usr_region_end);
237 if (usr_region_end < usraddr) {
238 /* 32-bit systems only, rounding caused uint32_t overflow */
241 return (nap->dynamic_text_start <= usraddr &&
242 usr_region_end <= nap->dynamic_text_end);
245 /* Warning: sizeof(nacl_abi_off_t)!=sizeof(off_t) on OSX */
246 int32_t NaClSysMmapIntern(struct NaClApp *nap,
252 nacl_abi_off_t offset) {
254 struct NaClDesc *ndp;
260 uintptr_t map_result;
261 int holding_app_lock;
262 struct nacl_abi_stat stbuf;
263 size_t alloc_rounded_length;
264 nacl_off64_t file_size;
265 nacl_off64_t file_bytes;
266 nacl_off64_t host_rounded_file_bytes;
267 size_t alloc_rounded_file_bytes;
269 holding_app_lock = 0;
272 allowed_flags = (NACL_ABI_MAP_FIXED | NACL_ABI_MAP_SHARED
273 | NACL_ABI_MAP_PRIVATE | NACL_ABI_MAP_ANONYMOUS);
275 usraddr = (uintptr_t) start;
277 if (0 != (flags & ~allowed_flags)) {
278 NaClLog(2, "invalid mmap flags 0%o, ignoring extraneous bits\n", flags);
279 flags &= allowed_flags;
282 if (0 != (flags & NACL_ABI_MAP_ANONYMOUS)) {
284 * anonymous mmap, so backing store is just swap: no descriptor is
285 * involved, and no memory object will be created to represent the
290 ndp = NaClAppGetDesc(nap, d);
292 map_result = -NACL_ABI_EBADF;
299 * Check if application is trying to do dynamic code loading by
302 if (0 != (NACL_ABI_PROT_EXEC & prot) &&
303 0 != (NACL_ABI_MAP_FIXED & flags) &&
305 NaClSysCommonAddrRangeInAllowedDynamicCodeSpace(nap, usraddr, length)) {
306 if (!nap->enable_dyncode_syscalls) {
308 "NaClSysMmap: PROT_EXEC when dyncode syscalls are disabled.\n");
309 map_result = -NACL_ABI_EINVAL;
312 if (0 != (NACL_ABI_PROT_WRITE & prot)) {
314 "NaClSysMmap: asked for writable and executable code pages?!?\n");
315 map_result = -NACL_ABI_EINVAL;
319 } else if (0 != (prot & NACL_ABI_PROT_EXEC)) {
320 map_result = -NACL_ABI_EINVAL;
325 * Starting address must be aligned to worst-case allocation
326 * granularity. (Windows.)
328 if (!NaClIsAllocPageMultiple(usraddr)) {
329 NaClLog(2, "NaClSysMmap: address not allocation granularity aligned\n");
330 map_result = -NACL_ABI_EINVAL;
334 * Offset should be non-negative (nacl_abi_off_t is signed). This
335 * condition is caught when the file is stat'd and checked, and
336 * offset is ignored for anonymous mappings.
339 NaClLog(1, /* application bug */
340 "NaClSysMmap: negative file offset: %"NACL_PRId64"\n",
342 map_result = -NACL_ABI_EINVAL;
346 * And offset must be a multiple of the allocation unit.
348 if (!NaClIsAllocPageMultiple((uintptr_t) offset)) {
350 ("NaClSysMmap: file offset 0x%08"NACL_PRIxPTR" not multiple"
351 " of allocation size\n"),
353 map_result = -NACL_ABI_EINVAL;
358 map_result = -NACL_ABI_EINVAL;
361 alloc_rounded_length = NaClRoundAllocPage(length);
362 if (alloc_rounded_length != length) {
364 NaClLog(3, "NaClSysMmap: length not a multiple of allocation size\n");
365 map_result = -NACL_ABI_EINVAL;
369 "NaClSysMmap: rounded length to 0x%"NACL_PRIxS"\n",
370 alloc_rounded_length);
375 * Note: sentinel values are bigger than the NaCl module addr space.
377 file_size = kMaxUsableFileSize;
378 file_bytes = kMaxUsableFileSize;
379 host_rounded_file_bytes = kMaxUsableFileSize;
380 alloc_rounded_file_bytes = kMaxUsableFileSize;
383 * We stat the file to figure out its actual size.
385 * This is necessary because the POSIXy interface we provide
386 * allows mapping beyond the extent of a file but Windows'
387 * interface does not. We simulate the POSIX behaviour on
390 map_result = (*((struct NaClDescVtbl const *) ndp->base.vtbl)->
392 if (0 != map_result) {
397 * Preemptively refuse to map anything that's not a regular file or
398 * shared memory segment. Other types usually report st_size of zero,
399 * which the code below will handle by just doing a dummy PROT_NONE
400 * mapping for the requested size and never attempting the underlying
401 * NaClDesc Map operation. So without this check, the host OS never
402 * gets the chance to refuse the mapping operation on an object that
405 if (!NACL_ABI_S_ISREG(stbuf.nacl_abi_st_mode) &&
406 !NACL_ABI_S_ISSHM(stbuf.nacl_abi_st_mode)) {
407 map_result = -NACL_ABI_ENODEV;
412 * BUG(bsy): there's a race between this fstat and the actual mmap
413 * below. It's probably insoluble. Even if we fstat again after
414 * mmap and compared, the mmap could have "seen" the file with a
415 * different size, after which the racing thread restored back to
416 * the same value before the 2nd fstat takes place.
418 file_size = stbuf.nacl_abi_st_size;
420 if (file_size < offset) {
421 map_result = -NACL_ABI_EINVAL;
425 file_bytes = file_size - offset;
426 if ((nacl_off64_t) kMaxUsableFileSize < file_bytes) {
427 host_rounded_file_bytes = kMaxUsableFileSize;
429 host_rounded_file_bytes = NaClRoundHostAllocPage((size_t) file_bytes);
432 ASSERT(host_rounded_file_bytes <= (nacl_off64_t) kMaxUsableFileSize);
434 * We need to deal with NaClRoundHostAllocPage rounding up to zero
435 * from ~0u - n, where n < 4096 or 65536 (== 1 alloc page).
437 * Luckily, file_bytes is at most kMaxUsableFileSize which is
438 * smaller than SIZE_T_MAX, so it should never happen, but we
439 * leave the explicit check below as defensive programming.
441 alloc_rounded_file_bytes =
442 NaClRoundAllocPage((size_t) host_rounded_file_bytes);
444 if (0 == alloc_rounded_file_bytes && 0 != host_rounded_file_bytes) {
445 map_result = -NACL_ABI_ENOMEM;
450 * NB: host_rounded_file_bytes and alloc_rounded_file_bytes can be
451 * zero. Such an mmap just makes memory (offset relative to
452 * usraddr) in the range [0, alloc_rounded_length) inaccessible.
457 * host_rounded_file_bytes is how many bytes we can map from the
458 * file, given the user-supplied starting offset. It is at least
459 * one page. If it came from a real file, it is a multiple of
460 * host-OS allocation size. it cannot be larger than
461 * kMaxUsableFileSize.
463 if (mapping_code && (size_t) file_bytes < alloc_rounded_length) {
465 "NaClSysMmap: disallowing partial allocation page extension for"
467 map_result = -NACL_ABI_EINVAL;
470 length = size_min(alloc_rounded_length, (size_t) host_rounded_file_bytes);
473 * Lock the addr space.
475 NaClXMutexLock(&nap->mu);
477 NaClVmHoleOpeningMu(nap);
479 holding_app_lock = 1;
481 if (0 == (flags & NACL_ABI_MAP_FIXED)) {
483 * The user wants us to pick an address range.
487 * Pick a hole in addr space of appropriate size, anywhere.
488 * We pick one that's best for the system.
490 usrpage = NaClVmmapFindMapSpace(&nap->mem_map,
491 alloc_rounded_length >> NACL_PAGESHIFT);
492 NaClLog(4, "NaClSysMmap: FindMapSpace: page 0x%05"NACL_PRIxPTR"\n",
495 map_result = -NACL_ABI_ENOMEM;
498 usraddr = usrpage << NACL_PAGESHIFT;
499 NaClLog(4, "NaClSysMmap: new starting addr: 0x%08"NACL_PRIxPTR
503 * user supplied an addr, but it's to be treated as a hint; we
504 * find a hole of the right size in the app's address space,
505 * according to the usual mmap semantics.
507 usrpage = NaClVmmapFindMapSpaceAboveHint(&nap->mem_map,
509 (alloc_rounded_length
511 NaClLog(4, "NaClSysMmap: FindSpaceAboveHint: page 0x%05"NACL_PRIxPTR"\n",
514 NaClLog(4, "NaClSysMmap: hint failed, doing generic allocation\n");
515 usrpage = NaClVmmapFindMapSpace(&nap->mem_map,
516 alloc_rounded_length >> NACL_PAGESHIFT);
519 map_result = -NACL_ABI_ENOMEM;
522 usraddr = usrpage << NACL_PAGESHIFT;
523 NaClLog(4, "NaClSysMmap: new starting addr: 0x%08"NACL_PRIxPTR"\n",
529 * Validate [usraddr, endaddr) is okay.
531 if (usraddr >= ((uintptr_t) 1 << nap->addr_bits)) {
533 ("NaClSysMmap: start address (0x%08"NACL_PRIxPTR") outside address"
536 map_result = -NACL_ABI_EINVAL;
539 endaddr = usraddr + alloc_rounded_length;
540 if (endaddr < usraddr) {
542 ("NaClSysMmap: integer overflow -- "
543 "NaClSysMmap(0x%08"NACL_PRIxPTR",0x%"NACL_PRIxS",0x%x,0x%x,%d,"
544 "0x%08"NACL_PRIxPTR"\n"),
545 usraddr, length, prot, flags, d, (uintptr_t) offset);
546 map_result = -NACL_ABI_EINVAL;
550 * NB: we use > instead of >= here.
552 * endaddr is the address of the first byte beyond the target region
553 * and it can equal the address space limit. (of course, normally
554 * the main thread's stack is there.)
556 if (endaddr > ((uintptr_t) 1 << nap->addr_bits)) {
558 ("NaClSysMmap: end address (0x%08"NACL_PRIxPTR") is beyond"
559 " the end of the address space\n"),
561 map_result = -NACL_ABI_EINVAL;
567 "NaClSysMmap: PROT_EXEC requested, usraddr 0x%08"NACL_PRIxPTR
568 ", length %"NACL_PRIxS"\n",
570 if (!NACL_FI("MMAP_BYPASS_DESCRIPTOR_SAFETY_CHECK",
571 NaClDescIsSafeForMmap(ndp),
573 NaClLog(4, "NaClSysMmap: descriptor not blessed\n");
574 map_result = -NACL_ABI_EINVAL;
577 NaClLog(4, "NaClSysMmap: allowed\n");
578 } else if (NaClSysCommonAddrRangeContainsExecutablePages(nap,
581 NaClLog(2, "NaClSysMmap: region contains executable pages\n");
582 map_result = -NACL_ABI_EINVAL;
586 NaClVmIoPendingCheck_mu(nap,
588 (uint32_t) (usraddr + length - 1));
591 * Force NACL_ABI_MAP_FIXED, since we are specifying address in NaCl
594 flags |= NACL_ABI_MAP_FIXED;
597 * Turn off PROT_EXEC -- normal user mmapped pages should not be
598 * executable. This is primarily for the service runtime's own
599 * bookkeeping -- prot is used in NaClVmmapAddWithOverwrite and will
600 * be needed for remapping data pages on Windows if page protection
601 * is set to PROT_NONE and back.
603 * NB: we've captured the notion of mapping executable memory for
604 * dynamic library loading etc in mapping_code, so when we do map
605 * text we will explicitly OR in NACL_ABI_PROT_EXEC as needed.
607 prot &= ~NACL_ABI_PROT_EXEC;
610 * Exactly one of NACL_ABI_MAP_SHARED and NACL_ABI_MAP_PRIVATE is set.
612 if ((0 == (flags & NACL_ABI_MAP_SHARED)) ==
613 (0 == (flags & NACL_ABI_MAP_PRIVATE))) {
614 map_result = -NACL_ABI_EINVAL;
618 sysaddr = NaClUserToSys(nap, usraddr);
624 ("NaClSysMmap: NaClDescIoDescMap(,,0x%08"NACL_PRIxPTR","
625 "0x%08"NACL_PRIxS",0x%x,0x%x,0x%08"NACL_PRIxPTR")\n"),
626 sysaddr, length, prot, flags, (uintptr_t) offset);
627 map_result = NaClDescIoDescMapAnon(nap->effp,
633 } else if (mapping_code) {
635 * Map a read-only view in trusted memory, ask validator if
636 * valid without patching; if okay, then map in untrusted
637 * executable memory. Fallback to using the dyncode_create
638 * interface otherwise.
640 * On Windows, threads are already stopped by the
641 * NaClVmHoleOpeningMu invocation above.
643 * For mmap, stopping threads on Windows is needed to ensure
644 * that nothing gets allocated into the temporary address space
645 * hole. This would otherwise have been particularly dangerous,
646 * since the hole is in an executable region. We must abort the
647 * program if some other trusted thread (or injected thread)
648 * allocates into this space. We also need interprocessor
649 * interrupts to flush the icaches associated other cores, since
650 * they may contain stale data. NB: mmap with PROT_EXEC should
651 * do this for us, since otherwise loading shared libraries in a
652 * multithreaded environment cannot work in a portable fashion.
653 * (Mutex locks only ensure dcache coherency.)
655 * For eventual munmap, stopping threads also involve looking at
656 * their registers to make sure their %rip/%eip/%ip are not
657 * inside the region being modified (impossible for initial
658 * insertion). This is needed because mmap->munmap->mmap could
659 * cause problems due to scheduler races.
661 * Use NaClDynamicRegionCreate to mark region as allocated.
663 * See NaClElfFileMapSegment in elf_util.c for corresponding
664 * mmap-based main executable loading.
666 uintptr_t image_sys_addr;
667 NaClValidationStatus validator_status = NaClValidationFailed;
668 struct NaClValidationMetadata metadata;
669 int sys_ret; /* syscall return convention */
672 NaClLog(4, "NaClSysMmap: checking descriptor type\n");
673 if (NACL_VTBL(NaClDesc, ndp)->typeTag != NACL_DESC_HOST_IO) {
674 NaClLog(4, "NaClSysMmap: not supported type, got %d\n",
675 NACL_VTBL(NaClDesc, ndp)->typeTag);
676 map_result = -NACL_ABI_EINVAL;
681 * First, try to mmap. Check if target address range is
682 * available. It must be neither in use by NaClText interface,
683 * nor used by previous mmap'd code. We record mmap'd code
684 * regions in the NaClText's data structures to avoid having to
685 * deal with looking in two data structures.
687 * This mapping is PROT_READ | PROT_WRITE, MAP_PRIVATE so that
688 * if validation fails in read-only mode, we can re-run the
689 * validator to patch in place.
692 image_sys_addr = (*NACL_VTBL(NaClDesc, ndp)->
694 NaClDescEffectorTrustedMem(),
697 NACL_ABI_PROT_READ | NACL_ABI_PROT_WRITE,
698 NACL_ABI_MAP_PRIVATE,
700 if (NaClPtrIsNegErrno(&image_sys_addr)) {
701 map_result = image_sys_addr;
705 /* Ask validator / validation cache */
706 NaClMetadataFromNaClDescCtor(&metadata, ndp);
707 validator_status = NACL_FI("MMAP_FORCE_MMAP_VALIDATION_FAIL",
710 (uint8_t *) image_sys_addr,
712 0, /* stubout_mode: no */
713 1, /* readonly_text: yes */
716 nap->validation_cache),
717 NaClValidationFailed);
718 NaClLog(3, "NaClSysMmap: prot_exec, validator_status %d\n",
720 NaClMetadataDtor(&metadata);
722 if (NaClValidationSucceeded == validator_status) {
724 * Check if target address range is actually available. It
725 * must be neither in use by NaClText interface, nor used by
726 * previous mmap'd code. We record mmap'd code regions in the
727 * NaClText's data structures to avoid lo both having to deal
728 * with looking in two data structures. We could do this
729 * first since this is a cheaper check, but it shouldn't
730 * matter since application errors ought to be rare and we
731 * shouldn't optimize for error handling, and this makes the
732 * code simpler (releasing a created region is more code).
734 NaClXMutexLock(&nap->dynamic_load_mutex);
735 ret = NaClDynamicRegionCreate(nap, NaClUserToSys(nap, usraddr), length,
737 NaClXMutexUnlock(&nap->dynamic_load_mutex);
739 NaClLog(3, "NaClSysMmap: PROT_EXEC region"
740 " overlaps other dynamic code\n");
741 map_result = -NACL_ABI_EINVAL;
745 * Remove scratch mapping.
747 NaClDescUnmapUnsafe(ndp, (void *) image_sys_addr, length);
749 * We must succeed in mapping into the untrusted executable
750 * space, since otherwise it would mean that the temporary
751 * hole (for Windows) was filled by some other thread, and
752 * that's unrecoverable. For Linux and OSX, this should never
753 * happen, since it's an atomic overmap.
755 NaClLog(3, "NaClSysMmap: mapping into executable memory\n");
756 image_sys_addr = (*NACL_VTBL(NaClDesc, ndp)->
761 NACL_ABI_PROT_READ | NACL_ABI_PROT_EXEC,
762 NACL_ABI_MAP_PRIVATE | NACL_ABI_MAP_FIXED,
764 if (image_sys_addr != sysaddr) {
766 "NaClSysMmap: map into executable memory failed:"
767 " got 0x%"NACL_PRIxPTR"\n", image_sys_addr);
769 map_result = (int32_t) usraddr;
774 "NaClSysMmap: did not validate in readonly_text mode;"
775 " attempting to use dyncode interface.\n");
777 if (holding_app_lock) {
778 NaClVmHoleClosingMu(nap);
779 NaClXMutexUnlock(&nap->mu);
782 if (NACL_FI("MMAP_STUBOUT_EMULATION", 0, 1)) {
783 NaClLog(3, "NaClSysMmap: emulating stubout mode by touching memory\n");
784 *(volatile uint8_t *) image_sys_addr =
785 *(volatile uint8_t *) image_sys_addr;
789 * Fallback implementation. Use the mapped memory as source for
790 * the dynamic code insertion interface.
792 sys_ret = NaClTextDyncodeCreate(nap,
794 (uint8_t *) image_sys_addr,
798 map_result = sys_ret;
800 map_result = (int32_t) usraddr;
804 sys_ret = (*NACL_VTBL(NaClDesc, ndp)->
805 UnmapUnsafe)(ndp, (void *) image_sys_addr, length);
807 sys_ret = munmap((void *) image_sys_addr, length);
811 "NaClSysMmap: could not unmap text at 0x%"NACL_PRIxPTR","
812 " length 0x%"NACL_PRIxS", NaCl errno %d\n",
813 image_sys_addr, length, -sys_ret);
815 goto cleanup_no_locks;
818 * This is a fix for Windows, where we cannot pass a size that
819 * goes beyond the non-page-rounded end of the file.
821 size_t length_to_map = size_min(length, (size_t) file_bytes);
824 ("NaClSysMmap: (*ndp->Map)(,,0x%08"NACL_PRIxPTR","
825 "0x%08"NACL_PRIxS",0x%x,0x%x,0x%08"NACL_PRIxPTR")\n"),
826 sysaddr, length, prot, flags, (uintptr_t) offset);
828 map_result = (*((struct NaClDescVtbl const *) ndp->base.vtbl)->
838 * "Small" negative integers are errno values. Larger ones are
841 if (NaClPtrIsNegErrno(&map_result)) {
842 if ((uintptr_t) -NACL_ABI_E_MOVE_ADDRESS_SPACE == map_result) {
844 ("NaClSysMmap: Map failed, but we"
845 " cannot handle address space move, error %"NACL_PRIuS"\n"),
846 (size_t) map_result);
849 * Propagate all other errors to user code.
853 if (map_result != sysaddr) {
854 NaClLog(LOG_FATAL, "system mmap did not honor NACL_ABI_MAP_FIXED\n");
858 * If we are mapping beyond the end of the file, we fill this space
859 * with PROT_NONE pages.
861 * Windows forces us to expose a mixture of 64k and 4k pages, and we
862 * expose the same mappings on other platforms. For example,
863 * suppose untrusted code requests to map 0x40000 bytes from a file
864 * of extent 0x100. We will create the following regions:
866 * 0- 0x100 A: Bytes from the file
867 * 0x100- 0x1000 B: The rest of the 4k page is accessible but undefined
868 * 0x1000-0x10000 C: The rest of the 64k page is inaccessible (PROT_NONE)
869 * 0x10000-0x40000 D: Further 64k pages are also inaccessible (PROT_NONE)
871 * On Windows, a single MapViewOfFileEx() call creates A, B and C.
872 * This call will not accept a size greater than 0x100, so we have
873 * to create D separately. The hardware requires B to be accessible
874 * (whenever A is accessible), but Windows does not allow C to be
875 * mapped as accessible. This is unfortunate because it interferes
876 * with how ELF dynamic linkers usually like to set up an ELF
879 /* inaccessible: [length, alloc_rounded_length) */
880 if (length < alloc_rounded_length) {
882 * On Unix, this maps regions C and D as inaccessible. On
883 * Windows, it just maps region D; region C has already been made
886 size_t map_len = alloc_rounded_length - length;
887 map_result = MunmapInternal(nap, sysaddr + length, map_len);
888 if (map_result != 0) {
893 if (alloc_rounded_length > 0) {
894 NaClVmmapAddWithOverwrite(&nap->mem_map,
895 NaClSysToUser(nap, sysaddr) >> NACL_PAGESHIFT,
896 alloc_rounded_length >> NACL_PAGESHIFT,
904 map_result = usraddr;
907 if (holding_app_lock) {
908 NaClVmHoleClosingMu(nap);
909 NaClXMutexUnlock(&nap->mu);
917 * Check to ensure that map_result will fit into a 32-bit value. This is
918 * a bit tricky because there are two valid ranges: one is the range from
919 * 0 to (almost) 2^32, the other is from -1 to -4096 (our error range).
920 * For a 32-bit value these ranges would overlap, but if the value is 64-bit
921 * they will be disjoint.
923 if (map_result > UINT32_MAX
924 && !NaClPtrIsNegErrno(&map_result)) {
925 NaClLog(LOG_FATAL, "Overflow in NaClSysMmap: return address is "
926 "0x%"NACL_PRIxPTR"\n", map_result);
928 NaClLog(3, "NaClSysMmap: returning 0x%08"NACL_PRIxPTR"\n", map_result);
930 return (int32_t) map_result;
933 int32_t NaClSysMmap(struct NaClAppThread *natp,
939 nacl_abi_off_t *offp) {
940 struct NaClApp *nap = natp->nap;
943 nacl_abi_off_t offset;
946 "Entered NaClSysMmap(0x%08"NACL_PRIxPTR",0x%"NACL_PRIxS","
947 "0x%x,0x%x,%d,0x%08"NACL_PRIxPTR")\n",
948 (uintptr_t) start, length, prot, flags, d, (uintptr_t) offp);
950 if ((nacl_abi_off_t *) 0 == offp) {
952 * This warning is really targetted towards trusted code,
953 * especially tests that didn't notice the argument type change.
954 * Unfortunatey, zero is a common and legitimate offset value, and
955 * the compiler will not complain since an automatic type
959 "NaClSysMmap: NULL pointer used"
960 " for offset in/out argument\n");
961 return -NACL_ABI_EINVAL;
964 sysaddr = NaClUserToSysAddrRange(nap, (uintptr_t) offp, sizeof offset);
965 if (kNaClBadAddress == sysaddr) {
967 "NaClSysMmap: offset in a bad untrusted memory location\n");
968 retval = -NACL_ABI_EFAULT;
971 offset = *(nacl_abi_off_t volatile *) sysaddr;
973 NaClLog(4, " offset = 0x%08"NACL_PRIx64"\n", (uint64_t) offset);
975 retval = NaClSysMmapIntern(nap, start, length, prot, flags, d, offset);
981 static int32_t MunmapInternal(struct NaClApp *nap,
982 uintptr_t sysaddr, size_t length) {
984 uintptr_t endaddr = sysaddr + length;
986 for (addr = sysaddr; addr < endaddr; addr += NACL_MAP_PAGESIZE) {
987 struct NaClVmmapEntry const *entry;
991 usraddr = NaClSysToUser(nap, addr);
993 entry = NaClVmmapFindPage(&nap->mem_map, usraddr >> NACL_PAGESHIFT);
998 ("NaClSysMunmap: addr 0x%08x, desc 0x%08"NACL_PRIxPTR"\n"),
999 addr, (uintptr_t) entry->desc);
1001 page_num = usraddr - (entry->page_num << NACL_PAGESHIFT);
1002 offset = (uintptr_t) entry->offset + page_num;
1004 if (NULL != entry->desc &&
1005 offset < (uintptr_t) entry->file_size) {
1006 if (!UnmapViewOfFile((void *) addr)) {
1008 ("MunmapInternal: UnmapViewOfFile failed to at addr"
1009 " 0x%08"NACL_PRIxPTR", error %d\n"),
1010 addr, GetLastError());
1013 * Fill the address space hole that we opened
1014 * with UnmapViewOfFile().
1016 if (!VirtualAlloc((void *) addr, NACL_MAP_PAGESIZE, MEM_RESERVE,
1018 NaClLog(LOG_FATAL, "MunmapInternal: "
1019 "failed to fill hole with VirtualAlloc(), error %d\n",
1024 * Anonymous memory; we just decommit it and thus
1025 * make it inaccessible.
1027 if (!VirtualFree((void *) addr,
1030 int error = GetLastError();
1032 ("MunmapInternal: Could not VirtualFree MEM_DECOMMIT"
1033 " addr 0x%08x, error %d (0x%x)\n"),
1034 addr, error, error);
1037 NaClVmmapRemove(&nap->mem_map,
1038 usraddr >> NACL_PAGESHIFT,
1039 NACL_PAGES_PER_MAP);
1044 static int32_t MunmapInternal(struct NaClApp *nap,
1045 uintptr_t sysaddr, size_t length) {
1046 UNREFERENCED_PARAMETER(nap);
1047 NaClLog(3, "MunmapInternal(0x%08"NACL_PRIxPTR", 0x%"NACL_PRIxS")\n",
1048 (uintptr_t) sysaddr, length);
1050 * Overwrite current mapping with inaccessible, anonymous
1051 * zero-filled pages, which should be copy-on-write and thus
1052 * relatively cheap. Do not open up an address space hole.
1054 if (MAP_FAILED == mmap((void *) sysaddr,
1057 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
1060 NaClLog(4, "mmap to put in anonymous memory failed, errno = %d\n", errno);
1061 return -NaClXlateErrno(errno);
1063 NaClVmmapRemove(&nap->mem_map,
1064 NaClSysToUser(nap, (uintptr_t) sysaddr) >> NACL_PAGESHIFT,
1065 length >> NACL_PAGESHIFT);
1070 int32_t NaClSysMunmap(struct NaClAppThread *natp,
1073 struct NaClApp *nap = natp->nap;
1074 int32_t retval = -NACL_ABI_EINVAL;
1076 int holding_app_lock = 0;
1077 size_t alloc_rounded_length;
1079 NaClLog(3, "Entered NaClSysMunmap(0x%08"NACL_PRIxPTR", "
1080 "0x%08"NACL_PRIxPTR", 0x%"NACL_PRIxS")\n",
1081 (uintptr_t) natp, (uintptr_t) start, length);
1083 if (!NaClIsAllocPageMultiple((uintptr_t) start)) {
1084 NaClLog(4, "start addr not allocation multiple\n");
1085 retval = -NACL_ABI_EINVAL;
1090 * Without this check we would get the following inconsistent
1092 * * On Linux, an mmap() of zero length yields a failure.
1093 * * On Mac OS X, an mmap() of zero length returns no error,
1094 * which would lead to a NaClVmmapUpdate() of zero pages, which
1096 * * On Windows we would iterate through the 64k pages and do
1097 * nothing, which would not yield a failure.
1099 retval = -NACL_ABI_EINVAL;
1102 alloc_rounded_length = NaClRoundAllocPage(length);
1103 if (alloc_rounded_length != length) {
1104 length = alloc_rounded_length;
1105 NaClLog(2, "munmap: rounded length to 0x%"NACL_PRIxS"\n", length);
1107 sysaddr = NaClUserToSysAddrRange(nap, (uintptr_t) start, length);
1108 if (kNaClBadAddress == sysaddr) {
1109 NaClLog(4, "munmap: region not user addresses\n");
1110 retval = -NACL_ABI_EFAULT;
1114 NaClXMutexLock(&nap->mu);
1116 NaClVmHoleOpeningMu(nap);
1118 holding_app_lock = 1;
1121 * User should be unable to unmap any executable pages. We check here.
1123 if (NaClSysCommonAddrRangeContainsExecutablePages(nap,
1126 NaClLog(2, "NaClSysMunmap: region contains executable pages\n");
1127 retval = -NACL_ABI_EINVAL;
1131 NaClVmIoPendingCheck_mu(nap,
1132 (uint32_t) (uintptr_t) start,
1133 (uint32_t) ((uintptr_t) start + length - 1));
1135 retval = MunmapInternal(nap, sysaddr, length);
1137 if (holding_app_lock) {
1138 NaClVmHoleClosingMu(nap);
1139 NaClXMutexUnlock(&nap->mu);
1145 static int32_t MprotectInternal(struct NaClApp *nap,
1146 uintptr_t sysaddr, size_t length, int prot) {
1148 uintptr_t endaddr = sysaddr + length;
1154 * VirtualProtect region cannot span allocations, all addresses must be
1155 * in one region of memory returned from VirtualAlloc or VirtualAllocEx.
1157 for (addr = sysaddr; addr < endaddr; addr += NACL_MAP_PAGESIZE) {
1158 struct NaClVmmapEntry const *entry;
1162 usraddr = NaClSysToUser(nap, addr);
1164 entry = NaClVmmapFindPage(&nap->mem_map, usraddr >> NACL_PAGESHIFT);
1165 if (NULL == entry) {
1168 NaClLog(3, "MprotectInternal: addr 0x%08x, desc 0x%08"NACL_PRIxPTR"\n",
1169 addr, (uintptr_t) entry->desc);
1171 page_num = usraddr - (entry->page_num << NACL_PAGESHIFT);
1172 offset = (uintptr_t) entry->offset + page_num;
1174 if (NULL == entry->desc) {
1175 flProtect = NaClflProtectMap(prot);
1177 /* Change the page protection */
1178 if (!VirtualProtect((void *) addr,
1182 int error = GetLastError();
1183 NaClLog(LOG_FATAL, "MprotectInternal: "
1184 "failed to change the memory protection with VirtualProtect,"
1185 " addr 0x%08x, error %d (0x%x)\n",
1186 addr, error, error);
1187 return -NaClXlateSystemError(error);
1189 } else if (offset < (uintptr_t) entry->file_size) {
1190 nacl_off64_t file_bytes;
1192 size_t rounded_chunk_size;
1194 char const *err_msg;
1196 desc_flags = (*NACL_VTBL(NaClDesc, entry->desc)->GetFlags)(entry->desc);
1197 NaClflProtectAndDesiredAccessMap(prot,
1199 & NACL_ABI_MAP_PRIVATE) != 0,
1200 (desc_flags & NACL_ABI_O_ACCMODE),
1201 /* flMaximumProtect= */ NULL,
1203 /* dwDesiredAccess= */ NULL,
1205 if (0 == flProtect) {
1207 * This shouldn't really happen since we already checked the address
1208 * space using NaClVmmapCheckExistingMapping, but better be safe.
1210 NaClLog(LOG_FATAL, "MprotectInternal: %s\n", err_msg);
1213 file_bytes = entry->file_size - offset;
1214 chunk_size = size_min((size_t) file_bytes, NACL_MAP_PAGESIZE);
1215 rounded_chunk_size = NaClRoundPage(chunk_size);
1217 NaClLog(4, "VirtualProtect(0x%08x, 0x%"NACL_PRIxS", %x)\n",
1218 addr, rounded_chunk_size, flProtect);
1220 /* Change the page protection */
1221 if (!VirtualProtect((void *) addr,
1225 int error = GetLastError();
1226 NaClLog(LOG_FATAL, "MprotectInternal: "
1227 "failed to change the memory protection with VirtualProtect()"
1228 " addr 0x%08x, error %d (0x%x)\n",
1229 addr, error, error);
1230 return -NaClXlateSystemError(error);
1238 static int32_t MprotectInternal(struct NaClApp *nap,
1239 uintptr_t sysaddr, size_t length, int prot) {
1241 uintptr_t last_page_num;
1243 struct NaClVmmapIter iter;
1244 struct NaClVmmapEntry *entry;
1246 host_prot = NaClProtMap(prot);
1248 usraddr = NaClSysToUser(nap, sysaddr);
1249 last_page_num = (usraddr + length) >> NACL_PAGESHIFT;
1251 NaClVmmapFindPageIter(&nap->mem_map,
1252 usraddr >> NACL_PAGESHIFT,
1254 entry = NaClVmmapIterStar(&iter);
1256 CHECK(usraddr == (entry->page_num << NACL_PAGESHIFT));
1258 for (; !NaClVmmapIterAtEnd(&iter) &&
1259 (NaClVmmapIterStar(&iter))->page_num < last_page_num;
1260 NaClVmmapIterIncr(&iter)) {
1264 entry = NaClVmmapIterStar(&iter);
1266 addr = NaClUserToSys(nap, entry->page_num << NACL_PAGESHIFT);
1267 entry_len = entry->npages << NACL_PAGESHIFT;
1269 NaClLog(3, "MprotectInternal: "
1270 "addr 0x%08"NACL_PRIxPTR", desc 0x%08"NACL_PRIxPTR"\n",
1271 addr, (uintptr_t) entry->desc);
1273 if (NULL == entry->desc) {
1274 if (0 != mprotect((void *) addr, entry_len, host_prot)) {
1275 NaClLog(LOG_FATAL, "MprotectInternal: "
1276 "mprotect on anonymous memory failed, errno = %d\n", errno);
1277 return -NaClXlateErrno(errno);
1279 } else if (entry->offset < entry->file_size) {
1280 nacl_abi_off64_t file_bytes;
1281 size_t rounded_file_bytes;
1284 file_bytes = entry->file_size - entry->offset;
1285 rounded_file_bytes = NaClRoundPage((size_t) file_bytes);
1286 prot_len = size_min(rounded_file_bytes, entry_len);
1288 if (0 != mprotect((void *) addr, prot_len, host_prot)) {
1289 NaClLog(LOG_FATAL, "MprotectInternal: "
1290 "mprotect on file-backed memory failed, errno = %d\n", errno);
1291 return -NaClXlateErrno(errno);
1296 CHECK((entry->page_num + entry->npages) == last_page_num);
1302 int32_t NaClSysMprotectInternal(struct NaClApp *nap,
1306 int32_t retval = -NACL_ABI_EINVAL;
1308 int holding_app_lock = 0;
1310 if (!NaClIsAllocPageMultiple((uintptr_t) start)) {
1311 NaClLog(4, "mprotect: start addr not allocation multiple\n");
1312 retval = -NACL_ABI_EINVAL;
1315 length = NaClRoundAllocPage(length);
1316 sysaddr = NaClUserToSysAddrRange(nap, (uintptr_t) start, length);
1317 if (kNaClBadAddress == sysaddr) {
1318 NaClLog(4, "mprotect: region not user addresses\n");
1319 retval = -NACL_ABI_EFAULT;
1322 if (0 != (~(NACL_ABI_PROT_READ | NACL_ABI_PROT_WRITE) & prot)) {
1323 NaClLog(4, "mprotect: prot has other bits than PROT_{READ|WRITE}\n");
1324 retval = -NACL_ABI_EACCES;
1328 NaClXMutexLock(&nap->mu);
1330 holding_app_lock = 1;
1333 * User should be unable to change protection of any executable pages.
1335 if (NaClSysCommonAddrRangeContainsExecutablePages(nap,
1338 NaClLog(2, "mprotect: region contains executable pages\n");
1339 retval = -NACL_ABI_EACCES;
1343 if (!NaClVmmapChangeProt(&nap->mem_map,
1344 NaClSysToUser(nap, sysaddr) >> NACL_PAGESHIFT,
1345 length >> NACL_PAGESHIFT,
1347 NaClLog(4, "mprotect: no such region\n");
1348 retval = -NACL_ABI_EACCES;
1352 NaClVmIoPendingCheck_mu(nap,
1353 (uint32_t) (uintptr_t) start,
1354 (uint32_t) ((uintptr_t) start + length - 1));
1356 retval = MprotectInternal(nap, sysaddr, length, prot);
1358 if (holding_app_lock) {
1359 NaClXMutexUnlock(&nap->mu);
1364 int32_t NaClSysMprotect(struct NaClAppThread *natp,
1368 struct NaClApp *nap = natp->nap;
1370 NaClLog(3, "Entered NaClSysMprotect(0x%08"NACL_PRIxPTR", "
1371 "0x%08"NACL_PRIxPTR", 0x%"NACL_PRIxS", 0x%x)\n",
1372 (uintptr_t) natp, (uintptr_t) start, length, prot);
1374 return NaClSysMprotectInternal(nap, start, length, prot);