1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
7 * DOC: Sample flow of using the ioctl interface provided by the Nitro Enclaves (NE)
13 * Load the nitro_enclaves module, setting also the enclave CPU pool. The
14 * enclave CPUs need to be full cores from the same NUMA node. CPU 0 and its
15 * siblings have to remain available for the primary / parent VM, so they
16 * cannot be included in the enclave CPU pool.
18 * See the cpu list section from the kernel documentation.
19 * https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html#cpu-lists
21 * insmod drivers/virt/nitro_enclaves/nitro_enclaves.ko
24 * The CPU pool can be set at runtime, after the kernel module is loaded.
26 * echo <cpu-list> > /sys/module/nitro_enclaves/parameters/ne_cpus
28 * NUMA and CPU siblings information can be found using:
33 * Check the online / offline CPU list. The CPUs from the pool should be
38 * Check dmesg for any warnings / errors through the NE driver lifetime / usage.
39 * The NE logs contain the "nitro_enclaves" or "pci 0000:00:02.0" pattern.
43 * Setup hugetlbfs huge pages. The memory needs to be from the same NUMA node as
46 * https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html
48 * By default, the allocation of hugetlb pages are distributed on all possible
49 * NUMA nodes. Use the following configuration files to set the number of huge
50 * pages from a NUMA node:
52 * /sys/devices/system/node/node<X>/hugepages/hugepages-2048kB/nr_hugepages
53 * /sys/devices/system/node/node<X>/hugepages/hugepages-1048576kB/nr_hugepages
55 * or, if not on a system with multiple NUMA nodes, can also set the number
56 * of 2 MiB / 1 GiB huge pages using
58 * /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
59 * /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
61 * In this example 256 hugepages of 2 MiB are used.
63 * Build and run the NE sample.
65 * make -C samples/nitro_enclaves clean
66 * make -C samples/nitro_enclaves
67 * ./samples/nitro_enclaves/ne_ioctl_sample <path_to_enclave_image>
69 * Unload the nitro_enclaves module.
71 * rmmod nitro_enclaves
83 #include <sys/eventfd.h>
84 #include <sys/ioctl.h>
86 #include <sys/socket.h>
88 #include <sys/types.h>
91 #include <linux/mman.h>
92 #include <linux/nitro_enclaves.h>
93 #include <linux/vm_sockets.h>
96 * NE_DEV_NAME - Nitro Enclaves (NE) misc device that provides the ioctl interface.
98 #define NE_DEV_NAME "/dev/nitro_enclaves"
101 * NE_POLL_WAIT_TIME - Timeout in seconds for each poll event.
103 #define NE_POLL_WAIT_TIME (60)
105 * NE_POLL_WAIT_TIME_MS - Timeout in milliseconds for each poll event.
107 #define NE_POLL_WAIT_TIME_MS (NE_POLL_WAIT_TIME * 1000)
110 * NE_SLEEP_TIME - Amount of time in seconds for the process to keep the enclave alive.
112 #define NE_SLEEP_TIME (300)
115 * NE_DEFAULT_NR_VCPUS - Default number of vCPUs set for an enclave.
117 #define NE_DEFAULT_NR_VCPUS (2)
120 * NE_MIN_MEM_REGION_SIZE - Minimum size of a memory region - 2 MiB.
122 #define NE_MIN_MEM_REGION_SIZE (2 * 1024 * 1024)
125 * NE_DEFAULT_NR_MEM_REGIONS - Default number of memory regions of 2 MiB set for
128 #define NE_DEFAULT_NR_MEM_REGIONS (256)
131 * NE_IMAGE_LOAD_HEARTBEAT_CID - Vsock CID for enclave image loading heartbeat logic.
133 #define NE_IMAGE_LOAD_HEARTBEAT_CID (3)
135 * NE_IMAGE_LOAD_HEARTBEAT_PORT - Vsock port for enclave image loading heartbeat logic.
137 #define NE_IMAGE_LOAD_HEARTBEAT_PORT (9000)
139 * NE_IMAGE_LOAD_HEARTBEAT_VALUE - Heartbeat value for enclave image loading.
141 #define NE_IMAGE_LOAD_HEARTBEAT_VALUE (0xb7)
144 * struct ne_user_mem_region - User space memory region set for an enclave.
145 * @userspace_addr: Address of the user space memory region.
146 * @memory_size: Size of the user space memory region.
148 struct ne_user_mem_region {
149 void *userspace_addr;
154 * ne_create_vm() - Create a slot for the enclave VM.
155 * @ne_dev_fd: The file descriptor of the NE misc device.
156 * @slot_uid: The generated slot uid for the enclave.
157 * @enclave_fd : The generated file descriptor for the enclave.
159 * Context: Process context.
162 * * Negative return value on failure.
164 static int ne_create_vm(int ne_dev_fd, unsigned long *slot_uid, int *enclave_fd)
167 *enclave_fd = ioctl(ne_dev_fd, NE_CREATE_VM, slot_uid);
169 if (*enclave_fd < 0) {
172 case NE_ERR_NO_CPUS_AVAIL_IN_POOL: {
173 printf("Error in create VM, no CPUs available in the NE CPU pool\n");
179 printf("Error in create VM [%m]\n");
190 * ne_poll_enclave_fd() - Thread function for polling the enclave fd.
191 * @data: Argument provided for the polling function.
193 * Context: Process context.
195 * * NULL on success / failure.
197 void *ne_poll_enclave_fd(void *data)
199 int enclave_fd = *(int *)data;
200 struct pollfd fds[1] = {};
204 printf("Running from poll thread, enclave fd %d\n", enclave_fd);
206 fds[0].fd = enclave_fd;
207 fds[0].events = POLLIN | POLLERR | POLLHUP;
209 /* Keep on polling until the current process is terminated. */
211 printf("[iter %d] Polling ...\n", i);
213 rc = poll(fds, 1, NE_POLL_WAIT_TIME_MS);
215 printf("Error in poll [%m]\n");
223 printf("Poll: %d seconds elapsed\n",
224 i * NE_POLL_WAIT_TIME);
229 printf("Poll received value 0x%x\n", fds[0].revents);
231 if (fds[0].revents & POLLHUP) {
232 printf("Received POLLHUP\n");
237 if (fds[0].revents & POLLNVAL) {
238 printf("Received POLLNVAL\n");
248 * ne_alloc_user_mem_region() - Allocate a user space memory region for an enclave.
249 * @ne_user_mem_region: User space memory region allocated using hugetlbfs.
251 * Context: Process context.
254 * * Negative return value on failure.
256 static int ne_alloc_user_mem_region(struct ne_user_mem_region *ne_user_mem_region)
259 * Check available hugetlb encodings for different huge page sizes in
260 * include/uapi/linux/mman.h.
262 ne_user_mem_region->userspace_addr = mmap(NULL, ne_user_mem_region->memory_size,
263 PROT_READ | PROT_WRITE,
264 MAP_PRIVATE | MAP_ANONYMOUS |
265 MAP_HUGETLB | MAP_HUGE_2MB, -1, 0);
266 if (ne_user_mem_region->userspace_addr == MAP_FAILED) {
267 printf("Error in mmap memory [%m]\n");
276 * ne_load_enclave_image() - Place the enclave image in the enclave memory.
277 * @enclave_fd : The file descriptor associated with the enclave.
278 * @ne_user_mem_regions: User space memory regions allocated for the enclave.
279 * @enclave_image_path : The file path of the enclave image.
281 * Context: Process context.
284 * * Negative return value on failure.
286 static int ne_load_enclave_image(int enclave_fd, struct ne_user_mem_region ne_user_mem_regions[],
287 char *enclave_image_path)
289 unsigned char *enclave_image = NULL;
290 int enclave_image_fd = -1;
291 size_t enclave_image_size = 0;
292 size_t enclave_memory_size = 0;
294 size_t image_written_bytes = 0;
295 struct ne_image_load_info image_load_info = {
296 .flags = NE_EIF_IMAGE,
298 struct stat image_stat_buf = {};
300 size_t temp_image_offset = 0;
302 for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++)
303 enclave_memory_size += ne_user_mem_regions[i].memory_size;
305 rc = stat(enclave_image_path, &image_stat_buf);
307 printf("Error in get image stat info [%m]\n");
312 enclave_image_size = image_stat_buf.st_size;
314 if (enclave_memory_size < enclave_image_size) {
315 printf("The enclave memory is smaller than the enclave image size\n");
320 rc = ioctl(enclave_fd, NE_GET_IMAGE_LOAD_INFO, &image_load_info);
323 case NE_ERR_NOT_IN_INIT_STATE: {
324 printf("Error in get image load info, enclave not in init state\n");
329 case NE_ERR_INVALID_FLAG_VALUE: {
330 printf("Error in get image load info, provided invalid flag\n");
336 printf("Error in get image load info [%m]\n");
342 printf("Enclave image offset in enclave memory is %lld\n",
343 image_load_info.memory_offset);
345 enclave_image_fd = open(enclave_image_path, O_RDONLY);
346 if (enclave_image_fd < 0) {
347 printf("Error in open enclave image file [%m]\n");
349 return enclave_image_fd;
352 enclave_image = mmap(NULL, enclave_image_size, PROT_READ,
353 MAP_PRIVATE, enclave_image_fd, 0);
354 if (enclave_image == MAP_FAILED) {
355 printf("Error in mmap enclave image [%m]\n");
360 temp_image_offset = image_load_info.memory_offset;
362 for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++) {
363 size_t bytes_to_write = 0;
364 size_t memory_offset = 0;
365 size_t memory_size = ne_user_mem_regions[i].memory_size;
366 size_t remaining_bytes = 0;
367 void *userspace_addr = ne_user_mem_regions[i].userspace_addr;
369 if (temp_image_offset >= memory_size) {
370 temp_image_offset -= memory_size;
373 } else if (temp_image_offset != 0) {
374 memory_offset = temp_image_offset;
375 memory_size -= temp_image_offset;
376 temp_image_offset = 0;
379 remaining_bytes = enclave_image_size - image_written_bytes;
380 bytes_to_write = memory_size < remaining_bytes ?
381 memory_size : remaining_bytes;
383 memcpy(userspace_addr + memory_offset,
384 enclave_image + image_written_bytes, bytes_to_write);
386 image_written_bytes += bytes_to_write;
388 if (image_written_bytes == enclave_image_size)
392 munmap(enclave_image, enclave_image_size);
394 close(enclave_image_fd);
400 * ne_set_user_mem_region() - Set a user space memory region for the given enclave.
401 * @enclave_fd : The file descriptor associated with the enclave.
402 * @ne_user_mem_region : User space memory region to be set for the enclave.
404 * Context: Process context.
407 * * Negative return value on failure.
409 static int ne_set_user_mem_region(int enclave_fd, struct ne_user_mem_region ne_user_mem_region)
411 struct ne_user_memory_region mem_region = {
412 .flags = NE_DEFAULT_MEMORY_REGION,
413 .memory_size = ne_user_mem_region.memory_size,
414 .userspace_addr = (__u64)ne_user_mem_region.userspace_addr,
418 rc = ioctl(enclave_fd, NE_SET_USER_MEMORY_REGION, &mem_region);
421 case NE_ERR_NOT_IN_INIT_STATE: {
422 printf("Error in set user memory region, enclave not in init state\n");
427 case NE_ERR_INVALID_MEM_REGION_SIZE: {
428 printf("Error in set user memory region, mem size not multiple of 2 MiB\n");
433 case NE_ERR_INVALID_MEM_REGION_ADDR: {
434 printf("Error in set user memory region, invalid user space address\n");
439 case NE_ERR_UNALIGNED_MEM_REGION_ADDR: {
440 printf("Error in set user memory region, unaligned user space address\n");
445 case NE_ERR_MEM_REGION_ALREADY_USED: {
446 printf("Error in set user memory region, memory region already used\n");
451 case NE_ERR_MEM_NOT_HUGE_PAGE: {
452 printf("Error in set user memory region, not backed by huge pages\n");
457 case NE_ERR_MEM_DIFFERENT_NUMA_NODE: {
458 printf("Error in set user memory region, different NUMA node than CPUs\n");
463 case NE_ERR_MEM_MAX_REGIONS: {
464 printf("Error in set user memory region, max memory regions reached\n");
469 case NE_ERR_INVALID_PAGE_SIZE: {
470 printf("Error in set user memory region, has page not multiple of 2 MiB\n");
475 case NE_ERR_INVALID_FLAG_VALUE: {
476 printf("Error in set user memory region, provided invalid flag\n");
482 printf("Error in set user memory region [%m]\n");
492 * ne_free_mem_regions() - Unmap all the user space memory regions that were set
493 * aside for the enclave.
494 * @ne_user_mem_regions: The user space memory regions associated with an enclave.
496 * Context: Process context.
498 static void ne_free_mem_regions(struct ne_user_mem_region ne_user_mem_regions[])
502 for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++)
503 munmap(ne_user_mem_regions[i].userspace_addr,
504 ne_user_mem_regions[i].memory_size);
508 * ne_add_vcpu() - Add a vCPU to the given enclave.
509 * @enclave_fd : The file descriptor associated with the enclave.
510 * @vcpu_id: vCPU id to be set for the enclave, either provided or
511 * auto-generated (if provided vCPU id is 0).
513 * Context: Process context.
516 * * Negative return value on failure.
518 static int ne_add_vcpu(int enclave_fd, unsigned int *vcpu_id)
522 rc = ioctl(enclave_fd, NE_ADD_VCPU, vcpu_id);
525 case NE_ERR_NO_CPUS_AVAIL_IN_POOL: {
526 printf("Error in add vcpu, no CPUs available in the NE CPU pool\n");
531 case NE_ERR_VCPU_ALREADY_USED: {
532 printf("Error in add vcpu, the provided vCPU is already used\n");
537 case NE_ERR_VCPU_NOT_IN_CPU_POOL: {
538 printf("Error in add vcpu, the provided vCPU is not in the NE CPU pool\n");
543 case NE_ERR_VCPU_INVALID_CPU_CORE: {
544 printf("Error in add vcpu, the core id of the provided vCPU is invalid\n");
549 case NE_ERR_NOT_IN_INIT_STATE: {
550 printf("Error in add vcpu, enclave not in init state\n");
555 case NE_ERR_INVALID_VCPU: {
556 printf("Error in add vcpu, the provided vCPU is out of avail CPUs range\n");
562 printf("Error in add vcpu [%m]\n");
572 * ne_start_enclave() - Start the given enclave.
573 * @enclave_fd : The file descriptor associated with the enclave.
574 * @enclave_start_info : Enclave metadata used for starting e.g. vsock CID.
576 * Context: Process context.
579 * * Negative return value on failure.
581 static int ne_start_enclave(int enclave_fd, struct ne_enclave_start_info *enclave_start_info)
585 rc = ioctl(enclave_fd, NE_START_ENCLAVE, enclave_start_info);
588 case NE_ERR_NOT_IN_INIT_STATE: {
589 printf("Error in start enclave, enclave not in init state\n");
594 case NE_ERR_NO_MEM_REGIONS_ADDED: {
595 printf("Error in start enclave, no memory regions have been added\n");
600 case NE_ERR_NO_VCPUS_ADDED: {
601 printf("Error in start enclave, no vCPUs have been added\n");
606 case NE_ERR_FULL_CORES_NOT_USED: {
607 printf("Error in start enclave, enclave has no full cores set\n");
612 case NE_ERR_ENCLAVE_MEM_MIN_SIZE: {
613 printf("Error in start enclave, enclave memory is less than min size\n");
618 case NE_ERR_INVALID_FLAG_VALUE: {
619 printf("Error in start enclave, provided invalid flag\n");
624 case NE_ERR_INVALID_ENCLAVE_CID: {
625 printf("Error in start enclave, provided invalid enclave CID\n");
631 printf("Error in start enclave [%m]\n");
641 * ne_start_enclave_check_booted() - Start the enclave and wait for a hearbeat
642 * from it, on a newly created vsock channel,
643 * to check it has booted.
644 * @enclave_fd : The file descriptor associated with the enclave.
646 * Context: Process context.
649 * * Negative return value on failure.
651 static int ne_start_enclave_check_booted(int enclave_fd)
653 struct sockaddr_vm client_vsock_addr = {};
654 int client_vsock_fd = -1;
655 socklen_t client_vsock_len = sizeof(client_vsock_addr);
656 struct ne_enclave_start_info enclave_start_info = {};
657 struct pollfd fds[1] = {};
659 unsigned char recv_buf = 0;
660 struct sockaddr_vm server_vsock_addr = {
661 .svm_family = AF_VSOCK,
662 .svm_cid = NE_IMAGE_LOAD_HEARTBEAT_CID,
663 .svm_port = NE_IMAGE_LOAD_HEARTBEAT_PORT,
665 int server_vsock_fd = -1;
667 server_vsock_fd = socket(AF_VSOCK, SOCK_STREAM, 0);
668 if (server_vsock_fd < 0) {
669 rc = server_vsock_fd;
671 printf("Error in socket [%m]\n");
676 rc = bind(server_vsock_fd, (struct sockaddr *)&server_vsock_addr,
677 sizeof(server_vsock_addr));
679 printf("Error in bind [%m]\n");
684 rc = listen(server_vsock_fd, 1);
686 printf("Error in listen [%m]\n");
691 rc = ne_start_enclave(enclave_fd, &enclave_start_info);
695 printf("Enclave started, CID %llu\n", enclave_start_info.enclave_cid);
697 fds[0].fd = server_vsock_fd;
698 fds[0].events = POLLIN;
700 rc = poll(fds, 1, NE_POLL_WAIT_TIME_MS);
702 printf("Error in poll [%m]\n");
708 printf("Poll timeout, %d seconds elapsed\n", NE_POLL_WAIT_TIME);
715 if ((fds[0].revents & POLLIN) == 0) {
716 printf("Poll received value %d\n", fds[0].revents);
723 rc = accept(server_vsock_fd, (struct sockaddr *)&client_vsock_addr,
726 printf("Error in accept [%m]\n");
731 client_vsock_fd = rc;
734 * Read the heartbeat value that the init process in the enclave sends
735 * after vsock connect.
737 rc = read(client_vsock_fd, &recv_buf, sizeof(recv_buf));
739 printf("Error in read [%m]\n");
744 if (rc != sizeof(recv_buf) || recv_buf != NE_IMAGE_LOAD_HEARTBEAT_VALUE) {
745 printf("Read %d instead of %d\n", recv_buf,
746 NE_IMAGE_LOAD_HEARTBEAT_VALUE);
751 /* Write the heartbeat value back. */
752 rc = write(client_vsock_fd, &recv_buf, sizeof(recv_buf));
754 printf("Error in write [%m]\n");
762 close(server_vsock_fd);
767 int main(int argc, char *argv[])
772 struct ne_user_mem_region ne_user_mem_regions[NE_DEFAULT_NR_MEM_REGIONS] = {};
773 unsigned int ne_vcpus[NE_DEFAULT_NR_VCPUS] = {};
775 pthread_t thread_id = 0;
776 unsigned long slot_uid = 0;
779 printf("Usage: %s <path_to_enclave_image>\n", argv[0]);
784 if (strlen(argv[1]) >= PATH_MAX) {
785 printf("The size of the path to enclave image is higher than max path\n");
790 ne_dev_fd = open(NE_DEV_NAME, O_RDWR | O_CLOEXEC);
792 printf("Error in open NE device [%m]\n");
797 printf("Creating enclave slot ...\n");
799 rc = ne_create_vm(ne_dev_fd, &slot_uid, &enclave_fd);
806 printf("Enclave fd %d\n", enclave_fd);
808 rc = pthread_create(&thread_id, NULL, ne_poll_enclave_fd, (void *)&enclave_fd);
810 printf("Error in thread create [%m]\n");
817 for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++) {
818 ne_user_mem_regions[i].memory_size = NE_MIN_MEM_REGION_SIZE;
820 rc = ne_alloc_user_mem_region(&ne_user_mem_regions[i]);
822 printf("Error in alloc userspace memory region, iter %d\n", i);
824 goto release_enclave_fd;
828 rc = ne_load_enclave_image(enclave_fd, ne_user_mem_regions, argv[1]);
830 goto release_enclave_fd;
832 for (i = 0; i < NE_DEFAULT_NR_MEM_REGIONS; i++) {
833 rc = ne_set_user_mem_region(enclave_fd, ne_user_mem_regions[i]);
835 printf("Error in set memory region, iter %d\n", i);
837 goto release_enclave_fd;
841 printf("Enclave memory regions were added\n");
843 for (i = 0; i < NE_DEFAULT_NR_VCPUS; i++) {
845 * The vCPU is chosen from the enclave vCPU pool, if the value
846 * of the vcpu_id is 0.
849 rc = ne_add_vcpu(enclave_fd, &ne_vcpus[i]);
851 printf("Error in add vcpu, iter %d\n", i);
853 goto release_enclave_fd;
856 printf("Added vCPU %d to the enclave\n", ne_vcpus[i]);
859 printf("Enclave vCPUs were added\n");
861 rc = ne_start_enclave_check_booted(enclave_fd);
863 printf("Error in the enclave start / image loading heartbeat logic [rc=%d]\n", rc);
865 goto release_enclave_fd;
868 printf("Entering sleep for %d seconds ...\n", NE_SLEEP_TIME);
870 sleep(NE_SLEEP_TIME);
874 ne_free_mem_regions(ne_user_mem_regions);
880 ne_free_mem_regions(ne_user_mem_regions);