From: Marek Szyprowski Date: Thu, 15 Sep 2022 11:30:03 +0000 (+0200) Subject: zlogger: map only the actively used buffer to the userspace X-Git-Tag: accepted/tizen/unified/20221102.172737~2^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3a489d20b218eec5073f18e5b248ced6c6a843d8;p=platform%2Fkernel%2Flinux-tizen-modules-source.git zlogger: map only the actively used buffer to the userspace Each zlogger client maps only a single 4KB buffer and writes to it. Once it is filled, it calls ALLOC ioctl (like before) and kernel remaps a new buffer in place of the old buffer. If kernel needs to free a buffer, the userspace mapping is cleared and will cause a page fault on the next access, which in turn will cause allocation of the new buffer just like the ALLOC ioctl does. Change-Id: I2d6fe8406e201ef20b6378a7ba37ed5df7790406 Signed-off-by: Marek Szyprowski --- diff --git a/include/uapi/linux/zlogger.h b/include/uapi/linux/zlogger.h index 1cad482..15784ed 100644 --- a/include/uapi/linux/zlogger.h +++ b/include/uapi/linux/zlogger.h @@ -27,7 +27,7 @@ #define ZLOGGER_DEVICE_COUNT (8) #define ZLOGGER_MAP_SIZE (4 * ZLOGGER_MB) #define ZLOGGER_BUFFER_SIZE (ZLOGGER_DEVICE_COUNT * ZLOGGER_MAP_SIZE) -#define ZLOGGER_BLOCK_SIZE (2 * ZLOGGER_KB) +#define ZLOGGER_BLOCK_SIZE (4 * ZLOGGER_KB) #define ZLOGGER_BLOCK_MAP_COUNT (ZLOGGER_MAP_SIZE / ZLOGGER_BLOCK_SIZE) #define ZLOGGER_BLOCK_COUNT (ZLOGGER_BUFFER_SIZE / ZLOGGER_BLOCK_SIZE) #define ZLOGGER_DATA_MAX (ZLOGGER_BLOCK_SIZE - sizeof(struct zlogger_header)) diff --git a/kernel/zlogger/zlogger.c b/kernel/zlogger/zlogger.c index 02a4fd2..fbc7456 100644 --- a/kernel/zlogger/zlogger.c +++ b/kernel/zlogger/zlogger.c @@ -43,6 +43,7 @@ #define NS_PER_SEC (1000000000UL) #define ZLOGGER_DEVICE_NAME "zlogger" +#define ZLOGGER_DUMP_DEVICE_NAME "zlogger_dump" #define ZLOGGER_SMACK_LABEL "*" #define BLOCK_RATIO(count) (count*100/ZLOGGER_BLOCK_COUNT) @@ -52,6 +53,10 @@ #define MAX_BUF_LEN 255 +#if ZLOGGER_BLOCK_SIZE != PAGE_SIZE +#error Only ZLOGGER_BLOCK_SIZE == PAGE_SIZE is supported +#endif + struct queue { char name[5]; uint16_t front; @@ -65,6 +70,7 @@ struct thread_table_field { pid_t tid; uint16_t blk; bool is_stdout; + struct vm_area_struct *vma; struct hlist_node next; }; @@ -101,6 +107,7 @@ struct zlog_file { /* --zlogger file channel */ static struct miscdevice zlogger_device; +static struct miscdevice zlogger_dump_device; static int g_init; static char *g_shm_ptr[ZLOGGER_DEVICE_COUNT]; @@ -123,6 +130,8 @@ static struct completion g_completion; static int g_zlog_enable = 1; module_param_named(zlog_enable, g_zlog_enable, int, 0644); +static int zlogger_unmap(struct thread_table_field *ptr); + #if (KERNEL_VERSION(3, 17, 0) > LINUX_VERSION_CODE) static inline u64 ktime_get_ns(void) { @@ -130,26 +139,26 @@ static inline u64 ktime_get_ns(void) } #endif -static uint16_t get_thread_table(pid_t tid, bool is_stdout) +static struct thread_table_field *get_thread_table(pid_t tid, bool is_stdout) { struct thread_table_field *ptr = NULL; hash_for_each_possible(g_thread_table->data, ptr, next, tid) { if (ptr->tid == tid && ptr->is_stdout == is_stdout) - return ptr->blk; + return ptr; } - return 0; + return NULL; } -static void set_thread_table(pid_t tid, bool is_stdout, uint16_t blk) +static struct thread_table_field *set_thread_table(pid_t tid, bool is_stdout, uint16_t blk) { struct thread_table_field *ptr = NULL; hash_for_each_possible(g_thread_table->data, ptr, next, tid) { if (ptr->tid == tid && ptr->is_stdout == is_stdout) { ptr->blk = blk; - return; + return ptr; } } @@ -158,6 +167,7 @@ static void set_thread_table(pid_t tid, bool is_stdout, uint16_t blk) ptr->is_stdout = is_stdout; ptr->blk = blk; hash_add(g_thread_table->data, &ptr->next, tid); + return ptr; } static inline char *get_shared_memory(int dev_index) @@ -240,16 +250,16 @@ static int zlog_task(void *user_data) int blk; do { - hash_for_each_safe(g_thread_table->data, tmp_bkt, tmp_iter, ptr, next) { blk = ptr->blk; // TODO: g_start_time should be under some kind of mutex. if (blk && get_block(blk)->head.ts < g_start_time) { - mutex_lock(&g_block_mutex); get_block(blk)->head.tid = 0; - queue_push(&g_free_q, blk); ptr->blk = 0; // TODO: The userspace might very well be using this block right now. + zlogger_unmap(ptr); + mutex_lock(&g_block_mutex); + queue_push(&g_free_q, blk); mutex_unlock(&g_block_mutex); } } @@ -288,29 +298,32 @@ static void run_task(void) } } -static long alloc_block_for_thread(bool is_stdout) +static struct thread_table_field *alloc_block_for_thread(bool is_stdout) { + struct thread_table_field *ptr; pid_t pid = current->tgid; pid_t tid = current->pid; uint16_t blk; struct zlogger_block *block; mutex_lock(&g_block_mutex); - blk = get_thread_table(tid, is_stdout); - if (blk) - queue_push(&g_free_q, blk); + ptr = get_thread_table(tid, is_stdout); + if (ptr && ptr->blk) + queue_push(&g_free_q, ptr->blk); blk = queue_pop(&g_free_q); - set_thread_table(tid, is_stdout, blk); - if (!blk) { if ((g_err_count++ % 10000) < 3) pr_info("[NO MEMORY] tid:%d free:%d err:%d", tid, g_free_q.count, g_err_count); mutex_unlock(&g_block_mutex); - return -ENOMEM; + return NULL; } - + ptr = set_thread_table(tid, is_stdout, blk); block = get_block(blk); + /* security: ensure mmaped block doesn't leak any information */ + if (!is_stdout) + memset(block, 0, ZLOGGER_BLOCK_SIZE); + // TODO: Needs documentation on how the g_start_time value behaves. if (g_start_time < block->head.ts) g_start_time = block->head.ts; @@ -321,31 +334,33 @@ static long alloc_block_for_thread(bool is_stdout) block->head.ts = g_start_time; mutex_unlock(&g_block_mutex); - return (long)blk; + return ptr; } static inline struct zlogger_block *get_valid_block(int tid, size_t len, bool is_stdout) { - uint16_t blk = 0; - long r; + struct thread_table_field *ptr; - blk = get_thread_table(tid, is_stdout); + ptr = get_thread_table(tid, is_stdout); + if (ptr && ptr->blk) { + struct zlogger_block *block = get_block(ptr->blk); - if (blk != 0) { - struct zlogger_block *block = get_block(blk); - - if (!block) + if (!block) { + printk("%s %d no block %d allocated\n", __func__, __LINE__, ptr->blk); return NULL; + } if (block->head.offset + len < ZLOGGER_DATA_MAX) return block; } - r = alloc_block_for_thread(is_stdout); - if (r <= 0) + ptr = alloc_block_for_thread(is_stdout); + if (!ptr) { + printk("%s %d no block allocated\n", __func__, __LINE__); return NULL; + } - return get_block((uint16_t)r); + return get_block(ptr->blk); } static int zlogger_open(struct inode *inode, struct file *file) @@ -373,27 +388,181 @@ static int zlogger_release(struct inode *ignored, struct file *file) return 0; } -static int zlogger_mmap(struct file *filep, struct vm_area_struct *vma) +/* called under mmap semaphore */ +static void zlogger_vm_open(struct vm_area_struct *vma) { - const int PAGES_PER_MAP = ZLOGGER_MAP_SIZE / PAGE_SIZE; - int dev_index = (int)vma->vm_pgoff / PAGES_PER_MAP; - unsigned long offset = vma->vm_pgoff % PAGES_PER_MAP; + /* + Force page fault on the next access of the given buffer, + this will allocate next block. + */ + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); + + /* + This is called from the new copied vma, + so ensure that a copy will always fault + */ + vma->vm_private_data = NULL; +} + +/* called under mmap semaphore */ +static void zlogger_vm_close(struct vm_area_struct *vma) +{ + struct thread_table_field *ptr = vma->vm_private_data; + + if (ptr) + ptr->vma = NULL; +} + +/* called under mmap semaphore */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +static vm_fault_t zlogger_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; +#else +static int zlogger_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#endif + struct thread_table_field *ptr; + void *p = NULL; + struct page *page; + + ptr = alloc_block_for_thread(false); + if (!ptr) + return VM_FAULT_SIGSEGV; + + ptr->vma = vma; + vma->vm_private_data = ptr; + + p = get_block(ptr->blk); + if (!p) + return VM_FAULT_SIGSEGV; + + page = virt_to_page((unsigned long)p); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + return vmf_insert_pfn(vma, vma->vm_start, page_to_pfn(page)); +#else + return vm_insert_pfn(vma, vma->vm_start, page_to_pfn(page)); +#endif +} + +static const struct vm_operations_struct zlogger_vm_ops = { + .fault = zlogger_fault, + .open = zlogger_vm_open, + .close = zlogger_vm_close, +}; + +static int zlogger_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct thread_table_field *ptr; unsigned long size = vma->vm_end - vma->vm_start; - char *p; struct page *page; + void *p; - if (dev_index > ZLOGGER_DEVICE_COUNT || offset != 0 || size > ZLOGGER_MAP_SIZE) { - pr_err("mmap failed: dev(%d) offset(%lu), size(%lu), pgoff(%lu)\n", dev_index, offset, size, vma->vm_pgoff); + if (vma->vm_pgoff != 0 || size != ZLOGGER_BLOCK_SIZE) return -EINVAL; - } - p = get_shared_memory(dev_index); - if (p) - page = virt_to_page((unsigned long)p); - else + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY; + vma->vm_private_data = filp; + vma->vm_ops = &zlogger_vm_ops; + + ptr = get_thread_table(current->pid, false); + if (ptr && ptr->vma) return -EINVAL; - return remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, vma->vm_page_prot); + ptr = alloc_block_for_thread(false); + if (!ptr) + return -ENOMEM; + + ptr->vma = vma; + vma->vm_private_data = ptr; + + p = get_block(ptr->blk); + if (!p) + return -ENOMEM; + + page = virt_to_page((unsigned long)p); + + return remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), ZLOGGER_BLOCK_SIZE, vma->vm_page_prot); +} + +static int zlogger_unmap(struct thread_table_field *ptr) +{ + struct vm_area_struct *vma = ptr->vma; + + if (!ptr->vma) + return 0; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) + if (mmap_write_lock_killable(vma->vm_mm)) + return -EINTR; +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + if (down_write_killable(&vma->vm_mm->mmap_sem)) + return -EINTR; +#else + down_write(&vma->vm_mm->mmap_sem); +#endif + + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); + vma->vm_private_data = NULL; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) + mmap_write_unlock(vma->vm_mm); +#else + up_write(&vma->vm_mm->mmap_sem); +#endif + return 0; +} + +static int zlogger_realloc_mmap(struct file *filp) +{ + struct thread_table_field *ptr; + struct vm_area_struct *vma; + pid_t tid = current->pid; + struct page *page; + void *p; + int ret; + + ptr = get_thread_table(tid, false); + if (!ptr || !ptr->vma) + return -EINVAL; + + vma = ptr->vma; + ptr->vma = NULL; + + ptr = alloc_block_for_thread(false); + if (!ptr) + return -ENOMEM; + + ptr->vma = vma; + vma->vm_private_data = ptr; + + p = get_block(ptr->blk); + if (!p) + return -ENOMEM; + + page = virt_to_page((unsigned long)p); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) + if (mmap_write_lock_killable(current->mm)) + return -EINTR; +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + if (down_write_killable(¤t->mm->mmap_sem)) + return -EINTR; +#else + down_write(¤t->mm->mmap_sem); +#endif + + zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); + ret = remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), ZLOGGER_BLOCK_SIZE, vma->vm_page_prot); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) + mmap_write_unlock(current->mm); +#else + up_write(¤t->mm->mmap_sem); +#endif + + return ret; } static ssize_t zlogger_read(struct file *filep, char __user *buffer, size_t len, loff_t *offset) @@ -636,9 +805,12 @@ static long zlogger_clear(void) } hash_for_each(g_thread_table->data, i, ptr, next) { - if (ptr->blk != 0) { - queue_push(&g_free_q, ptr->blk); + int blk = ptr->blk; + + if (blk != 0) { ptr->blk = 0; + zlogger_unmap(ptr); + queue_push(&g_free_q, blk); } } @@ -650,20 +822,14 @@ static long zlogger_ioctl(struct file *filp, unsigned int cmd, unsigned long arg { switch (cmd) { case ZLOGGER_IOCTL_COMMAND_ALLOC: - return alloc_block_for_thread(false); - break; - - case ZLOGGER_IOCTL_COMMAND_CLEAR: - return zlogger_clear(); - break; + return zlogger_realloc_mmap(filp); case ZLOGGER_IOCTL_COMMAND_SET_DEFAULT_PRIORITY: return zlogger_update_prio(filp, arg); - break; case ZLOGGER_IOCTL_COMMAND_SET_DEFAULT_TAG: - return zlogger_set_default_tag(filp, arg); - break; + return zlogger_set_default_tag(filp, arg); + default: return -EINVAL; } @@ -692,6 +858,51 @@ static const struct file_operations zlogger_fops = { .owner = THIS_MODULE, }; +static int zlogger_dump_mmap(struct file *filep, struct vm_area_struct *vma) +{ + const int PAGES_PER_MAP = ZLOGGER_MAP_SIZE / PAGE_SIZE; + int dev_index = (int)vma->vm_pgoff / PAGES_PER_MAP; + unsigned long offset = vma->vm_pgoff % PAGES_PER_MAP; + unsigned long size = vma->vm_end - vma->vm_start; + char *p; + struct page *page; + + if (dev_index > ZLOGGER_DEVICE_COUNT || offset != 0 || size > ZLOGGER_MAP_SIZE) { + pr_err("mmap failed: dev(%d) offset(%lu), size(%lu), pgoff(%lu)\n", dev_index, offset, size, vma->vm_pgoff); + return -EINVAL; + } + + p = get_shared_memory(dev_index); + if (p) + page = virt_to_page((unsigned long)p); + else + return -EINVAL; + + return remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, vma->vm_page_prot); +} + +static long zlogger_dump_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case ZLOGGER_IOCTL_COMMAND_CLEAR: + return zlogger_clear(); + + default: + return -EINVAL; + } +} + +static const struct file_operations zlogger_dump_fops = { + .open = nonseekable_open, + .mmap = zlogger_dump_mmap, + .unlocked_ioctl = zlogger_dump_ioctl, +#ifdef CONFIG_COMPAT + /* arg is unused so far, so we can call ioctl directly */ + .compat_ioctl = zlogger_dump_ioctl, +#endif + .owner = THIS_MODULE, +}; + static int zlogger_init(void) { int i = 0; @@ -737,12 +948,26 @@ static int zlogger_init(void) #ifdef CONFIG_SECURITY_SMACK_SET_DEV_SMK_LABEL zlogger_device.lab_smk64 = ZLOGGER_SMACK_LABEL; #endif + r = misc_register(&zlogger_device); if (unlikely(r)) { pr_err("Failed to register misc device for '%s' (%d)\n", ZLOGGER_DEVICE_NAME, r); goto out_free_zlog_task; } + zlogger_dump_device.minor = MISC_DYNAMIC_MINOR; + zlogger_dump_device.name = ZLOGGER_DUMP_DEVICE_NAME; + zlogger_dump_device.fops = &zlogger_dump_fops; + zlogger_dump_device.mode = 0444; +#ifdef CONFIG_SECURITY_SMACK_SET_DEV_SMK_LABEL + zlogger_dump_device.lab_smk64 = ZLOGGER_SMACK_LABEL; +#endif + r = misc_register(&zlogger_dump_device); + if (unlikely(r)) { + pr_err("Failed to register misc device for '%s' (%d)\n", ZLOGGER_DUMP_DEVICE_NAME, r); + goto out_free_zlogger_device; + } + g_init = 1; pr_info("Init success\n"); @@ -775,6 +1000,7 @@ static void zlogger_exit(void) struct hlist_node *tmp_iter = NULL; int tmp_bkt; + misc_deregister(&zlogger_dump_device); misc_deregister(&zlogger_device); // TODO: What about the task that is running in the background? @@ -799,5 +1025,8 @@ module_init(zlogger_init); module_exit(zlogger_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("jh1009.sung , Arkadiusz Nowak , Mateusz Majewski "); +MODULE_AUTHOR("Arkadiusz Nowak "); +MODULE_AUTHOR("Mateusz Majewski