zlogger: rework locking to fix potential AB-BA deadlock 87/298687/2
authorMarek Szyprowski <m.szyprowski@samsung.com>
Thu, 7 Sep 2023 09:06:57 +0000 (11:06 +0200)
committerMateusz Majewski <m.majewski2@samsung.com>
Wed, 13 Sep 2023 12:16:17 +0000 (14:16 +0200)
Resolve potential AB-BA deadlock between g_block_mutex and mmap semaphore
by always taking mmap semaphore first. Also ensure that all operations on
internal structures are performed at least under the the g_block_mutex to
keep system state consistent.

Change-Id: Ib5b0bc2cbfc2b4d443cbee6f5460e8414c34090f
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
kernel/zlogger/zlogger.c

index b5ddc2c..167af6e 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/version.h>
 #include <linux/hashtable.h>
 #include <linux/compat.h>
+#include <linux/sched/mm.h>
 #include <uapi/linux/zlogger.h>
 
 #define MS_PER_SEC (1000)
@@ -134,6 +135,8 @@ static struct thread_table_field *get_thread_table(pid_t tid, bool is_stdout)
 {
        struct thread_table_field *ptr = NULL;
 
+       mutex_is_locked(&g_block_mutex);
+
        hash_for_each_possible(g_thread_table->data, ptr, next, tid) {
                if (ptr->tid == tid && ptr->is_stdout == is_stdout)
                        return ptr;
@@ -146,6 +149,8 @@ static struct thread_table_field *set_thread_table(pid_t tid, bool is_stdout, ui
 {
        struct thread_table_field *ptr;
 
+       mutex_is_locked(&g_block_mutex);
+
        ptr = get_thread_table(tid, is_stdout);
        if (ptr) {
                ptr->blk = blk;
@@ -164,6 +169,8 @@ static struct thread_table_field *set_thread_table(pid_t tid, bool is_stdout, ui
 
 static void clear_thread_table(struct thread_table_field *ptr)
 {
+       mutex_is_locked(&g_block_mutex);
+
        hash_del(&ptr->next);
        kfree(ptr);
 }
@@ -245,22 +252,60 @@ static int zlog_task(void *user_data)
        struct thread_table_field *ptr = NULL;
        struct hlist_node *tmp_iter = NULL;
        int tmp_bkt;
-       int blk;
 
        while (!kthread_should_stop()) {
+restart:
+               mutex_lock(&g_block_mutex);
                hash_for_each_safe(g_thread_table->data, tmp_bkt, tmp_iter, ptr, next) {
-                       blk = ptr->blk;
-                       // TODO: g_start_time should be under some kind of mutex.
-                       if (blk && get_block(blk)->head.ts < g_start_time) {
-                               get_block(blk)->head.tid = 0;
-                               ptr->blk = 0;
-                               zlogger_unmap(ptr);
-                               mutex_lock(&g_block_mutex);
-                               queue_push(&g_free_q, blk);
-                               mutex_unlock(&g_block_mutex);
+                       struct zlogger_block *block = get_block(ptr->blk);
+
+                       if (ptr->blk && block->head.ts < g_start_time) {
+                               struct mm_struct *mm = NULL;
+                               pid_t tid = ptr->tid;
+                               bool is_stdout = ptr->is_stdout;
+
+                               if (ptr->vma) {
+                                       mm = ptr->vma->vm_mm;
+                                       mmget(mm);
+
+                                       /*
+                                        * Reacquire locks in opposite order to avoid
+                                        * AB-BA deadlock
+                                        */
+                                       mutex_unlock(&g_block_mutex);
+                                       if (mmap_write_lock_killable(mm)) {
+                                               mmput(mm);
+                                               goto restart;
+                                       }
+                                       mmput(mm);
+                                       mutex_lock(&g_block_mutex);
+                               }
+
+                               if (block->head.tid != tid) {
+                                       /*
+                                        * ownership of the block has been
+                                        * modified, restart
+                                        */
+                                       if (mm)
+                                               mmap_write_unlock(mm);
+                                       mutex_unlock(&g_block_mutex);
+                                       goto restart;
+                               }
+
+                               ptr = get_thread_table(tid, is_stdout);
+                               if (ptr && ptr->blk) {
+                                       uint16_t blk = ptr->blk;
+
+                                       ptr->blk = 0;
+                                       zlogger_unmap(ptr);
+                                       queue_push(&g_free_q, blk);
+                               }
+
+                               if (mm)
+                                       mmap_write_unlock(mm);
                        }
                }
-
+               mutex_unlock(&g_block_mutex);
                schedule_timeout_interruptible(msecs_to_jiffies(MS_PER_SEC * 5));
        }
 
@@ -275,7 +320,8 @@ static struct thread_table_field *alloc_block_for_thread(bool is_stdout)
        uint16_t blk;
        struct zlogger_block *block;
 
-       mutex_lock(&g_block_mutex);
+       mutex_is_locked(&g_block_mutex);
+
        ptr = get_thread_table(tid, is_stdout);
        if (ptr && ptr->blk)
                queue_push(&g_free_q, ptr->blk);
@@ -283,14 +329,12 @@ static struct thread_table_field *alloc_block_for_thread(bool is_stdout)
        if (!blk) {
                if ((g_err_count++ % 10000) < 3)
                        pr_info("[NO MEMORY] tid:%d free:%d err:%d", tid, g_free_q.count, g_err_count);
-               mutex_unlock(&g_block_mutex);
                return NULL;
        }
        ptr = set_thread_table(tid, is_stdout, blk);
-       if (!ptr) {
-               mutex_unlock(&g_block_mutex);
+       if (!ptr)
                return NULL;
-       }
+
        block = get_block(blk);
 
        /* security: ensure mmaped block doesn't leak any information */
@@ -305,7 +349,6 @@ static struct thread_table_field *alloc_block_for_thread(bool is_stdout)
        block->head.tid = tid;
        block->head.offset = 0;
        block->head.ts = g_start_time;
-       mutex_unlock(&g_block_mutex);
 
        return ptr;
 }
@@ -350,7 +393,7 @@ static int zlogger_release(struct inode *ignored, struct file *file)
 {
        if (file->private_data != NULL) {
                struct zlog_file *zlog_file_data = (struct zlog_file *)file->private_data;
-               struct thread_table_field *ptr = get_thread_table(current->pid, true);
+               struct thread_table_field *ptr;
 
                if (zlog_file_data->buffer != NULL) {
                        kfree(zlog_file_data->buffer);
@@ -359,11 +402,11 @@ static int zlogger_release(struct inode *ignored, struct file *file)
                kfree(file->private_data);
                file->private_data = NULL;
 
-               if (ptr) {
-                       mutex_lock(&g_block_mutex);
+               mutex_lock(&g_block_mutex);
+               ptr = get_thread_table(current->pid, true);
+               if (ptr)
                        clear_thread_table(ptr);
-                       mutex_unlock(&g_block_mutex);
-               }
+               mutex_unlock(&g_block_mutex);
        }
        return 0;
 }
@@ -390,8 +433,8 @@ static void zlogger_vm_close(struct vm_area_struct *vma)
        struct thread_table_field *ptr = vma->vm_private_data;
 
        if (ptr) {
-               ptr->vma = NULL;
                mutex_lock(&g_block_mutex);
+               ptr->vma = NULL;
                clear_thread_table(ptr);
                mutex_unlock(&g_block_mutex);
        }
@@ -404,21 +447,27 @@ static vm_fault_t zlogger_fault(struct vm_fault *vmf)
        struct thread_table_field *ptr;
        void *p = NULL;
        struct page *page;
+       vm_fault_t ret = VM_FAULT_SIGSEGV;
+
+       mutex_lock(&g_block_mutex);
 
        ptr = alloc_block_for_thread(false);
        if (!ptr)
-               return VM_FAULT_SIGSEGV;
+               goto unlock;
 
        ptr->vma = vma;
        vma->vm_private_data = ptr;
 
        p = get_block(ptr->blk);
        if (!p)
-               return VM_FAULT_SIGSEGV;
+               goto unlock;
 
        page = virt_to_page((unsigned long)p);
+       ret = vmf_insert_pfn(vma, vma->vm_start, page_to_pfn(page));
 
-       return vmf_insert_pfn(vma, vma->vm_start, page_to_pfn(page));
+unlock:
+       mutex_unlock(&g_block_mutex);
+       return ret;
 }
 
 static const struct vm_operations_struct zlogger_vm_ops = {
@@ -427,40 +476,52 @@ static const struct vm_operations_struct zlogger_vm_ops = {
        .close = zlogger_vm_close,
 };
 
+/* called under mmap semaphore */
 static int zlogger_mmap(struct file *filp, struct vm_area_struct *vma)
 {
        struct thread_table_field *ptr;
        unsigned long size = vma->vm_end - vma->vm_start;
        struct page *page;
        void *p;
+       int ret;
 
        if (vma->vm_pgoff != 0 || size != ZLOGGER_BLOCK_SIZE)
                return -EINVAL;
 
-       vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY;
-       vma->vm_private_data = filp;
-       vma->vm_ops = &zlogger_vm_ops;
+       mutex_lock(&g_block_mutex);
 
        ptr = get_thread_table(current->pid, false);
-       if (ptr && ptr->vma)
-               return -EINVAL;
+       if (ptr && ptr->vma) {
+               ret = -EINVAL;
+               goto unlock;
+       }
 
        ptr = alloc_block_for_thread(false);
-       if (!ptr)
-               return -ENOMEM;
+       if (!ptr) {
+               ret = -ENOMEM;
+               goto unlock;
+       }
+
+       p = get_block(ptr->blk);
+       if (!p) {
+               ret = -ENOMEM;
+               goto unlock;
+       }
 
        ptr->vma = vma;
        vma->vm_private_data = ptr;
-
-       p = get_block(ptr->blk);
-       if (!p)
-               return -ENOMEM;
+       vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY;
+       vma->vm_ops = &zlogger_vm_ops;
 
        page = virt_to_page((unsigned long)p);
+       ret = remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), ZLOGGER_BLOCK_SIZE, vma->vm_page_prot);
 
-       return remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), ZLOGGER_BLOCK_SIZE, vma->vm_page_prot);
+unlock:
+       mutex_unlock(&g_block_mutex);
+       return ret;
 }
 
+/* called under mmap semaphore */
 static int zlogger_unmap(struct thread_table_field *ptr)
 {
        struct vm_area_struct *vma = ptr->vma;
@@ -468,13 +529,9 @@ static int zlogger_unmap(struct thread_table_field *ptr)
        if (!ptr->vma)
                return 0;
 
-       if (mmap_write_lock_killable(vma->vm_mm))
-               return -EINTR;
-
        zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
        vma->vm_private_data = NULL;
 
-       mmap_write_unlock(vma->vm_mm);
        return 0;
 }
 
@@ -487,32 +544,41 @@ static int zlogger_realloc_mmap(struct file *filp)
        void *p;
        int ret;
 
+       if (mmap_write_lock_killable(current->mm))
+               return -EINTR;
+
+       mutex_lock(&g_block_mutex);
+
        ptr = get_thread_table(tid, false);
-       if (!ptr || !ptr->vma)
-               return -EINVAL;
+       if (!ptr || !ptr->vma) {
+               ret = -EINVAL;
+               goto unlock;
+       }
 
        vma = ptr->vma;
        ptr->vma = NULL;
 
        ptr = alloc_block_for_thread(false);
-       if (!ptr)
-               return -ENOMEM;
+       if (!ptr) {
+               ret = -ENOMEM;
+               goto unlock;
+       }
 
        ptr->vma = vma;
        vma->vm_private_data = ptr;
 
        p = get_block(ptr->blk);
-       if (!p)
-               return -ENOMEM;
+       if (!p) {
+               ret = -ENOMEM;
+               goto unlock;
+       }
 
        page = virt_to_page((unsigned long)p);
-
-       if (mmap_write_lock_killable(current->mm))
-               return -EINTR;
-
        zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
        ret = remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), ZLOGGER_BLOCK_SIZE, vma->vm_page_prot);
 
+unlock:
+       mutex_unlock(&g_block_mutex);
        mmap_write_unlock(current->mm);
 
        return ret;
@@ -568,13 +634,18 @@ static int _zlog_write(const unsigned char prio, const char *tag, const char *ms
        size_t tag_size = strnlen(tag, ZLOGGER_TAG_MAX) + 1;
        size_t msg_size = strnlen(msg, ZLOGGER_MSG_MAX) + 1;
        size_t entry_size = hd_size + prio_size + tag_size + msg_size;
-       struct zlogger_block *block = get_valid_block(tid, entry_size, true);
+       struct zlogger_block *block;
        struct zlogger_entry *entry;
        struct zlogger_entry tmp;
+       int ret;
+
+       mutex_lock(&g_block_mutex);
 
+       block = get_valid_block(tid, entry_size, true);
        if (block == NULL) {
                pr_err("%s: no block available\n", __func__);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto unlock;
        }
 
        entry = (struct zlogger_entry *)(block->data + block->head.offset);
@@ -584,7 +655,8 @@ static int _zlog_write(const unsigned char prio, const char *tag, const char *ms
                // as it is userspace writable.
                pr_err("[%d] block:%p(tid:%d offset:%x) entry:%p(%zu)\n",
                                tid, block, block->head.tid, block->head.offset, entry, entry_size);
-               return -EFAULT;
+               ret = -EFAULT;
+               goto unlock;
        }
 
        /* Ensure that the timestamp is increasing, as opposed to
@@ -608,7 +680,11 @@ static int _zlog_write(const unsigned char prio, const char *tag, const char *ms
        block->head.offset += (uint16_t)entry_size;
        block->head.ts = ts;
 
-       return msg_size - 1;
+       ret = msg_size - 1;
+
+unlock:
+       mutex_unlock(&g_block_mutex);
+       return ret;
 }
 
 static void endl_to_zero(struct zlog_file *zlog_file_data, size_t len)
@@ -746,28 +822,71 @@ static long zlogger_set_default_tag(struct file *filp,  unsigned long arg)
 
 static long zlogger_clear(void)
 {
-       int i;
        struct thread_table_field *ptr = NULL;
+       struct hlist_node *tmp_iter = NULL;
+       int tmp_bkt;
+       int i;
 
+restart:
        mutex_lock(&g_block_mutex);
+       hash_for_each_safe(g_thread_table->data, tmp_bkt, tmp_iter, ptr, next) {
+               struct zlogger_block *block = get_block(ptr->blk);
 
-       for (i = 1; i <= ZLOGGER_BLOCK_COUNT; i++) {
-               struct zlogger_block *block = get_block(i);
+               if (ptr->blk) {
+                       struct mm_struct *mm = NULL;
+                       pid_t tid = ptr->tid;
+                       bool is_stdout = ptr->is_stdout;
 
-               memset(&block->head, 0, sizeof(block->head));
-       }
+                       if (ptr->vma) {
+                               mm = ptr->vma->vm_mm;
+                               mmget(mm);
+
+                               /*
+                                * Reacquire locks in opposite order to avoid
+                                * AB-BA deadlock
+                                */
+                               mutex_unlock(&g_block_mutex);
+                               if (mmap_write_lock_killable(mm)) {
+                                       mmput(mm);
+                                       goto restart;
+                               }
+                               mmput(mm);
+                               mutex_lock(&g_block_mutex);
+                       }
+
+                       if (block->head.tid != tid) {
+                               /*
+                                * Ownership of the block has been modified,
+                                * restart
+                                */
+                               if (mm)
+                                       mmap_write_unlock(mm);
+                               mutex_unlock(&g_block_mutex);
+                               goto restart;
+                       }
 
-       hash_for_each(g_thread_table->data, i, ptr, next) {
-               int blk = ptr->blk;
+                       ptr = get_thread_table(tid, is_stdout);
+                       if (ptr && ptr->blk) {
+                               uint16_t blk = ptr->blk;
+
+                               ptr->blk = 0;
+                               zlogger_unmap(ptr);
+                               queue_push(&g_free_q, blk);
+                       }
 
-               if (blk != 0) {
-                       ptr->blk = 0;
-                       zlogger_unmap(ptr);
-                       queue_push(&g_free_q, blk);
+                       if (mm)
+                               mmap_write_unlock(mm);
                }
        }
 
+       for (i = 1; i <= ZLOGGER_BLOCK_COUNT; i++) {
+               struct zlogger_block *block = get_block(i);
+
+               memset(&block->head, 0, sizeof(block->head));
+       }
+
        mutex_unlock(&g_block_mutex);
+
        return 0;
 }