mm: kfence: allocate kfence_metadata at runtime
authorPeng Zhang <zhangpeng.00@bytedance.com>
Tue, 18 Jul 2023 07:30:19 +0000 (15:30 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 18 Aug 2023 17:12:39 +0000 (10:12 -0700)
kfence_metadata is currently a static array.  For the purpose of
allocating scalable __kfence_pool, we first change it to runtime
allocation of metadata.  Since the size of an object of kfence_metadata is
1160 bytes, we can save at least 72 pages (with default 256 objects)
without enabling kfence.

[akpm@linux-foundation.org: restore newline, per Marco]
Link: https://lkml.kernel.org/r/20230718073019.52513-1-zhangpeng.00@bytedance.com
Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/kfence.h
mm/kfence/core.c
mm/kfence/kfence.h
mm/mm_init.c

index 726857a..401af47 100644 (file)
@@ -59,15 +59,16 @@ static __always_inline bool is_kfence_address(const void *addr)
 }
 
 /**
- * kfence_alloc_pool() - allocate the KFENCE pool via memblock
+ * kfence_alloc_pool_and_metadata() - allocate the KFENCE pool and KFENCE
+ * metadata via memblock
  */
-void __init kfence_alloc_pool(void);
+void __init kfence_alloc_pool_and_metadata(void);
 
 /**
  * kfence_init() - perform KFENCE initialization at boot time
  *
- * Requires that kfence_alloc_pool() was called before. This sets up the
- * allocation gate timer, and requires that workqueues are available.
+ * Requires that kfence_alloc_pool_and_metadata() was called before. This sets
+ * up the allocation gate timer, and requires that workqueues are available.
  */
 void __init kfence_init(void);
 
@@ -223,7 +224,7 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla
 #else /* CONFIG_KFENCE */
 
 static inline bool is_kfence_address(const void *addr) { return false; }
-static inline void kfence_alloc_pool(void) { }
+static inline void kfence_alloc_pool_and_metadata(void) { }
 static inline void kfence_init(void) { }
 static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
 static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
index dad3c0e..96fd041 100644 (file)
@@ -116,7 +116,15 @@ EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
  * backing pages (in __kfence_pool).
  */
 static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
-struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+struct kfence_metadata *kfence_metadata __read_mostly;
+
+/*
+ * If kfence_metadata is not NULL, it may be accessed by kfence_shutdown_cache().
+ * So introduce kfence_metadata_init to initialize metadata, and then make
+ * kfence_metadata visible after initialization is successful. This prevents
+ * potential UAF or access to uninitialized metadata.
+ */
+static struct kfence_metadata *kfence_metadata_init __read_mostly;
 
 /* Freelist with available objects. */
 static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
@@ -591,7 +599,7 @@ static unsigned long kfence_init_pool(void)
 
                __folio_set_slab(slab_folio(slab));
 #ifdef CONFIG_MEMCG
-               slab->memcg_data = (unsigned long)&kfence_metadata[i / 2 - 1].objcg |
+               slab->memcg_data = (unsigned long)&kfence_metadata_init[i / 2 - 1].objcg |
                                   MEMCG_DATA_OBJCGS;
 #endif
        }
@@ -610,7 +618,7 @@ static unsigned long kfence_init_pool(void)
        }
 
        for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
-               struct kfence_metadata *meta = &kfence_metadata[i];
+               struct kfence_metadata *meta = &kfence_metadata_init[i];
 
                /* Initialize metadata. */
                INIT_LIST_HEAD(&meta->list);
@@ -626,6 +634,12 @@ static unsigned long kfence_init_pool(void)
                addr += 2 * PAGE_SIZE;
        }
 
+       /*
+        * Make kfence_metadata visible only when initialization is successful.
+        * Otherwise, if the initialization fails and kfence_metadata is freed,
+        * it may cause UAF in kfence_shutdown_cache().
+        */
+       smp_store_release(&kfence_metadata, kfence_metadata_init);
        return 0;
 
 reset_slab:
@@ -672,26 +686,10 @@ static bool __init kfence_init_pool_early(void)
         */
        memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
        __kfence_pool = NULL;
-       return false;
-}
-
-static bool kfence_init_pool_late(void)
-{
-       unsigned long addr, free_size;
 
-       addr = kfence_init_pool();
-
-       if (!addr)
-               return true;
+       memblock_free_late(__pa(kfence_metadata_init), KFENCE_METADATA_SIZE);
+       kfence_metadata_init = NULL;
 
-       /* Same as above. */
-       free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool);
-#ifdef CONFIG_CONTIG_ALLOC
-       free_contig_range(page_to_pfn(virt_to_page((void *)addr)), free_size / PAGE_SIZE);
-#else
-       free_pages_exact((void *)addr, free_size);
-#endif
-       __kfence_pool = NULL;
        return false;
 }
 
@@ -841,19 +839,30 @@ static void toggle_allocation_gate(struct work_struct *work)
 
 /* === Public interface ===================================================== */
 
-void __init kfence_alloc_pool(void)
+void __init kfence_alloc_pool_and_metadata(void)
 {
        if (!kfence_sample_interval)
                return;
 
-       /* if the pool has already been initialized by arch, skip the below. */
-       if (__kfence_pool)
-               return;
-
-       __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
-
+       /*
+        * If the pool has already been initialized by arch, there is no need to
+        * re-allocate the memory pool.
+        */
        if (!__kfence_pool)
+               __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+
+       if (!__kfence_pool) {
                pr_err("failed to allocate pool\n");
+               return;
+       }
+
+       /* The memory allocated by memblock has been zeroed out. */
+       kfence_metadata_init = memblock_alloc(KFENCE_METADATA_SIZE, PAGE_SIZE);
+       if (!kfence_metadata_init) {
+               pr_err("failed to allocate metadata\n");
+               memblock_free(__kfence_pool, KFENCE_POOL_SIZE);
+               __kfence_pool = NULL;
+       }
 }
 
 static void kfence_init_enable(void)
@@ -895,33 +904,69 @@ void __init kfence_init(void)
 
 static int kfence_init_late(void)
 {
-       const unsigned long nr_pages = KFENCE_POOL_SIZE / PAGE_SIZE;
+       const unsigned long nr_pages_pool = KFENCE_POOL_SIZE / PAGE_SIZE;
+       const unsigned long nr_pages_meta = KFENCE_METADATA_SIZE / PAGE_SIZE;
+       unsigned long addr = (unsigned long)__kfence_pool;
+       unsigned long free_size = KFENCE_POOL_SIZE;
+       int err = -ENOMEM;
+
 #ifdef CONFIG_CONTIG_ALLOC
        struct page *pages;
 
-       pages = alloc_contig_pages(nr_pages, GFP_KERNEL, first_online_node, NULL);
+       pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node,
+                                  NULL);
        if (!pages)
                return -ENOMEM;
+
        __kfence_pool = page_to_virt(pages);
+       pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node,
+                                  NULL);
+       if (pages)
+               kfence_metadata_init = page_to_virt(pages);
 #else
-       if (nr_pages > MAX_ORDER_NR_PAGES) {
+       if (nr_pages_pool > MAX_ORDER_NR_PAGES ||
+           nr_pages_meta > MAX_ORDER_NR_PAGES) {
                pr_warn("KFENCE_NUM_OBJECTS too large for buddy allocator\n");
                return -EINVAL;
        }
+
        __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL);
        if (!__kfence_pool)
                return -ENOMEM;
+
+       kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL);
 #endif
 
-       if (!kfence_init_pool_late()) {
-               pr_err("%s failed\n", __func__);
-               return -EBUSY;
+       if (!kfence_metadata_init)
+               goto free_pool;
+
+       memzero_explicit(kfence_metadata_init, KFENCE_METADATA_SIZE);
+       addr = kfence_init_pool();
+       if (!addr) {
+               kfence_init_enable();
+               kfence_debugfs_init();
+               return 0;
        }
 
-       kfence_init_enable();
-       kfence_debugfs_init();
+       pr_err("%s failed\n", __func__);
+       free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool);
+       err = -EBUSY;
 
-       return 0;
+#ifdef CONFIG_CONTIG_ALLOC
+       free_contig_range(page_to_pfn(virt_to_page((void *)kfence_metadata_init)),
+                         nr_pages_meta);
+free_pool:
+       free_contig_range(page_to_pfn(virt_to_page((void *)addr)),
+                         free_size / PAGE_SIZE);
+#else
+       free_pages_exact((void *)kfence_metadata_init, KFENCE_METADATA_SIZE);
+free_pool:
+       free_pages_exact((void *)addr, free_size);
+#endif
+
+       kfence_metadata_init = NULL;
+       __kfence_pool = NULL;
+       return err;
 }
 
 static int kfence_enable_late(void)
@@ -941,6 +986,10 @@ void kfence_shutdown_cache(struct kmem_cache *s)
        struct kfence_metadata *meta;
        int i;
 
+       /* Pairs with release in kfence_init_pool(). */
+       if (!smp_load_acquire(&kfence_metadata))
+               return;
+
        for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
                bool in_use;
 
index 392fb27..f46fbb0 100644 (file)
@@ -102,7 +102,10 @@ struct kfence_metadata {
 #endif
 };
 
-extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+#define KFENCE_METADATA_SIZE PAGE_ALIGN(sizeof(struct kfence_metadata) * \
+                                       CONFIG_KFENCE_NUM_OBJECTS)
+
+extern struct kfence_metadata *kfence_metadata;
 
 static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
 {
index 7e3fcdb..acb0ac1 100644 (file)
@@ -2767,7 +2767,7 @@ void __init mm_core_init(void)
         */
        page_ext_init_flatmem();
        mem_debugging_and_hardening_init();
-       kfence_alloc_pool();
+       kfence_alloc_pool_and_metadata();
        report_meminit();
        kmsan_init_shadow();
        stack_depot_early_init();