1 /* SPDX-License-Identifier: GPL-2.0 */
5 * Internal slab definitions
8 /* Reuses the bits in struct page */
10 unsigned long __page_flags;
12 #if defined(CONFIG_SLAB)
15 struct list_head slab_list;
16 struct rcu_head rcu_head;
18 struct kmem_cache *slab_cache;
19 void *freelist; /* array of free object indexes */
20 void *s_mem; /* first object */
23 #elif defined(CONFIG_SLUB)
26 struct list_head slab_list;
27 struct rcu_head rcu_head;
28 #ifdef CONFIG_SLUB_CPU_PARTIAL
31 int slabs; /* Nr of slabs left */
35 struct kmem_cache *slab_cache;
36 /* Double-word boundary */
37 void *freelist; /* first free object */
39 unsigned long counters;
46 unsigned int __unused;
48 #elif defined(CONFIG_SLOB)
50 struct list_head slab_list;
52 void *freelist; /* first free block */
54 unsigned int __unused_2;
57 #error "Unexpected slab allocator configured"
60 atomic_t __page_refcount;
62 unsigned long memcg_data;
66 #define SLAB_MATCH(pg, sl) \
67 static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
68 SLAB_MATCH(flags, __page_flags);
69 SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
71 SLAB_MATCH(rcu_head, rcu_head);
73 SLAB_MATCH(_refcount, __page_refcount);
75 SLAB_MATCH(memcg_data, memcg_data);
78 static_assert(sizeof(struct slab) <= sizeof(struct page));
81 * folio_slab - Converts from folio to slab.
84 * Currently struct slab is a different representation of a folio where
85 * folio_test_slab() is true.
87 * Return: The slab which contains this folio.
89 #define folio_slab(folio) (_Generic((folio), \
90 const struct folio *: (const struct slab *)(folio), \
91 struct folio *: (struct slab *)(folio)))
94 * slab_folio - The folio allocated for a slab
97 * Slabs are allocated as folios that contain the individual objects and are
98 * using some fields in the first struct page of the folio - those fields are
99 * now accessed by struct slab. It is occasionally necessary to convert back to
100 * a folio in order to communicate with the rest of the mm. Please use this
101 * helper function instead of casting yourself, as the implementation may change
104 #define slab_folio(s) (_Generic((s), \
105 const struct slab *: (const struct folio *)s, \
106 struct slab *: (struct folio *)s))
109 * page_slab - Converts from first struct page to slab.
110 * @p: The first (either head of compound or single) page of slab.
112 * A temporary wrapper to convert struct page to struct slab in situations where
113 * we know the page is the compound head, or single order-0 page.
115 * Long-term ideally everything would work with struct slab directly or go
116 * through folio to struct slab.
118 * Return: The slab which contains this page
120 #define page_slab(p) (_Generic((p), \
121 const struct page *: (const struct slab *)(p), \
122 struct page *: (struct slab *)(p)))
125 * slab_page - The first struct page allocated for a slab
128 * A convenience wrapper for converting slab to the first struct page of the
129 * underlying folio, to communicate with code not yet converted to folio or
132 #define slab_page(s) folio_page(slab_folio(s), 0)
135 * If network-based swap is enabled, sl*b must keep track of whether pages
136 * were allocated from pfmemalloc reserves.
138 static inline bool slab_test_pfmemalloc(const struct slab *slab)
140 return folio_test_active((struct folio *)slab_folio(slab));
143 static inline void slab_set_pfmemalloc(struct slab *slab)
145 folio_set_active(slab_folio(slab));
148 static inline void slab_clear_pfmemalloc(struct slab *slab)
150 folio_clear_active(slab_folio(slab));
153 static inline void __slab_clear_pfmemalloc(struct slab *slab)
155 __folio_clear_active(slab_folio(slab));
158 static inline void *slab_address(const struct slab *slab)
160 return folio_address(slab_folio(slab));
163 static inline int slab_nid(const struct slab *slab)
165 return folio_nid(slab_folio(slab));
168 static inline pg_data_t *slab_pgdat(const struct slab *slab)
170 return folio_pgdat(slab_folio(slab));
173 static inline struct slab *virt_to_slab(const void *addr)
175 struct folio *folio = virt_to_folio(addr);
177 if (!folio_test_slab(folio))
180 return folio_slab(folio);
183 static inline int slab_order(const struct slab *slab)
185 return folio_order((struct folio *)slab_folio(slab));
188 static inline size_t slab_size(const struct slab *slab)
190 return PAGE_SIZE << slab_order(slab);
195 * Common fields provided in kmem_cache by all slab allocators
196 * This struct is either used directly by the allocator (SLOB)
197 * or the allocator must include definitions for all fields
198 * provided in kmem_cache_common in their definition of kmem_cache.
200 * Once we can do anonymous structs (C11 standard) we could put a
201 * anonymous struct definition in these allocators so that the
202 * separate allocations in the kmem_cache structure of SLAB and
203 * SLUB is no longer needed.
206 unsigned int object_size;/* The original size of the object */
207 unsigned int size; /* The aligned/padded/added on size */
208 unsigned int align; /* Alignment as calculated */
209 slab_flags_t flags; /* Active flags on the slab */
210 unsigned int useroffset;/* Usercopy region offset */
211 unsigned int usersize; /* Usercopy region size */
212 const char *name; /* Slab name for sysfs */
213 int refcount; /* Use counter */
214 void (*ctor)(void *); /* Called on object slot creation */
215 struct list_head list; /* List of all slab caches on the system */
218 #endif /* CONFIG_SLOB */
221 #include <linux/slab_def.h>
225 #include <linux/slub_def.h>
228 #include <linux/memcontrol.h>
229 #include <linux/fault-inject.h>
230 #include <linux/kasan.h>
231 #include <linux/kmemleak.h>
232 #include <linux/random.h>
233 #include <linux/sched/mm.h>
236 * State of the slab allocator.
238 * This is used to describe the states of the allocator during bootup.
239 * Allocators use this to gradually bootstrap themselves. Most allocators
240 * have the problem that the structures used for managing slab caches are
241 * allocated from slab caches themselves.
244 DOWN, /* No slab functionality yet */
245 PARTIAL, /* SLUB: kmem_cache_node available */
246 PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */
247 UP, /* Slab caches usable but not all extras yet */
248 FULL /* Everything is working */
251 extern enum slab_state slab_state;
253 /* The slab cache mutex protects the management structures during changes */
254 extern struct mutex slab_mutex;
256 /* The list of all slab caches on the system */
257 extern struct list_head slab_caches;
259 /* The slab cache that manages slab cache information */
260 extern struct kmem_cache *kmem_cache;
262 /* A table of kmalloc cache names and sizes */
263 extern const struct kmalloc_info_struct {
264 const char *name[NR_KMALLOC_TYPES];
269 /* Kmalloc array related functions */
270 void setup_kmalloc_cache_index_table(void);
271 void create_kmalloc_caches(slab_flags_t);
273 /* Find the kmalloc slab corresponding for a certain size */
274 struct kmem_cache *kmalloc_slab(size_t, gfp_t);
277 gfp_t kmalloc_fix_flags(gfp_t flags);
279 /* Functions provided by the slab allocators */
280 int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
282 struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size,
283 slab_flags_t flags, unsigned int useroffset,
284 unsigned int usersize);
285 extern void create_boot_cache(struct kmem_cache *, const char *name,
286 unsigned int size, slab_flags_t flags,
287 unsigned int useroffset, unsigned int usersize);
289 int slab_unmergeable(struct kmem_cache *s);
290 struct kmem_cache *find_mergeable(unsigned size, unsigned align,
291 slab_flags_t flags, const char *name, void (*ctor)(void *));
294 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
295 slab_flags_t flags, void (*ctor)(void *));
297 slab_flags_t kmem_cache_flags(unsigned int object_size,
298 slab_flags_t flags, const char *name);
300 static inline struct kmem_cache *
301 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
302 slab_flags_t flags, void (*ctor)(void *))
305 static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
306 slab_flags_t flags, const char *name)
313 /* Legal flag mask for kmem_cache_create(), for various configurations */
314 #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
315 SLAB_CACHE_DMA32 | SLAB_PANIC | \
316 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
318 #if defined(CONFIG_DEBUG_SLAB)
319 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
320 #elif defined(CONFIG_SLUB_DEBUG)
321 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
322 SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
324 #define SLAB_DEBUG_FLAGS (0)
327 #if defined(CONFIG_SLAB)
328 #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
329 SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
331 #elif defined(CONFIG_SLUB)
332 #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
333 SLAB_TEMPORARY | SLAB_ACCOUNT)
335 #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
338 /* Common flags available with current configuration */
339 #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
341 /* Common flags permitted for kmem_cache_create */
342 #define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | \
347 SLAB_CONSISTENCY_CHECKS | \
350 SLAB_RECLAIM_ACCOUNT | \
354 bool __kmem_cache_empty(struct kmem_cache *);
355 int __kmem_cache_shutdown(struct kmem_cache *);
356 void __kmem_cache_release(struct kmem_cache *);
357 int __kmem_cache_shrink(struct kmem_cache *);
358 void slab_kmem_cache_release(struct kmem_cache *);
364 unsigned long active_objs;
365 unsigned long num_objs;
366 unsigned long active_slabs;
367 unsigned long num_slabs;
368 unsigned long shared_avail;
370 unsigned int batchcount;
372 unsigned int objects_per_slab;
373 unsigned int cache_order;
376 void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo);
377 void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
378 ssize_t slabinfo_write(struct file *file, const char __user *buffer,
379 size_t count, loff_t *ppos);
382 * Generic implementation of bulk operations
383 * These are useful for situations in which the allocator cannot
384 * perform optimizations. In that case segments of the object listed
385 * may be allocated or freed using these operations.
387 void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
388 int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
390 static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
392 return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
393 NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
396 #ifdef CONFIG_SLUB_DEBUG
397 #ifdef CONFIG_SLUB_DEBUG_ON
398 DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
400 DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
402 extern void print_tracking(struct kmem_cache *s, void *object);
403 long validate_slab_cache(struct kmem_cache *s);
404 static inline bool __slub_debug_enabled(void)
406 return static_branch_unlikely(&slub_debug_enabled);
409 static inline void print_tracking(struct kmem_cache *s, void *object)
412 static inline bool __slub_debug_enabled(void)
419 * Returns true if any of the specified slub_debug flags is enabled for the
420 * cache. Use only for flags parsed by setup_slub_debug() as it also enables
423 static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags)
425 if (IS_ENABLED(CONFIG_SLUB_DEBUG))
426 VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS));
427 if (__slub_debug_enabled())
428 return s->flags & flags;
432 #ifdef CONFIG_MEMCG_KMEM
434 * slab_objcgs - get the object cgroups vector associated with a slab
435 * @slab: a pointer to the slab struct
437 * Returns a pointer to the object cgroups vector associated with the slab,
438 * or NULL if no such vector has been associated yet.
440 static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
442 unsigned long memcg_data = READ_ONCE(slab->memcg_data);
444 VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS),
446 VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab));
448 return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
451 int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
452 gfp_t gfp, bool new_slab);
453 void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
454 enum node_stat_item idx, int nr);
456 static inline void memcg_free_slab_cgroups(struct slab *slab)
458 kfree(slab_objcgs(slab));
459 slab->memcg_data = 0;
462 static inline size_t obj_full_size(struct kmem_cache *s)
465 * For each accounted object there is an extra space which is used
466 * to store obj_cgroup membership. Charge it too.
468 return s->size + sizeof(struct obj_cgroup *);
472 * Returns false if the allocation should fail.
474 static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
475 struct obj_cgroup **objcgp,
476 size_t objects, gfp_t flags)
478 struct obj_cgroup *objcg;
480 if (!memcg_kmem_enabled())
483 if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))
486 objcg = get_obj_cgroup_from_current();
490 if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) {
491 obj_cgroup_put(objcg);
499 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
500 struct obj_cgroup *objcg,
501 gfp_t flags, size_t size,
508 if (!memcg_kmem_enabled() || !objcg)
511 for (i = 0; i < size; i++) {
513 slab = virt_to_slab(p[i]);
515 if (!slab_objcgs(slab) &&
516 memcg_alloc_slab_cgroups(slab, s, flags,
518 obj_cgroup_uncharge(objcg, obj_full_size(s));
522 off = obj_to_index(s, slab, p[i]);
523 obj_cgroup_get(objcg);
524 slab_objcgs(slab)[off] = objcg;
525 mod_objcg_state(objcg, slab_pgdat(slab),
526 cache_vmstat_idx(s), obj_full_size(s));
528 obj_cgroup_uncharge(objcg, obj_full_size(s));
531 obj_cgroup_put(objcg);
534 static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
535 void **p, int objects)
537 struct kmem_cache *s;
538 struct obj_cgroup **objcgs;
539 struct obj_cgroup *objcg;
544 if (!memcg_kmem_enabled())
547 for (i = 0; i < objects; i++) {
551 slab = virt_to_slab(p[i]);
552 /* we could be given a kmalloc_large() object, skip those */
556 objcgs = slab_objcgs(slab);
561 s = slab->slab_cache;
565 off = obj_to_index(s, slab, p[i]);
571 obj_cgroup_uncharge(objcg, obj_full_size(s));
572 mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
574 obj_cgroup_put(objcg);
578 #else /* CONFIG_MEMCG_KMEM */
579 static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
584 static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
589 static inline int memcg_alloc_slab_cgroups(struct slab *slab,
590 struct kmem_cache *s, gfp_t gfp,
596 static inline void memcg_free_slab_cgroups(struct slab *slab)
600 static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
601 struct obj_cgroup **objcgp,
602 size_t objects, gfp_t flags)
607 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
608 struct obj_cgroup *objcg,
609 gfp_t flags, size_t size,
614 static inline void memcg_slab_free_hook(struct kmem_cache *s,
615 void **p, int objects)
618 #endif /* CONFIG_MEMCG_KMEM */
621 static inline struct kmem_cache *virt_to_cache(const void *obj)
625 slab = virt_to_slab(obj);
626 if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
629 return slab->slab_cache;
632 static __always_inline void account_slab(struct slab *slab, int order,
633 struct kmem_cache *s, gfp_t gfp)
635 if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT))
636 memcg_alloc_slab_cgroups(slab, s, gfp, true);
638 mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
642 static __always_inline void unaccount_slab(struct slab *slab, int order,
643 struct kmem_cache *s)
645 if (memcg_kmem_enabled())
646 memcg_free_slab_cgroups(slab);
648 mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
649 -(PAGE_SIZE << order));
652 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
654 struct kmem_cache *cachep;
656 if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
657 !kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
660 cachep = virt_to_cache(x);
661 if (WARN(cachep && cachep != s,
662 "%s: Wrong slab cache. %s but object is from %s\n",
663 __func__, s->name, cachep->name))
664 print_tracking(cachep, x);
667 #endif /* CONFIG_SLOB */
669 static inline size_t slab_ksize(const struct kmem_cache *s)
672 return s->object_size;
674 #else /* CONFIG_SLUB */
675 # ifdef CONFIG_SLUB_DEBUG
677 * Debugging requires use of the padding between object
678 * and whatever may come after it.
680 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
681 return s->object_size;
683 if (s->flags & SLAB_KASAN)
684 return s->object_size;
686 * If we have the need to store the freelist pointer
687 * back there or track user information then we can
688 * only use the space before that information.
690 if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
693 * Else we can use all the padding etc for the allocation
699 static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
700 struct obj_cgroup **objcgp,
701 size_t size, gfp_t flags)
703 flags &= gfp_allowed_mask;
707 if (should_failslab(s, flags))
710 if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
716 static inline void slab_post_alloc_hook(struct kmem_cache *s,
717 struct obj_cgroup *objcg, gfp_t flags,
718 size_t size, void **p, bool init)
722 flags &= gfp_allowed_mask;
725 * As memory initialization might be integrated into KASAN,
726 * kasan_slab_alloc and initialization memset must be
727 * kept together to avoid discrepancies in behavior.
729 * As p[i] might get tagged, memset and kmemleak hook come after KASAN.
731 for (i = 0; i < size; i++) {
732 p[i] = kasan_slab_alloc(s, p[i], flags, init);
733 if (p[i] && init && !kasan_has_integrated_init())
734 memset(p[i], 0, s->object_size);
735 kmemleak_alloc_recursive(p[i], s->object_size, 1,
739 memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
744 * The slab lists for all objects.
746 struct kmem_cache_node {
747 spinlock_t list_lock;
750 struct list_head slabs_partial; /* partial list first, better asm code */
751 struct list_head slabs_full;
752 struct list_head slabs_free;
753 unsigned long total_slabs; /* length of all slab lists */
754 unsigned long free_slabs; /* length of free slab list only */
755 unsigned long free_objects;
756 unsigned int free_limit;
757 unsigned int colour_next; /* Per-node cache coloring */
758 struct array_cache *shared; /* shared per node */
759 struct alien_cache **alien; /* on other nodes */
760 unsigned long next_reap; /* updated without locking */
761 int free_touched; /* updated without locking */
765 unsigned long nr_partial;
766 struct list_head partial;
767 #ifdef CONFIG_SLUB_DEBUG
768 atomic_long_t nr_slabs;
769 atomic_long_t total_objects;
770 struct list_head full;
776 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
778 return s->node[node];
782 * Iterator over all nodes. The body will be executed for each node that has
783 * a kmem_cache_node structure allocated (which is true for all online nodes)
785 #define for_each_kmem_cache_node(__s, __node, __n) \
786 for (__node = 0; __node < nr_node_ids; __node++) \
787 if ((__n = get_node(__s, __node)))
791 #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
792 void dump_unreclaimable_slab(void);
794 static inline void dump_unreclaimable_slab(void)
799 void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
801 #ifdef CONFIG_SLAB_FREELIST_RANDOM
802 int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
804 void cache_random_seq_destroy(struct kmem_cache *cachep);
806 static inline int cache_random_seq_create(struct kmem_cache *cachep,
807 unsigned int count, gfp_t gfp)
811 static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
812 #endif /* CONFIG_SLAB_FREELIST_RANDOM */
814 static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
816 if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
820 if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
821 return flags & __GFP_ZERO;
824 return flags & __GFP_ZERO;
827 static inline bool slab_want_init_on_free(struct kmem_cache *c)
829 if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
832 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)));
836 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
837 void debugfs_slab_release(struct kmem_cache *);
839 static inline void debugfs_slab_release(struct kmem_cache *s) { }
843 #define KS_ADDRS_COUNT 16
844 struct kmem_obj_info {
846 struct slab *kp_slab;
848 unsigned long kp_data_offset;
849 struct kmem_cache *kp_slab_cache;
851 void *kp_stack[KS_ADDRS_COUNT];
852 void *kp_free_stack[KS_ADDRS_COUNT];
854 void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
857 #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
858 void __check_heap_object(const void *ptr, unsigned long n,
859 const struct slab *slab, bool to_user);
862 void __check_heap_object(const void *ptr, unsigned long n,
863 const struct slab *slab, bool to_user)
868 #endif /* MM_SLAB_H */