X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=mm%2Fslub.c;h=2258ed82880a063eee70893f26664e635d7d04a0;hb=9df53b154ac712c87db1170057aa6df05eb7bdbd;hp=8c691fa1cf3c78a91fa301bcd0cd323df1b28a1e;hpb=6eca954e25e8a67e48334e668b9ac7da21fcbc6f;p=platform%2Fadaptation%2Frenesas_rcar%2Frenesas_kernel.git diff --git a/mm/slub.c b/mm/slub.c index 8c691fa..2258ed8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -16,6 +16,7 @@ #include #include #include +#include "slab.h" #include #include #include @@ -33,15 +34,17 @@ #include +#include "internal.h" + /* * Lock order: - * 1. slub_lock (Global Semaphore) + * 1. slab_mutex (Global Mutex) * 2. node->list_lock * 3. slab_lock(page) (Only on some arches and for debugging) * - * slub_lock + * slab_mutex * - * The role of the slub_lock is to protect the list of all the slabs + * The role of the slab_mutex is to protect the list of all the slabs * and to synchronize major metadata changes to slab cache structures. * * The slab_lock is only used for debugging and on arches that do not @@ -182,17 +185,6 @@ static int kmem_size = sizeof(struct kmem_cache); static struct notifier_block slab_notifier; #endif -static enum { - DOWN, /* No slab functionality available */ - PARTIAL, /* Kmem_cache_node works */ - UP, /* Everything works but does not show up in sysfs */ - SYSFS /* Sysfs up */ -} slab_state = DOWN; - -/* A list of all slab caches on the system */ -static DECLARE_RWSEM(slub_lock); -static LIST_HEAD(slab_caches); - /* * Tracking user of a slab. */ @@ -218,11 +210,7 @@ static void sysfs_slab_remove(struct kmem_cache *); static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) { return 0; } -static inline void sysfs_slab_remove(struct kmem_cache *s) -{ - kfree(s->name); - kfree(s); -} +static inline void sysfs_slab_remove(struct kmem_cache *s) { } #endif @@ -237,11 +225,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) * Core slab cache functions *******************************************************************/ -int slab_is_available(void) -{ - return slab_state >= UP; -} - static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) { return s->node[node]; @@ -311,7 +294,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) * and whatever may come after it. */ if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) - return s->objsize; + return s->object_size; #endif /* @@ -609,11 +592,11 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (p > addr + 16) print_section("Bytes b4 ", p - 16, 16); - print_section("Object ", p, min_t(unsigned long, s->objsize, + print_section("Object ", p, min_t(unsigned long, s->object_size, PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) - print_section("Redzone ", p + s->objsize, - s->inuse - s->objsize); + print_section("Redzone ", p + s->object_size, + s->inuse - s->object_size); if (s->offset) off = s->offset + sizeof(void *); @@ -637,7 +620,7 @@ static void object_err(struct kmem_cache *s, struct page *page, print_trailer(s, page, object); } -static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) +static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...) { va_list args; char buf[100]; @@ -655,12 +638,12 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) u8 *p = object; if (s->flags & __OBJECT_POISON) { - memset(p, POISON_FREE, s->objsize - 1); - p[s->objsize - 1] = POISON_END; + memset(p, POISON_FREE, s->object_size - 1); + p[s->object_size - 1] = POISON_END; } if (s->flags & SLAB_RED_ZONE) - memset(p + s->objsize, val, s->inuse - s->objsize); + memset(p + s->object_size, val, s->inuse - s->object_size); } static void restore_bytes(struct kmem_cache *s, char *message, u8 data, @@ -705,10 +688,10 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, * Poisoning uses 0x6b (POISON_FREE) and the last byte is * 0xa5 (POISON_END) * - * object + s->objsize + * object + s->object_size * Padding to reach word boundary. This is also used for Redzoning. * Padding is extended by another word if Redzoning is enabled and - * objsize == inuse. + * object_size == inuse. * * We fill with 0xbb (RED_INACTIVE) for inactive objects and with * 0xcc (RED_ACTIVE) for objects in use. @@ -727,7 +710,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, * object + s->size * Nothing is used beyond s->size. * - * If slabcaches are merged then the objsize and inuse boundaries are mostly + * If slabcaches are merged then the object_size and inuse boundaries are mostly * ignored. And therefore no slab options that rely on these boundaries * may be used with merged slabcaches. */ @@ -787,25 +770,25 @@ static int check_object(struct kmem_cache *s, struct page *page, void *object, u8 val) { u8 *p = object; - u8 *endobject = object + s->objsize; + u8 *endobject = object + s->object_size; if (s->flags & SLAB_RED_ZONE) { if (!check_bytes_and_report(s, page, object, "Redzone", - endobject, val, s->inuse - s->objsize)) + endobject, val, s->inuse - s->object_size)) return 0; } else { - if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { + if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { check_bytes_and_report(s, page, p, "Alignment padding", - endobject, POISON_INUSE, s->inuse - s->objsize); + endobject, POISON_INUSE, s->inuse - s->object_size); } } if (s->flags & SLAB_POISON) { if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && (!check_bytes_and_report(s, page, p, "Poison", p, - POISON_FREE, s->objsize - 1) || + POISON_FREE, s->object_size - 1) || !check_bytes_and_report(s, page, p, "Poison", - p + s->objsize - 1, POISON_END, 1))) + p + s->object_size - 1, POISON_END, 1))) return 0; /* * check_pad_bytes cleans up on its own. @@ -926,7 +909,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, page->freelist); if (!alloc) - print_section("Object ", (void *)object, s->objsize); + print_section("Object ", (void *)object, s->object_size); dump_stack(); } @@ -942,14 +925,14 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) lockdep_trace_alloc(flags); might_sleep_if(flags & __GFP_WAIT); - return should_failslab(s->objsize, flags, s->flags); + return should_failslab(s->object_size, flags, s->flags); } static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) { flags &= gfp_allowed_mask; kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); - kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); + kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); } static inline void slab_free_hook(struct kmem_cache *s, void *x) @@ -966,13 +949,13 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) unsigned long flags; local_irq_save(flags); - kmemcheck_slab_free(s, x, s->objsize); - debug_check_no_locks_freed(x, s->objsize); + kmemcheck_slab_free(s, x, s->object_size); + debug_check_no_locks_freed(x, s->object_size); local_irq_restore(flags); } #endif if (!(s->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(x, s->objsize); + debug_check_no_obj_freed(x, s->object_size); } /* @@ -1082,13 +1065,13 @@ bad: return 0; } -static noinline int free_debug_processing(struct kmem_cache *s, - struct page *page, void *object, unsigned long addr) +static noinline struct kmem_cache_node *free_debug_processing( + struct kmem_cache *s, struct page *page, void *object, + unsigned long addr, unsigned long *flags) { - unsigned long flags; - int rc = 0; + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); - local_irq_save(flags); + spin_lock_irqsave(&n->list_lock, *flags); slab_lock(page); if (!check_slab(s, page)) @@ -1126,15 +1109,19 @@ static noinline int free_debug_processing(struct kmem_cache *s, set_track(s, object, TRACK_FREE, addr); trace(s, page, object, 0); init_object(s, object, SLUB_RED_INACTIVE); - rc = 1; out: slab_unlock(page); - local_irq_restore(flags); - return rc; + /* + * Keep node_lock to preserve integrity + * until the object is actually freed + */ + return n; fail: + slab_unlock(page); + spin_unlock_irqrestore(&n->list_lock, *flags); slab_fix(s, "Object at 0x%p not freed", object); - goto out; + return NULL; } static int __init setup_slub_debug(char *str) @@ -1207,7 +1194,7 @@ out: __setup("slub_debug", setup_slub_debug); -static unsigned long kmem_cache_flags(unsigned long objsize, +static unsigned long kmem_cache_flags(unsigned long object_size, unsigned long flags, const char *name, void (*ctor)(void *)) { @@ -1227,8 +1214,9 @@ static inline void setup_object_debug(struct kmem_cache *s, static inline int alloc_debug_processing(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) { return 0; } -static inline int free_debug_processing(struct kmem_cache *s, - struct page *page, void *object, unsigned long addr) { return 0; } +static inline struct kmem_cache_node *free_debug_processing( + struct kmem_cache *s, struct page *page, void *object, + unsigned long addr, unsigned long *flags) { return NULL; } static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } @@ -1237,7 +1225,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page, static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) {} static inline void remove_full(struct kmem_cache *s, struct page *page) {} -static inline unsigned long kmem_cache_flags(unsigned long objsize, +static inline unsigned long kmem_cache_flags(unsigned long object_size, unsigned long flags, const char *name, void (*ctor)(void *)) { @@ -1314,13 +1302,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) stat(s, ORDER_FALLBACK); } - if (flags & __GFP_WAIT) - local_irq_disable(); - - if (!page) - return NULL; - - if (kmemcheck_enabled + if (kmemcheck_enabled && page && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { int pages = 1 << oo_order(oo); @@ -1336,6 +1318,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) kmemcheck_mark_unallocated_pages(page, pages); } + if (flags & __GFP_WAIT) + local_irq_disable(); + if (!page) + return NULL; + page->objects = oo_objects(oo); mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? @@ -1370,6 +1357,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) inc_slabs_node(s, page_to_nid(page), page->objects); page->slab = s; __SetPageSlab(page); + if (page->pfmemalloc) + SetPageSlabPfmemalloc(page); start = page_address(page); @@ -1413,6 +1402,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -pages); + __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); reset_page_mapcount(page); if (current->reclaim_state) @@ -1490,12 +1480,12 @@ static inline void remove_partial(struct kmem_cache_node *n, } /* - * Lock slab, remove from the partial list and put the object into the - * per cpu freelist. + * Remove slab from the partial list, freeze it and + * return the pointer to the freelist. * * Returns a list of objects or NULL if it fails. * - * Must hold list_lock. + * Must hold list_lock since we modify the partial list. */ static inline void *acquire_slab(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page, @@ -1510,26 +1500,27 @@ static inline void *acquire_slab(struct kmem_cache *s, * The old freelist is the list of objects for the * per cpu allocation list. */ - do { - freelist = page->freelist; - counters = page->counters; - new.counters = counters; - if (mode) { - new.inuse = page->objects; - new.freelist = NULL; - } else { - new.freelist = freelist; - } + freelist = page->freelist; + counters = page->counters; + new.counters = counters; + if (mode) { + new.inuse = page->objects; + new.freelist = NULL; + } else { + new.freelist = freelist; + } - VM_BUG_ON(new.frozen); - new.frozen = 1; + VM_BUG_ON(new.frozen); + new.frozen = 1; - } while (!__cmpxchg_double_slab(s, page, + if (!__cmpxchg_double_slab(s, page, freelist, counters, new.freelist, new.counters, - "lock and freeze")); + "acquire_slab")) + return NULL; remove_partial(n, page); + WARN_ON(!freelist); return freelist; } @@ -1563,7 +1554,6 @@ static void *get_partial_node(struct kmem_cache *s, if (!object) { c->page = page; - c->node = page_to_nid(page); stat(s, ALLOC_FROM_PARTIAL); object = t; available = page->objects - page->inuse; @@ -1617,7 +1607,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, do { cpuset_mems_cookie = get_mems_allowed(); - zonelist = node_zonelist(slab_node(current->mempolicy), flags); + zonelist = node_zonelist(slab_node(), flags); for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { struct kmem_cache_node *n; @@ -1731,14 +1721,12 @@ void init_kmem_cache_cpus(struct kmem_cache *s) /* * Remove the cpu slab */ -static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) +static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist) { enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; - struct page *page = c->page; struct kmem_cache_node *n = get_node(s, page_to_nid(page)); int lock = 0; enum slab_modes l = M_NONE, m = M_NONE; - void *freelist; void *nextfree; int tail = DEACTIVATE_TO_HEAD; struct page new; @@ -1749,11 +1737,6 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) tail = DEACTIVATE_TO_TAIL; } - c->tid = next_tid(c->tid); - c->page = NULL; - freelist = c->freelist; - c->freelist = NULL; - /* * Stage one: Free all available per cpu objects back * to the page freelist while it is still frozen. Leave the @@ -1879,21 +1862,31 @@ redo: } } -/* Unfreeze all the cpu partial slabs */ +/* + * Unfreeze all the cpu partial slabs. + * + * This function must be called with interrupt disabled. + */ static void unfreeze_partials(struct kmem_cache *s) { - struct kmem_cache_node *n = NULL; + struct kmem_cache_node *n = NULL, *n2 = NULL; struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); struct page *page, *discard_page = NULL; while ((page = c->partial)) { - enum slab_modes { M_PARTIAL, M_FREE }; - enum slab_modes l, m; struct page new; struct page old; c->partial = page->next; - l = M_FREE; + + n2 = get_node(s, page_to_nid(page)); + if (n != n2) { + if (n) + spin_unlock(&n->list_lock); + + n = n2; + spin_lock(&n->list_lock); + } do { @@ -1906,43 +1899,17 @@ static void unfreeze_partials(struct kmem_cache *s) new.frozen = 0; - if (!new.inuse && (!n || n->nr_partial > s->min_partial)) - m = M_FREE; - else { - struct kmem_cache_node *n2 = get_node(s, - page_to_nid(page)); - - m = M_PARTIAL; - if (n != n2) { - if (n) - spin_unlock(&n->list_lock); - - n = n2; - spin_lock(&n->list_lock); - } - } - - if (l != m) { - if (l == M_PARTIAL) { - remove_partial(n, page); - stat(s, FREE_REMOVE_PARTIAL); - } else { - add_partial(n, page, - DEACTIVATE_TO_TAIL); - stat(s, FREE_ADD_PARTIAL); - } - - l = m; - } - - } while (!cmpxchg_double_slab(s, page, + } while (!__cmpxchg_double_slab(s, page, old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")); - if (m == M_FREE) { + if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) { page->next = discard_page; discard_page = page; + } else { + add_partial(n, page, DEACTIVATE_TO_TAIL); + stat(s, FREE_ADD_PARTIAL); } } @@ -1991,6 +1958,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) local_irq_save(flags); unfreeze_partials(s); local_irq_restore(flags); + oldpage = NULL; pobjects = 0; pages = 0; stat(s, CPU_PARTIAL_DRAIN); @@ -2011,7 +1979,11 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { stat(s, CPUSLAB_FLUSH); - deactivate_slab(s, c); + deactivate_slab(s, c->page, c->freelist); + + c->tid = next_tid(c->tid); + c->page = NULL; + c->freelist = NULL; } /* @@ -2055,10 +2027,10 @@ static void flush_all(struct kmem_cache *s) * Check if the objects in a per cpu structure fit numa * locality expectations. */ -static inline int node_match(struct kmem_cache_cpu *c, int node) +static inline int node_match(struct page *page, int node) { #ifdef CONFIG_NUMA - if (node != NUMA_NO_NODE && c->node != node) + if (node != NUMA_NO_NODE && page_to_nid(page) != node) return 0; #endif return 1; @@ -2101,10 +2073,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", nid, gfpflags); printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " - "default order: %d, min order: %d\n", s->name, s->objsize, + "default order: %d, min order: %d\n", s->name, s->object_size, s->size, oo_order(s->oo), oo_order(s->min)); - if (oo_order(s->min) > get_order(s->objsize)) + if (oo_order(s->min) > get_order(s->object_size)) printk(KERN_WARNING " %s debugging increased min order, use " "slub_debug=O to disable.\n", s->name); @@ -2130,10 +2102,16 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_cpu **pc) { - void *object; - struct kmem_cache_cpu *c; - struct page *page = new_slab(s, flags, node); + void *freelist; + struct kmem_cache_cpu *c = *pc; + struct page *page; + + freelist = get_partial(s, flags, node, c); + + if (freelist) + return freelist; + page = new_slab(s, flags, node); if (page) { c = __this_cpu_ptr(s->cpu_slab); if (c->page) @@ -2143,17 +2121,24 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, * No other reference to the page yet so we can * muck around with it freely without cmpxchg */ - object = page->freelist; + freelist = page->freelist; page->freelist = NULL; stat(s, ALLOC_SLAB); - c->node = page_to_nid(page); c->page = page; *pc = c; } else - object = NULL; + freelist = NULL; - return object; + return freelist; +} + +static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags) +{ + if (unlikely(PageSlabPfmemalloc(page))) + return gfp_pfmemalloc_allowed(gfpflags); + + return true; } /* @@ -2163,6 +2148,8 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, * The page is still frozen if the return value is not NULL. * * If this function returns NULL then the page has been unfrozen. + * + * This function must be called with interrupt disabled. */ static inline void *get_freelist(struct kmem_cache *s, struct page *page) { @@ -2173,13 +2160,14 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) do { freelist = page->freelist; counters = page->counters; + new.counters = counters; VM_BUG_ON(!new.frozen); new.inuse = page->objects; new.frozen = freelist != NULL; - } while (!cmpxchg_double_slab(s, page, + } while (!__cmpxchg_double_slab(s, page, freelist, counters, NULL, new.counters, "get_freelist")); @@ -2206,7 +2194,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { - void **object; + void *freelist; + struct page *page; unsigned long flags; local_irq_save(flags); @@ -2219,25 +2208,41 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, c = this_cpu_ptr(s->cpu_slab); #endif - if (!c->page) + page = c->page; + if (!page) goto new_slab; redo: - if (unlikely(!node_match(c, node))) { + + if (unlikely(!node_match(page, node))) { stat(s, ALLOC_NODE_MISMATCH); - deactivate_slab(s, c); + deactivate_slab(s, page, c->freelist); + c->page = NULL; + c->freelist = NULL; + goto new_slab; + } + + /* + * By rights, we should be searching for a slab page that was + * PFMEMALLOC but right now, we are losing the pfmemalloc + * information when the page leaves the per-cpu allocator + */ + if (unlikely(!pfmemalloc_match(page, gfpflags))) { + deactivate_slab(s, page, c->freelist); + c->page = NULL; + c->freelist = NULL; goto new_slab; } /* must check again c->freelist in case of cpu migration or IRQ */ - object = c->freelist; - if (object) + freelist = c->freelist; + if (freelist) goto load_freelist; stat(s, ALLOC_SLOWPATH); - object = get_freelist(s, c->page); + freelist = get_freelist(s, page); - if (!object) { + if (!freelist) { c->page = NULL; stat(s, DEACTIVATE_BYPASS); goto new_slab; @@ -2246,50 +2251,50 @@ redo: stat(s, ALLOC_REFILL); load_freelist: - c->freelist = get_freepointer(s, object); + /* + * freelist is pointing to the list of objects to be used. + * page is pointing to the page from which the objects are obtained. + * That page must be frozen for per cpu allocations to work. + */ + VM_BUG_ON(!c->page->frozen); + c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); local_irq_restore(flags); - return object; + return freelist; new_slab: if (c->partial) { - c->page = c->partial; - c->partial = c->page->next; - c->node = page_to_nid(c->page); + page = c->page = c->partial; + c->partial = page->next; stat(s, CPU_PARTIAL_ALLOC); c->freelist = NULL; goto redo; } - /* Then do expensive stuff like retrieving pages from the partial lists */ - object = get_partial(s, gfpflags, node, c); + freelist = new_slab_objects(s, gfpflags, node, &c); - if (unlikely(!object)) { + if (unlikely(!freelist)) { + if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) + slab_out_of_memory(s, gfpflags, node); - object = new_slab_objects(s, gfpflags, node, &c); - - if (unlikely(!object)) { - if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) - slab_out_of_memory(s, gfpflags, node); - - local_irq_restore(flags); - return NULL; - } + local_irq_restore(flags); + return NULL; } - if (likely(!kmem_cache_debug(s))) + page = c->page; + if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) goto load_freelist; /* Only entered in the debug case */ - if (!alloc_debug_processing(s, c->page, object, addr)) + if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr)) goto new_slab; /* Slab failed checks. Next slab needed */ - c->freelist = get_freepointer(s, object); - deactivate_slab(s, c); - c->node = NUMA_NO_NODE; + deactivate_slab(s, page, get_freepointer(s, freelist)); + c->page = NULL; + c->freelist = NULL; local_irq_restore(flags); - return object; + return freelist; } /* @@ -2307,6 +2312,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, { void **object; struct kmem_cache_cpu *c; + struct page *page; unsigned long tid; if (slab_pre_alloc_hook(s, gfpflags)) @@ -2332,8 +2338,8 @@ redo: barrier(); object = c->freelist; - if (unlikely(!object || !node_match(c, node))) - + page = c->page; + if (unlikely(!object || !node_match(page, node))) object = __slab_alloc(s, gfpflags, node, addr, c); else { @@ -2364,7 +2370,7 @@ redo: } if (unlikely(gfpflags & __GFP_ZERO) && object) - memset(object, 0, s->objsize); + memset(object, 0, s->object_size); slab_post_alloc_hook(s, gfpflags, object); @@ -2375,7 +2381,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); - trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); + trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags); return ret; } @@ -2405,7 +2411,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); trace_kmem_cache_alloc_node(_RET_IP_, ret, - s->objsize, s->size, gfpflags, node); + s->object_size, s->size, gfpflags, node); return ret; } @@ -2448,7 +2454,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, stat(s, FREE_SLOWPATH); - if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) + if (kmem_cache_debug(s) && + !(n = free_debug_processing(s, page, x, addr, &flags))) return; do { @@ -2603,6 +2610,13 @@ void kmem_cache_free(struct kmem_cache *s, void *x) page = virt_to_head_page(x); + if (kmem_cache_debug(s) && page->slab != s) { + pr_err("kmem_cache_free: Wrong slab cache. %s but object" + " is from %s\n", page->slab->name, s->name); + WARN_ON_ONCE(1); + return; + } + slab_free(s, page, x, _RET_IP_); trace_kmem_cache_free(_RET_IP_, x); @@ -2900,7 +2914,7 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min) static int calculate_sizes(struct kmem_cache *s, int forced_order) { unsigned long flags = s->flags; - unsigned long size = s->objsize; + unsigned long size = s->object_size; unsigned long align = s->align; int order; @@ -2929,7 +2943,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * end of the object and the free pointer. If not then add an * additional word to have some bytes to store Redzone information. */ - if ((flags & SLAB_RED_ZONE) && size == s->objsize) + if ((flags & SLAB_RED_ZONE) && size == s->object_size) size += sizeof(void *); #endif @@ -2977,7 +2991,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * user specified and the dynamic determination of cache line size * on bootup. */ - align = calculate_alignment(flags, align, s->objsize); + align = calculate_alignment(flags, align, s->object_size); s->align = align; /* @@ -3017,17 +3031,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) } -static int kmem_cache_open(struct kmem_cache *s, - const char *name, size_t size, - size_t align, unsigned long flags, - void (*ctor)(void *)) +static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) { - memset(s, 0, kmem_size); - s->name = name; - s->ctor = ctor; - s->objsize = size; - s->align = align; - s->flags = kmem_cache_flags(size, flags, name, ctor); + s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); s->reserved = 0; if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) @@ -3040,7 +3046,7 @@ static int kmem_cache_open(struct kmem_cache *s, * Disable debugging flags that store metadata if the min slab * order increased. */ - if (get_order(s->size) > get_order(s->objsize)) { + if (get_order(s->size) > get_order(s->object_size)) { s->flags &= ~DEBUG_METADATA_FLAGS; s->offset = 0; if (!calculate_sizes(s, -1)) @@ -3089,7 +3095,6 @@ static int kmem_cache_open(struct kmem_cache *s, else s->cpu_partial = 30; - s->refcount = 1; #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 1000; #endif @@ -3097,16 +3102,16 @@ static int kmem_cache_open(struct kmem_cache *s, goto error; if (alloc_kmem_cache_cpus(s)) - return 1; + return 0; free_kmem_cache_nodes(s); error: if (flags & SLAB_PANIC) panic("Cannot create slab %s size=%lu realsize=%u " "order=%u offset=%u flags=%lx\n", - s->name, (unsigned long)size, s->size, oo_order(s->oo), + s->name, (unsigned long)s->size, s->size, oo_order(s->oo), s->offset, flags); - return 0; + return -EINVAL; } /* @@ -3114,7 +3119,7 @@ error: */ unsigned int kmem_cache_size(struct kmem_cache *s) { - return s->objsize; + return s->object_size; } EXPORT_SYMBOL(kmem_cache_size); @@ -3128,7 +3133,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, sizeof(long), GFP_ATOMIC); if (!map) return; - slab_err(s, page, "%s", text); + slab_err(s, page, text, s->name); slab_lock(page); get_map(s, page, map); @@ -3160,7 +3165,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) discard_slab(s, page); } else { list_slab_objects(s, page, - "Objects remaining on kmem_cache_close()"); + "Objects remaining in %s on kmem_cache_close()"); } } } @@ -3173,7 +3178,6 @@ static inline int kmem_cache_close(struct kmem_cache *s) int node; flush_all(s); - free_percpu(s->cpu_slab); /* Attempt to free all objects */ for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); @@ -3182,33 +3186,20 @@ static inline int kmem_cache_close(struct kmem_cache *s) if (n->nr_partial || slabs_node(s, node)) return 1; } + free_percpu(s->cpu_slab); free_kmem_cache_nodes(s); return 0; } -/* - * Close a cache and release the kmem_cache structure - * (must be used for caches created using kmem_cache_create) - */ -void kmem_cache_destroy(struct kmem_cache *s) -{ - down_write(&slub_lock); - s->refcount--; - if (!s->refcount) { - list_del(&s->list); - up_write(&slub_lock); - if (kmem_cache_close(s)) { - printk(KERN_ERR "SLUB %s: %s called for cache that " - "still has objects.\n", s->name, __func__); - dump_stack(); - } - if (s->flags & SLAB_DESTROY_BY_RCU) - rcu_barrier(); +int __kmem_cache_shutdown(struct kmem_cache *s) +{ + int rc = kmem_cache_close(s); + + if (!rc) sysfs_slab_remove(s); - } else - up_write(&slub_lock); + + return rc; } -EXPORT_SYMBOL(kmem_cache_destroy); /******************************************************************** * Kmalloc subsystem @@ -3217,8 +3208,6 @@ EXPORT_SYMBOL(kmem_cache_destroy); struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; EXPORT_SYMBOL(kmalloc_caches); -static struct kmem_cache *kmem_cache; - #ifdef CONFIG_ZONE_DMA static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; #endif @@ -3264,14 +3253,17 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name, { struct kmem_cache *s; - s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); + s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + + s->name = name; + s->size = s->object_size = size; + s->align = ARCH_KMALLOC_MINALIGN; /* * This function is called with IRQs disabled during early-boot on - * single CPU so there's no need to take slub_lock here. + * single CPU so there's no need to take slab_mutex here. */ - if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, - flags, NULL)) + if (kmem_cache_open(s, flags)) goto panic; list_add(&s->list, &slab_caches); @@ -3473,7 +3465,7 @@ void kfree(const void *x) if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); kmemleak_free(x); - put_page(page); + __free_pages(page, compound_order(page)); return; } slab_free(page->slab, page, object, _RET_IP_); @@ -3553,10 +3545,10 @@ static int slab_mem_going_offline_callback(void *arg) { struct kmem_cache *s; - down_read(&slub_lock); + mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) kmem_cache_shrink(s); - up_read(&slub_lock); + mutex_unlock(&slab_mutex); return 0; } @@ -3577,7 +3569,7 @@ static void slab_mem_offline_callback(void *arg) if (offline_node < 0) return; - down_read(&slub_lock); + mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) { n = get_node(s, offline_node); if (n) { @@ -3593,7 +3585,7 @@ static void slab_mem_offline_callback(void *arg) kmem_cache_free(kmem_cache_node, n); } } - up_read(&slub_lock); + mutex_unlock(&slab_mutex); } static int slab_mem_going_online_callback(void *arg) @@ -3616,7 +3608,7 @@ static int slab_mem_going_online_callback(void *arg) * allocate a kmem_cache_node structure in order to bring the node * online. */ - down_read(&slub_lock); + mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) { /* * XXX: kmem_cache_alloc_node will fallback to other nodes @@ -3632,7 +3624,7 @@ static int slab_mem_going_online_callback(void *arg) s->node[nid] = n; } out: - up_read(&slub_lock); + mutex_unlock(&slab_mutex); return ret; } @@ -3710,12 +3702,12 @@ void __init kmem_cache_init(void) slub_max_order = 0; kmem_size = offsetof(struct kmem_cache, node) + - nr_node_ids * sizeof(struct kmem_cache_node *); + nr_node_ids * sizeof(struct kmem_cache_node *); /* Allocate two kmem_caches from the page allocator */ kmalloc_size = ALIGN(kmem_size, cache_line_size()); order = get_order(2 * kmalloc_size); - kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); + kmem_cache = (void *)__get_free_pages(GFP_NOWAIT | __GFP_ZERO, order); /* * Must first have the slab cache available for the allocations of the @@ -3724,9 +3716,10 @@ void __init kmem_cache_init(void) */ kmem_cache_node = (void *)kmem_cache + kmalloc_size; - kmem_cache_open(kmem_cache_node, "kmem_cache_node", - sizeof(struct kmem_cache_node), - 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + kmem_cache_node->name = "kmem_cache_node"; + kmem_cache_node->size = kmem_cache_node->object_size = + sizeof(struct kmem_cache_node); + kmem_cache_open(kmem_cache_node, SLAB_HWCACHE_ALIGN | SLAB_PANIC); hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); @@ -3734,8 +3727,10 @@ void __init kmem_cache_init(void) slab_state = PARTIAL; temp_kmem_cache = kmem_cache; - kmem_cache_open(kmem_cache, "kmem_cache", kmem_size, - 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + kmem_cache->name = "kmem_cache"; + kmem_cache->size = kmem_cache->object_size = kmem_size; + kmem_cache_open(kmem_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC); + kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); memcpy(kmem_cache, temp_kmem_cache, kmem_size); @@ -3843,11 +3838,11 @@ void __init kmem_cache_init(void) if (s && s->size) { char *name = kasprintf(GFP_NOWAIT, - "dma-kmalloc-%d", s->objsize); + "dma-kmalloc-%d", s->object_size); BUG_ON(!name); kmalloc_dma_caches[i] = create_kmalloc_cache(name, - s->objsize, SLAB_CACHE_DMA); + s->object_size, SLAB_CACHE_DMA); } } #endif @@ -3924,16 +3919,11 @@ static struct kmem_cache *find_mergeable(size_t size, return NULL; } -struct kmem_cache *kmem_cache_create(const char *name, size_t size, +struct kmem_cache *__kmem_cache_alias(const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *)) { struct kmem_cache *s; - char *n; - - if (WARN_ON(!name)) - return NULL; - down_write(&slub_lock); s = find_mergeable(size, align, flags, name, ctor); if (s) { s->refcount++; @@ -3941,49 +3931,35 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, * Adjust the object sizes so that we clear * the complete object on kzalloc. */ - s->objsize = max(s->objsize, (int)size); + s->object_size = max(s->object_size, (int)size); s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); if (sysfs_slab_alias(s, name)) { s->refcount--; - goto err; + s = NULL; } - up_write(&slub_lock); - return s; } - n = kstrdup(name, GFP_KERNEL); - if (!n) - goto err; + return s; +} - s = kmalloc(kmem_size, GFP_KERNEL); - if (s) { - if (kmem_cache_open(s, n, - size, align, flags, ctor)) { - list_add(&s->list, &slab_caches); - up_write(&slub_lock); - if (sysfs_slab_add(s)) { - down_write(&slub_lock); - list_del(&s->list); - kfree(n); - kfree(s); - goto err; - } - return s; - } - kfree(s); - } - kfree(n); -err: - up_write(&slub_lock); +int __kmem_cache_create(struct kmem_cache *s, unsigned long flags) +{ + int err; - if (flags & SLAB_PANIC) - panic("Cannot create slabcache %s\n", name); - else - s = NULL; - return s; + err = kmem_cache_open(s, flags); + if (err) + return err; + + mutex_unlock(&slab_mutex); + err = sysfs_slab_add(s); + mutex_lock(&slab_mutex); + + if (err) + kmem_cache_close(s); + + return err; } -EXPORT_SYMBOL(kmem_cache_create); #ifdef CONFIG_SMP /* @@ -4002,13 +3978,13 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - down_read(&slub_lock); + mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) { local_irq_save(flags); __flush_cpu_slab(s, cpu); local_irq_restore(flags); } - up_read(&slub_lock); + mutex_unlock(&slab_mutex); break; default: break; @@ -4500,30 +4476,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s, for_each_possible_cpu(cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); - int node = ACCESS_ONCE(c->node); + int node; struct page *page; - if (node < 0) - continue; page = ACCESS_ONCE(c->page); - if (page) { - if (flags & SO_TOTAL) - x = page->objects; - else if (flags & SO_OBJECTS) - x = page->inuse; - else - x = 1; + if (!page) + continue; - total += x; - nodes[node] += x; - } - page = c->partial; + node = page_to_nid(page); + if (flags & SO_TOTAL) + x = page->objects; + else if (flags & SO_OBJECTS) + x = page->inuse; + else + x = 1; + + total += x; + nodes[node] += x; + page = ACCESS_ONCE(c->partial); if (page) { x = page->pobjects; total += x; nodes[node] += x; } + per_cpu[node]++; } } @@ -4623,7 +4600,7 @@ SLAB_ATTR_RO(align); static ssize_t object_size_show(struct kmem_cache *s, char *buf) { - return sprintf(buf, "%d\n", s->objsize); + return sprintf(buf, "%d\n", s->object_size); } SLAB_ATTR_RO(object_size); @@ -5211,14 +5188,6 @@ static ssize_t slab_attr_store(struct kobject *kobj, return err; } -static void kmem_cache_release(struct kobject *kobj) -{ - struct kmem_cache *s = to_slab(kobj); - - kfree(s->name); - kfree(s); -} - static const struct sysfs_ops slab_sysfs_ops = { .show = slab_attr_show, .store = slab_attr_store, @@ -5226,7 +5195,6 @@ static const struct sysfs_ops slab_sysfs_ops = { static struct kobj_type slab_ktype = { .sysfs_ops = &slab_sysfs_ops, - .release = kmem_cache_release }; static int uevent_filter(struct kset *kset, struct kobject *kobj) @@ -5286,7 +5254,7 @@ static int sysfs_slab_add(struct kmem_cache *s) const char *name; int unmergeable; - if (slab_state < SYSFS) + if (slab_state < FULL) /* Defer until later */ return 0; @@ -5331,7 +5299,7 @@ static int sysfs_slab_add(struct kmem_cache *s) static void sysfs_slab_remove(struct kmem_cache *s) { - if (slab_state < SYSFS) + if (slab_state < FULL) /* * Sysfs has not been setup yet so no need to remove the * cache from sysfs. @@ -5359,7 +5327,7 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name) { struct saved_alias *al; - if (slab_state == SYSFS) { + if (slab_state == FULL) { /* * If we have a leftover link then remove it. */ @@ -5383,16 +5351,16 @@ static int __init slab_sysfs_init(void) struct kmem_cache *s; int err; - down_write(&slub_lock); + mutex_lock(&slab_mutex); slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); if (!slab_kset) { - up_write(&slub_lock); + mutex_unlock(&slab_mutex); printk(KERN_ERR "Cannot register slab subsystem.\n"); return -ENOSYS; } - slab_state = SYSFS; + slab_state = FULL; list_for_each_entry(s, &slab_caches, list) { err = sysfs_slab_add(s); @@ -5408,11 +5376,11 @@ static int __init slab_sysfs_init(void) err = sysfs_slab_alias(al->s, al->name); if (err) printk(KERN_ERR "SLUB: Unable to add boot slab alias" - " %s to sysfs\n", s->name); + " %s to sysfs\n", al->name); kfree(al); } - up_write(&slub_lock); + mutex_unlock(&slab_mutex); resiliency_test(); return 0; } @@ -5427,7 +5395,7 @@ __initcall(slab_sysfs_init); static void print_slabinfo_header(struct seq_file *m) { seq_puts(m, "slabinfo - version: 2.1\n"); - seq_puts(m, "# name " + seq_puts(m, "# name " " "); seq_puts(m, " : tunables "); seq_puts(m, " : slabdata "); @@ -5438,7 +5406,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) { loff_t n = *pos; - down_read(&slub_lock); + mutex_lock(&slab_mutex); if (!n) print_slabinfo_header(m); @@ -5452,7 +5420,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) static void s_stop(struct seq_file *m, void *p) { - up_read(&slub_lock); + mutex_unlock(&slab_mutex); } static int s_show(struct seq_file *m, void *p)