X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=mm%2Fslab.c;h=11d9af5f9d2e5f19e682f2fe45c98ed7674702d5;hb=cce89f4f6911286500cf7be0363f46c9b0a12ce0;hp=e901a36e2520c2b7aa5123ddcd0553415b764397;hpb=a8822e2d570bc265c9fa0347710fbcc7bef8b327;p=platform%2Fadaptation%2Frenesas_rcar%2Frenesas_kernel.git diff --git a/mm/slab.c b/mm/slab.c index e901a36..11d9af5 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -68,7 +68,7 @@ * Further notes from the original documentation: * * 11 April '97. Started multi-threading - markhe - * The global cache-chain is protected by the mutex 'cache_chain_mutex'. + * The global cache-chain is protected by the mutex 'slab_mutex'. * The sem is only needed when accessing/extending the cache-chain, which * can never happen inside an interrupt (kmem_cache_create(), * kmem_cache_shrink() and kmem_cache_reap()). @@ -87,6 +87,7 @@ */ #include +#include "slab.h" #include #include #include @@ -117,12 +118,16 @@ #include #include +#include + #include #include #include #include +#include "internal.h" + /* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. * 0 for faster, smaller code (especially in the critical paths). @@ -151,6 +156,12 @@ #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN #endif +/* + * true if a page was allocated from pfmemalloc reserves for network-based + * swap + */ +static bool pfmemalloc_active __read_mostly; + /* Legal flag mask for kmem_cache_create(). */ #if DEBUG # define CREATE_MASK (SLAB_RED_ZONE | \ @@ -256,9 +267,30 @@ struct array_cache { * Must have this definition in here for the proper * alignment of array_cache. Also simplifies accessing * the entries. + * + * Entries should not be directly dereferenced as + * entries belonging to slabs marked pfmemalloc will + * have the lower bits set SLAB_OBJ_PFMEMALLOC */ }; +#define SLAB_OBJ_PFMEMALLOC 1 +static inline bool is_obj_pfmemalloc(void *objp) +{ + return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC; +} + +static inline void set_obj_pfmemalloc(void **objp) +{ + *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC); + return; +} + +static inline void clear_obj_pfmemalloc(void **objp) +{ + *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC); +} + /* * bootstrap: The caches do not work without cpuarrays anymore, but the * cpuarrays are allocated from the generic caches... @@ -424,8 +456,8 @@ static void kmem_list3_init(struct kmem_list3 *parent) * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: * redzone word. * cachep->obj_offset: The real object. - * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] - * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address + * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] + * cachep->size - 1* BYTES_PER_WORD: last caller address * [BYTES_PER_WORD long] */ static int obj_offset(struct kmem_cache *cachep) @@ -433,11 +465,6 @@ static int obj_offset(struct kmem_cache *cachep) return cachep->obj_offset; } -static int obj_size(struct kmem_cache *cachep) -{ - return cachep->obj_size; -} - static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); @@ -449,23 +476,22 @@ static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); if (cachep->flags & SLAB_STORE_USER) - return (unsigned long long *)(objp + cachep->buffer_size - + return (unsigned long long *)(objp + cachep->size - sizeof(unsigned long long) - REDZONE_ALIGN); - return (unsigned long long *) (objp + cachep->buffer_size - + return (unsigned long long *) (objp + cachep->size - sizeof(unsigned long long)); } static void **dbg_userword(struct kmem_cache *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_STORE_USER)); - return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); + return (void **)(objp + cachep->size - BYTES_PER_WORD); } #else #define obj_offset(x) 0 -#define obj_size(cachep) (cachep->buffer_size) #define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) #define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) #define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) @@ -475,7 +501,7 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) #ifdef CONFIG_TRACING size_t slab_buffer_size(struct kmem_cache *cachep) { - return cachep->buffer_size; + return cachep->size; } EXPORT_SYMBOL(slab_buffer_size); #endif @@ -489,56 +515,30 @@ EXPORT_SYMBOL(slab_buffer_size); static int slab_max_order = SLAB_MAX_ORDER_LO; static bool slab_max_order_set __initdata; -/* - * Functions for storing/retrieving the cachep and or slab from the page - * allocator. These are used to find the slab an obj belongs to. With kfree(), - * these are used to find the cache which an obj belongs to. - */ -static inline void page_set_cache(struct page *page, struct kmem_cache *cache) -{ - page->lru.next = (struct list_head *)cache; -} - -static inline struct kmem_cache *page_get_cache(struct page *page) -{ - page = compound_head(page); - BUG_ON(!PageSlab(page)); - return (struct kmem_cache *)page->lru.next; -} - -static inline void page_set_slab(struct page *page, struct slab *slab) -{ - page->lru.prev = (struct list_head *)slab; -} - -static inline struct slab *page_get_slab(struct page *page) -{ - BUG_ON(!PageSlab(page)); - return (struct slab *)page->lru.prev; -} - static inline struct kmem_cache *virt_to_cache(const void *obj) { struct page *page = virt_to_head_page(obj); - return page_get_cache(page); + return page->slab_cache; } static inline struct slab *virt_to_slab(const void *obj) { struct page *page = virt_to_head_page(obj); - return page_get_slab(page); + + VM_BUG_ON(!PageSlab(page)); + return page->slab_page; } static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, unsigned int idx) { - return slab->s_mem + cache->buffer_size * idx; + return slab->s_mem + cache->size * idx; } /* - * We want to avoid an expensive divide : (offset / cache->buffer_size) - * Using the fact that buffer_size is a constant for a particular cache, - * we can replace (offset / cache->buffer_size) by + * We want to avoid an expensive divide : (offset / cache->size) + * Using the fact that size is a constant for a particular cache, + * we can replace (offset / cache->size) by * reciprocal_divide(offset, cache->reciprocal_buffer_size) */ static inline unsigned int obj_to_index(const struct kmem_cache *cache, @@ -578,39 +578,18 @@ static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; /* internal cache of cache description objs */ -static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES]; -static struct kmem_cache cache_cache = { - .nodelists = cache_cache_nodelists, +static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES]; +static struct kmem_cache kmem_cache_boot = { + .nodelists = kmem_cache_nodelists, .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, - .buffer_size = sizeof(struct kmem_cache), + .size = sizeof(struct kmem_cache), .name = "kmem_cache", }; #define BAD_ALIEN_MAGIC 0x01020304ul -/* - * chicken and egg problem: delay the per-cpu array allocation - * until the general caches are up. - */ -static enum { - NONE, - PARTIAL_AC, - PARTIAL_L3, - EARLY, - LATE, - FULL -} g_cpucache_up; - -/* - * used by boot code to determine if it can use slab based allocator - */ -int slab_is_available(void) -{ - return g_cpucache_up >= EARLY; -} - #ifdef CONFIG_LOCKDEP /* @@ -676,7 +655,7 @@ static void init_node_lock_keys(int q) { struct cache_sizes *s = malloc_sizes; - if (g_cpucache_up < LATE) + if (slab_state < UP) return; for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { @@ -716,12 +695,6 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) } #endif -/* - * Guard access to the cache-chain. - */ -static DEFINE_MUTEX(cache_chain_mutex); -static struct list_head cache_chain; - static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) @@ -951,6 +924,124 @@ static struct array_cache *alloc_arraycache(int node, int entries, return nc; } +static inline bool is_slab_pfmemalloc(struct slab *slabp) +{ + struct page *page = virt_to_page(slabp->s_mem); + + return PageSlabPfmemalloc(page); +} + +/* Clears pfmemalloc_active if no slabs have pfmalloc set */ +static void recheck_pfmemalloc_active(struct kmem_cache *cachep, + struct array_cache *ac) +{ + struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()]; + struct slab *slabp; + unsigned long flags; + + if (!pfmemalloc_active) + return; + + spin_lock_irqsave(&l3->list_lock, flags); + list_for_each_entry(slabp, &l3->slabs_full, list) + if (is_slab_pfmemalloc(slabp)) + goto out; + + list_for_each_entry(slabp, &l3->slabs_partial, list) + if (is_slab_pfmemalloc(slabp)) + goto out; + + list_for_each_entry(slabp, &l3->slabs_free, list) + if (is_slab_pfmemalloc(slabp)) + goto out; + + pfmemalloc_active = false; +out: + spin_unlock_irqrestore(&l3->list_lock, flags); +} + +static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, + gfp_t flags, bool force_refill) +{ + int i; + void *objp = ac->entry[--ac->avail]; + + /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ + if (unlikely(is_obj_pfmemalloc(objp))) { + struct kmem_list3 *l3; + + if (gfp_pfmemalloc_allowed(flags)) { + clear_obj_pfmemalloc(&objp); + return objp; + } + + /* The caller cannot use PFMEMALLOC objects, find another one */ + for (i = 1; i < ac->avail; i++) { + /* If a !PFMEMALLOC object is found, swap them */ + if (!is_obj_pfmemalloc(ac->entry[i])) { + objp = ac->entry[i]; + ac->entry[i] = ac->entry[ac->avail]; + ac->entry[ac->avail] = objp; + return objp; + } + } + + /* + * If there are empty slabs on the slabs_free list and we are + * being forced to refill the cache, mark this one !pfmemalloc. + */ + l3 = cachep->nodelists[numa_mem_id()]; + if (!list_empty(&l3->slabs_free) && force_refill) { + struct slab *slabp = virt_to_slab(objp); + ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem)); + clear_obj_pfmemalloc(&objp); + recheck_pfmemalloc_active(cachep, ac); + return objp; + } + + /* No !PFMEMALLOC objects available */ + ac->avail++; + objp = NULL; + } + + return objp; +} + +static inline void *ac_get_obj(struct kmem_cache *cachep, + struct array_cache *ac, gfp_t flags, bool force_refill) +{ + void *objp; + + if (unlikely(sk_memalloc_socks())) + objp = __ac_get_obj(cachep, ac, flags, force_refill); + else + objp = ac->entry[--ac->avail]; + + return objp; +} + +static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, + void *objp) +{ + if (unlikely(pfmemalloc_active)) { + /* Some pfmemalloc slabs exist, check if this is one */ + struct page *page = virt_to_page(objp); + if (PageSlabPfmemalloc(page)) + set_obj_pfmemalloc(&objp); + } + + return objp; +} + +static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, + void *objp) +{ + if (unlikely(sk_memalloc_socks())) + objp = __ac_put_obj(cachep, ac, objp); + + ac->entry[ac->avail++] = objp; +} + /* * Transfer objects in one arraycache to another. * Locking must be handled by the caller. @@ -1127,7 +1218,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) STATS_INC_ACOVERFLOW(cachep); __drain_alien_cache(cachep, alien, nodeid); } - alien->entry[alien->avail++] = objp; + ac_put_obj(cachep, alien, objp); spin_unlock(&alien->lock); } else { spin_lock(&(cachep->nodelists[nodeid])->list_lock); @@ -1145,7 +1236,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) * When hotplugging memory or a cpu, existing nodelists are not replaced if * already in use. * - * Must hold cache_chain_mutex. + * Must hold slab_mutex. */ static int init_cache_nodelists_node(int node) { @@ -1153,7 +1244,7 @@ static int init_cache_nodelists_node(int node) struct kmem_list3 *l3; const int memsize = sizeof(struct kmem_list3); - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { /* * Set up the size64 kmemlist for cpu before we can * begin anything. Make sure some other cpu on this @@ -1169,7 +1260,7 @@ static int init_cache_nodelists_node(int node) /* * The l3s don't come and go as CPUs come and - * go. cache_chain_mutex is sufficient + * go. slab_mutex is sufficient * protection here. */ cachep->nodelists[node] = l3; @@ -1191,7 +1282,7 @@ static void __cpuinit cpuup_canceled(long cpu) int node = cpu_to_mem(cpu); const struct cpumask *mask = cpumask_of_node(node); - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { struct array_cache *nc; struct array_cache *shared; struct array_cache **alien; @@ -1241,7 +1332,7 @@ free_array_cache: * the respective cache's slabs, now we can go ahead and * shrink each nodelist to its limit. */ - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { l3 = cachep->nodelists[node]; if (!l3) continue; @@ -1270,7 +1361,7 @@ static int __cpuinit cpuup_prepare(long cpu) * Now we can go ahead with allocating the shared arrays and * array caches */ - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { struct array_cache *nc; struct array_cache *shared = NULL; struct array_cache **alien = NULL; @@ -1338,9 +1429,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); err = cpuup_prepare(cpu); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -1350,7 +1441,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: /* - * Shutdown cache reaper. Note that the cache_chain_mutex is + * Shutdown cache reaper. Note that the slab_mutex is * held so that if cache_reap() is invoked it cannot do * anything expensive but will only modify reap_work * and reschedule the timer. @@ -1377,9 +1468,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, #endif case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); cpuup_canceled(cpu); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); break; } return notifier_from_errno(err); @@ -1395,14 +1486,14 @@ static struct notifier_block __cpuinitdata cpucache_notifier = { * Returns -EBUSY if all objects cannot be drained so that the node is not * removed. * - * Must hold cache_chain_mutex. + * Must hold slab_mutex. */ static int __meminit drain_cache_nodelists_node(int node) { struct kmem_cache *cachep; int ret = 0; - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { struct kmem_list3 *l3; l3 = cachep->nodelists[node]; @@ -1433,14 +1524,14 @@ static int __meminit slab_memory_callback(struct notifier_block *self, switch (action) { case MEM_GOING_ONLINE: - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); ret = init_cache_nodelists_node(nid); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); break; case MEM_GOING_OFFLINE: - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); ret = drain_cache_nodelists_node(nid); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); break; case MEM_ONLINE: case MEM_OFFLINE: @@ -1503,15 +1594,17 @@ void __init kmem_cache_init(void) int order; int node; + kmem_cache = &kmem_cache_boot; + if (num_possible_nodes() == 1) use_alien_caches = 0; for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); if (i < MAX_NUMNODES) - cache_cache.nodelists[i] = NULL; + kmem_cache->nodelists[i] = NULL; } - set_up_list3s(&cache_cache, CACHE_CACHE); + set_up_list3s(kmem_cache, CACHE_CACHE); /* * Fragmentation resistance on low memory - only use bigger @@ -1523,9 +1616,9 @@ void __init kmem_cache_init(void) /* Bootstrap is tricky, because several objects are allocated * from caches that do not exist yet: - * 1) initialize the cache_cache cache: it contains the struct - * kmem_cache structures of all caches, except cache_cache itself: - * cache_cache is statically allocated. + * 1) initialize the kmem_cache cache: it contains the struct + * kmem_cache structures of all caches, except kmem_cache itself: + * kmem_cache is statically allocated. * Initially an __init data area is used for the head array and the * kmem_list3 structures, it's replaced with a kmalloc allocated * array at the end of the bootstrap. @@ -1534,45 +1627,43 @@ void __init kmem_cache_init(void) * An __init data area is used for the head array. * 3) Create the remaining kmalloc caches, with minimally sized * head arrays. - * 4) Replace the __init data head arrays for cache_cache and the first + * 4) Replace the __init data head arrays for kmem_cache and the first * kmalloc cache with kmalloc allocated arrays. - * 5) Replace the __init data for kmem_list3 for cache_cache and + * 5) Replace the __init data for kmem_list3 for kmem_cache and * the other cache's with kmalloc allocated memory. * 6) Resize the head arrays of the kmalloc caches to their final sizes. */ node = numa_mem_id(); - /* 1) create the cache_cache */ - INIT_LIST_HEAD(&cache_chain); - list_add(&cache_cache.next, &cache_chain); - cache_cache.colour_off = cache_line_size(); - cache_cache.array[smp_processor_id()] = &initarray_cache.cache; - cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; + /* 1) create the kmem_cache */ + INIT_LIST_HEAD(&slab_caches); + list_add(&kmem_cache->list, &slab_caches); + kmem_cache->colour_off = cache_line_size(); + kmem_cache->array[smp_processor_id()] = &initarray_cache.cache; + kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; /* * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids */ - cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + + kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + nr_node_ids * sizeof(struct kmem_list3 *); -#if DEBUG - cache_cache.obj_size = cache_cache.buffer_size; -#endif - cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, + kmem_cache->object_size = kmem_cache->size; + kmem_cache->size = ALIGN(kmem_cache->object_size, cache_line_size()); - cache_cache.reciprocal_buffer_size = - reciprocal_value(cache_cache.buffer_size); + kmem_cache->reciprocal_buffer_size = + reciprocal_value(kmem_cache->size); for (order = 0; order < MAX_ORDER; order++) { - cache_estimate(order, cache_cache.buffer_size, - cache_line_size(), 0, &left_over, &cache_cache.num); - if (cache_cache.num) + cache_estimate(order, kmem_cache->size, + cache_line_size(), 0, &left_over, &kmem_cache->num); + if (kmem_cache->num) break; } - BUG_ON(!cache_cache.num); - cache_cache.gfporder = order; - cache_cache.colour = left_over / cache_cache.colour_off; - cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + + BUG_ON(!kmem_cache->num); + kmem_cache->gfporder = order; + kmem_cache->colour = left_over / kmem_cache->colour_off; + kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) + sizeof(struct slab), cache_line_size()); /* 2+3) create the kmalloc caches */ @@ -1585,19 +1676,22 @@ void __init kmem_cache_init(void) * bug. */ - sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, - sizes[INDEX_AC].cs_size, - ARCH_KMALLOC_MINALIGN, - ARCH_KMALLOC_FLAGS|SLAB_PANIC, - NULL); + sizes[INDEX_AC].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + sizes[INDEX_AC].cs_cachep->name = names[INDEX_AC].name; + sizes[INDEX_AC].cs_cachep->size = sizes[INDEX_AC].cs_size; + sizes[INDEX_AC].cs_cachep->object_size = sizes[INDEX_AC].cs_size; + sizes[INDEX_AC].cs_cachep->align = ARCH_KMALLOC_MINALIGN; + __kmem_cache_create(sizes[INDEX_AC].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC); + list_add(&sizes[INDEX_AC].cs_cachep->list, &slab_caches); if (INDEX_AC != INDEX_L3) { - sizes[INDEX_L3].cs_cachep = - kmem_cache_create(names[INDEX_L3].name, - sizes[INDEX_L3].cs_size, - ARCH_KMALLOC_MINALIGN, - ARCH_KMALLOC_FLAGS|SLAB_PANIC, - NULL); + sizes[INDEX_L3].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + sizes[INDEX_L3].cs_cachep->name = names[INDEX_L3].name; + sizes[INDEX_L3].cs_cachep->size = sizes[INDEX_L3].cs_size; + sizes[INDEX_L3].cs_cachep->object_size = sizes[INDEX_L3].cs_size; + sizes[INDEX_L3].cs_cachep->align = ARCH_KMALLOC_MINALIGN; + __kmem_cache_create(sizes[INDEX_L3].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC); + list_add(&sizes[INDEX_L3].cs_cachep->list, &slab_caches); } slab_early_init = 0; @@ -1611,20 +1705,23 @@ void __init kmem_cache_init(void) * allow tighter packing of the smaller caches. */ if (!sizes->cs_cachep) { - sizes->cs_cachep = kmem_cache_create(names->name, - sizes->cs_size, - ARCH_KMALLOC_MINALIGN, - ARCH_KMALLOC_FLAGS|SLAB_PANIC, - NULL); + sizes->cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + sizes->cs_cachep->name = names->name; + sizes->cs_cachep->size = sizes->cs_size; + sizes->cs_cachep->object_size = sizes->cs_size; + sizes->cs_cachep->align = ARCH_KMALLOC_MINALIGN; + __kmem_cache_create(sizes->cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC); + list_add(&sizes->cs_cachep->list, &slab_caches); } #ifdef CONFIG_ZONE_DMA - sizes->cs_dmacachep = kmem_cache_create( - names->name_dma, - sizes->cs_size, - ARCH_KMALLOC_MINALIGN, - ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| - SLAB_PANIC, - NULL); + sizes->cs_dmacachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + sizes->cs_dmacachep->name = names->name_dma; + sizes->cs_dmacachep->size = sizes->cs_size; + sizes->cs_dmacachep->object_size = sizes->cs_size; + sizes->cs_dmacachep->align = ARCH_KMALLOC_MINALIGN; + __kmem_cache_create(sizes->cs_dmacachep, + ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC); + list_add(&sizes->cs_dmacachep->list, &slab_caches); #endif sizes++; names++; @@ -1635,15 +1732,15 @@ void __init kmem_cache_init(void) ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); - memcpy(ptr, cpu_cache_get(&cache_cache), + BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache); + memcpy(ptr, cpu_cache_get(kmem_cache), sizeof(struct arraycache_init)); /* * Do not assume that spinlocks can be initialized via memcpy: */ spin_lock_init(&ptr->lock); - cache_cache.array[smp_processor_id()] = ptr; + kmem_cache->array[smp_processor_id()] = ptr; ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); @@ -1664,7 +1761,7 @@ void __init kmem_cache_init(void) int nid; for_each_online_node(nid) { - init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid); + init_list(kmem_cache, &initkmem_list3[CACHE_CACHE + nid], nid); init_list(malloc_sizes[INDEX_AC].cs_cachep, &initkmem_list3[SIZE_AC + nid], nid); @@ -1676,27 +1773,27 @@ void __init kmem_cache_init(void) } } - g_cpucache_up = EARLY; + slab_state = UP; } void __init kmem_cache_init_late(void) { struct kmem_cache *cachep; - g_cpucache_up = LATE; + slab_state = UP; /* Annotate slab for lockdep -- annotate the malloc caches */ init_lock_keys(); /* 6) resize the head arrays to their final sizes */ - mutex_lock(&cache_chain_mutex); - list_for_each_entry(cachep, &cache_chain, next) + mutex_lock(&slab_mutex); + list_for_each_entry(cachep, &slab_caches, list) if (enable_cpucache(cachep, GFP_NOWAIT)) BUG(); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); /* Done! */ - g_cpucache_up = FULL; + slab_state = FULL; /* * Register a cpu startup notifier callback that initializes @@ -1727,6 +1824,9 @@ static int __init cpucache_init(void) */ for_each_online_cpu(cpu) start_cpu_timer(cpu); + + /* Done! */ + slab_state = FULL; return 0; } __initcall(cpucache_init); @@ -1743,7 +1843,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n", nodeid, gfpflags); printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", - cachep->name, cachep->buffer_size, cachep->gfporder); + cachep->name, cachep->size, cachep->gfporder); for_each_online_node(node) { unsigned long active_objs = 0, num_objs = 0, free_objects = 0; @@ -1798,7 +1898,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) flags |= __GFP_COMP; #endif - flags |= cachep->gfpflags; + flags |= cachep->allocflags; if (cachep->flags & SLAB_RECLAIM_ACCOUNT) flags |= __GFP_RECLAIMABLE; @@ -1809,6 +1909,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) return NULL; } + /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ + if (unlikely(page->pfmemalloc)) + pfmemalloc_active = true; + nr_pages = (1 << cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) add_zone_page_state(page_zone(page), @@ -1816,9 +1920,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) else add_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_pages); - for (i = 0; i < nr_pages; i++) + for (i = 0; i < nr_pages; i++) { __SetPageSlab(page + i); + if (page->pfmemalloc) + SetPageSlabPfmemalloc(page + i); + } + if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); @@ -1850,6 +1958,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) NR_SLAB_UNRECLAIMABLE, nr_freed); while (i--) { BUG_ON(!PageSlab(page)); + __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); page++; } @@ -1874,7 +1983,7 @@ static void kmem_rcu_free(struct rcu_head *head) static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, unsigned long caller) { - int size = obj_size(cachep); + int size = cachep->object_size; addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; @@ -1906,7 +2015,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) { - int size = obj_size(cachep); + int size = cachep->object_size; addr = &((char *)addr)[obj_offset(cachep)]; memset(addr, val, size); @@ -1966,7 +2075,7 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) printk("\n"); } realobj = (char *)objp + obj_offset(cachep); - size = obj_size(cachep); + size = cachep->object_size; for (i = 0; i < size && lines; i += 16, lines--) { int limit; limit = 16; @@ -1983,7 +2092,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) int lines = 0; realobj = (char *)objp + obj_offset(cachep); - size = obj_size(cachep); + size = cachep->object_size; for (i = 0; i < size; i++) { char exp = POISON_FREE; @@ -2047,10 +2156,10 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC - if (cachep->buffer_size % PAGE_SIZE == 0 && + if (cachep->size % PAGE_SIZE == 0 && OFF_SLAB(cachep)) kernel_map_pages(virt_to_page(objp), - cachep->buffer_size / PAGE_SIZE, 1); + cachep->size / PAGE_SIZE, 1); else check_poison_obj(cachep, objp); #else @@ -2101,27 +2210,6 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) } } -static void __kmem_cache_destroy(struct kmem_cache *cachep) -{ - int i; - struct kmem_list3 *l3; - - for_each_online_cpu(i) - kfree(cachep->array[i]); - - /* NUMA: free the list3 structures */ - for_each_online_node(i) { - l3 = cachep->nodelists[i]; - if (l3) { - kfree(l3->shared); - free_alien_cache(l3->alien); - kfree(l3); - } - } - kmem_cache_free(&cache_cache, cachep); -} - - /** * calculate_slab_order - calculate size (page order) of slabs * @cachep: pointer to the cache that is being created @@ -2194,10 +2282,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) { - if (g_cpucache_up == FULL) + if (slab_state >= FULL) return enable_cpucache(cachep, gfp); - if (g_cpucache_up == NONE) { + if (slab_state == DOWN) { /* * Note: the first kmem_cache_create must create the cache * that's used by kmalloc(24), otherwise the creation of @@ -2212,16 +2300,16 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) */ set_up_list3s(cachep, SIZE_AC); if (INDEX_AC == INDEX_L3) - g_cpucache_up = PARTIAL_L3; + slab_state = PARTIAL_L3; else - g_cpucache_up = PARTIAL_AC; + slab_state = PARTIAL_ARRAYCACHE; } else { cachep->array[smp_processor_id()] = kmalloc(sizeof(struct arraycache_init), gfp); - if (g_cpucache_up == PARTIAL_AC) { + if (slab_state == PARTIAL_ARRAYCACHE) { set_up_list3s(cachep, SIZE_L3); - g_cpucache_up = PARTIAL_L3; + slab_state = PARTIAL_L3; } else { int node; for_each_online_node(node) { @@ -2247,7 +2335,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) } /** - * kmem_cache_create - Create a cache. + * __kmem_cache_create - Create a cache. * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. @@ -2258,9 +2346,6 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) * Cannot be called within a int, but can be interrupted. * The @ctor is run when new pages are allocated by the cache. * - * @name must be valid until the cache is destroyed. This implies that - * the module calling this has to destroy the cache before getting unloaded. - * * The flags are * * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) @@ -2273,60 +2358,15 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) * cacheline. This can be beneficial if you're counting cycles as closely * as davem. */ -struct kmem_cache * -kmem_cache_create (const char *name, size_t size, size_t align, - unsigned long flags, void (*ctor)(void *)) +int +__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) { size_t left_over, slab_size, ralign; - struct kmem_cache *cachep = NULL, *pc; gfp_t gfp; - - /* - * Sanity checks... these are all serious usage bugs. - */ - if (!name || in_interrupt() || (size < BYTES_PER_WORD) || - size > KMALLOC_MAX_SIZE) { - printk(KERN_ERR "%s: Early error in slab %s\n", __func__, - name); - BUG(); - } - - /* - * We use cache_chain_mutex to ensure a consistent view of - * cpu_online_mask as well. Please see cpuup_callback - */ - if (slab_is_available()) { - get_online_cpus(); - mutex_lock(&cache_chain_mutex); - } - - list_for_each_entry(pc, &cache_chain, next) { - char tmp; - int res; - - /* - * This happens when the module gets unloaded and doesn't - * destroy its slab cache and no-one else reuses the vmalloc - * area of the module. Print a warning. - */ - res = probe_kernel_address(pc->name, tmp); - if (res) { - printk(KERN_ERR - "SLAB: cache with size %d has lost its name\n", - pc->buffer_size); - continue; - } - - if (!strcmp(pc->name, name)) { - printk(KERN_ERR - "kmem_cache_create: duplicate cache %s\n", name); - dump_stack(); - goto oops; - } - } + int err; + size_t size = cachep->size; #if DEBUG - WARN_ON(strchr(name, ' ')); /* It confuses parsers */ #if FORCED_DEBUG /* * Enable redzoning and last user accounting, except for caches with @@ -2396,8 +2436,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, ralign = ARCH_SLAB_MINALIGN; } /* 3) caller mandated alignment */ - if (ralign < align) { - ralign = align; + if (ralign < cachep->align) { + ralign = cachep->align; } /* disable debug if necessary */ if (ralign > __alignof__(unsigned long long)) @@ -2405,21 +2445,15 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* * 4) Store it. */ - align = ralign; + cachep->align = ralign; if (slab_is_available()) gfp = GFP_KERNEL; else gfp = GFP_NOWAIT; - /* Get cache's description obj. */ - cachep = kmem_cache_zalloc(&cache_cache, gfp); - if (!cachep) - goto oops; - cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; #if DEBUG - cachep->obj_size = size; /* * Both debugging options require word-alignment which is calculated @@ -2442,7 +2476,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) if (size >= malloc_sizes[INDEX_L3 + 1].cs_size - && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { + && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - ALIGN(size, align); size = PAGE_SIZE; } @@ -2463,19 +2497,15 @@ kmem_cache_create (const char *name, size_t size, size_t align, */ flags |= CFLGS_OFF_SLAB; - size = ALIGN(size, align); + size = ALIGN(size, cachep->align); - left_over = calculate_slab_order(cachep, size, align, flags); + left_over = calculate_slab_order(cachep, size, cachep->align, flags); + + if (!cachep->num) + return -E2BIG; - if (!cachep->num) { - printk(KERN_ERR - "kmem_cache_create: couldn't create cache %s.\n", name); - kmem_cache_free(&cache_cache, cachep); - cachep = NULL; - goto oops; - } slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) - + sizeof(struct slab), align); + + sizeof(struct slab), cachep->align); /* * If the slab has been placed off-slab, and we have enough space then @@ -2503,15 +2533,15 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->colour_off = cache_line_size(); /* Offset must be a multiple of the alignment. */ - if (cachep->colour_off < align) - cachep->colour_off = align; + if (cachep->colour_off < cachep->align) + cachep->colour_off = cachep->align; cachep->colour = left_over / cachep->colour_off; cachep->slab_size = slab_size; cachep->flags = flags; - cachep->gfpflags = 0; + cachep->allocflags = 0; if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) - cachep->gfpflags |= GFP_DMA; - cachep->buffer_size = size; + cachep->allocflags |= GFP_DMA; + cachep->size = size; cachep->reciprocal_buffer_size = reciprocal_value(size); if (flags & CFLGS_OFF_SLAB) { @@ -2525,13 +2555,11 @@ kmem_cache_create (const char *name, size_t size, size_t align, */ BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache)); } - cachep->ctor = ctor; - cachep->name = name; - if (setup_cpu_cache(cachep, gfp)) { - __kmem_cache_destroy(cachep); - cachep = NULL; - goto oops; + err = setup_cpu_cache(cachep, gfp); + if (err) { + __kmem_cache_shutdown(cachep); + return err; } if (flags & SLAB_DEBUG_OBJECTS) { @@ -2544,19 +2572,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, slab_set_debugobj_lock_classes(cachep); } - /* cache setup completed, link it into the list */ - list_add(&cachep->next, &cache_chain); -oops: - if (!cachep && (flags & SLAB_PANIC)) - panic("kmem_cache_create(): failed to create slab `%s'\n", - name); - if (slab_is_available()) { - mutex_unlock(&cache_chain_mutex); - put_online_cpus(); - } - return cachep; + return 0; } -EXPORT_SYMBOL(kmem_cache_create); #if DEBUG static void check_irq_off(void) @@ -2671,7 +2688,7 @@ out: return nr_freed; } -/* Called with cache_chain_mutex held to protect against cpu hotplug */ +/* Called with slab_mutex held to protect against cpu hotplug */ static int __cache_shrink(struct kmem_cache *cachep) { int ret = 0, i = 0; @@ -2706,57 +2723,37 @@ int kmem_cache_shrink(struct kmem_cache *cachep) BUG_ON(!cachep || in_interrupt()); get_online_cpus(); - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); ret = __cache_shrink(cachep); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); put_online_cpus(); return ret; } EXPORT_SYMBOL(kmem_cache_shrink); -/** - * kmem_cache_destroy - delete a cache - * @cachep: the cache to destroy - * - * Remove a &struct kmem_cache object from the slab cache. - * - * It is expected this function will be called by a module when it is - * unloaded. This will remove the cache completely, and avoid a duplicate - * cache being allocated each time a module is loaded and unloaded, if the - * module doesn't have persistent in-kernel storage across loads and unloads. - * - * The cache must be empty before calling this function. - * - * The caller must guarantee that no one will allocate memory from the cache - * during the kmem_cache_destroy(). - */ -void kmem_cache_destroy(struct kmem_cache *cachep) +int __kmem_cache_shutdown(struct kmem_cache *cachep) { - BUG_ON(!cachep || in_interrupt()); + int i; + struct kmem_list3 *l3; + int rc = __cache_shrink(cachep); - /* Find the cache in the chain of caches. */ - get_online_cpus(); - mutex_lock(&cache_chain_mutex); - /* - * the chain is never empty, cache_cache is never destroyed - */ - list_del(&cachep->next); - if (__cache_shrink(cachep)) { - slab_error(cachep, "Can't free all objects"); - list_add(&cachep->next, &cache_chain); - mutex_unlock(&cache_chain_mutex); - put_online_cpus(); - return; - } + if (rc) + return rc; - if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) - rcu_barrier(); + for_each_online_cpu(i) + kfree(cachep->array[i]); - __kmem_cache_destroy(cachep); - mutex_unlock(&cache_chain_mutex); - put_online_cpus(); + /* NUMA: free the list3 structures */ + for_each_online_node(i) { + l3 = cachep->nodelists[i]; + if (l3) { + kfree(l3->shared); + free_alien_cache(l3->alien); + kfree(l3); + } + } + return 0; } -EXPORT_SYMBOL(kmem_cache_destroy); /* * Get the memory for a slab management obj. @@ -2840,10 +2837,10 @@ static void cache_init_objs(struct kmem_cache *cachep, slab_error(cachep, "constructor overwrote the" " start of an object"); } - if ((cachep->buffer_size % PAGE_SIZE) == 0 && + if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) kernel_map_pages(virt_to_page(objp), - cachep->buffer_size / PAGE_SIZE, 0); + cachep->size / PAGE_SIZE, 0); #else if (cachep->ctor) cachep->ctor(objp); @@ -2857,9 +2854,9 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) { if (CONFIG_ZONE_DMA_FLAG) { if (flags & GFP_DMA) - BUG_ON(!(cachep->gfpflags & GFP_DMA)); + BUG_ON(!(cachep->allocflags & GFP_DMA)); else - BUG_ON(cachep->gfpflags & GFP_DMA); + BUG_ON(cachep->allocflags & GFP_DMA); } } @@ -2918,8 +2915,8 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, nr_pages <<= cache->gfporder; do { - page_set_cache(page, cache); - page_set_slab(page, slab); + page->slab_cache = cache; + page->slab_page = slab; page++; } while (--nr_pages); } @@ -3057,7 +3054,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, kfree_debugcheck(objp); page = virt_to_head_page(objp); - slabp = page_get_slab(page); + slabp = page->slab_page; if (cachep->flags & SLAB_RED_ZONE) { verify_redzone_free(cachep, objp); @@ -3077,10 +3074,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, #endif if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC - if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { + if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { store_stackinfo(cachep, objp, (unsigned long)caller); kernel_map_pages(virt_to_page(objp), - cachep->buffer_size / PAGE_SIZE, 0); + cachep->size / PAGE_SIZE, 0); } else { poison_obj(cachep, objp, POISON_FREE); } @@ -3120,16 +3117,19 @@ bad: #define check_slabp(x,y) do { } while(0) #endif -static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) +static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, + bool force_refill) { int batchcount; struct kmem_list3 *l3; struct array_cache *ac; int node; -retry: check_irq_off(); node = numa_mem_id(); + if (unlikely(force_refill)) + goto force_grow; +retry: ac = cpu_cache_get(cachep); batchcount = ac->batchcount; if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { @@ -3179,8 +3179,8 @@ retry: STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, - node); + ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, + node)); } check_slabp(cachep, slabp); @@ -3199,18 +3199,22 @@ alloc_done: if (unlikely(!ac->avail)) { int x; +force_grow: x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); /* cache_grow can reenable interrupts, then ac could change. */ ac = cpu_cache_get(cachep); - if (!x && ac->avail == 0) /* no objects in sight? abort */ + + /* no objects in sight? abort */ + if (!x && (ac->avail == 0 || force_refill)) return NULL; if (!ac->avail) /* objects refilled by interrupt? */ goto retry; } ac->touched = 1; - return ac->entry[--ac->avail]; + + return ac_get_obj(cachep, ac, flags, force_refill); } static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, @@ -3230,9 +3234,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, return objp; if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC - if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) + if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) kernel_map_pages(virt_to_page(objp), - cachep->buffer_size / PAGE_SIZE, 1); + cachep->size / PAGE_SIZE, 1); else check_poison_obj(cachep, objp); #else @@ -3261,8 +3265,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, struct slab *slabp; unsigned objnr; - slabp = page_get_slab(virt_to_head_page(objp)); - objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; + slabp = virt_to_head_page(objp)->slab_page; + objnr = (unsigned)(objp - slabp->s_mem) / cachep->size; slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; } #endif @@ -3282,33 +3286,45 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) { - if (cachep == &cache_cache) + if (cachep == kmem_cache) return false; - return should_failslab(obj_size(cachep), flags, cachep->flags); + return should_failslab(cachep->object_size, flags, cachep->flags); } static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *objp; struct array_cache *ac; + bool force_refill = false; check_irq_off(); ac = cpu_cache_get(cachep); if (likely(ac->avail)) { - STATS_INC_ALLOCHIT(cachep); ac->touched = 1; - objp = ac->entry[--ac->avail]; - } else { - STATS_INC_ALLOCMISS(cachep); - objp = cache_alloc_refill(cachep, flags); + objp = ac_get_obj(cachep, ac, flags, false); + /* - * the 'ac' may be updated by cache_alloc_refill(), - * and kmemleak_erase() requires its correct value. + * Allow for the possibility all avail objects are not allowed + * by the current flags */ - ac = cpu_cache_get(cachep); + if (objp) { + STATS_INC_ALLOCHIT(cachep); + goto out; + } + force_refill = true; } + + STATS_INC_ALLOCMISS(cachep); + objp = cache_alloc_refill(cachep, flags, force_refill); + /* + * the 'ac' may be updated by cache_alloc_refill(), + * and kmemleak_erase() requires its correct value. + */ + ac = cpu_cache_get(cachep); + +out: /* * To avoid a false negative, if an object that is in one of the * per-CPU caches is leaked, we need to make sure kmemleak doesn't @@ -3336,7 +3352,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) nid_alloc = cpuset_slab_spread_node(); else if (current->mempolicy) - nid_alloc = slab_node(current->mempolicy); + nid_alloc = slab_node(); if (nid_alloc != nid_here) return ____cache_alloc_node(cachep, flags, nid_alloc); return NULL; @@ -3368,7 +3384,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) retry_cpuset: cpuset_mems_cookie = get_mems_allowed(); - zonelist = node_zonelist(slab_node(current->mempolicy), flags); + zonelist = node_zonelist(slab_node(), flags); retry: /* @@ -3545,14 +3561,14 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, out: local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); - kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, + kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags, flags); if (likely(ptr)) - kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep)); + kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size); if (unlikely((flags & __GFP_ZERO) && ptr)) - memset(ptr, 0, obj_size(cachep)); + memset(ptr, 0, cachep->object_size); return ptr; } @@ -3607,15 +3623,15 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) objp = __do_cache_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); - kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, + kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags, flags); prefetchw(objp); if (likely(objp)) - kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep)); + kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size); if (unlikely((flags & __GFP_ZERO) && objp)) - memset(objp, 0, obj_size(cachep)); + memset(objp, 0, cachep->object_size); return objp; } @@ -3630,9 +3646,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, struct kmem_list3 *l3; for (i = 0; i < nr_objects; i++) { - void *objp = objpp[i]; + void *objp; struct slab *slabp; + clear_obj_pfmemalloc(&objpp[i]); + objp = objpp[i]; + slabp = virt_to_slab(objp); l3 = cachep->nodelists[node]; list_del(&slabp->list); @@ -3731,7 +3750,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, caller); - kmemcheck_slab_free(cachep, objp, obj_size(cachep)); + kmemcheck_slab_free(cachep, objp, cachep->object_size); /* * Skip calling cache_free_alien() when the platform is not numa. @@ -3750,7 +3769,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, cache_flusharray(cachep, ac); } - ac->entry[ac->avail++] = objp; + ac_put_obj(cachep, ac, objp); } /** @@ -3766,7 +3785,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0)); trace_kmem_cache_alloc(_RET_IP_, ret, - obj_size(cachep), cachep->buffer_size, flags); + cachep->object_size, cachep->size, flags); return ret; } @@ -3794,7 +3813,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) __builtin_return_address(0)); trace_kmem_cache_alloc_node(_RET_IP_, ret, - obj_size(cachep), cachep->buffer_size, + cachep->object_size, cachep->size, flags, nodeid); return ret; @@ -3876,7 +3895,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, ret = __cache_alloc(cachep, flags, caller); trace_kmalloc((unsigned long) caller, ret, - size, cachep->buffer_size, flags); + size, cachep->size, flags); return ret; } @@ -3916,9 +3935,9 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) unsigned long flags; local_irq_save(flags); - debug_check_no_locks_freed(objp, obj_size(cachep)); + debug_check_no_locks_freed(objp, cachep->object_size); if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(objp, obj_size(cachep)); + debug_check_no_obj_freed(objp, cachep->object_size); __cache_free(cachep, objp, __builtin_return_address(0)); local_irq_restore(flags); @@ -3947,8 +3966,9 @@ void kfree(const void *objp) local_irq_save(flags); kfree_debugcheck(objp); c = virt_to_cache(objp); - debug_check_no_locks_freed(objp, obj_size(c)); - debug_check_no_obj_freed(objp, obj_size(c)); + debug_check_no_locks_freed(objp, c->object_size); + + debug_check_no_obj_freed(objp, c->object_size); __cache_free(c, (void *)objp, __builtin_return_address(0)); local_irq_restore(flags); } @@ -3956,7 +3976,7 @@ EXPORT_SYMBOL(kfree); unsigned int kmem_cache_size(struct kmem_cache *cachep) { - return obj_size(cachep); + return cachep->object_size; } EXPORT_SYMBOL(kmem_cache_size); @@ -4030,7 +4050,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) return 0; fail: - if (!cachep->next.next) { + if (!cachep->list.next) { /* Cache is not active yet. Roll back what we did */ node--; while (node >= 0) { @@ -4065,7 +4085,7 @@ static void do_ccupdate_local(void *info) new->new[smp_processor_id()] = old; } -/* Always called with the cache_chain_mutex held */ +/* Always called with the slab_mutex held */ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, int shared, gfp_t gfp) { @@ -4109,7 +4129,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, return alloc_kmemlist(cachep, gfp); } -/* Called with cache_chain_mutex held always */ +/* Called with slab_mutex held always */ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) { int err; @@ -4124,13 +4144,13 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) * The numbers are guessed, we should auto-tune as described by * Bonwick. */ - if (cachep->buffer_size > 131072) + if (cachep->size > 131072) limit = 1; - else if (cachep->buffer_size > PAGE_SIZE) + else if (cachep->size > PAGE_SIZE) limit = 8; - else if (cachep->buffer_size > 1024) + else if (cachep->size > 1024) limit = 24; - else if (cachep->buffer_size > 256) + else if (cachep->size > 256) limit = 54; else limit = 120; @@ -4145,7 +4165,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) * to a larger limit. Thus disabled by default. */ shared = 0; - if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1) + if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1) shared = 8; #if DEBUG @@ -4211,11 +4231,11 @@ static void cache_reap(struct work_struct *w) int node = numa_mem_id(); struct delayed_work *work = to_delayed_work(w); - if (!mutex_trylock(&cache_chain_mutex)) + if (!mutex_trylock(&slab_mutex)) /* Give up. Setup the next iteration. */ goto out; - list_for_each_entry(searchp, &cache_chain, next) { + list_for_each_entry(searchp, &slab_caches, list) { check_irq_on(); /* @@ -4253,7 +4273,7 @@ next: cond_resched(); } check_irq_on(); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); next_reap_node(); out: /* Set up the next iteration */ @@ -4289,26 +4309,26 @@ static void *s_start(struct seq_file *m, loff_t *pos) { loff_t n = *pos; - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); if (!n) print_slabinfo_header(m); - return seq_list_start(&cache_chain, *pos); + return seq_list_start(&slab_caches, *pos); } static void *s_next(struct seq_file *m, void *p, loff_t *pos) { - return seq_list_next(p, &cache_chain, pos); + return seq_list_next(p, &slab_caches, pos); } static void s_stop(struct seq_file *m, void *p) { - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); } static int s_show(struct seq_file *m, void *p) { - struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next); + struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); struct slab *slabp; unsigned long active_objs; unsigned long num_objs; @@ -4364,7 +4384,7 @@ static int s_show(struct seq_file *m, void *p) printk(KERN_ERR "slab: cache %s error: %s\n", name, error); seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", - name, active_objs, num_objs, cachep->buffer_size, + name, active_objs, num_objs, cachep->size, cachep->num, (1 << cachep->gfporder)); seq_printf(m, " : tunables %4u %4u %4u", cachep->limit, cachep->batchcount, cachep->shared); @@ -4454,9 +4474,9 @@ static ssize_t slabinfo_write(struct file *file, const char __user *buffer, return -EINVAL; /* Find the cache in the chain of caches. */ - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); res = -EINVAL; - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { if (!strcmp(cachep->name, kbuf)) { if (limit < 1 || batchcount < 1 || batchcount > limit || shared < 0) { @@ -4469,7 +4489,7 @@ static ssize_t slabinfo_write(struct file *file, const char __user *buffer, break; } } - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); if (res >= 0) res = count; return res; @@ -4492,8 +4512,8 @@ static const struct file_operations proc_slabinfo_operations = { static void *leaks_start(struct seq_file *m, loff_t *pos) { - mutex_lock(&cache_chain_mutex); - return seq_list_start(&cache_chain, *pos); + mutex_lock(&slab_mutex); + return seq_list_start(&slab_caches, *pos); } static inline int add_caller(unsigned long *n, unsigned long v) @@ -4532,7 +4552,7 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) int i; if (n[0] == n[1]) return; - for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) { + for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) { if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) continue; if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) @@ -4558,7 +4578,7 @@ static void show_symbol(struct seq_file *m, unsigned long address) static int leaks_show(struct seq_file *m, void *p) { - struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next); + struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); struct slab *slabp; struct kmem_list3 *l3; const char *name; @@ -4592,17 +4612,17 @@ static int leaks_show(struct seq_file *m, void *p) name = cachep->name; if (n[0] == n[1]) { /* Increase the buffer size */ - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); if (!m->private) { /* Too bad, we are really out */ m->private = n; - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); return -ENOMEM; } *(unsigned long *)m->private = n[0] * 2; kfree(n); - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); /* Now make sure this entry will be retried */ m->count = m->size; return 0; @@ -4677,6 +4697,6 @@ size_t ksize(const void *objp) if (unlikely(objp == ZERO_SIZE_PTR)) return 0; - return obj_size(virt_to_cache(objp)); + return virt_to_cache(objp)->object_size; } EXPORT_SYMBOL(ksize);