X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=mm%2Fslab.c;h=de961b48a6a4a82935112d4b0a065eb566b50d48;hb=12c3667fb780e20360ad0bde32dfb3591ef609ad;hp=1fcf3ac94b6ccd3e085c5a0c6ba35a59fe553e8e;hpb=7d3d09b01a028e9dd1282149fdcd2a6e0edd73e4;p=platform%2Fadaptation%2Frenesas_rcar%2Frenesas_kernel.git diff --git a/mm/slab.c b/mm/slab.c index 1fcf3ac..de961b4 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -118,12 +118,16 @@ #include #include +#include + #include #include #include #include +#include "internal.h" + /* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. * 0 for faster, smaller code (especially in the critical paths). @@ -152,6 +156,12 @@ #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN #endif +/* + * true if a page was allocated from pfmemalloc reserves for network-based + * swap + */ +static bool pfmemalloc_active __read_mostly; + /* Legal flag mask for kmem_cache_create(). */ #if DEBUG # define CREATE_MASK (SLAB_RED_ZONE | \ @@ -257,9 +267,30 @@ struct array_cache { * Must have this definition in here for the proper * alignment of array_cache. Also simplifies accessing * the entries. + * + * Entries should not be directly dereferenced as + * entries belonging to slabs marked pfmemalloc will + * have the lower bits set SLAB_OBJ_PFMEMALLOC */ }; +#define SLAB_OBJ_PFMEMALLOC 1 +static inline bool is_obj_pfmemalloc(void *objp) +{ + return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC; +} + +static inline void set_obj_pfmemalloc(void **objp) +{ + *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC); + return; +} + +static inline void clear_obj_pfmemalloc(void **objp) +{ + *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC); +} + /* * bootstrap: The caches do not work without cpuarrays anymore, but the * cpuarrays are allocated from the generic caches... @@ -484,13 +515,6 @@ EXPORT_SYMBOL(slab_buffer_size); static int slab_max_order = SLAB_MAX_ORDER_LO; static bool slab_max_order_set __initdata; -static inline struct kmem_cache *page_get_cache(struct page *page) -{ - page = compound_head(page); - BUG_ON(!PageSlab(page)); - return page->slab_cache; -} - static inline struct kmem_cache *virt_to_cache(const void *obj) { struct page *page = virt_to_head_page(obj); @@ -554,9 +578,9 @@ static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; /* internal cache of cache description objs */ -static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES]; -static struct kmem_cache cache_cache = { - .nodelists = cache_cache_nodelists, +static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES]; +static struct kmem_cache kmem_cache_boot = { + .nodelists = kmem_cache_nodelists, .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, @@ -900,6 +924,124 @@ static struct array_cache *alloc_arraycache(int node, int entries, return nc; } +static inline bool is_slab_pfmemalloc(struct slab *slabp) +{ + struct page *page = virt_to_page(slabp->s_mem); + + return PageSlabPfmemalloc(page); +} + +/* Clears pfmemalloc_active if no slabs have pfmalloc set */ +static void recheck_pfmemalloc_active(struct kmem_cache *cachep, + struct array_cache *ac) +{ + struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()]; + struct slab *slabp; + unsigned long flags; + + if (!pfmemalloc_active) + return; + + spin_lock_irqsave(&l3->list_lock, flags); + list_for_each_entry(slabp, &l3->slabs_full, list) + if (is_slab_pfmemalloc(slabp)) + goto out; + + list_for_each_entry(slabp, &l3->slabs_partial, list) + if (is_slab_pfmemalloc(slabp)) + goto out; + + list_for_each_entry(slabp, &l3->slabs_free, list) + if (is_slab_pfmemalloc(slabp)) + goto out; + + pfmemalloc_active = false; +out: + spin_unlock_irqrestore(&l3->list_lock, flags); +} + +static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, + gfp_t flags, bool force_refill) +{ + int i; + void *objp = ac->entry[--ac->avail]; + + /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ + if (unlikely(is_obj_pfmemalloc(objp))) { + struct kmem_list3 *l3; + + if (gfp_pfmemalloc_allowed(flags)) { + clear_obj_pfmemalloc(&objp); + return objp; + } + + /* The caller cannot use PFMEMALLOC objects, find another one */ + for (i = 1; i < ac->avail; i++) { + /* If a !PFMEMALLOC object is found, swap them */ + if (!is_obj_pfmemalloc(ac->entry[i])) { + objp = ac->entry[i]; + ac->entry[i] = ac->entry[ac->avail]; + ac->entry[ac->avail] = objp; + return objp; + } + } + + /* + * If there are empty slabs on the slabs_free list and we are + * being forced to refill the cache, mark this one !pfmemalloc. + */ + l3 = cachep->nodelists[numa_mem_id()]; + if (!list_empty(&l3->slabs_free) && force_refill) { + struct slab *slabp = virt_to_slab(objp); + ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem)); + clear_obj_pfmemalloc(&objp); + recheck_pfmemalloc_active(cachep, ac); + return objp; + } + + /* No !PFMEMALLOC objects available */ + ac->avail++; + objp = NULL; + } + + return objp; +} + +static inline void *ac_get_obj(struct kmem_cache *cachep, + struct array_cache *ac, gfp_t flags, bool force_refill) +{ + void *objp; + + if (unlikely(sk_memalloc_socks())) + objp = __ac_get_obj(cachep, ac, flags, force_refill); + else + objp = ac->entry[--ac->avail]; + + return objp; +} + +static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, + void *objp) +{ + if (unlikely(pfmemalloc_active)) { + /* Some pfmemalloc slabs exist, check if this is one */ + struct page *page = virt_to_page(objp); + if (PageSlabPfmemalloc(page)) + set_obj_pfmemalloc(&objp); + } + + return objp; +} + +static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, + void *objp) +{ + if (unlikely(sk_memalloc_socks())) + objp = __ac_put_obj(cachep, ac, objp); + + ac->entry[ac->avail++] = objp; +} + /* * Transfer objects in one arraycache to another. * Locking must be handled by the caller. @@ -1076,7 +1218,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) STATS_INC_ACOVERFLOW(cachep); __drain_alien_cache(cachep, alien, nodeid); } - alien->entry[alien->avail++] = objp; + ac_put_obj(cachep, alien, objp); spin_unlock(&alien->lock); } else { spin_lock(&(cachep->nodelists[nodeid])->list_lock); @@ -1452,15 +1594,17 @@ void __init kmem_cache_init(void) int order; int node; + kmem_cache = &kmem_cache_boot; + if (num_possible_nodes() == 1) use_alien_caches = 0; for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); if (i < MAX_NUMNODES) - cache_cache.nodelists[i] = NULL; + kmem_cache->nodelists[i] = NULL; } - set_up_list3s(&cache_cache, CACHE_CACHE); + set_up_list3s(kmem_cache, CACHE_CACHE); /* * Fragmentation resistance on low memory - only use bigger @@ -1472,9 +1616,9 @@ void __init kmem_cache_init(void) /* Bootstrap is tricky, because several objects are allocated * from caches that do not exist yet: - * 1) initialize the cache_cache cache: it contains the struct - * kmem_cache structures of all caches, except cache_cache itself: - * cache_cache is statically allocated. + * 1) initialize the kmem_cache cache: it contains the struct + * kmem_cache structures of all caches, except kmem_cache itself: + * kmem_cache is statically allocated. * Initially an __init data area is used for the head array and the * kmem_list3 structures, it's replaced with a kmalloc allocated * array at the end of the bootstrap. @@ -1483,43 +1627,43 @@ void __init kmem_cache_init(void) * An __init data area is used for the head array. * 3) Create the remaining kmalloc caches, with minimally sized * head arrays. - * 4) Replace the __init data head arrays for cache_cache and the first + * 4) Replace the __init data head arrays for kmem_cache and the first * kmalloc cache with kmalloc allocated arrays. - * 5) Replace the __init data for kmem_list3 for cache_cache and + * 5) Replace the __init data for kmem_list3 for kmem_cache and * the other cache's with kmalloc allocated memory. * 6) Resize the head arrays of the kmalloc caches to their final sizes. */ node = numa_mem_id(); - /* 1) create the cache_cache */ + /* 1) create the kmem_cache */ INIT_LIST_HEAD(&slab_caches); - list_add(&cache_cache.list, &slab_caches); - cache_cache.colour_off = cache_line_size(); - cache_cache.array[smp_processor_id()] = &initarray_cache.cache; - cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; + list_add(&kmem_cache->list, &slab_caches); + kmem_cache->colour_off = cache_line_size(); + kmem_cache->array[smp_processor_id()] = &initarray_cache.cache; + kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; /* * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids */ - cache_cache.size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + + kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + nr_node_ids * sizeof(struct kmem_list3 *); - cache_cache.object_size = cache_cache.size; - cache_cache.size = ALIGN(cache_cache.size, + kmem_cache->object_size = kmem_cache->size; + kmem_cache->size = ALIGN(kmem_cache->object_size, cache_line_size()); - cache_cache.reciprocal_buffer_size = - reciprocal_value(cache_cache.size); + kmem_cache->reciprocal_buffer_size = + reciprocal_value(kmem_cache->size); for (order = 0; order < MAX_ORDER; order++) { - cache_estimate(order, cache_cache.size, - cache_line_size(), 0, &left_over, &cache_cache.num); - if (cache_cache.num) + cache_estimate(order, kmem_cache->size, + cache_line_size(), 0, &left_over, &kmem_cache->num); + if (kmem_cache->num) break; } - BUG_ON(!cache_cache.num); - cache_cache.gfporder = order; - cache_cache.colour = left_over / cache_cache.colour_off; - cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + + BUG_ON(!kmem_cache->num); + kmem_cache->gfporder = order; + kmem_cache->colour = left_over / kmem_cache->colour_off; + kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) + sizeof(struct slab), cache_line_size()); /* 2+3) create the kmalloc caches */ @@ -1538,6 +1682,7 @@ void __init kmem_cache_init(void) ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL); + list_add(&sizes[INDEX_AC].cs_cachep->list, &slab_caches); if (INDEX_AC != INDEX_L3) { sizes[INDEX_L3].cs_cachep = __kmem_cache_create(names[INDEX_L3].name, @@ -1545,6 +1690,7 @@ void __init kmem_cache_init(void) ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL); + list_add(&sizes[INDEX_L3].cs_cachep->list, &slab_caches); } slab_early_init = 0; @@ -1563,6 +1709,7 @@ void __init kmem_cache_init(void) ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL); + list_add(&sizes->cs_cachep->list, &slab_caches); } #ifdef CONFIG_ZONE_DMA sizes->cs_dmacachep = __kmem_cache_create( @@ -1572,6 +1719,7 @@ void __init kmem_cache_init(void) ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC, NULL); + list_add(&sizes->cs_dmacachep->list, &slab_caches); #endif sizes++; names++; @@ -1582,15 +1730,15 @@ void __init kmem_cache_init(void) ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); - memcpy(ptr, cpu_cache_get(&cache_cache), + BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache); + memcpy(ptr, cpu_cache_get(kmem_cache), sizeof(struct arraycache_init)); /* * Do not assume that spinlocks can be initialized via memcpy: */ spin_lock_init(&ptr->lock); - cache_cache.array[smp_processor_id()] = ptr; + kmem_cache->array[smp_processor_id()] = ptr; ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); @@ -1611,7 +1759,7 @@ void __init kmem_cache_init(void) int nid; for_each_online_node(nid) { - init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid); + init_list(kmem_cache, &initkmem_list3[CACHE_CACHE + nid], nid); init_list(malloc_sizes[INDEX_AC].cs_cachep, &initkmem_list3[SIZE_AC + nid], nid); @@ -1759,6 +1907,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) return NULL; } + /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ + if (unlikely(page->pfmemalloc)) + pfmemalloc_active = true; + nr_pages = (1 << cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) add_zone_page_state(page_zone(page), @@ -1766,9 +1918,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) else add_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_pages); - for (i = 0; i < nr_pages; i++) + for (i = 0; i < nr_pages; i++) { __SetPageSlab(page + i); + if (page->pfmemalloc) + SetPageSlabPfmemalloc(page + i); + } + if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); @@ -1800,6 +1956,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) NR_SLAB_UNRECLAIMABLE, nr_freed); while (i--) { BUG_ON(!PageSlab(page)); + __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); page++; } @@ -2051,27 +2208,6 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) } } -static void __kmem_cache_destroy(struct kmem_cache *cachep) -{ - int i; - struct kmem_list3 *l3; - - for_each_online_cpu(i) - kfree(cachep->array[i]); - - /* NUMA: free the list3 structures */ - for_each_online_node(i) { - l3 = cachep->nodelists[i]; - if (l3) { - kfree(l3->shared); - free_alien_cache(l3->alien); - kfree(l3); - } - } - kmem_cache_free(&cache_cache, cachep); -} - - /** * calculate_slab_order - calculate size (page order) of slabs * @cachep: pointer to the cache that is being created @@ -2208,9 +2344,6 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) * Cannot be called within a int, but can be interrupted. * The @ctor is run when new pages are allocated by the cache. * - * @name must be valid until the cache is destroyed. This implies that - * the module calling this has to destroy the cache before getting unloaded. - * * The flags are * * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) @@ -2318,7 +2451,7 @@ __kmem_cache_create (const char *name, size_t size, size_t align, gfp = GFP_NOWAIT; /* Get cache's description obj. */ - cachep = kmem_cache_zalloc(&cache_cache, gfp); + cachep = kmem_cache_zalloc(kmem_cache, gfp); if (!cachep) return NULL; @@ -2376,7 +2509,7 @@ __kmem_cache_create (const char *name, size_t size, size_t align, if (!cachep->num) { printk(KERN_ERR "kmem_cache_create: couldn't create cache %s.\n", name); - kmem_cache_free(&cache_cache, cachep); + kmem_cache_free(kmem_cache, cachep); return NULL; } slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) @@ -2432,9 +2565,10 @@ __kmem_cache_create (const char *name, size_t size, size_t align, } cachep->ctor = ctor; cachep->name = name; + cachep->refcount = 1; if (setup_cpu_cache(cachep, gfp)) { - __kmem_cache_destroy(cachep); + __kmem_cache_shutdown(cachep); return NULL; } @@ -2448,8 +2582,6 @@ __kmem_cache_create (const char *name, size_t size, size_t align, slab_set_debugobj_lock_classes(cachep); } - /* cache setup completed, link it into the list */ - list_add(&cachep->list, &slab_caches); return cachep; } @@ -2609,49 +2741,29 @@ int kmem_cache_shrink(struct kmem_cache *cachep) } EXPORT_SYMBOL(kmem_cache_shrink); -/** - * kmem_cache_destroy - delete a cache - * @cachep: the cache to destroy - * - * Remove a &struct kmem_cache object from the slab cache. - * - * It is expected this function will be called by a module when it is - * unloaded. This will remove the cache completely, and avoid a duplicate - * cache being allocated each time a module is loaded and unloaded, if the - * module doesn't have persistent in-kernel storage across loads and unloads. - * - * The cache must be empty before calling this function. - * - * The caller must guarantee that no one will allocate memory from the cache - * during the kmem_cache_destroy(). - */ -void kmem_cache_destroy(struct kmem_cache *cachep) +int __kmem_cache_shutdown(struct kmem_cache *cachep) { - BUG_ON(!cachep || in_interrupt()); + int i; + struct kmem_list3 *l3; + int rc = __cache_shrink(cachep); - /* Find the cache in the chain of caches. */ - get_online_cpus(); - mutex_lock(&slab_mutex); - /* - * the chain is never empty, cache_cache is never destroyed - */ - list_del(&cachep->list); - if (__cache_shrink(cachep)) { - slab_error(cachep, "Can't free all objects"); - list_add(&cachep->list, &slab_caches); - mutex_unlock(&slab_mutex); - put_online_cpus(); - return; - } + if (rc) + return rc; - if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) - rcu_barrier(); + for_each_online_cpu(i) + kfree(cachep->array[i]); - __kmem_cache_destroy(cachep); - mutex_unlock(&slab_mutex); - put_online_cpus(); + /* NUMA: free the list3 structures */ + for_each_online_node(i) { + l3 = cachep->nodelists[i]; + if (l3) { + kfree(l3->shared); + free_alien_cache(l3->alien); + kfree(l3); + } + } + return 0; } -EXPORT_SYMBOL(kmem_cache_destroy); /* * Get the memory for a slab management obj. @@ -3015,16 +3127,19 @@ bad: #define check_slabp(x,y) do { } while(0) #endif -static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) +static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, + bool force_refill) { int batchcount; struct kmem_list3 *l3; struct array_cache *ac; int node; -retry: check_irq_off(); node = numa_mem_id(); + if (unlikely(force_refill)) + goto force_grow; +retry: ac = cpu_cache_get(cachep); batchcount = ac->batchcount; if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { @@ -3074,8 +3189,8 @@ retry: STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, - node); + ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, + node)); } check_slabp(cachep, slabp); @@ -3094,18 +3209,22 @@ alloc_done: if (unlikely(!ac->avail)) { int x; +force_grow: x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); /* cache_grow can reenable interrupts, then ac could change. */ ac = cpu_cache_get(cachep); - if (!x && ac->avail == 0) /* no objects in sight? abort */ + + /* no objects in sight? abort */ + if (!x && (ac->avail == 0 || force_refill)) return NULL; if (!ac->avail) /* objects refilled by interrupt? */ goto retry; } ac->touched = 1; - return ac->entry[--ac->avail]; + + return ac_get_obj(cachep, ac, flags, force_refill); } static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, @@ -3177,7 +3296,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) { - if (cachep == &cache_cache) + if (cachep == kmem_cache) return false; return should_failslab(cachep->object_size, flags, cachep->flags); @@ -3187,23 +3306,35 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *objp; struct array_cache *ac; + bool force_refill = false; check_irq_off(); ac = cpu_cache_get(cachep); if (likely(ac->avail)) { - STATS_INC_ALLOCHIT(cachep); ac->touched = 1; - objp = ac->entry[--ac->avail]; - } else { - STATS_INC_ALLOCMISS(cachep); - objp = cache_alloc_refill(cachep, flags); + objp = ac_get_obj(cachep, ac, flags, false); + /* - * the 'ac' may be updated by cache_alloc_refill(), - * and kmemleak_erase() requires its correct value. + * Allow for the possibility all avail objects are not allowed + * by the current flags */ - ac = cpu_cache_get(cachep); + if (objp) { + STATS_INC_ALLOCHIT(cachep); + goto out; + } + force_refill = true; } + + STATS_INC_ALLOCMISS(cachep); + objp = cache_alloc_refill(cachep, flags, force_refill); + /* + * the 'ac' may be updated by cache_alloc_refill(), + * and kmemleak_erase() requires its correct value. + */ + ac = cpu_cache_get(cachep); + +out: /* * To avoid a false negative, if an object that is in one of the * per-CPU caches is leaked, we need to make sure kmemleak doesn't @@ -3525,9 +3656,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, struct kmem_list3 *l3; for (i = 0; i < nr_objects; i++) { - void *objp = objpp[i]; + void *objp; struct slab *slabp; + clear_obj_pfmemalloc(&objpp[i]); + objp = objpp[i]; + slabp = virt_to_slab(objp); l3 = cachep->nodelists[node]; list_del(&slabp->list); @@ -3645,7 +3779,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, cache_flusharray(cachep, ac); } - ac->entry[ac->avail++] = objp; + ac_put_obj(cachep, ac, objp); } /**