*/
#include <linux/slab.h>
-#include "slab.h"
#include <linux/mm.h>
#include <linux/poison.h>
#include <linux/swap.h>
#include "internal.h"
+#include "slab.h"
+
/*
* DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
* 0 for faster, smaller code (especially in the critical paths).
#undef CACHE
};
-static struct arraycache_init initarray_cache __initdata =
- { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
static struct arraycache_init initarray_generic =
{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
/* internal cache of cache description objs */
-static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES];
static struct kmem_cache kmem_cache_boot = {
- .nodelists = kmem_cache_nodelists,
.batchcount = 1,
.limit = BOOT_CPUCACHE_ENTRIES,
.shared = 1,
}
}
+static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
+{
+ struct kmem_list3 *l3;
+ l3 = cachep->nodelists[q];
+ if (!l3)
+ return;
+
+ slab_set_lock_classes(cachep, &on_slab_l3_key,
+ &on_slab_alc_key, q);
+}
+
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+ int node;
+
+ VM_BUG_ON(OFF_SLAB(cachep));
+ for_each_node(node)
+ on_slab_lock_classes_node(cachep, node);
+}
+
static inline void init_lock_keys(void)
{
int node;
{
}
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+}
+
+static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
+{
+}
+
static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
{
}
free_alien_cache(alien);
if (cachep->flags & SLAB_DEBUG_OBJECTS)
slab_set_debugobj_lock_classes_node(cachep, node);
+ else if (!OFF_SLAB(cachep) &&
+ !(cachep->flags & SLAB_DESTROY_BY_RCU))
+ on_slab_lock_classes_node(cachep, node);
}
init_node_lock_keys(node);
}
/*
+ * The memory after the last cpu cache pointer is used for the
+ * the nodelists pointer.
+ */
+static void setup_nodelists_pointer(struct kmem_cache *cachep)
+{
+ cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+}
+
+/*
* Initialisation. Called after the page allocator have been initialised and
* before smp_init().
*/
void __init kmem_cache_init(void)
{
- size_t left_over;
struct cache_sizes *sizes;
struct cache_names *names;
int i;
- int order;
- int node;
kmem_cache = &kmem_cache_boot;
+ setup_nodelists_pointer(kmem_cache);
if (num_possible_nodes() == 1)
use_alien_caches = 0;
- for (i = 0; i < NUM_INIT_LISTS; i++) {
+ for (i = 0; i < NUM_INIT_LISTS; i++)
kmem_list3_init(&initkmem_list3[i]);
- if (i < MAX_NUMNODES)
- kmem_cache->nodelists[i] = NULL;
- }
+
set_up_list3s(kmem_cache, CACHE_CACHE);
/*
* 6) Resize the head arrays of the kmalloc caches to their final sizes.
*/
- node = numa_mem_id();
-
/* 1) create the kmem_cache */
- INIT_LIST_HEAD(&slab_caches);
- list_add(&kmem_cache->list, &slab_caches);
- kmem_cache->colour_off = cache_line_size();
- kmem_cache->array[smp_processor_id()] = &initarray_cache.cache;
- kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
/*
* struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
*/
- kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
- nr_node_ids * sizeof(struct kmem_list3 *);
- kmem_cache->object_size = kmem_cache->size;
- kmem_cache->size = ALIGN(kmem_cache->object_size,
- cache_line_size());
- kmem_cache->reciprocal_buffer_size =
- reciprocal_value(kmem_cache->size);
-
- for (order = 0; order < MAX_ORDER; order++) {
- cache_estimate(order, kmem_cache->size,
- cache_line_size(), 0, &left_over, &kmem_cache->num);
- if (kmem_cache->num)
- break;
- }
- BUG_ON(!kmem_cache->num);
- kmem_cache->gfporder = order;
- kmem_cache->colour = left_over / kmem_cache->colour_off;
- kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) +
- sizeof(struct slab), cache_line_size());
+ create_boot_cache(kmem_cache, "kmem_cache",
+ offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+ nr_node_ids * sizeof(struct kmem_list3 *),
+ SLAB_HWCACHE_ALIGN);
+ list_add(&kmem_cache->list, &slab_caches);
/* 2+3) create the kmalloc caches */
sizes = malloc_sizes;
* bug.
*/
- sizes[INDEX_AC].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
- sizes[INDEX_AC].cs_cachep->name = names[INDEX_AC].name;
- sizes[INDEX_AC].cs_cachep->size = sizes[INDEX_AC].cs_size;
- sizes[INDEX_AC].cs_cachep->object_size = sizes[INDEX_AC].cs_size;
- sizes[INDEX_AC].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
- __kmem_cache_create(sizes[INDEX_AC].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
- list_add(&sizes[INDEX_AC].cs_cachep->list, &slab_caches);
-
- if (INDEX_AC != INDEX_L3) {
- sizes[INDEX_L3].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
- sizes[INDEX_L3].cs_cachep->name = names[INDEX_L3].name;
- sizes[INDEX_L3].cs_cachep->size = sizes[INDEX_L3].cs_size;
- sizes[INDEX_L3].cs_cachep->object_size = sizes[INDEX_L3].cs_size;
- sizes[INDEX_L3].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
- __kmem_cache_create(sizes[INDEX_L3].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
- list_add(&sizes[INDEX_L3].cs_cachep->list, &slab_caches);
- }
+ sizes[INDEX_AC].cs_cachep = create_kmalloc_cache(names[INDEX_AC].name,
+ sizes[INDEX_AC].cs_size, ARCH_KMALLOC_FLAGS);
+
+ if (INDEX_AC != INDEX_L3)
+ sizes[INDEX_L3].cs_cachep =
+ create_kmalloc_cache(names[INDEX_L3].name,
+ sizes[INDEX_L3].cs_size, ARCH_KMALLOC_FLAGS);
slab_early_init = 0;
* Note for systems short on memory removing the alignment will
* allow tighter packing of the smaller caches.
*/
- if (!sizes->cs_cachep) {
- sizes->cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
- sizes->cs_cachep->name = names->name;
- sizes->cs_cachep->size = sizes->cs_size;
- sizes->cs_cachep->object_size = sizes->cs_size;
- sizes->cs_cachep->align = ARCH_KMALLOC_MINALIGN;
- __kmem_cache_create(sizes->cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
- list_add(&sizes->cs_cachep->list, &slab_caches);
- }
+ if (!sizes->cs_cachep)
+ sizes->cs_cachep = create_kmalloc_cache(names->name,
+ sizes->cs_size, ARCH_KMALLOC_FLAGS);
+
#ifdef CONFIG_ZONE_DMA
- sizes->cs_dmacachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
- sizes->cs_dmacachep->name = names->name_dma;
- sizes->cs_dmacachep->size = sizes->cs_size;
- sizes->cs_dmacachep->object_size = sizes->cs_size;
- sizes->cs_dmacachep->align = ARCH_KMALLOC_MINALIGN;
- __kmem_cache_create(sizes->cs_dmacachep,
- ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC);
- list_add(&sizes->cs_dmacachep->list, &slab_caches);
+ sizes->cs_dmacachep = create_kmalloc_cache(
+ names->name_dma, sizes->cs_size,
+ SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS);
#endif
sizes++;
names++;
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
- BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache);
memcpy(ptr, cpu_cache_get(kmem_cache),
sizeof(struct arraycache_init));
/*
if (page->pfmemalloc)
SetPageSlabPfmemalloc(page + i);
}
+ memcg_bind_pages(cachep, cachep->gfporder);
if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
__ClearPageSlab(page);
page++;
}
+
+ memcg_release_pages(cachep, cachep->gfporder);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
- free_pages((unsigned long)addr, cachep->gfporder);
+ free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
}
static void kmem_rcu_free(struct rcu_head *head)
if (slab_state == DOWN) {
/*
- * Note: the first kmem_cache_create must create the cache
+ * Note: Creation of first cache (kmem_cache).
+ * The setup_list3s is taken care
+ * of by the caller of __kmem_cache_create
+ */
+ cachep->array[smp_processor_id()] = &initarray_generic.cache;
+ slab_state = PARTIAL;
+ } else if (slab_state == PARTIAL) {
+ /*
+ * Note: the second kmem_cache_create must create the cache
* that's used by kmalloc(24), otherwise the creation of
* further caches will BUG().
*/
/*
* If the cache that's used by kmalloc(sizeof(kmem_list3)) is
- * the first cache, then we need to set up all its list3s,
+ * the second cache, then we need to set up all its list3s,
* otherwise the creation of further caches will BUG().
*/
set_up_list3s(cachep, SIZE_AC);
else
slab_state = PARTIAL_ARRAYCACHE;
} else {
+ /* Remaining boot caches */
cachep->array[smp_processor_id()] =
kmalloc(sizeof(struct arraycache_init), gfp);
size &= ~(BYTES_PER_WORD - 1);
}
- /* calculate the final buffer alignment: */
-
- /* 1) arch recommendation: can be overridden for debug */
- if (flags & SLAB_HWCACHE_ALIGN) {
- /*
- * Default alignment: as specified by the arch code. Except if
- * an object is really small, then squeeze multiple objects into
- * one cacheline.
- */
- ralign = cache_line_size();
- while (size <= ralign / 2)
- ralign /= 2;
- } else {
- ralign = BYTES_PER_WORD;
- }
-
/*
* Redzoning and user store require word alignment or possibly larger.
* Note this will be overridden by architecture or caller mandated
size &= ~(REDZONE_ALIGN - 1);
}
- /* 2) arch mandated alignment */
- if (ralign < ARCH_SLAB_MINALIGN) {
- ralign = ARCH_SLAB_MINALIGN;
- }
/* 3) caller mandated alignment */
if (ralign < cachep->align) {
ralign = cachep->align;
else
gfp = GFP_NOWAIT;
- cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+ setup_nodelists_pointer(cachep);
#if DEBUG
/*
WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
slab_set_debugobj_lock_classes(cachep);
- }
+ } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
+ on_slab_lock_classes(cachep);
return 0;
}
if (slab_should_failslab(cachep, flags))
return NULL;
+ cachep = memcg_kmem_get_cache(cachep, flags);
+
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
if (slab_should_failslab(cachep, flags))
return NULL;
+ cachep = memcg_kmem_get_cache(cachep, flags);
+
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
objp = __do_cache_alloc(cachep, flags);
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
unsigned long flags;
+ cachep = cache_from_obj(cachep, objp);
+ if (!cachep)
+ return;
local_irq_save(flags);
debug_check_no_locks_freed(objp, cachep->object_size);
}
/* Always called with the slab_mutex held */
-static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
int batchcount, int shared, gfp_t gfp)
{
struct ccupdate_struct *new;
return alloc_kmemlist(cachep, gfp);
}
+static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+ int batchcount, int shared, gfp_t gfp)
+{
+ int ret;
+ struct kmem_cache *c = NULL;
+ int i = 0;
+
+ ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
+
+ if (slab_state < FULL)
+ return ret;
+
+ if ((ret < 0) || !is_root_cache(cachep))
+ return ret;
+
+ VM_BUG_ON(!mutex_is_locked(&slab_mutex));
+ for_each_memcg_cache_index(i) {
+ c = cache_from_memcg(cachep, i);
+ if (c)
+ /* return value determined by the parent cache only */
+ __do_tune_cpucache(c, limit, batchcount, shared, gfp);
+ }
+
+ return ret;
+}
+
/* Called with slab_mutex held always */
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
int err;
- int limit, shared;
+ int limit = 0;
+ int shared = 0;
+ int batchcount = 0;
+
+ if (!is_root_cache(cachep)) {
+ struct kmem_cache *root = memcg_root_cache(cachep);
+ limit = root->limit;
+ shared = root->shared;
+ batchcount = root->batchcount;
+ }
+ if (limit && shared && batchcount)
+ goto skip_setup;
/*
* The head array serves three purposes:
* - create a LIFO ordering, i.e. return objects that are cache-warm
if (limit > 32)
limit = 32;
#endif
- err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
+ batchcount = (limit + 1) / 2;
+skip_setup:
+ err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
if (err)
printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
cachep->name, -err);