#include <linux/interrupt.h>
#include <linux/bitops.h>
#include <linux/slab.h>
+#include "slab.h"
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kmemcheck.h>
#include <trace/events/kmem.h>
+#include "internal.h"
+
/*
* Lock order:
- * 1. slub_lock (Global Semaphore)
+ * 1. slab_mutex (Global Mutex)
* 2. node->list_lock
* 3. slab_lock(page) (Only on some arches and for debugging)
*
- * slub_lock
+ * slab_mutex
*
- * The role of the slub_lock is to protect the list of all the slabs
+ * The role of the slab_mutex is to protect the list of all the slabs
* and to synchronize major metadata changes to slab cache structures.
*
* The slab_lock is only used for debugging and on arches that do not
static struct notifier_block slab_notifier;
#endif
-static enum {
- DOWN, /* No slab functionality available */
- PARTIAL, /* Kmem_cache_node works */
- UP, /* Everything works but does not show up in sysfs */
- SYSFS /* Sysfs up */
-} slab_state = DOWN;
-
-/* A list of all slab caches on the system */
-static DECLARE_RWSEM(slub_lock);
-static LIST_HEAD(slab_caches);
-
/*
* Tracking user of a slab.
*/
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
-static inline void sysfs_slab_remove(struct kmem_cache *s)
-{
- kfree(s->name);
- kfree(s);
-}
+static inline void sysfs_slab_remove(struct kmem_cache *s) { }
#endif
* Core slab cache functions
*******************************************************************/
-int slab_is_available(void)
-{
- return slab_state >= UP;
-}
-
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
{
return s->node[node];
* and whatever may come after it.
*/
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
- return s->objsize;
+ return s->object_size;
#endif
/*
if (p > addr + 16)
print_section("Bytes b4 ", p - 16, 16);
- print_section("Object ", p, min_t(unsigned long, s->objsize,
+ print_section("Object ", p, min_t(unsigned long, s->object_size,
PAGE_SIZE));
if (s->flags & SLAB_RED_ZONE)
- print_section("Redzone ", p + s->objsize,
- s->inuse - s->objsize);
+ print_section("Redzone ", p + s->object_size,
+ s->inuse - s->object_size);
if (s->offset)
off = s->offset + sizeof(void *);
print_trailer(s, page, object);
}
-static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
+static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...)
{
va_list args;
char buf[100];
u8 *p = object;
if (s->flags & __OBJECT_POISON) {
- memset(p, POISON_FREE, s->objsize - 1);
- p[s->objsize - 1] = POISON_END;
+ memset(p, POISON_FREE, s->object_size - 1);
+ p[s->object_size - 1] = POISON_END;
}
if (s->flags & SLAB_RED_ZONE)
- memset(p + s->objsize, val, s->inuse - s->objsize);
+ memset(p + s->object_size, val, s->inuse - s->object_size);
}
static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
* Poisoning uses 0x6b (POISON_FREE) and the last byte is
* 0xa5 (POISON_END)
*
- * object + s->objsize
+ * object + s->object_size
* Padding to reach word boundary. This is also used for Redzoning.
* Padding is extended by another word if Redzoning is enabled and
- * objsize == inuse.
+ * object_size == inuse.
*
* We fill with 0xbb (RED_INACTIVE) for inactive objects and with
* 0xcc (RED_ACTIVE) for objects in use.
* object + s->size
* Nothing is used beyond s->size.
*
- * If slabcaches are merged then the objsize and inuse boundaries are mostly
+ * If slabcaches are merged then the object_size and inuse boundaries are mostly
* ignored. And therefore no slab options that rely on these boundaries
* may be used with merged slabcaches.
*/
void *object, u8 val)
{
u8 *p = object;
- u8 *endobject = object + s->objsize;
+ u8 *endobject = object + s->object_size;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, page, object, "Redzone",
- endobject, val, s->inuse - s->objsize))
+ endobject, val, s->inuse - s->object_size))
return 0;
} else {
- if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
+ if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
check_bytes_and_report(s, page, p, "Alignment padding",
- endobject, POISON_INUSE, s->inuse - s->objsize);
+ endobject, POISON_INUSE, s->inuse - s->object_size);
}
}
if (s->flags & SLAB_POISON) {
if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
(!check_bytes_and_report(s, page, p, "Poison", p,
- POISON_FREE, s->objsize - 1) ||
+ POISON_FREE, s->object_size - 1) ||
!check_bytes_and_report(s, page, p, "Poison",
- p + s->objsize - 1, POISON_END, 1)))
+ p + s->object_size - 1, POISON_END, 1)))
return 0;
/*
* check_pad_bytes cleans up on its own.
page->freelist);
if (!alloc)
- print_section("Object ", (void *)object, s->objsize);
+ print_section("Object ", (void *)object, s->object_size);
dump_stack();
}
lockdep_trace_alloc(flags);
might_sleep_if(flags & __GFP_WAIT);
- return should_failslab(s->objsize, flags, s->flags);
+ return should_failslab(s->object_size, flags, s->flags);
}
static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
{
flags &= gfp_allowed_mask;
kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
- kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
+ kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
}
static inline void slab_free_hook(struct kmem_cache *s, void *x)
unsigned long flags;
local_irq_save(flags);
- kmemcheck_slab_free(s, x, s->objsize);
- debug_check_no_locks_freed(x, s->objsize);
+ kmemcheck_slab_free(s, x, s->object_size);
+ debug_check_no_locks_freed(x, s->object_size);
local_irq_restore(flags);
}
#endif
if (!(s->flags & SLAB_DEBUG_OBJECTS))
- debug_check_no_obj_freed(x, s->objsize);
+ debug_check_no_obj_freed(x, s->object_size);
}
/*
return 0;
}
-static noinline int free_debug_processing(struct kmem_cache *s,
- struct page *page, void *object, unsigned long addr)
+static noinline struct kmem_cache_node *free_debug_processing(
+ struct kmem_cache *s, struct page *page, void *object,
+ unsigned long addr, unsigned long *flags)
{
- unsigned long flags;
- int rc = 0;
+ struct kmem_cache_node *n = get_node(s, page_to_nid(page));
- local_irq_save(flags);
+ spin_lock_irqsave(&n->list_lock, *flags);
slab_lock(page);
if (!check_slab(s, page))
set_track(s, object, TRACK_FREE, addr);
trace(s, page, object, 0);
init_object(s, object, SLUB_RED_INACTIVE);
- rc = 1;
out:
slab_unlock(page);
- local_irq_restore(flags);
- return rc;
+ /*
+ * Keep node_lock to preserve integrity
+ * until the object is actually freed
+ */
+ return n;
fail:
+ slab_unlock(page);
+ spin_unlock_irqrestore(&n->list_lock, *flags);
slab_fix(s, "Object at 0x%p not freed", object);
- goto out;
+ return NULL;
}
static int __init setup_slub_debug(char *str)
__setup("slub_debug", setup_slub_debug);
-static unsigned long kmem_cache_flags(unsigned long objsize,
+static unsigned long kmem_cache_flags(unsigned long object_size,
unsigned long flags, const char *name,
void (*ctor)(void *))
{
static inline int alloc_debug_processing(struct kmem_cache *s,
struct page *page, void *object, unsigned long addr) { return 0; }
-static inline int free_debug_processing(struct kmem_cache *s,
- struct page *page, void *object, unsigned long addr) { return 0; }
+static inline struct kmem_cache_node *free_debug_processing(
+ struct kmem_cache *s, struct page *page, void *object,
+ unsigned long addr, unsigned long *flags) { return NULL; }
static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
{ return 1; }
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct page *page) {}
static inline void remove_full(struct kmem_cache *s, struct page *page) {}
-static inline unsigned long kmem_cache_flags(unsigned long objsize,
+static inline unsigned long kmem_cache_flags(unsigned long object_size,
unsigned long flags, const char *name,
void (*ctor)(void *))
{
stat(s, ORDER_FALLBACK);
}
- if (flags & __GFP_WAIT)
- local_irq_disable();
-
- if (!page)
- return NULL;
-
- if (kmemcheck_enabled
+ if (kmemcheck_enabled && page
&& !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
int pages = 1 << oo_order(oo);
kmemcheck_mark_unallocated_pages(page, pages);
}
+ if (flags & __GFP_WAIT)
+ local_irq_disable();
+ if (!page)
+ return NULL;
+
page->objects = oo_objects(oo);
mod_zone_page_state(page_zone(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ?
inc_slabs_node(s, page_to_nid(page), page->objects);
page->slab = s;
__SetPageSlab(page);
+ if (page->pfmemalloc)
+ SetPageSlabPfmemalloc(page);
start = page_address(page);
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-pages);
+ __ClearPageSlabPfmemalloc(page);
__ClearPageSlab(page);
reset_page_mapcount(page);
if (current->reclaim_state)
}
/*
- * Lock slab, remove from the partial list and put the object into the
- * per cpu freelist.
+ * Remove slab from the partial list, freeze it and
+ * return the pointer to the freelist.
*
* Returns a list of objects or NULL if it fails.
*
- * Must hold list_lock.
+ * Must hold list_lock since we modify the partial list.
*/
static inline void *acquire_slab(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page,
* The old freelist is the list of objects for the
* per cpu allocation list.
*/
- do {
- freelist = page->freelist;
- counters = page->counters;
- new.counters = counters;
- if (mode) {
- new.inuse = page->objects;
- new.freelist = NULL;
- } else {
- new.freelist = freelist;
- }
+ freelist = page->freelist;
+ counters = page->counters;
+ new.counters = counters;
+ if (mode) {
+ new.inuse = page->objects;
+ new.freelist = NULL;
+ } else {
+ new.freelist = freelist;
+ }
- VM_BUG_ON(new.frozen);
- new.frozen = 1;
+ VM_BUG_ON(new.frozen);
+ new.frozen = 1;
- } while (!__cmpxchg_double_slab(s, page,
+ if (!__cmpxchg_double_slab(s, page,
freelist, counters,
new.freelist, new.counters,
- "lock and freeze"));
+ "acquire_slab"))
+ return NULL;
remove_partial(n, page);
+ WARN_ON(!freelist);
return freelist;
}
if (!object) {
c->page = page;
- c->node = page_to_nid(page);
stat(s, ALLOC_FROM_PARTIAL);
object = t;
available = page->objects - page->inuse;
do {
cpuset_mems_cookie = get_mems_allowed();
- zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+ zonelist = node_zonelist(slab_node(), flags);
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
struct kmem_cache_node *n;
/*
* Remove the cpu slab
*/
-static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
+static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
{
enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
- struct page *page = c->page;
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
int lock = 0;
enum slab_modes l = M_NONE, m = M_NONE;
- void *freelist;
void *nextfree;
int tail = DEACTIVATE_TO_HEAD;
struct page new;
tail = DEACTIVATE_TO_TAIL;
}
- c->tid = next_tid(c->tid);
- c->page = NULL;
- freelist = c->freelist;
- c->freelist = NULL;
-
/*
* Stage one: Free all available per cpu objects back
* to the page freelist while it is still frozen. Leave the
}
}
-/* Unfreeze all the cpu partial slabs */
+/*
+ * Unfreeze all the cpu partial slabs.
+ *
+ * This function must be called with interrupt disabled.
+ */
static void unfreeze_partials(struct kmem_cache *s)
{
- struct kmem_cache_node *n = NULL;
+ struct kmem_cache_node *n = NULL, *n2 = NULL;
struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
struct page *page, *discard_page = NULL;
while ((page = c->partial)) {
- enum slab_modes { M_PARTIAL, M_FREE };
- enum slab_modes l, m;
struct page new;
struct page old;
c->partial = page->next;
- l = M_FREE;
+
+ n2 = get_node(s, page_to_nid(page));
+ if (n != n2) {
+ if (n)
+ spin_unlock(&n->list_lock);
+
+ n = n2;
+ spin_lock(&n->list_lock);
+ }
do {
new.frozen = 0;
- if (!new.inuse && (!n || n->nr_partial > s->min_partial))
- m = M_FREE;
- else {
- struct kmem_cache_node *n2 = get_node(s,
- page_to_nid(page));
-
- m = M_PARTIAL;
- if (n != n2) {
- if (n)
- spin_unlock(&n->list_lock);
-
- n = n2;
- spin_lock(&n->list_lock);
- }
- }
-
- if (l != m) {
- if (l == M_PARTIAL) {
- remove_partial(n, page);
- stat(s, FREE_REMOVE_PARTIAL);
- } else {
- add_partial(n, page,
- DEACTIVATE_TO_TAIL);
- stat(s, FREE_ADD_PARTIAL);
- }
-
- l = m;
- }
-
- } while (!cmpxchg_double_slab(s, page,
+ } while (!__cmpxchg_double_slab(s, page,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab"));
- if (m == M_FREE) {
+ if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
page->next = discard_page;
discard_page = page;
+ } else {
+ add_partial(n, page, DEACTIVATE_TO_TAIL);
+ stat(s, FREE_ADD_PARTIAL);
}
}
local_irq_save(flags);
unfreeze_partials(s);
local_irq_restore(flags);
+ oldpage = NULL;
pobjects = 0;
pages = 0;
stat(s, CPU_PARTIAL_DRAIN);
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
stat(s, CPUSLAB_FLUSH);
- deactivate_slab(s, c);
+ deactivate_slab(s, c->page, c->freelist);
+
+ c->tid = next_tid(c->tid);
+ c->page = NULL;
+ c->freelist = NULL;
}
/*
* Check if the objects in a per cpu structure fit numa
* locality expectations.
*/
-static inline int node_match(struct kmem_cache_cpu *c, int node)
+static inline int node_match(struct page *page, int node)
{
#ifdef CONFIG_NUMA
- if (node != NUMA_NO_NODE && c->node != node)
+ if (node != NUMA_NO_NODE && page_to_nid(page) != node)
return 0;
#endif
return 1;
"SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
nid, gfpflags);
printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
- "default order: %d, min order: %d\n", s->name, s->objsize,
+ "default order: %d, min order: %d\n", s->name, s->object_size,
s->size, oo_order(s->oo), oo_order(s->min));
- if (oo_order(s->min) > get_order(s->objsize))
+ if (oo_order(s->min) > get_order(s->object_size))
printk(KERN_WARNING " %s debugging increased min order, use "
"slub_debug=O to disable.\n", s->name);
static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
int node, struct kmem_cache_cpu **pc)
{
- void *object;
- struct kmem_cache_cpu *c;
- struct page *page = new_slab(s, flags, node);
+ void *freelist;
+ struct kmem_cache_cpu *c = *pc;
+ struct page *page;
+
+ freelist = get_partial(s, flags, node, c);
+
+ if (freelist)
+ return freelist;
+ page = new_slab(s, flags, node);
if (page) {
c = __this_cpu_ptr(s->cpu_slab);
if (c->page)
* No other reference to the page yet so we can
* muck around with it freely without cmpxchg
*/
- object = page->freelist;
+ freelist = page->freelist;
page->freelist = NULL;
stat(s, ALLOC_SLAB);
- c->node = page_to_nid(page);
c->page = page;
*pc = c;
} else
- object = NULL;
+ freelist = NULL;
- return object;
+ return freelist;
+}
+
+static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
+{
+ if (unlikely(PageSlabPfmemalloc(page)))
+ return gfp_pfmemalloc_allowed(gfpflags);
+
+ return true;
}
/*
* The page is still frozen if the return value is not NULL.
*
* If this function returns NULL then the page has been unfrozen.
+ *
+ * This function must be called with interrupt disabled.
*/
static inline void *get_freelist(struct kmem_cache *s, struct page *page)
{
do {
freelist = page->freelist;
counters = page->counters;
+
new.counters = counters;
VM_BUG_ON(!new.frozen);
new.inuse = page->objects;
new.frozen = freelist != NULL;
- } while (!cmpxchg_double_slab(s, page,
+ } while (!__cmpxchg_double_slab(s, page,
freelist, counters,
NULL, new.counters,
"get_freelist"));
static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
- void **object;
+ void *freelist;
+ struct page *page;
unsigned long flags;
local_irq_save(flags);
c = this_cpu_ptr(s->cpu_slab);
#endif
- if (!c->page)
+ page = c->page;
+ if (!page)
goto new_slab;
redo:
- if (unlikely(!node_match(c, node))) {
+
+ if (unlikely(!node_match(page, node))) {
stat(s, ALLOC_NODE_MISMATCH);
- deactivate_slab(s, c);
+ deactivate_slab(s, page, c->freelist);
+ c->page = NULL;
+ c->freelist = NULL;
+ goto new_slab;
+ }
+
+ /*
+ * By rights, we should be searching for a slab page that was
+ * PFMEMALLOC but right now, we are losing the pfmemalloc
+ * information when the page leaves the per-cpu allocator
+ */
+ if (unlikely(!pfmemalloc_match(page, gfpflags))) {
+ deactivate_slab(s, page, c->freelist);
+ c->page = NULL;
+ c->freelist = NULL;
goto new_slab;
}
/* must check again c->freelist in case of cpu migration or IRQ */
- object = c->freelist;
- if (object)
+ freelist = c->freelist;
+ if (freelist)
goto load_freelist;
stat(s, ALLOC_SLOWPATH);
- object = get_freelist(s, c->page);
+ freelist = get_freelist(s, page);
- if (!object) {
+ if (!freelist) {
c->page = NULL;
stat(s, DEACTIVATE_BYPASS);
goto new_slab;
stat(s, ALLOC_REFILL);
load_freelist:
- c->freelist = get_freepointer(s, object);
+ /*
+ * freelist is pointing to the list of objects to be used.
+ * page is pointing to the page from which the objects are obtained.
+ * That page must be frozen for per cpu allocations to work.
+ */
+ VM_BUG_ON(!c->page->frozen);
+ c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
local_irq_restore(flags);
- return object;
+ return freelist;
new_slab:
if (c->partial) {
- c->page = c->partial;
- c->partial = c->page->next;
- c->node = page_to_nid(c->page);
+ page = c->page = c->partial;
+ c->partial = page->next;
stat(s, CPU_PARTIAL_ALLOC);
c->freelist = NULL;
goto redo;
}
- /* Then do expensive stuff like retrieving pages from the partial lists */
- object = get_partial(s, gfpflags, node, c);
+ freelist = new_slab_objects(s, gfpflags, node, &c);
- if (unlikely(!object)) {
+ if (unlikely(!freelist)) {
+ if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
+ slab_out_of_memory(s, gfpflags, node);
- object = new_slab_objects(s, gfpflags, node, &c);
-
- if (unlikely(!object)) {
- if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
- slab_out_of_memory(s, gfpflags, node);
-
- local_irq_restore(flags);
- return NULL;
- }
+ local_irq_restore(flags);
+ return NULL;
}
- if (likely(!kmem_cache_debug(s)))
+ page = c->page;
+ if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
goto load_freelist;
/* Only entered in the debug case */
- if (!alloc_debug_processing(s, c->page, object, addr))
+ if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr))
goto new_slab; /* Slab failed checks. Next slab needed */
- c->freelist = get_freepointer(s, object);
- deactivate_slab(s, c);
- c->node = NUMA_NO_NODE;
+ deactivate_slab(s, page, get_freepointer(s, freelist));
+ c->page = NULL;
+ c->freelist = NULL;
local_irq_restore(flags);
- return object;
+ return freelist;
}
/*
{
void **object;
struct kmem_cache_cpu *c;
+ struct page *page;
unsigned long tid;
if (slab_pre_alloc_hook(s, gfpflags))
barrier();
object = c->freelist;
- if (unlikely(!object || !node_match(c, node)))
-
+ page = c->page;
+ if (unlikely(!object || !node_match(page, node)))
object = __slab_alloc(s, gfpflags, node, addr, c);
else {
}
if (unlikely(gfpflags & __GFP_ZERO) && object)
- memset(object, 0, s->objsize);
+ memset(object, 0, s->object_size);
slab_post_alloc_hook(s, gfpflags, object);
{
void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
- trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
+ trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
return ret;
}
void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
- s->objsize, s->size, gfpflags, node);
+ s->object_size, s->size, gfpflags, node);
return ret;
}
stat(s, FREE_SLOWPATH);
- if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
+ if (kmem_cache_debug(s) &&
+ !(n = free_debug_processing(s, page, x, addr, &flags)))
return;
do {
page = virt_to_head_page(x);
+ if (kmem_cache_debug(s) && page->slab != s) {
+ pr_err("kmem_cache_free: Wrong slab cache. %s but object"
+ " is from %s\n", page->slab->name, s->name);
+ WARN_ON_ONCE(1);
+ return;
+ }
+
slab_free(s, page, x, _RET_IP_);
trace_kmem_cache_free(_RET_IP_, x);
static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
unsigned long flags = s->flags;
- unsigned long size = s->objsize;
+ unsigned long size = s->object_size;
unsigned long align = s->align;
int order;
* end of the object and the free pointer. If not then add an
* additional word to have some bytes to store Redzone information.
*/
- if ((flags & SLAB_RED_ZONE) && size == s->objsize)
+ if ((flags & SLAB_RED_ZONE) && size == s->object_size)
size += sizeof(void *);
#endif
* user specified and the dynamic determination of cache line size
* on bootup.
*/
- align = calculate_alignment(flags, align, s->objsize);
+ align = calculate_alignment(flags, align, s->object_size);
s->align = align;
/*
}
-static int kmem_cache_open(struct kmem_cache *s,
- const char *name, size_t size,
- size_t align, unsigned long flags,
- void (*ctor)(void *))
+static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
{
- memset(s, 0, kmem_size);
- s->name = name;
- s->ctor = ctor;
- s->objsize = size;
- s->align = align;
- s->flags = kmem_cache_flags(size, flags, name, ctor);
+ s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
s->reserved = 0;
if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
* Disable debugging flags that store metadata if the min slab
* order increased.
*/
- if (get_order(s->size) > get_order(s->objsize)) {
+ if (get_order(s->size) > get_order(s->object_size)) {
s->flags &= ~DEBUG_METADATA_FLAGS;
s->offset = 0;
if (!calculate_sizes(s, -1))
else
s->cpu_partial = 30;
- s->refcount = 1;
#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
#endif
goto error;
if (alloc_kmem_cache_cpus(s))
- return 1;
+ return 0;
free_kmem_cache_nodes(s);
error:
if (flags & SLAB_PANIC)
panic("Cannot create slab %s size=%lu realsize=%u "
"order=%u offset=%u flags=%lx\n",
- s->name, (unsigned long)size, s->size, oo_order(s->oo),
+ s->name, (unsigned long)s->size, s->size, oo_order(s->oo),
s->offset, flags);
- return 0;
+ return -EINVAL;
}
/*
*/
unsigned int kmem_cache_size(struct kmem_cache *s)
{
- return s->objsize;
+ return s->object_size;
}
EXPORT_SYMBOL(kmem_cache_size);
sizeof(long), GFP_ATOMIC);
if (!map)
return;
- slab_err(s, page, "%s", text);
+ slab_err(s, page, text, s->name);
slab_lock(page);
get_map(s, page, map);
discard_slab(s, page);
} else {
list_slab_objects(s, page,
- "Objects remaining on kmem_cache_close()");
+ "Objects remaining in %s on kmem_cache_close()");
}
}
}
int node;
flush_all(s);
- free_percpu(s->cpu_slab);
/* Attempt to free all objects */
for_each_node_state(node, N_NORMAL_MEMORY) {
struct kmem_cache_node *n = get_node(s, node);
if (n->nr_partial || slabs_node(s, node))
return 1;
}
+ free_percpu(s->cpu_slab);
free_kmem_cache_nodes(s);
return 0;
}
-/*
- * Close a cache and release the kmem_cache structure
- * (must be used for caches created using kmem_cache_create)
- */
-void kmem_cache_destroy(struct kmem_cache *s)
-{
- down_write(&slub_lock);
- s->refcount--;
- if (!s->refcount) {
- list_del(&s->list);
- up_write(&slub_lock);
- if (kmem_cache_close(s)) {
- printk(KERN_ERR "SLUB %s: %s called for cache that "
- "still has objects.\n", s->name, __func__);
- dump_stack();
- }
- if (s->flags & SLAB_DESTROY_BY_RCU)
- rcu_barrier();
+int __kmem_cache_shutdown(struct kmem_cache *s)
+{
+ int rc = kmem_cache_close(s);
+
+ if (!rc)
sysfs_slab_remove(s);
- } else
- up_write(&slub_lock);
+
+ return rc;
}
-EXPORT_SYMBOL(kmem_cache_destroy);
/********************************************************************
* Kmalloc subsystem
struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
EXPORT_SYMBOL(kmalloc_caches);
-static struct kmem_cache *kmem_cache;
-
#ifdef CONFIG_ZONE_DMA
static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
#endif
{
struct kmem_cache *s;
- s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
+ s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+
+ s->name = name;
+ s->size = s->object_size = size;
+ s->align = ARCH_KMALLOC_MINALIGN;
/*
* This function is called with IRQs disabled during early-boot on
- * single CPU so there's no need to take slub_lock here.
+ * single CPU so there's no need to take slab_mutex here.
*/
- if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
- flags, NULL))
+ if (kmem_cache_open(s, flags))
goto panic;
list_add(&s->list, &slab_caches);
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kmemleak_free(x);
- put_page(page);
+ __free_pages(page, compound_order(page));
return;
}
slab_free(page->slab, page, object, _RET_IP_);
{
struct kmem_cache *s;
- down_read(&slub_lock);
+ mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list)
kmem_cache_shrink(s);
- up_read(&slub_lock);
+ mutex_unlock(&slab_mutex);
return 0;
}
if (offline_node < 0)
return;
- down_read(&slub_lock);
+ mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list) {
n = get_node(s, offline_node);
if (n) {
kmem_cache_free(kmem_cache_node, n);
}
}
- up_read(&slub_lock);
+ mutex_unlock(&slab_mutex);
}
static int slab_mem_going_online_callback(void *arg)
* allocate a kmem_cache_node structure in order to bring the node
* online.
*/
- down_read(&slub_lock);
+ mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list) {
/*
* XXX: kmem_cache_alloc_node will fallback to other nodes
s->node[nid] = n;
}
out:
- up_read(&slub_lock);
+ mutex_unlock(&slab_mutex);
return ret;
}
slub_max_order = 0;
kmem_size = offsetof(struct kmem_cache, node) +
- nr_node_ids * sizeof(struct kmem_cache_node *);
+ nr_node_ids * sizeof(struct kmem_cache_node *);
/* Allocate two kmem_caches from the page allocator */
kmalloc_size = ALIGN(kmem_size, cache_line_size());
order = get_order(2 * kmalloc_size);
- kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
+ kmem_cache = (void *)__get_free_pages(GFP_NOWAIT | __GFP_ZERO, order);
/*
* Must first have the slab cache available for the allocations of the
*/
kmem_cache_node = (void *)kmem_cache + kmalloc_size;
- kmem_cache_open(kmem_cache_node, "kmem_cache_node",
- sizeof(struct kmem_cache_node),
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+ kmem_cache_node->name = "kmem_cache_node";
+ kmem_cache_node->size = kmem_cache_node->object_size =
+ sizeof(struct kmem_cache_node);
+ kmem_cache_open(kmem_cache_node, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
slab_state = PARTIAL;
temp_kmem_cache = kmem_cache;
- kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+ kmem_cache->name = "kmem_cache";
+ kmem_cache->size = kmem_cache->object_size = kmem_size;
+ kmem_cache_open(kmem_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+
kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
memcpy(kmem_cache, temp_kmem_cache, kmem_size);
if (s && s->size) {
char *name = kasprintf(GFP_NOWAIT,
- "dma-kmalloc-%d", s->objsize);
+ "dma-kmalloc-%d", s->object_size);
BUG_ON(!name);
kmalloc_dma_caches[i] = create_kmalloc_cache(name,
- s->objsize, SLAB_CACHE_DMA);
+ s->object_size, SLAB_CACHE_DMA);
}
}
#endif
return NULL;
}
-struct kmem_cache *kmem_cache_create(const char *name, size_t size,
+struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
size_t align, unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *s;
- char *n;
-
- if (WARN_ON(!name))
- return NULL;
- down_write(&slub_lock);
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
s->refcount++;
* Adjust the object sizes so that we clear
* the complete object on kzalloc.
*/
- s->objsize = max(s->objsize, (int)size);
+ s->object_size = max(s->object_size, (int)size);
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
if (sysfs_slab_alias(s, name)) {
s->refcount--;
- goto err;
+ s = NULL;
}
- up_write(&slub_lock);
- return s;
}
- n = kstrdup(name, GFP_KERNEL);
- if (!n)
- goto err;
+ return s;
+}
- s = kmalloc(kmem_size, GFP_KERNEL);
- if (s) {
- if (kmem_cache_open(s, n,
- size, align, flags, ctor)) {
- list_add(&s->list, &slab_caches);
- up_write(&slub_lock);
- if (sysfs_slab_add(s)) {
- down_write(&slub_lock);
- list_del(&s->list);
- kfree(n);
- kfree(s);
- goto err;
- }
- return s;
- }
- kfree(s);
- }
- kfree(n);
-err:
- up_write(&slub_lock);
+int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
+{
+ int err;
- if (flags & SLAB_PANIC)
- panic("Cannot create slabcache %s\n", name);
- else
- s = NULL;
- return s;
+ err = kmem_cache_open(s, flags);
+ if (err)
+ return err;
+
+ mutex_unlock(&slab_mutex);
+ err = sysfs_slab_add(s);
+ mutex_lock(&slab_mutex);
+
+ if (err)
+ kmem_cache_close(s);
+
+ return err;
}
-EXPORT_SYMBOL(kmem_cache_create);
#ifdef CONFIG_SMP
/*
case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- down_read(&slub_lock);
+ mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list) {
local_irq_save(flags);
__flush_cpu_slab(s, cpu);
local_irq_restore(flags);
}
- up_read(&slub_lock);
+ mutex_unlock(&slab_mutex);
break;
default:
break;
for_each_possible_cpu(cpu) {
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
- int node = ACCESS_ONCE(c->node);
+ int node;
struct page *page;
- if (node < 0)
- continue;
page = ACCESS_ONCE(c->page);
- if (page) {
- if (flags & SO_TOTAL)
- x = page->objects;
- else if (flags & SO_OBJECTS)
- x = page->inuse;
- else
- x = 1;
+ if (!page)
+ continue;
- total += x;
- nodes[node] += x;
- }
- page = c->partial;
+ node = page_to_nid(page);
+ if (flags & SO_TOTAL)
+ x = page->objects;
+ else if (flags & SO_OBJECTS)
+ x = page->inuse;
+ else
+ x = 1;
+
+ total += x;
+ nodes[node] += x;
+ page = ACCESS_ONCE(c->partial);
if (page) {
x = page->pobjects;
total += x;
nodes[node] += x;
}
+
per_cpu[node]++;
}
}
static ssize_t object_size_show(struct kmem_cache *s, char *buf)
{
- return sprintf(buf, "%d\n", s->objsize);
+ return sprintf(buf, "%d\n", s->object_size);
}
SLAB_ATTR_RO(object_size);
return err;
}
-static void kmem_cache_release(struct kobject *kobj)
-{
- struct kmem_cache *s = to_slab(kobj);
-
- kfree(s->name);
- kfree(s);
-}
-
static const struct sysfs_ops slab_sysfs_ops = {
.show = slab_attr_show,
.store = slab_attr_store,
static struct kobj_type slab_ktype = {
.sysfs_ops = &slab_sysfs_ops,
- .release = kmem_cache_release
};
static int uevent_filter(struct kset *kset, struct kobject *kobj)
const char *name;
int unmergeable;
- if (slab_state < SYSFS)
+ if (slab_state < FULL)
/* Defer until later */
return 0;
static void sysfs_slab_remove(struct kmem_cache *s)
{
- if (slab_state < SYSFS)
+ if (slab_state < FULL)
/*
* Sysfs has not been setup yet so no need to remove the
* cache from sysfs.
{
struct saved_alias *al;
- if (slab_state == SYSFS) {
+ if (slab_state == FULL) {
/*
* If we have a leftover link then remove it.
*/
struct kmem_cache *s;
int err;
- down_write(&slub_lock);
+ mutex_lock(&slab_mutex);
slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
if (!slab_kset) {
- up_write(&slub_lock);
+ mutex_unlock(&slab_mutex);
printk(KERN_ERR "Cannot register slab subsystem.\n");
return -ENOSYS;
}
- slab_state = SYSFS;
+ slab_state = FULL;
list_for_each_entry(s, &slab_caches, list) {
err = sysfs_slab_add(s);
err = sysfs_slab_alias(al->s, al->name);
if (err)
printk(KERN_ERR "SLUB: Unable to add boot slab alias"
- " %s to sysfs\n", s->name);
+ " %s to sysfs\n", al->name);
kfree(al);
}
- up_write(&slub_lock);
+ mutex_unlock(&slab_mutex);
resiliency_test();
return 0;
}
static void print_slabinfo_header(struct seq_file *m)
{
seq_puts(m, "slabinfo - version: 2.1\n");
- seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
+ seq_puts(m, "# name <active_objs> <num_objs> <object_size> "
"<objperslab> <pagesperslab>");
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
{
loff_t n = *pos;
- down_read(&slub_lock);
+ mutex_lock(&slab_mutex);
if (!n)
print_slabinfo_header(m);
static void s_stop(struct seq_file *m, void *p)
{
- up_read(&slub_lock);
+ mutex_unlock(&slab_mutex);
}
static int s_show(struct seq_file *m, void *p)