mm/sl[aou]b: Get rid of __kmem_cache_destroy

[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / slab.c
diff --git a/mm/slab.c b/mm/slab.c

index 28a8f7d..de961b4 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -68,7 +68,7 @@
   * Further notes from the original documentation:
   *
   * 11 April '97.  Started multi-threading - markhe
- *     The global cache-chain is protected by the mutex 'cache_chain_mutex'.
+ *     The global cache-chain is protected by the mutex 'slab_mutex'.
   *     The sem is only needed when accessing/extending the cache-chain, which
   *     can never happen inside an interrupt (kmem_cache_create(),
   *     kmem_cache_shrink() and kmem_cache_reap()).
@@ -87,6 +87,7 @@
   */
  
  #include       <linux/slab.h>
+#include       "slab.h"
  #include       <linux/mm.h>
  #include       <linux/poison.h>
  #include       <linux/swap.h>
@@ -117,12 +118,16 @@
  #include       <linux/memory.h>
  #include       <linux/prefetch.h>
  
+#include       <net/sock.h>
+
  #include       <asm/cacheflush.h>
  #include       <asm/tlbflush.h>
  #include       <asm/page.h>
  
  #include <trace/events/kmem.h>
  
+#include       "internal.h"
+
  /*
   * DEBUG       - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
   *               0 for faster, smaller code (especially in the critical paths).
@@ -151,6 +156,12 @@
  #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
  #endif
  
+/*
+ * true if a page was allocated from pfmemalloc reserves for network-based
+ * swap
+ */
+static bool pfmemalloc_active __read_mostly;
+
  /* Legal flag mask for kmem_cache_create(). */
  #if DEBUG
  # define CREATE_MASK   (SLAB_RED_ZONE | \
@@ -256,9 +267,30 @@ struct array_cache {
                          * Must have this definition in here for the proper
                          * alignment of array_cache. Also simplifies accessing
                          * the entries.
+                        *
+                        * Entries should not be directly dereferenced as
+                        * entries belonging to slabs marked pfmemalloc will
+                        * have the lower bits set SLAB_OBJ_PFMEMALLOC
                          */
  };
  
+#define SLAB_OBJ_PFMEMALLOC    1
+static inline bool is_obj_pfmemalloc(void *objp)
+{
+       return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC;
+}
+
+static inline void set_obj_pfmemalloc(void **objp)
+{
+       *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC);
+       return;
+}
+
+static inline void clear_obj_pfmemalloc(void **objp)
+{
+       *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC);
+}
+
  /*
   * bootstrap: The caches do not work without cpuarrays anymore, but the
   * cpuarrays are allocated from the generic caches...
@@ -424,8 +456,8 @@ static void kmem_list3_init(struct kmem_list3 *parent)
   * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
   *             redzone word.
   * cachep->obj_offset: The real object.
- * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
- * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address
+ * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
+ * cachep->size - 1* BYTES_PER_WORD: last caller address
   *                                     [BYTES_PER_WORD long]
   */
  static int obj_offset(struct kmem_cache *cachep)
@@ -433,11 +465,6 @@ static int obj_offset(struct kmem_cache *cachep)
         return cachep->obj_offset;
  }
  
-static int obj_size(struct kmem_cache *cachep)
-{
-       return cachep->obj_size;
-}
-
  static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
  {
         BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
@@ -449,23 +476,22 @@ static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
  {
         BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
         if (cachep->flags & SLAB_STORE_USER)
-               return (unsigned long long *)(objp + cachep->buffer_size -
+               return (unsigned long long *)(objp + cachep->size -
                                               sizeof(unsigned long long) -
                                               REDZONE_ALIGN);
-       return (unsigned long long *) (objp + cachep->buffer_size -
+       return (unsigned long long *) (objp + cachep->size -
                                        sizeof(unsigned long long));
  }
  
  static void **dbg_userword(struct kmem_cache *cachep, void *objp)
  {
         BUG_ON(!(cachep->flags & SLAB_STORE_USER));
-       return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
+       return (void **)(objp + cachep->size - BYTES_PER_WORD);
  }
  
  #else
  
  #define obj_offset(x)                  0
-#define obj_size(cachep)               (cachep->buffer_size)
  #define dbg_redzone1(cachep, objp)     ({BUG(); (unsigned long long *)NULL;})
  #define dbg_redzone2(cachep, objp)     ({BUG(); (unsigned long long *)NULL;})
  #define dbg_userword(cachep, objp)     ({BUG(); (void **)NULL;})
@@ -475,7 +501,7 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
  #ifdef CONFIG_TRACING
  size_t slab_buffer_size(struct kmem_cache *cachep)
  {
-       return cachep->buffer_size;
+       return cachep->size;
  }
  EXPORT_SYMBOL(slab_buffer_size);
  #endif
@@ -489,13 +515,6 @@ EXPORT_SYMBOL(slab_buffer_size);
  static int slab_max_order = SLAB_MAX_ORDER_LO;
  static bool slab_max_order_set __initdata;
  
-static inline struct kmem_cache *page_get_cache(struct page *page)
-{
-       page = compound_head(page);
-       BUG_ON(!PageSlab(page));
-       return page->slab_cache;
-}
-
  static inline struct kmem_cache *virt_to_cache(const void *obj)
  {
         struct page *page = virt_to_head_page(obj);
@@ -513,13 +532,13 @@ static inline struct slab *virt_to_slab(const void *obj)
  static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
                                  unsigned int idx)
  {
-       return slab->s_mem + cache->buffer_size * idx;
+       return slab->s_mem + cache->size * idx;
  }
  
  /*
- * We want to avoid an expensive divide : (offset / cache->buffer_size)
- *   Using the fact that buffer_size is a constant for a particular cache,
- *   we can replace (offset / cache->buffer_size) by
+ * We want to avoid an expensive divide : (offset / cache->size)
+ *   Using the fact that size is a constant for a particular cache,
+ *   we can replace (offset / cache->size) by
   *   reciprocal_divide(offset, cache->reciprocal_buffer_size)
   */
  static inline unsigned int obj_to_index(const struct kmem_cache *cache,
@@ -559,39 +578,18 @@ static struct arraycache_init initarray_generic =
      { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
  
  /* internal cache of cache description objs */
-static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
-static struct kmem_cache cache_cache = {
-       .nodelists = cache_cache_nodelists,
+static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES];
+static struct kmem_cache kmem_cache_boot = {
+       .nodelists = kmem_cache_nodelists,
         .batchcount = 1,
         .limit = BOOT_CPUCACHE_ENTRIES,
         .shared = 1,
-       .buffer_size = sizeof(struct kmem_cache),
+       .size = sizeof(struct kmem_cache),
         .name = "kmem_cache",
  };
  
  #define BAD_ALIEN_MAGIC 0x01020304ul
  
-/*
- * chicken and egg problem: delay the per-cpu array allocation
- * until the general caches are up.
- */
-static enum {
-       NONE,
-       PARTIAL_AC,
-       PARTIAL_L3,
-       EARLY,
-       LATE,
-       FULL
-} g_cpucache_up;
-
-/*
- * used by boot code to determine if it can use slab based allocator
- */
-int slab_is_available(void)
-{
-       return g_cpucache_up >= EARLY;
-}
-
  #ifdef CONFIG_LOCKDEP
  
  /*
@@ -657,7 +655,7 @@ static void init_node_lock_keys(int q)
  {
         struct cache_sizes *s = malloc_sizes;
  
-       if (g_cpucache_up < LATE)
+       if (slab_state < UP)
                 return;
  
         for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
@@ -697,12 +695,6 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
  }
  #endif
  
-/*
- * Guard access to the cache-chain.
- */
-static DEFINE_MUTEX(cache_chain_mutex);
-static struct list_head cache_chain;
-
  static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
  
  static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -932,6 +924,124 @@ static struct array_cache *alloc_arraycache(int node, int entries,
         return nc;
  }
  
+static inline bool is_slab_pfmemalloc(struct slab *slabp)
+{
+       struct page *page = virt_to_page(slabp->s_mem);
+
+       return PageSlabPfmemalloc(page);
+}
+
+/* Clears pfmemalloc_active if no slabs have pfmalloc set */
+static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
+                                               struct array_cache *ac)
+{
+       struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()];
+       struct slab *slabp;
+       unsigned long flags;
+
+       if (!pfmemalloc_active)
+               return;
+
+       spin_lock_irqsave(&l3->list_lock, flags);
+       list_for_each_entry(slabp, &l3->slabs_full, list)
+               if (is_slab_pfmemalloc(slabp))
+                       goto out;
+
+       list_for_each_entry(slabp, &l3->slabs_partial, list)
+               if (is_slab_pfmemalloc(slabp))
+                       goto out;
+
+       list_for_each_entry(slabp, &l3->slabs_free, list)
+               if (is_slab_pfmemalloc(slabp))
+                       goto out;
+
+       pfmemalloc_active = false;
+out:
+       spin_unlock_irqrestore(&l3->list_lock, flags);
+}
+
+static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
+                                               gfp_t flags, bool force_refill)
+{
+       int i;
+       void *objp = ac->entry[--ac->avail];
+
+       /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */
+       if (unlikely(is_obj_pfmemalloc(objp))) {
+               struct kmem_list3 *l3;
+
+               if (gfp_pfmemalloc_allowed(flags)) {
+                       clear_obj_pfmemalloc(&objp);
+                       return objp;
+               }
+
+               /* The caller cannot use PFMEMALLOC objects, find another one */
+               for (i = 1; i < ac->avail; i++) {
+                       /* If a !PFMEMALLOC object is found, swap them */
+                       if (!is_obj_pfmemalloc(ac->entry[i])) {
+                               objp = ac->entry[i];
+                               ac->entry[i] = ac->entry[ac->avail];
+                               ac->entry[ac->avail] = objp;
+                               return objp;
+                       }
+               }
+
+               /*
+                * If there are empty slabs on the slabs_free list and we are
+                * being forced to refill the cache, mark this one !pfmemalloc.
+                */
+               l3 = cachep->nodelists[numa_mem_id()];
+               if (!list_empty(&l3->slabs_free) && force_refill) {
+                       struct slab *slabp = virt_to_slab(objp);
+                       ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem));
+                       clear_obj_pfmemalloc(&objp);
+                       recheck_pfmemalloc_active(cachep, ac);
+                       return objp;
+               }
+
+               /* No !PFMEMALLOC objects available */
+               ac->avail++;
+               objp = NULL;
+       }
+
+       return objp;
+}
+
+static inline void *ac_get_obj(struct kmem_cache *cachep,
+                       struct array_cache *ac, gfp_t flags, bool force_refill)
+{
+       void *objp;
+
+       if (unlikely(sk_memalloc_socks()))
+               objp = __ac_get_obj(cachep, ac, flags, force_refill);
+       else
+               objp = ac->entry[--ac->avail];
+
+       return objp;
+}
+
+static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
+                                                               void *objp)
+{
+       if (unlikely(pfmemalloc_active)) {
+               /* Some pfmemalloc slabs exist, check if this is one */
+               struct page *page = virt_to_page(objp);
+               if (PageSlabPfmemalloc(page))
+                       set_obj_pfmemalloc(&objp);
+       }
+
+       return objp;
+}
+
+static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
+                                                               void *objp)
+{
+       if (unlikely(sk_memalloc_socks()))
+               objp = __ac_put_obj(cachep, ac, objp);
+
+       ac->entry[ac->avail++] = objp;
+}
+
  /*
   * Transfer objects in one arraycache to another.
   * Locking must be handled by the caller.
@@ -1108,7 +1218,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
                         STATS_INC_ACOVERFLOW(cachep);
                         __drain_alien_cache(cachep, alien, nodeid);
                 }
-               alien->entry[alien->avail++] = objp;
+               ac_put_obj(cachep, alien, objp);
                 spin_unlock(&alien->lock);
         } else {
                 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
@@ -1126,7 +1236,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
   * When hotplugging memory or a cpu, existing nodelists are not replaced if
   * already in use.
   *
- * Must hold cache_chain_mutex.
+ * Must hold slab_mutex.
   */
  static int init_cache_nodelists_node(int node)
  {
@@ -1134,7 +1244,7 @@ static int init_cache_nodelists_node(int node)
         struct kmem_list3 *l3;
         const int memsize = sizeof(struct kmem_list3);
  
-       list_for_each_entry(cachep, &cache_chain, next) {
+       list_for_each_entry(cachep, &slab_caches, list) {
                 /*
                  * Set up the size64 kmemlist for cpu before we can
                  * begin anything. Make sure some other cpu on this
@@ -1150,7 +1260,7 @@ static int init_cache_nodelists_node(int node)
  
                         /*
                          * The l3s don't come and go as CPUs come and
-                        * go.  cache_chain_mutex is sufficient
+                        * go.  slab_mutex is sufficient
                          * protection here.
                          */
                         cachep->nodelists[node] = l3;
@@ -1172,7 +1282,7 @@ static void __cpuinit cpuup_canceled(long cpu)
         int node = cpu_to_mem(cpu);
         const struct cpumask *mask = cpumask_of_node(node);
  
-       list_for_each_entry(cachep, &cache_chain, next) {
+       list_for_each_entry(cachep, &slab_caches, list) {
                 struct array_cache *nc;
                 struct array_cache *shared;
                 struct array_cache **alien;
@@ -1222,7 +1332,7 @@ free_array_cache:
          * the respective cache's slabs,  now we can go ahead and
          * shrink each nodelist to its limit.
          */
-       list_for_each_entry(cachep, &cache_chain, next) {
+       list_for_each_entry(cachep, &slab_caches, list) {
                 l3 = cachep->nodelists[node];
                 if (!l3)
                         continue;
@@ -1251,7 +1361,7 @@ static int __cpuinit cpuup_prepare(long cpu)
          * Now we can go ahead with allocating the shared arrays and
          * array caches
          */
-       list_for_each_entry(cachep, &cache_chain, next) {
+       list_for_each_entry(cachep, &slab_caches, list) {
                 struct array_cache *nc;
                 struct array_cache *shared = NULL;
                 struct array_cache **alien = NULL;
@@ -1319,9 +1429,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
         switch (action) {
         case CPU_UP_PREPARE:
         case CPU_UP_PREPARE_FROZEN:
-               mutex_lock(&cache_chain_mutex);
+               mutex_lock(&slab_mutex);
                 err = cpuup_prepare(cpu);
-               mutex_unlock(&cache_chain_mutex);
+               mutex_unlock(&slab_mutex);
                 break;
         case CPU_ONLINE:
         case CPU_ONLINE_FROZEN:
@@ -1331,7 +1441,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
         case CPU_DOWN_PREPARE:
         case CPU_DOWN_PREPARE_FROZEN:
                 /*
-                * Shutdown cache reaper. Note that the cache_chain_mutex is
+                * Shutdown cache reaper. Note that the slab_mutex is
                  * held so that if cache_reap() is invoked it cannot do
                  * anything expensive but will only modify reap_work
                  * and reschedule the timer.
@@ -1358,9 +1468,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
  #endif
         case CPU_UP_CANCELED:
         case CPU_UP_CANCELED_FROZEN:
-               mutex_lock(&cache_chain_mutex);
+               mutex_lock(&slab_mutex);
                 cpuup_canceled(cpu);
-               mutex_unlock(&cache_chain_mutex);
+               mutex_unlock(&slab_mutex);
                 break;
         }
         return notifier_from_errno(err);
@@ -1376,14 +1486,14 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
   * Returns -EBUSY if all objects cannot be drained so that the node is not
   * removed.
   *
- * Must hold cache_chain_mutex.
+ * Must hold slab_mutex.
   */
  static int __meminit drain_cache_nodelists_node(int node)
  {
         struct kmem_cache *cachep;
         int ret = 0;
  
-       list_for_each_entry(cachep, &cache_chain, next) {
+       list_for_each_entry(cachep, &slab_caches, list) {
                 struct kmem_list3 *l3;
  
                 l3 = cachep->nodelists[node];
@@ -1414,14 +1524,14 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
  
         switch (action) {
         case MEM_GOING_ONLINE:
-               mutex_lock(&cache_chain_mutex);
+               mutex_lock(&slab_mutex);
                 ret = init_cache_nodelists_node(nid);
-               mutex_unlock(&cache_chain_mutex);
+               mutex_unlock(&slab_mutex);
                 break;
         case MEM_GOING_OFFLINE:
-               mutex_lock(&cache_chain_mutex);
+               mutex_lock(&slab_mutex);
                 ret = drain_cache_nodelists_node(nid);
-               mutex_unlock(&cache_chain_mutex);
+               mutex_unlock(&slab_mutex);
                 break;
         case MEM_ONLINE:
         case MEM_OFFLINE:
@@ -1484,15 +1594,17 @@ void __init kmem_cache_init(void)
         int order;
         int node;
  
+       kmem_cache = &kmem_cache_boot;
+
         if (num_possible_nodes() == 1)
                 use_alien_caches = 0;
  
         for (i = 0; i < NUM_INIT_LISTS; i++) {
                 kmem_list3_init(&initkmem_list3[i]);
                 if (i < MAX_NUMNODES)
-                       cache_cache.nodelists[i] = NULL;
+                       kmem_cache->nodelists[i] = NULL;
         }
-       set_up_list3s(&cache_cache, CACHE_CACHE);
+       set_up_list3s(kmem_cache, CACHE_CACHE);
  
         /*
          * Fragmentation resistance on low memory - only use bigger
@@ -1504,9 +1616,9 @@ void __init kmem_cache_init(void)
  
         /* Bootstrap is tricky, because several objects are allocated
          * from caches that do not exist yet:
-        * 1) initialize the cache_cache cache: it contains the struct
-        *    kmem_cache structures of all caches, except cache_cache itself:
-        *    cache_cache is statically allocated.
+        * 1) initialize the kmem_cache cache: it contains the struct
+        *    kmem_cache structures of all caches, except kmem_cache itself:
+        *    kmem_cache is statically allocated.
          *    Initially an __init data area is used for the head array and the
          *    kmem_list3 structures, it's replaced with a kmalloc allocated
          *    array at the end of the bootstrap.
@@ -1515,45 +1627,43 @@ void __init kmem_cache_init(void)
          *    An __init data area is used for the head array.
          * 3) Create the remaining kmalloc caches, with minimally sized
          *    head arrays.
-        * 4) Replace the __init data head arrays for cache_cache and the first
+        * 4) Replace the __init data head arrays for kmem_cache and the first
          *    kmalloc cache with kmalloc allocated arrays.
-        * 5) Replace the __init data for kmem_list3 for cache_cache and
+        * 5) Replace the __init data for kmem_list3 for kmem_cache and
          *    the other cache's with kmalloc allocated memory.
          * 6) Resize the head arrays of the kmalloc caches to their final sizes.
          */
  
         node = numa_mem_id();
  
-       /* 1) create the cache_cache */
-       INIT_LIST_HEAD(&cache_chain);
-       list_add(&cache_cache.next, &cache_chain);
-       cache_cache.colour_off = cache_line_size();
-       cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
-       cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
+       /* 1) create the kmem_cache */
+       INIT_LIST_HEAD(&slab_caches);
+       list_add(&kmem_cache->list, &slab_caches);
+       kmem_cache->colour_off = cache_line_size();
+       kmem_cache->array[smp_processor_id()] = &initarray_cache.cache;
+       kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
  
         /*
          * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
          */
-       cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+       kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
                                   nr_node_ids * sizeof(struct kmem_list3 *);
-#if DEBUG
-       cache_cache.obj_size = cache_cache.buffer_size;
-#endif
-       cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
+       kmem_cache->object_size = kmem_cache->size;
+       kmem_cache->size = ALIGN(kmem_cache->object_size,
                                         cache_line_size());
-       cache_cache.reciprocal_buffer_size =
-               reciprocal_value(cache_cache.buffer_size);
+       kmem_cache->reciprocal_buffer_size =
+               reciprocal_value(kmem_cache->size);
  
         for (order = 0; order < MAX_ORDER; order++) {
-               cache_estimate(order, cache_cache.buffer_size,
-                       cache_line_size(), 0, &left_over, &cache_cache.num);
-               if (cache_cache.num)
+               cache_estimate(order, kmem_cache->size,
+                       cache_line_size(), 0, &left_over, &kmem_cache->num);
+               if (kmem_cache->num)
                         break;
         }
-       BUG_ON(!cache_cache.num);
-       cache_cache.gfporder = order;
-       cache_cache.colour = left_over / cache_cache.colour_off;
-       cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
+       BUG_ON(!kmem_cache->num);
+       kmem_cache->gfporder = order;
+       kmem_cache->colour = left_over / kmem_cache->colour_off;
+       kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) +
                                       sizeof(struct slab), cache_line_size());
  
         /* 2+3) create the kmalloc caches */
@@ -1566,19 +1676,21 @@ void __init kmem_cache_init(void)
          * bug.
          */
  
-       sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
+       sizes[INDEX_AC].cs_cachep = __kmem_cache_create(names[INDEX_AC].name,
                                         sizes[INDEX_AC].cs_size,
                                         ARCH_KMALLOC_MINALIGN,
                                         ARCH_KMALLOC_FLAGS|SLAB_PANIC,
                                         NULL);
  
+       list_add(&sizes[INDEX_AC].cs_cachep->list, &slab_caches);
         if (INDEX_AC != INDEX_L3) {
                 sizes[INDEX_L3].cs_cachep =
-                       kmem_cache_create(names[INDEX_L3].name,
+                       __kmem_cache_create(names[INDEX_L3].name,
                                 sizes[INDEX_L3].cs_size,
                                 ARCH_KMALLOC_MINALIGN,
                                 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
                                 NULL);
+               list_add(&sizes[INDEX_L3].cs_cachep->list, &slab_caches);
         }
  
         slab_early_init = 0;
@@ -1592,20 +1704,22 @@ void __init kmem_cache_init(void)
                  * allow tighter packing of the smaller caches.
                  */
                 if (!sizes->cs_cachep) {
-                       sizes->cs_cachep = kmem_cache_create(names->name,
+                       sizes->cs_cachep = __kmem_cache_create(names->name,
                                         sizes->cs_size,
                                         ARCH_KMALLOC_MINALIGN,
                                         ARCH_KMALLOC_FLAGS|SLAB_PANIC,
                                         NULL);
+                       list_add(&sizes->cs_cachep->list, &slab_caches);
                 }
  #ifdef CONFIG_ZONE_DMA
-               sizes->cs_dmacachep = kmem_cache_create(
+               sizes->cs_dmacachep = __kmem_cache_create(
                                         names->name_dma,
                                         sizes->cs_size,
                                         ARCH_KMALLOC_MINALIGN,
                                         ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
                                                 SLAB_PANIC,
                                         NULL);
+               list_add(&sizes->cs_dmacachep->list, &slab_caches);
  #endif
                 sizes++;
                 names++;
@@ -1616,15 +1730,15 @@ void __init kmem_cache_init(void)
  
                 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
  
-               BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
-               memcpy(ptr, cpu_cache_get(&cache_cache),
+               BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache);
+               memcpy(ptr, cpu_cache_get(kmem_cache),
                        sizeof(struct arraycache_init));
                 /*
                  * Do not assume that spinlocks can be initialized via memcpy:
                  */
                 spin_lock_init(&ptr->lock);
  
-               cache_cache.array[smp_processor_id()] = ptr;
+               kmem_cache->array[smp_processor_id()] = ptr;
  
                 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
  
@@ -1645,7 +1759,7 @@ void __init kmem_cache_init(void)
                 int nid;
  
                 for_each_online_node(nid) {
-                       init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
+                       init_list(kmem_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
  
                         init_list(malloc_sizes[INDEX_AC].cs_cachep,
                                   &initkmem_list3[SIZE_AC + nid], nid);
@@ -1657,27 +1771,27 @@ void __init kmem_cache_init(void)
                 }
         }
  
-       g_cpucache_up = EARLY;
+       slab_state = UP;
  }
  
  void __init kmem_cache_init_late(void)
  {
         struct kmem_cache *cachep;
  
-       g_cpucache_up = LATE;
+       slab_state = UP;
  
         /* Annotate slab for lockdep -- annotate the malloc caches */
         init_lock_keys();
  
         /* 6) resize the head arrays to their final sizes */
-       mutex_lock(&cache_chain_mutex);
-       list_for_each_entry(cachep, &cache_chain, next)
+       mutex_lock(&slab_mutex);
+       list_for_each_entry(cachep, &slab_caches, list)
                 if (enable_cpucache(cachep, GFP_NOWAIT))
                         BUG();
-       mutex_unlock(&cache_chain_mutex);
+       mutex_unlock(&slab_mutex);
  
         /* Done! */
-       g_cpucache_up = FULL;
+       slab_state = FULL;
  
         /*
          * Register a cpu startup notifier callback that initializes
@@ -1708,6 +1822,9 @@ static int __init cpucache_init(void)
          */
         for_each_online_cpu(cpu)
                 start_cpu_timer(cpu);
+
+       /* Done! */
+       slab_state = FULL;
         return 0;
  }
  __initcall(cpucache_init);
@@ -1724,7 +1841,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
                 "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
                 nodeid, gfpflags);
         printk(KERN_WARNING "  cache: %s, object size: %d, order: %d\n",
-               cachep->name, cachep->buffer_size, cachep->gfporder);
+               cachep->name, cachep->size, cachep->gfporder);
  
         for_each_online_node(node) {
                 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
@@ -1779,7 +1896,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
         flags |= __GFP_COMP;
  #endif
  
-       flags |= cachep->gfpflags;
+       flags |= cachep->allocflags;
         if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
                 flags |= __GFP_RECLAIMABLE;
  
@@ -1790,6 +1907,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                 return NULL;
         }
  
+       /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
+       if (unlikely(page->pfmemalloc))
+               pfmemalloc_active = true;
+
         nr_pages = (1 << cachep->gfporder);
         if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
                 add_zone_page_state(page_zone(page),
@@ -1797,9 +1918,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
         else
                 add_zone_page_state(page_zone(page),
                         NR_SLAB_UNRECLAIMABLE, nr_pages);
-       for (i = 0; i < nr_pages; i++)
+       for (i = 0; i < nr_pages; i++) {
                 __SetPageSlab(page + i);
  
+               if (page->pfmemalloc)
+                       SetPageSlabPfmemalloc(page + i);
+       }
+
         if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
                 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
  
@@ -1831,6 +1956,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
                                 NR_SLAB_UNRECLAIMABLE, nr_freed);
         while (i--) {
                 BUG_ON(!PageSlab(page));
+               __ClearPageSlabPfmemalloc(page);
                 __ClearPageSlab(page);
                 page++;
         }
@@ -1855,7 +1981,7 @@ static void kmem_rcu_free(struct rcu_head *head)
  static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
                             unsigned long caller)
  {
-       int size = obj_size(cachep);
+       int size = cachep->object_size;
  
         addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
  
@@ -1887,7 +2013,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
  
  static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
  {
-       int size = obj_size(cachep);
+       int size = cachep->object_size;
         addr = &((char *)addr)[obj_offset(cachep)];
  
         memset(addr, val, size);
@@ -1947,7 +2073,7 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
                 printk("\n");
         }
         realobj = (char *)objp + obj_offset(cachep);
-       size = obj_size(cachep);
+       size = cachep->object_size;
         for (i = 0; i < size && lines; i += 16, lines--) {
                 int limit;
                 limit = 16;
@@ -1964,7 +2090,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
         int lines = 0;
  
         realobj = (char *)objp + obj_offset(cachep);
-       size = obj_size(cachep);
+       size = cachep->object_size;
  
         for (i = 0; i < size; i++) {
                 char exp = POISON_FREE;
@@ -2028,10 +2154,10 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
  
                 if (cachep->flags & SLAB_POISON) {
  #ifdef CONFIG_DEBUG_PAGEALLOC
-                       if (cachep->buffer_size % PAGE_SIZE == 0 &&
+                       if (cachep->size % PAGE_SIZE == 0 &&
                                         OFF_SLAB(cachep))
                                 kernel_map_pages(virt_to_page(objp),
-                                       cachep->buffer_size / PAGE_SIZE, 1);
+                                       cachep->size / PAGE_SIZE, 1);
                         else
                                 check_poison_obj(cachep, objp);
  #else
@@ -2082,27 +2208,6 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
         }
  }
  
-static void __kmem_cache_destroy(struct kmem_cache *cachep)
-{
-       int i;
-       struct kmem_list3 *l3;
-
-       for_each_online_cpu(i)
-           kfree(cachep->array[i]);
-
-       /* NUMA: free the list3 structures */
-       for_each_online_node(i) {
-               l3 = cachep->nodelists[i];
-               if (l3) {
-                       kfree(l3->shared);
-                       free_alien_cache(l3->alien);
-                       kfree(l3);
-               }
-       }
-       kmem_cache_free(&cache_cache, cachep);
-}
-
-
  /**
   * calculate_slab_order - calculate size (page order) of slabs
   * @cachep: pointer to the cache that is being created
@@ -2175,10 +2280,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
  
  static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
  {
-       if (g_cpucache_up == FULL)
+       if (slab_state >= FULL)
                 return enable_cpucache(cachep, gfp);
  
-       if (g_cpucache_up == NONE) {
+       if (slab_state == DOWN) {
                 /*
                  * Note: the first kmem_cache_create must create the cache
                  * that's used by kmalloc(24), otherwise the creation of
@@ -2193,16 +2298,16 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
                  */
                 set_up_list3s(cachep, SIZE_AC);
                 if (INDEX_AC == INDEX_L3)
-                       g_cpucache_up = PARTIAL_L3;
+                       slab_state = PARTIAL_L3;
                 else
-                       g_cpucache_up = PARTIAL_AC;
+                       slab_state = PARTIAL_ARRAYCACHE;
         } else {
                 cachep->array[smp_processor_id()] =
                         kmalloc(sizeof(struct arraycache_init), gfp);
  
-               if (g_cpucache_up == PARTIAL_AC) {
+               if (slab_state == PARTIAL_ARRAYCACHE) {
                         set_up_list3s(cachep, SIZE_L3);
-                       g_cpucache_up = PARTIAL_L3;
+                       slab_state = PARTIAL_L3;
                 } else {
                         int node;
                         for_each_online_node(node) {
@@ -2228,7 +2333,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
  }
  
  /**
- * kmem_cache_create - Create a cache.
+ * __kmem_cache_create - Create a cache.
   * @name: A string which is used in /proc/slabinfo to identify this cache.
   * @size: The size of objects to be created in this cache.
   * @align: The required alignment for the objects.
@@ -2239,9 +2344,6 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
   * Cannot be called within a int, but can be interrupted.
   * The @ctor is run when new pages are allocated by the cache.
   *
- * @name must be valid until the cache is destroyed. This implies that
- * the module calling this has to destroy the cache before getting unloaded.
- *
   * The flags are
   *
   * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
@@ -2255,59 +2357,14 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
   * as davem.
   */
  struct kmem_cache *
-kmem_cache_create (const char *name, size_t size, size_t align,
+__kmem_cache_create (const char *name, size_t size, size_t align,
         unsigned long flags, void (*ctor)(void *))
  {
         size_t left_over, slab_size, ralign;
-       struct kmem_cache *cachep = NULL, *pc;
+       struct kmem_cache *cachep = NULL;
         gfp_t gfp;
  
-       /*
-        * Sanity checks... these are all serious usage bugs.
-        */
-       if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
-           size > KMALLOC_MAX_SIZE) {
-               printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
-                               name);
-               BUG();
-       }
-
-       /*
-        * We use cache_chain_mutex to ensure a consistent view of
-        * cpu_online_mask as well.  Please see cpuup_callback
-        */
-       if (slab_is_available()) {
-               get_online_cpus();
-               mutex_lock(&cache_chain_mutex);
-       }
-
-       list_for_each_entry(pc, &cache_chain, next) {
-               char tmp;
-               int res;
-
-               /*
-                * This happens when the module gets unloaded and doesn't
-                * destroy its slab cache and no-one else reuses the vmalloc
-                * area of the module.  Print a warning.
-                */
-               res = probe_kernel_address(pc->name, tmp);
-               if (res) {
-                       printk(KERN_ERR
-                              "SLAB: cache with size %d has lost its name\n",
-                              pc->buffer_size);
-                       continue;
-               }
-
-               if (!strcmp(pc->name, name)) {
-                       printk(KERN_ERR
-                              "kmem_cache_create: duplicate cache %s\n", name);
-                       dump_stack();
-                       goto oops;
-               }
-       }
-
  #if DEBUG
-       WARN_ON(strchr(name, ' '));     /* It confuses parsers */
  #if FORCED_DEBUG
         /*
          * Enable redzoning and last user accounting, except for caches with
@@ -2394,13 +2451,14 @@ kmem_cache_create (const char *name, size_t size, size_t align,
                 gfp = GFP_NOWAIT;
  
         /* Get cache's description obj. */
-       cachep = kmem_cache_zalloc(&cache_cache, gfp);
+       cachep = kmem_cache_zalloc(kmem_cache, gfp);
         if (!cachep)
-               goto oops;
+               return NULL;
  
         cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+       cachep->object_size = size;
+       cachep->align = align;
  #if DEBUG
-       cachep->obj_size = size;
  
         /*
          * Both debugging options require word-alignment which is calculated
@@ -2423,7 +2481,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         }
  #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
         if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
-           && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
+           && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
                 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
                 size = PAGE_SIZE;
         }
@@ -2451,9 +2509,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         if (!cachep->num) {
                 printk(KERN_ERR
                        "kmem_cache_create: couldn't create cache %s.\n", name);
-               kmem_cache_free(&cache_cache, cachep);
-               cachep = NULL;
-               goto oops;
+               kmem_cache_free(kmem_cache, cachep);
+               return NULL;
         }
         slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
                           + sizeof(struct slab), align);
@@ -2489,10 +2546,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         cachep->colour = left_over / cachep->colour_off;
         cachep->slab_size = slab_size;
         cachep->flags = flags;
-       cachep->gfpflags = 0;
+       cachep->allocflags = 0;
         if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
-               cachep->gfpflags |= GFP_DMA;
-       cachep->buffer_size = size;
+               cachep->allocflags |= GFP_DMA;
+       cachep->size = size;
         cachep->reciprocal_buffer_size = reciprocal_value(size);
  
         if (flags & CFLGS_OFF_SLAB) {
@@ -2508,11 +2565,11 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         }
         cachep->ctor = ctor;
         cachep->name = name;
+       cachep->refcount = 1;
  
         if (setup_cpu_cache(cachep, gfp)) {
-               __kmem_cache_destroy(cachep);
-               cachep = NULL;
-               goto oops;
+               __kmem_cache_shutdown(cachep);
+               return NULL;
         }
  
         if (flags & SLAB_DEBUG_OBJECTS) {
@@ -2525,19 +2582,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
                 slab_set_debugobj_lock_classes(cachep);
         }
  
-       /* cache setup completed, link it into the list */
-       list_add(&cachep->next, &cache_chain);
-oops:
-       if (!cachep && (flags & SLAB_PANIC))
-               panic("kmem_cache_create(): failed to create slab `%s'\n",
-                     name);
-       if (slab_is_available()) {
-               mutex_unlock(&cache_chain_mutex);
-               put_online_cpus();
-       }
         return cachep;
  }
-EXPORT_SYMBOL(kmem_cache_create);
  
  #if DEBUG
  static void check_irq_off(void)
@@ -2652,7 +2698,7 @@ out:
         return nr_freed;
  }
  
-/* Called with cache_chain_mutex held to protect against cpu hotplug */
+/* Called with slab_mutex held to protect against cpu hotplug */
  static int __cache_shrink(struct kmem_cache *cachep)
  {
         int ret = 0, i = 0;
@@ -2687,57 +2733,37 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
         BUG_ON(!cachep || in_interrupt());
  
         get_online_cpus();
-       mutex_lock(&cache_chain_mutex);
+       mutex_lock(&slab_mutex);
         ret = __cache_shrink(cachep);
-       mutex_unlock(&cache_chain_mutex);
+       mutex_unlock(&slab_mutex);
         put_online_cpus();
         return ret;
  }
  EXPORT_SYMBOL(kmem_cache_shrink);
  
-/**
- * kmem_cache_destroy - delete a cache
- * @cachep: the cache to destroy
- *
- * Remove a &struct kmem_cache object from the slab cache.
- *
- * It is expected this function will be called by a module when it is
- * unloaded.  This will remove the cache completely, and avoid a duplicate
- * cache being allocated each time a module is loaded and unloaded, if the
- * module doesn't have persistent in-kernel storage across loads and unloads.
- *
- * The cache must be empty before calling this function.
- *
- * The caller must guarantee that no one will allocate memory from the cache
- * during the kmem_cache_destroy().
- */
-void kmem_cache_destroy(struct kmem_cache *cachep)
+int __kmem_cache_shutdown(struct kmem_cache *cachep)
  {
-       BUG_ON(!cachep || in_interrupt());
+       int i;
+       struct kmem_list3 *l3;
+       int rc = __cache_shrink(cachep);
  
-       /* Find the cache in the chain of caches. */
-       get_online_cpus();
-       mutex_lock(&cache_chain_mutex);
-       /*
-        * the chain is never empty, cache_cache is never destroyed
-        */
-       list_del(&cachep->next);
-       if (__cache_shrink(cachep)) {
-               slab_error(cachep, "Can't free all objects");
-               list_add(&cachep->next, &cache_chain);
-               mutex_unlock(&cache_chain_mutex);
-               put_online_cpus();
-               return;
-       }
+       if (rc)
+               return rc;
  
-       if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
-               rcu_barrier();
+       for_each_online_cpu(i)
+           kfree(cachep->array[i]);
  
-       __kmem_cache_destroy(cachep);
-       mutex_unlock(&cache_chain_mutex);
-       put_online_cpus();
+       /* NUMA: free the list3 structures */
+       for_each_online_node(i) {
+               l3 = cachep->nodelists[i];
+               if (l3) {
+                       kfree(l3->shared);
+                       free_alien_cache(l3->alien);
+                       kfree(l3);
+               }
+       }
+       return 0;
  }
-EXPORT_SYMBOL(kmem_cache_destroy);
  
  /*
   * Get the memory for a slab management obj.
@@ -2821,10 +2847,10 @@ static void cache_init_objs(struct kmem_cache *cachep,
                                 slab_error(cachep, "constructor overwrote the"
                                            " start of an object");
                 }
-               if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
+               if ((cachep->size % PAGE_SIZE) == 0 &&
                             OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
                         kernel_map_pages(virt_to_page(objp),
-                                        cachep->buffer_size / PAGE_SIZE, 0);
+                                        cachep->size / PAGE_SIZE, 0);
  #else
                 if (cachep->ctor)
                         cachep->ctor(objp);
@@ -2838,9 +2864,9 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
  {
         if (CONFIG_ZONE_DMA_FLAG) {
                 if (flags & GFP_DMA)
-                       BUG_ON(!(cachep->gfpflags & GFP_DMA));
+                       BUG_ON(!(cachep->allocflags & GFP_DMA));
                 else
-                       BUG_ON(cachep->gfpflags & GFP_DMA);
+                       BUG_ON(cachep->allocflags & GFP_DMA);
         }
  }
  
@@ -3058,10 +3084,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
  #endif
         if (cachep->flags & SLAB_POISON) {
  #ifdef CONFIG_DEBUG_PAGEALLOC
-               if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
+               if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
                         store_stackinfo(cachep, objp, (unsigned long)caller);
                         kernel_map_pages(virt_to_page(objp),
-                                        cachep->buffer_size / PAGE_SIZE, 0);
+                                        cachep->size / PAGE_SIZE, 0);
                 } else {
                         poison_obj(cachep, objp, POISON_FREE);
                 }
@@ -3101,16 +3127,19 @@ bad:
  #define check_slabp(x,y) do { } while(0)
  #endif
  
-static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
+                                                       bool force_refill)
  {
         int batchcount;
         struct kmem_list3 *l3;
         struct array_cache *ac;
         int node;
  
-retry:
         check_irq_off();
         node = numa_mem_id();
+       if (unlikely(force_refill))
+               goto force_grow;
+retry:
         ac = cpu_cache_get(cachep);
         batchcount = ac->batchcount;
         if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3160,8 +3189,8 @@ retry:
                         STATS_INC_ACTIVE(cachep);
                         STATS_SET_HIGH(cachep);
  
-                       ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
-                                                           node);
+                       ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
+                                                                       node));
                 }
                 check_slabp(cachep, slabp);
  
@@ -3180,18 +3209,22 @@ alloc_done:
  
         if (unlikely(!ac->avail)) {
                 int x;
+force_grow:
                 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
  
                 /* cache_grow can reenable interrupts, then ac could change. */
                 ac = cpu_cache_get(cachep);
-               if (!x && ac->avail == 0)       /* no objects in sight? abort */
+
+               /* no objects in sight? abort */
+               if (!x && (ac->avail == 0 || force_refill))
                         return NULL;
  
                 if (!ac->avail)         /* objects refilled by interrupt? */
                         goto retry;
         }
         ac->touched = 1;
-       return ac->entry[--ac->avail];
+
+       return ac_get_obj(cachep, ac, flags, force_refill);
  }
  
  static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
@@ -3211,9 +3244,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                 return objp;
         if (cachep->flags & SLAB_POISON) {
  #ifdef CONFIG_DEBUG_PAGEALLOC
-               if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
+               if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
                         kernel_map_pages(virt_to_page(objp),
-                                        cachep->buffer_size / PAGE_SIZE, 1);
+                                        cachep->size / PAGE_SIZE, 1);
                 else
                         check_poison_obj(cachep, objp);
  #else
@@ -3243,7 +3276,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                 unsigned objnr;
  
                 slabp = virt_to_head_page(objp)->slab_page;
-               objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
+               objnr = (unsigned)(objp - slabp->s_mem) / cachep->size;
                 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
         }
  #endif
@@ -3263,33 +3296,45 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
  
  static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
  {
-       if (cachep == &cache_cache)
+       if (cachep == kmem_cache)
                 return false;
  
-       return should_failslab(obj_size(cachep), flags, cachep->flags);
+       return should_failslab(cachep->object_size, flags, cachep->flags);
  }
  
  static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
  {
         void *objp;
         struct array_cache *ac;
+       bool force_refill = false;
  
         check_irq_off();
  
         ac = cpu_cache_get(cachep);
         if (likely(ac->avail)) {
-               STATS_INC_ALLOCHIT(cachep);
                 ac->touched = 1;
-               objp = ac->entry[--ac->avail];
-       } else {
-               STATS_INC_ALLOCMISS(cachep);
-               objp = cache_alloc_refill(cachep, flags);
+               objp = ac_get_obj(cachep, ac, flags, false);
+
                 /*
-                * the 'ac' may be updated by cache_alloc_refill(),
-                * and kmemleak_erase() requires its correct value.
+                * Allow for the possibility all avail objects are not allowed
+                * by the current flags
                  */
-               ac = cpu_cache_get(cachep);
+               if (objp) {
+                       STATS_INC_ALLOCHIT(cachep);
+                       goto out;
+               }
+               force_refill = true;
         }
+
+       STATS_INC_ALLOCMISS(cachep);
+       objp = cache_alloc_refill(cachep, flags, force_refill);
+       /*
+        * the 'ac' may be updated by cache_alloc_refill(),
+        * and kmemleak_erase() requires its correct value.
+        */
+       ac = cpu_cache_get(cachep);
+
+out:
         /*
          * To avoid a false negative, if an object that is in one of the
          * per-CPU caches is leaked, we need to make sure kmemleak doesn't
@@ -3317,7 +3362,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
         if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
                 nid_alloc = cpuset_slab_spread_node();
         else if (current->mempolicy)
-               nid_alloc = slab_node(current->mempolicy);
+               nid_alloc = slab_node();
         if (nid_alloc != nid_here)
                 return ____cache_alloc_node(cachep, flags, nid_alloc);
         return NULL;
@@ -3349,7 +3394,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
  
  retry_cpuset:
         cpuset_mems_cookie = get_mems_allowed();
-       zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+       zonelist = node_zonelist(slab_node(), flags);
  
  retry:
         /*
@@ -3526,14 +3571,14 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
    out:
         local_irq_restore(save_flags);
         ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
-       kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
+       kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
                                  flags);
  
         if (likely(ptr))
-               kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
+               kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size);
  
         if (unlikely((flags & __GFP_ZERO) && ptr))
-               memset(ptr, 0, obj_size(cachep));
+               memset(ptr, 0, cachep->object_size);
  
         return ptr;
  }
@@ -3588,15 +3633,15 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
         objp = __do_cache_alloc(cachep, flags);
         local_irq_restore(save_flags);
         objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
-       kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
+       kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
                                  flags);
         prefetchw(objp);
  
         if (likely(objp))
-               kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
+               kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size);
  
         if (unlikely((flags & __GFP_ZERO) && objp))
-               memset(objp, 0, obj_size(cachep));
+               memset(objp, 0, cachep->object_size);
  
         return objp;
  }
@@ -3611,9 +3656,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
         struct kmem_list3 *l3;
  
         for (i = 0; i < nr_objects; i++) {
-               void *objp = objpp[i];
+               void *objp;
                 struct slab *slabp;
  
+               clear_obj_pfmemalloc(&objpp[i]);
+               objp = objpp[i];
+
                 slabp = virt_to_slab(objp);
                 l3 = cachep->nodelists[node];
                 list_del(&slabp->list);
@@ -3712,7 +3760,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
         kmemleak_free_recursive(objp, cachep->flags);
         objp = cache_free_debugcheck(cachep, objp, caller);
  
-       kmemcheck_slab_free(cachep, objp, obj_size(cachep));
+       kmemcheck_slab_free(cachep, objp, cachep->object_size);
  
         /*
          * Skip calling cache_free_alien() when the platform is not numa.
@@ -3731,7 +3779,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
                 cache_flusharray(cachep, ac);
         }
  
-       ac->entry[ac->avail++] = objp;
+       ac_put_obj(cachep, ac, objp);
  }
  
  /**
@@ -3747,7 +3795,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
         void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
  
         trace_kmem_cache_alloc(_RET_IP_, ret,
-                              obj_size(cachep), cachep->buffer_size, flags);
+                              cachep->object_size, cachep->size, flags);
  
         return ret;
  }
@@ -3775,7 +3823,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                                        __builtin_return_address(0));
  
         trace_kmem_cache_alloc_node(_RET_IP_, ret,
-                                   obj_size(cachep), cachep->buffer_size,
+                                   cachep->object_size, cachep->size,
                                     flags, nodeid);
  
         return ret;
@@ -3857,7 +3905,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
         ret = __cache_alloc(cachep, flags, caller);
  
         trace_kmalloc((unsigned long) caller, ret,
-                     size, cachep->buffer_size, flags);
+                     size, cachep->size, flags);
  
         return ret;
  }
@@ -3897,9 +3945,9 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
         unsigned long flags;
  
         local_irq_save(flags);
-       debug_check_no_locks_freed(objp, obj_size(cachep));
+       debug_check_no_locks_freed(objp, cachep->object_size);
         if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
-               debug_check_no_obj_freed(objp, obj_size(cachep));
+               debug_check_no_obj_freed(objp, cachep->object_size);
         __cache_free(cachep, objp, __builtin_return_address(0));
         local_irq_restore(flags);
  
@@ -3928,8 +3976,9 @@ void kfree(const void *objp)
         local_irq_save(flags);
         kfree_debugcheck(objp);
         c = virt_to_cache(objp);
-       debug_check_no_locks_freed(objp, obj_size(c));
-       debug_check_no_obj_freed(objp, obj_size(c));
+       debug_check_no_locks_freed(objp, c->object_size);
+
+       debug_check_no_obj_freed(objp, c->object_size);
         __cache_free(c, (void *)objp, __builtin_return_address(0));
         local_irq_restore(flags);
  }
@@ -3937,7 +3986,7 @@ EXPORT_SYMBOL(kfree);
  
  unsigned int kmem_cache_size(struct kmem_cache *cachep)
  {
-       return obj_size(cachep);
+       return cachep->object_size;
  }
  EXPORT_SYMBOL(kmem_cache_size);
  
@@ -4011,7 +4060,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
         return 0;
  
  fail:
-       if (!cachep->next.next) {
+       if (!cachep->list.next) {
                 /* Cache is not active yet. Roll back what we did */
                 node--;
                 while (node >= 0) {
@@ -4046,7 +4095,7 @@ static void do_ccupdate_local(void *info)
         new->new[smp_processor_id()] = old;
  }
  
-/* Always called with the cache_chain_mutex held */
+/* Always called with the slab_mutex held */
  static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
                                 int batchcount, int shared, gfp_t gfp)
  {
@@ -4090,7 +4139,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
         return alloc_kmemlist(cachep, gfp);
  }
  
-/* Called with cache_chain_mutex held always */
+/* Called with slab_mutex held always */
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
  {
         int err;
@@ -4105,13 +4154,13 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
          * The numbers are guessed, we should auto-tune as described by
          * Bonwick.
          */
-       if (cachep->buffer_size > 131072)
+       if (cachep->size > 131072)
                 limit = 1;
-       else if (cachep->buffer_size > PAGE_SIZE)
+       else if (cachep->size > PAGE_SIZE)
                 limit = 8;
-       else if (cachep->buffer_size > 1024)
+       else if (cachep->size > 1024)
                 limit = 24;
-       else if (cachep->buffer_size > 256)
+       else if (cachep->size > 256)
                 limit = 54;
         else
                 limit = 120;
@@ -4126,7 +4175,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
          * to a larger limit. Thus disabled by default.
          */
         shared = 0;
-       if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
+       if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
                 shared = 8;
  
  #if DEBUG
@@ -4192,11 +4241,11 @@ static void cache_reap(struct work_struct *w)
         int node = numa_mem_id();
         struct delayed_work *work = to_delayed_work(w);
  
-       if (!mutex_trylock(&cache_chain_mutex))
+       if (!mutex_trylock(&slab_mutex))
                 /* Give up. Setup the next iteration. */
                 goto out;
  
-       list_for_each_entry(searchp, &cache_chain, next) {
+       list_for_each_entry(searchp, &slab_caches, list) {
                 check_irq_on();
  
                 /*
@@ -4234,7 +4283,7 @@ next:
                 cond_resched();
         }
         check_irq_on();
-       mutex_unlock(&cache_chain_mutex);
+       mutex_unlock(&slab_mutex);
         next_reap_node();
  out:
         /* Set up the next iteration */
@@ -4270,26 +4319,26 @@ static void *s_start(struct seq_file *m, loff_t *pos)
  {
         loff_t n = *pos;
  
-       mutex_lock(&cache_chain_mutex);
+       mutex_lock(&slab_mutex);
         if (!n)
                 print_slabinfo_header(m);
  
-       return seq_list_start(&cache_chain, *pos);
+       return seq_list_start(&slab_caches, *pos);
  }
  
  static void *s_next(struct seq_file *m, void *p, loff_t *pos)
  {
-       return seq_list_next(p, &cache_chain, pos);
+       return seq_list_next(p, &slab_caches, pos);
  }
  
  static void s_stop(struct seq_file *m, void *p)
  {
-       mutex_unlock(&cache_chain_mutex);
+       mutex_unlock(&slab_mutex);
  }
  
  static int s_show(struct seq_file *m, void *p)
  {
-       struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
+       struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
         struct slab *slabp;
         unsigned long active_objs;
         unsigned long num_objs;
@@ -4345,7 +4394,7 @@ static int s_show(struct seq_file *m, void *p)
                 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
  
         seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
-                  name, active_objs, num_objs, cachep->buffer_size,
+                  name, active_objs, num_objs, cachep->size,
                    cachep->num, (1 << cachep->gfporder));
         seq_printf(m, " : tunables %4u %4u %4u",
                    cachep->limit, cachep->batchcount, cachep->shared);
@@ -4435,9 +4484,9 @@ static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
                 return -EINVAL;
  
         /* Find the cache in the chain of caches. */
-       mutex_lock(&cache_chain_mutex);
+       mutex_lock(&slab_mutex);
         res = -EINVAL;
-       list_for_each_entry(cachep, &cache_chain, next) {
+       list_for_each_entry(cachep, &slab_caches, list) {
                 if (!strcmp(cachep->name, kbuf)) {
                         if (limit < 1 || batchcount < 1 ||
                                         batchcount > limit || shared < 0) {
@@ -4450,7 +4499,7 @@ static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
                         break;
                 }
         }
-       mutex_unlock(&cache_chain_mutex);
+       mutex_unlock(&slab_mutex);
         if (res >= 0)
                 res = count;
         return res;
@@ -4473,8 +4522,8 @@ static const struct file_operations proc_slabinfo_operations = {
  
  static void *leaks_start(struct seq_file *m, loff_t *pos)
  {
-       mutex_lock(&cache_chain_mutex);
-       return seq_list_start(&cache_chain, *pos);
+       mutex_lock(&slab_mutex);
+       return seq_list_start(&slab_caches, *pos);
  }
  
  static inline int add_caller(unsigned long *n, unsigned long v)
@@ -4513,7 +4562,7 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
         int i;
         if (n[0] == n[1])
                 return;
-       for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
+       for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) {
                 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
                         continue;
                 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
@@ -4539,7 +4588,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
  
  static int leaks_show(struct seq_file *m, void *p)
  {
-       struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
+       struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
         struct slab *slabp;
         struct kmem_list3 *l3;
         const char *name;
@@ -4573,17 +4622,17 @@ static int leaks_show(struct seq_file *m, void *p)
         name = cachep->name;
         if (n[0] == n[1]) {
                 /* Increase the buffer size */
-               mutex_unlock(&cache_chain_mutex);
+               mutex_unlock(&slab_mutex);
                 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
                 if (!m->private) {
                         /* Too bad, we are really out */
                         m->private = n;
-                       mutex_lock(&cache_chain_mutex);
+                       mutex_lock(&slab_mutex);
                         return -ENOMEM;
                 }
                 *(unsigned long *)m->private = n[0] * 2;
                 kfree(n);
-               mutex_lock(&cache_chain_mutex);
+               mutex_lock(&slab_mutex);
                 /* Now make sure this entry will be retried */
                 m->count = m->size;
                 return 0;
@@ -4658,6 +4707,6 @@ size_t ksize(const void *objp)
         if (unlikely(objp == ZERO_SIZE_PTR))
                 return 0;
  
-       return obj_size(virt_to_cache(objp));
+       return virt_to_cache(objp)->object_size;
  }
  EXPORT_SYMBOL(ksize);