slab: Return NULL for oversized allocations

[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / slab_common.c
diff --git a/mm/slab_common.c b/mm/slab_common.c

index 069a24e..c5d352e 100644 (file)
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -13,9 +13,12 @@
  #include <linux/module.h>
  #include <linux/cpu.h>
  #include <linux/uaccess.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
  #include <asm/cacheflush.h>
  #include <asm/tlbflush.h>
  #include <asm/page.h>
+#include <linux/memcontrol.h>
  
  #include "slab.h"
  
@@ -25,7 +28,8 @@ DEFINE_MUTEX(slab_mutex);
  struct kmem_cache *kmem_cache;
  
  #ifdef CONFIG_DEBUG_VM
-static int kmem_cache_sanity_check(const char *name, size_t size)
+static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
+                                  size_t size)
  {
         struct kmem_cache *s = NULL;
  
@@ -51,7 +55,13 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
                         continue;
                 }
  
-               if (!strcmp(s->name, name)) {
+               /*
+                * For simplicity, we won't check this in the list of memcg
+                * caches. We have control over memcg naming, and if there
+                * aren't duplicates in the global list, there won't be any
+                * duplicates in the memcg lists as well.
+                */
+               if (!memcg && !strcmp(s->name, name)) {
                         pr_err("%s (%s): Cache name already exists.\n",
                                __func__, name);
                         dump_stack();
@@ -64,12 +74,69 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
         return 0;
  }
  #else
-static inline int kmem_cache_sanity_check(const char *name, size_t size)
+static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
+                                         const char *name, size_t size)
  {
         return 0;
  }
  #endif
  
+#ifdef CONFIG_MEMCG_KMEM
+int memcg_update_all_caches(int num_memcgs)
+{
+       struct kmem_cache *s;
+       int ret = 0;
+       mutex_lock(&slab_mutex);
+
+       list_for_each_entry(s, &slab_caches, list) {
+               if (!is_root_cache(s))
+                       continue;
+
+               ret = memcg_update_cache_size(s, num_memcgs);
+               /*
+                * See comment in memcontrol.c, memcg_update_cache_size:
+                * Instead of freeing the memory, we'll just leave the caches
+                * up to this point in an updated state.
+                */
+               if (ret)
+                       goto out;
+       }
+
+       memcg_update_array_size(num_memcgs);
+out:
+       mutex_unlock(&slab_mutex);
+       return ret;
+}
+#endif
+
+/*
+ * Figure out what the alignment of the objects will be given a set of
+ * flags, a user specified alignment and the size of the objects.
+ */
+unsigned long calculate_alignment(unsigned long flags,
+               unsigned long align, unsigned long size)
+{
+       /*
+        * If the user wants hardware cache aligned objects then follow that
+        * suggestion if the object is sufficiently large.
+        *
+        * The hardware cache alignment cannot override the specified
+        * alignment though. If that is greater then use it.
+        */
+       if (flags & SLAB_HWCACHE_ALIGN) {
+               unsigned long ralign = cache_line_size();
+               while (size <= ralign / 2)
+                       ralign /= 2;
+               align = max(align, ralign);
+       }
+
+       if (align < ARCH_SLAB_MINALIGN)
+               align = ARCH_SLAB_MINALIGN;
+
+       return ALIGN(align, sizeof(void *));
+}
+
+
  /*
   * kmem_cache_create - Create a cache.
   * @name: A string which is used in /proc/slabinfo to identify this cache.
@@ -95,8 +162,10 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size)
   * as davem.
   */
  
-struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align,
-               unsigned long flags, void (*ctor)(void *))
+struct kmem_cache *
+kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
+                       size_t align, unsigned long flags, void (*ctor)(void *),
+                       struct kmem_cache *parent_cache)
  {
         struct kmem_cache *s = NULL;
         int err = 0;
@@ -104,19 +173,33 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
         get_online_cpus();
         mutex_lock(&slab_mutex);
  
-       if (!kmem_cache_sanity_check(name, size) == 0)
+       if (!kmem_cache_sanity_check(memcg, name, size) == 0)
                 goto out_locked;
  
+       /*
+        * Some allocators will constraint the set of valid flags to a subset
+        * of all flags. We expect them to define CACHE_CREATE_MASK in this
+        * case, and we'll just provide them with a sanitized version of the
+        * passed flags.
+        */
+       flags &= CACHE_CREATE_MASK;
  
-       s = __kmem_cache_alias(name, size, align, flags, ctor);
+       s = __kmem_cache_alias(memcg, name, size, align, flags, ctor);
         if (s)
                 goto out_locked;
  
         s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
         if (s) {
                 s->object_size = s->size = size;
-               s->align = align;
+               s->align = calculate_alignment(flags, align, size);
                 s->ctor = ctor;
+
+               if (memcg_register_cache(memcg, s, parent_cache)) {
+                       kmem_cache_free(kmem_cache, s);
+                       err = -ENOMEM;
+                       goto out_locked;
+               }
+
                 s->name = kstrdup(name, GFP_KERNEL);
                 if (!s->name) {
                         kmem_cache_free(kmem_cache, s);
@@ -126,10 +209,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
  
                 err = __kmem_cache_create(s, flags);
                 if (!err) {
-
                         s->refcount = 1;
                         list_add(&s->list, &slab_caches);
-
+                       memcg_cache_list_add(memcg, s);
                 } else {
                         kfree(s->name);
                         kmem_cache_free(kmem_cache, s);
@@ -157,10 +239,20 @@ out_locked:
  
         return s;
  }
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t align,
+                 unsigned long flags, void (*ctor)(void *))
+{
+       return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
+}
  EXPORT_SYMBOL(kmem_cache_create);
  
  void kmem_cache_destroy(struct kmem_cache *s)
  {
+       /* Destroy all the children caches if we aren't a memcg cache */
+       kmem_cache_destroy_memcg_children(s);
+
         get_online_cpus();
         mutex_lock(&slab_mutex);
         s->refcount--;
@@ -172,6 +264,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
                         if (s->flags & SLAB_DESTROY_BY_RCU)
                                 rcu_barrier();
  
+                       memcg_release_cache(s);
                         kfree(s->name);
                         kmem_cache_free(kmem_cache, s);
                 } else {
@@ -192,3 +285,348 @@ int slab_is_available(void)
  {
         return slab_state >= UP;
  }
+
+#ifndef CONFIG_SLOB
+/* Create a cache during boot when no slab services are available yet */
+void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
+               unsigned long flags)
+{
+       int err;
+
+       s->name = name;
+       s->size = s->object_size = size;
+       s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+       err = __kmem_cache_create(s, flags);
+
+       if (err)
+               panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
+                                       name, size, err);
+
+       s->refcount = -1;       /* Exempt from merging for now */
+}
+
+struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
+                               unsigned long flags)
+{
+       struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+
+       if (!s)
+               panic("Out of memory when creating slab %s\n", name);
+
+       create_boot_cache(s, name, size, flags);
+       list_add(&s->list, &slab_caches);
+       s->refcount = 1;
+       return s;
+}
+
+struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
+EXPORT_SYMBOL(kmalloc_caches);
+
+#ifdef CONFIG_ZONE_DMA
+struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
+EXPORT_SYMBOL(kmalloc_dma_caches);
+#endif
+
+/*
+ * Conversion table for small slabs sizes / 8 to the index in the
+ * kmalloc array. This is necessary for slabs < 192 since we have non power
+ * of two cache sizes there. The size of larger slabs can be determined using
+ * fls.
+ */
+static s8 size_index[24] = {
+       3,      /* 8 */
+       4,      /* 16 */
+       5,      /* 24 */
+       5,      /* 32 */
+       6,      /* 40 */
+       6,      /* 48 */
+       6,      /* 56 */
+       6,      /* 64 */
+       1,      /* 72 */
+       1,      /* 80 */
+       1,      /* 88 */
+       1,      /* 96 */
+       7,      /* 104 */
+       7,      /* 112 */
+       7,      /* 120 */
+       7,      /* 128 */
+       2,      /* 136 */
+       2,      /* 144 */
+       2,      /* 152 */
+       2,      /* 160 */
+       2,      /* 168 */
+       2,      /* 176 */
+       2,      /* 184 */
+       2       /* 192 */
+};
+
+static inline int size_index_elem(size_t bytes)
+{
+       return (bytes - 1) / 8;
+}
+
+/*
+ * Find the kmem_cache structure that serves a given size of
+ * allocation
+ */
+struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
+{
+       int index;
+
+       if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
+               return NULL;
+
+       if (size <= 192) {
+               if (!size)
+                       return ZERO_SIZE_PTR;
+
+               index = size_index[size_index_elem(size)];
+       } else
+               index = fls(size - 1);
+
+#ifdef CONFIG_ZONE_DMA
+       if (unlikely((flags & GFP_DMA)))
+               return kmalloc_dma_caches[index];
+
+#endif
+       return kmalloc_caches[index];
+}
+
+/*
+ * Create the kmalloc array. Some of the regular kmalloc arrays
+ * may already have been created because they were needed to
+ * enable allocations for slab creation.
+ */
+void __init create_kmalloc_caches(unsigned long flags)
+{
+       int i;
+
+       /*
+        * Patch up the size_index table if we have strange large alignment
+        * requirements for the kmalloc array. This is only the case for
+        * MIPS it seems. The standard arches will not generate any code here.
+        *
+        * Largest permitted alignment is 256 bytes due to the way we
+        * handle the index determination for the smaller caches.
+        *
+        * Make sure that nothing crazy happens if someone starts tinkering
+        * around with ARCH_KMALLOC_MINALIGN
+        */
+       BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
+               (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
+
+       for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
+               int elem = size_index_elem(i);
+
+               if (elem >= ARRAY_SIZE(size_index))
+                       break;
+               size_index[elem] = KMALLOC_SHIFT_LOW;
+       }
+
+       if (KMALLOC_MIN_SIZE >= 64) {
+               /*
+                * The 96 byte size cache is not used if the alignment
+                * is 64 byte.
+                */
+               for (i = 64 + 8; i <= 96; i += 8)
+                       size_index[size_index_elem(i)] = 7;
+
+       }
+
+       if (KMALLOC_MIN_SIZE >= 128) {
+               /*
+                * The 192 byte sized cache is not used if the alignment
+                * is 128 byte. Redirect kmalloc to use the 256 byte cache
+                * instead.
+                */
+               for (i = 128 + 8; i <= 192; i += 8)
+                       size_index[size_index_elem(i)] = 8;
+       }
+       /* Caches that are not of the two-to-the-power-of size */
+       if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1])
+               kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags);
+
+       if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2])
+               kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags);
+
+       for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
+               if (!kmalloc_caches[i])
+                       kmalloc_caches[i] = create_kmalloc_cache(NULL,
+                                                       1 << i, flags);
+
+       /* Kmalloc array is now usable */
+       slab_state = UP;
+
+       for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
+               struct kmem_cache *s = kmalloc_caches[i];
+               char *n;
+
+               if (s) {
+                       n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i));
+
+                       BUG_ON(!n);
+                       s->name = n;
+               }
+       }
+
+#ifdef CONFIG_ZONE_DMA
+       for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
+               struct kmem_cache *s = kmalloc_caches[i];
+
+               if (s) {
+                       int size = kmalloc_size(i);
+                       char *n = kasprintf(GFP_NOWAIT,
+                                "dma-kmalloc-%d", size);
+
+                       BUG_ON(!n);
+                       kmalloc_dma_caches[i] = create_kmalloc_cache(n,
+                               size, SLAB_CACHE_DMA | flags);
+               }
+       }
+#endif
+}
+#endif /* !CONFIG_SLOB */
+
+
+#ifdef CONFIG_SLABINFO
+void print_slabinfo_header(struct seq_file *m)
+{
+       /*
+        * Output format version, so at least we can change it
+        * without _too_ many complaints.
+        */
+#ifdef CONFIG_DEBUG_SLAB
+       seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
+#else
+       seq_puts(m, "slabinfo - version: 2.1\n");
+#endif
+       seq_puts(m, "# name            <active_objs> <num_objs> <objsize> "
+                "<objperslab> <pagesperslab>");
+       seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
+       seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
+#ifdef CONFIG_DEBUG_SLAB
+       seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
+                "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
+       seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
+#endif
+       seq_putc(m, '\n');
+}
+
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+       loff_t n = *pos;
+
+       mutex_lock(&slab_mutex);
+       if (!n)
+               print_slabinfo_header(m);
+
+       return seq_list_start(&slab_caches, *pos);
+}
+
+static void *s_next(struct seq_file *m, void *p, loff_t *pos)
+{
+       return seq_list_next(p, &slab_caches, pos);
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+       mutex_unlock(&slab_mutex);
+}
+
+static void
+memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
+{
+       struct kmem_cache *c;
+       struct slabinfo sinfo;
+       int i;
+
+       if (!is_root_cache(s))
+               return;
+
+       for_each_memcg_cache_index(i) {
+               c = cache_from_memcg(s, i);
+               if (!c)
+                       continue;
+
+               memset(&sinfo, 0, sizeof(sinfo));
+               get_slabinfo(c, &sinfo);
+
+               info->active_slabs += sinfo.active_slabs;
+               info->num_slabs += sinfo.num_slabs;
+               info->shared_avail += sinfo.shared_avail;
+               info->active_objs += sinfo.active_objs;
+               info->num_objs += sinfo.num_objs;
+       }
+}
+
+int cache_show(struct kmem_cache *s, struct seq_file *m)
+{
+       struct slabinfo sinfo;
+
+       memset(&sinfo, 0, sizeof(sinfo));
+       get_slabinfo(s, &sinfo);
+
+       memcg_accumulate_slabinfo(s, &sinfo);
+
+       seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
+                  cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
+                  sinfo.objects_per_slab, (1 << sinfo.cache_order));
+
+       seq_printf(m, " : tunables %4u %4u %4u",
+                  sinfo.limit, sinfo.batchcount, sinfo.shared);
+       seq_printf(m, " : slabdata %6lu %6lu %6lu",
+                  sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
+       slabinfo_show_stats(m, s);
+       seq_putc(m, '\n');
+       return 0;
+}
+
+static int s_show(struct seq_file *m, void *p)
+{
+       struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
+
+       if (!is_root_cache(s))
+               return 0;
+       return cache_show(s, m);
+}
+
+/*
+ * slabinfo_op - iterator that generates /proc/slabinfo
+ *
+ * Output layout:
+ * cache-name
+ * num-active-objs
+ * total-objs
+ * object size
+ * num-active-slabs
+ * total-slabs
+ * num-pages-per-slab
+ * + further values on SMP and with statistics enabled
+ */
+static const struct seq_operations slabinfo_op = {
+       .start = s_start,
+       .next = s_next,
+       .stop = s_stop,
+       .show = s_show,
+};
+
+static int slabinfo_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &slabinfo_op);
+}
+
+static const struct file_operations proc_slabinfo_operations = {
+       .open           = slabinfo_open,
+       .read           = seq_read,
+       .write          = slabinfo_write,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
+
+static int __init slab_proc_init(void)
+{
+       proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
+       return 0;
+}
+module_init(slab_proc_init);
+#endif /* CONFIG_SLABINFO */