mm: memcg: remove unused node/section info from pc->flags
[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / page_cgroup.c
1 #include <linux/mm.h>
2 #include <linux/mmzone.h>
3 #include <linux/bootmem.h>
4 #include <linux/bit_spinlock.h>
5 #include <linux/page_cgroup.h>
6 #include <linux/hash.h>
7 #include <linux/slab.h>
8 #include <linux/memory.h>
9 #include <linux/vmalloc.h>
10 #include <linux/cgroup.h>
11 #include <linux/swapops.h>
12 #include <linux/kmemleak.h>
13
14 static unsigned long total_usage;
15
16 #if !defined(CONFIG_SPARSEMEM)
17
18
19 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
20 {
21         pgdat->node_page_cgroup = NULL;
22 }
23
24 struct page_cgroup *lookup_page_cgroup(struct page *page)
25 {
26         unsigned long pfn = page_to_pfn(page);
27         unsigned long offset;
28         struct page_cgroup *base;
29
30         base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
31         if (unlikely(!base))
32                 return NULL;
33
34         offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
35         return base + offset;
36 }
37
38 static int __init alloc_node_page_cgroup(int nid)
39 {
40         struct page_cgroup *base;
41         unsigned long table_size;
42         unsigned long nr_pages;
43
44         nr_pages = NODE_DATA(nid)->node_spanned_pages;
45         if (!nr_pages)
46                 return 0;
47
48         table_size = sizeof(struct page_cgroup) * nr_pages;
49
50         base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
51                         table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
52         if (!base)
53                 return -ENOMEM;
54         NODE_DATA(nid)->node_page_cgroup = base;
55         total_usage += table_size;
56         return 0;
57 }
58
59 void __init page_cgroup_init_flatmem(void)
60 {
61
62         int nid, fail;
63
64         if (mem_cgroup_disabled())
65                 return;
66
67         for_each_online_node(nid)  {
68                 fail = alloc_node_page_cgroup(nid);
69                 if (fail)
70                         goto fail;
71         }
72         printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
73         printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
74         " don't want memory cgroups\n");
75         return;
76 fail:
77         printk(KERN_CRIT "allocation of page_cgroup failed.\n");
78         printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option\n");
79         panic("Out of memory");
80 }
81
82 #else /* CONFIG_FLAT_NODE_MEM_MAP */
83
84 struct page_cgroup *lookup_page_cgroup(struct page *page)
85 {
86         unsigned long pfn = page_to_pfn(page);
87         struct mem_section *section = __pfn_to_section(pfn);
88
89         if (!section->page_cgroup)
90                 return NULL;
91         return section->page_cgroup + pfn;
92 }
93
94 static void *__meminit alloc_page_cgroup(size_t size, int nid)
95 {
96         gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
97         void *addr = NULL;
98
99         addr = alloc_pages_exact_nid(nid, size, flags);
100         if (addr) {
101                 kmemleak_alloc(addr, size, 1, flags);
102                 return addr;
103         }
104
105         if (node_state(nid, N_HIGH_MEMORY))
106                 addr = vzalloc_node(size, nid);
107         else
108                 addr = vzalloc(size);
109
110         return addr;
111 }
112
113 #ifdef CONFIG_MEMORY_HOTPLUG
114 static void free_page_cgroup(void *addr)
115 {
116         if (is_vmalloc_addr(addr)) {
117                 vfree(addr);
118         } else {
119                 struct page *page = virt_to_page(addr);
120                 size_t table_size =
121                         sizeof(struct page_cgroup) * PAGES_PER_SECTION;
122
123                 BUG_ON(PageReserved(page));
124                 free_pages_exact(addr, table_size);
125         }
126 }
127 #endif
128
129 static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
130 {
131         struct mem_section *section;
132         struct page_cgroup *base;
133         unsigned long table_size;
134
135         section = __pfn_to_section(pfn);
136
137         if (section->page_cgroup)
138                 return 0;
139
140         table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
141         base = alloc_page_cgroup(table_size, nid);
142
143         /*
144          * The value stored in section->page_cgroup is (base - pfn)
145          * and it does not point to the memory block allocated above,
146          * causing kmemleak false positives.
147          */
148         kmemleak_not_leak(base);
149
150         if (!base) {
151                 printk(KERN_ERR "page cgroup allocation failure\n");
152                 return -ENOMEM;
153         }
154
155         /*
156          * The passed "pfn" may not be aligned to SECTION.  For the calculation
157          * we need to apply a mask.
158          */
159         pfn &= PAGE_SECTION_MASK;
160         section->page_cgroup = base - pfn;
161         total_usage += table_size;
162         return 0;
163 }
164 #ifdef CONFIG_MEMORY_HOTPLUG
165 void __free_page_cgroup(unsigned long pfn)
166 {
167         struct mem_section *ms;
168         struct page_cgroup *base;
169
170         ms = __pfn_to_section(pfn);
171         if (!ms || !ms->page_cgroup)
172                 return;
173         base = ms->page_cgroup + pfn;
174         free_page_cgroup(base);
175         ms->page_cgroup = NULL;
176 }
177
178 int __meminit online_page_cgroup(unsigned long start_pfn,
179                         unsigned long nr_pages,
180                         int nid)
181 {
182         unsigned long start, end, pfn;
183         int fail = 0;
184
185         start = SECTION_ALIGN_DOWN(start_pfn);
186         end = SECTION_ALIGN_UP(start_pfn + nr_pages);
187
188         if (nid == -1) {
189                 /*
190                  * In this case, "nid" already exists and contains valid memory.
191                  * "start_pfn" passed to us is a pfn which is an arg for
192                  * online__pages(), and start_pfn should exist.
193                  */
194                 nid = pfn_to_nid(start_pfn);
195                 VM_BUG_ON(!node_state(nid, N_ONLINE));
196         }
197
198         for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
199                 if (!pfn_present(pfn))
200                         continue;
201                 fail = init_section_page_cgroup(pfn, nid);
202         }
203         if (!fail)
204                 return 0;
205
206         /* rollback */
207         for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
208                 __free_page_cgroup(pfn);
209
210         return -ENOMEM;
211 }
212
213 int __meminit offline_page_cgroup(unsigned long start_pfn,
214                 unsigned long nr_pages, int nid)
215 {
216         unsigned long start, end, pfn;
217
218         start = SECTION_ALIGN_DOWN(start_pfn);
219         end = SECTION_ALIGN_UP(start_pfn + nr_pages);
220
221         for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
222                 __free_page_cgroup(pfn);
223         return 0;
224
225 }
226
227 static int __meminit page_cgroup_callback(struct notifier_block *self,
228                                unsigned long action, void *arg)
229 {
230         struct memory_notify *mn = arg;
231         int ret = 0;
232         switch (action) {
233         case MEM_GOING_ONLINE:
234                 ret = online_page_cgroup(mn->start_pfn,
235                                    mn->nr_pages, mn->status_change_nid);
236                 break;
237         case MEM_OFFLINE:
238                 offline_page_cgroup(mn->start_pfn,
239                                 mn->nr_pages, mn->status_change_nid);
240                 break;
241         case MEM_CANCEL_ONLINE:
242         case MEM_GOING_OFFLINE:
243                 break;
244         case MEM_ONLINE:
245         case MEM_CANCEL_OFFLINE:
246                 break;
247         }
248
249         return notifier_from_errno(ret);
250 }
251
252 #endif
253
254 void __init page_cgroup_init(void)
255 {
256         unsigned long pfn;
257         int nid;
258
259         if (mem_cgroup_disabled())
260                 return;
261
262         for_each_node_state(nid, N_HIGH_MEMORY) {
263                 unsigned long start_pfn, end_pfn;
264
265                 start_pfn = node_start_pfn(nid);
266                 end_pfn = node_end_pfn(nid);
267                 /*
268                  * start_pfn and end_pfn may not be aligned to SECTION and the
269                  * page->flags of out of node pages are not initialized.  So we
270                  * scan [start_pfn, the biggest section's pfn < end_pfn) here.
271                  */
272                 for (pfn = start_pfn;
273                      pfn < end_pfn;
274                      pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
275
276                         if (!pfn_valid(pfn))
277                                 continue;
278                         /*
279                          * Nodes's pfns can be overlapping.
280                          * We know some arch can have a nodes layout such as
281                          * -------------pfn-------------->
282                          * N0 | N1 | N2 | N0 | N1 | N2|....
283                          */
284                         if (pfn_to_nid(pfn) != nid)
285                                 continue;
286                         if (init_section_page_cgroup(pfn, nid))
287                                 goto oom;
288                 }
289         }
290         hotplug_memory_notifier(page_cgroup_callback, 0);
291         printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
292         printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
293                          "don't want memory cgroups\n");
294         return;
295 oom:
296         printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
297         panic("Out of memory");
298 }
299
300 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
301 {
302         return;
303 }
304
305 #endif
306
307
308 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
309
310 static DEFINE_MUTEX(swap_cgroup_mutex);
311 struct swap_cgroup_ctrl {
312         struct page **map;
313         unsigned long length;
314         spinlock_t      lock;
315 };
316
317 static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
318
319 struct swap_cgroup {
320         unsigned short          id;
321 };
322 #define SC_PER_PAGE     (PAGE_SIZE/sizeof(struct swap_cgroup))
323 #define SC_POS_MASK     (SC_PER_PAGE - 1)
324
325 /*
326  * SwapCgroup implements "lookup" and "exchange" operations.
327  * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
328  * against SwapCache. At swap_free(), this is accessed directly from swap.
329  *
330  * This means,
331  *  - we have no race in "exchange" when we're accessed via SwapCache because
332  *    SwapCache(and its swp_entry) is under lock.
333  *  - When called via swap_free(), there is no user of this entry and no race.
334  * Then, we don't need lock around "exchange".
335  *
336  * TODO: we can push these buffers out to HIGHMEM.
337  */
338
339 /*
340  * allocate buffer for swap_cgroup.
341  */
342 static int swap_cgroup_prepare(int type)
343 {
344         struct page *page;
345         struct swap_cgroup_ctrl *ctrl;
346         unsigned long idx, max;
347
348         ctrl = &swap_cgroup_ctrl[type];
349
350         for (idx = 0; idx < ctrl->length; idx++) {
351                 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
352                 if (!page)
353                         goto not_enough_page;
354                 ctrl->map[idx] = page;
355         }
356         return 0;
357 not_enough_page:
358         max = idx;
359         for (idx = 0; idx < max; idx++)
360                 __free_page(ctrl->map[idx]);
361
362         return -ENOMEM;
363 }
364
365 /**
366  * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
367  * @end: swap entry to be cmpxchged
368  * @old: old id
369  * @new: new id
370  *
371  * Returns old id at success, 0 at failure.
372  * (There is no mem_cgroup using 0 as its id)
373  */
374 unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
375                                         unsigned short old, unsigned short new)
376 {
377         int type = swp_type(ent);
378         unsigned long offset = swp_offset(ent);
379         unsigned long idx = offset / SC_PER_PAGE;
380         unsigned long pos = offset & SC_POS_MASK;
381         struct swap_cgroup_ctrl *ctrl;
382         struct page *mappage;
383         struct swap_cgroup *sc;
384         unsigned long flags;
385         unsigned short retval;
386
387         ctrl = &swap_cgroup_ctrl[type];
388
389         mappage = ctrl->map[idx];
390         sc = page_address(mappage);
391         sc += pos;
392         spin_lock_irqsave(&ctrl->lock, flags);
393         retval = sc->id;
394         if (retval == old)
395                 sc->id = new;
396         else
397                 retval = 0;
398         spin_unlock_irqrestore(&ctrl->lock, flags);
399         return retval;
400 }
401
402 /**
403  * swap_cgroup_record - record mem_cgroup for this swp_entry.
404  * @ent: swap entry to be recorded into
405  * @mem: mem_cgroup to be recorded
406  *
407  * Returns old value at success, 0 at failure.
408  * (Of course, old value can be 0.)
409  */
410 unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
411 {
412         int type = swp_type(ent);
413         unsigned long offset = swp_offset(ent);
414         unsigned long idx = offset / SC_PER_PAGE;
415         unsigned long pos = offset & SC_POS_MASK;
416         struct swap_cgroup_ctrl *ctrl;
417         struct page *mappage;
418         struct swap_cgroup *sc;
419         unsigned short old;
420         unsigned long flags;
421
422         ctrl = &swap_cgroup_ctrl[type];
423
424         mappage = ctrl->map[idx];
425         sc = page_address(mappage);
426         sc += pos;
427         spin_lock_irqsave(&ctrl->lock, flags);
428         old = sc->id;
429         sc->id = id;
430         spin_unlock_irqrestore(&ctrl->lock, flags);
431
432         return old;
433 }
434
435 /**
436  * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
437  * @ent: swap entry to be looked up.
438  *
439  * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
440  */
441 unsigned short lookup_swap_cgroup(swp_entry_t ent)
442 {
443         int type = swp_type(ent);
444         unsigned long offset = swp_offset(ent);
445         unsigned long idx = offset / SC_PER_PAGE;
446         unsigned long pos = offset & SC_POS_MASK;
447         struct swap_cgroup_ctrl *ctrl;
448         struct page *mappage;
449         struct swap_cgroup *sc;
450         unsigned short ret;
451
452         ctrl = &swap_cgroup_ctrl[type];
453         mappage = ctrl->map[idx];
454         sc = page_address(mappage);
455         sc += pos;
456         ret = sc->id;
457         return ret;
458 }
459
460 int swap_cgroup_swapon(int type, unsigned long max_pages)
461 {
462         void *array;
463         unsigned long array_size;
464         unsigned long length;
465         struct swap_cgroup_ctrl *ctrl;
466
467         if (!do_swap_account)
468                 return 0;
469
470         length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
471         array_size = length * sizeof(void *);
472
473         array = vzalloc(array_size);
474         if (!array)
475                 goto nomem;
476
477         ctrl = &swap_cgroup_ctrl[type];
478         mutex_lock(&swap_cgroup_mutex);
479         ctrl->length = length;
480         ctrl->map = array;
481         spin_lock_init(&ctrl->lock);
482         if (swap_cgroup_prepare(type)) {
483                 /* memory shortage */
484                 ctrl->map = NULL;
485                 ctrl->length = 0;
486                 mutex_unlock(&swap_cgroup_mutex);
487                 vfree(array);
488                 goto nomem;
489         }
490         mutex_unlock(&swap_cgroup_mutex);
491
492         return 0;
493 nomem:
494         printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n");
495         printk(KERN_INFO
496                 "swap_cgroup can be disabled by swapaccount=0 boot option\n");
497         return -ENOMEM;
498 }
499
500 void swap_cgroup_swapoff(int type)
501 {
502         struct page **map;
503         unsigned long i, length;
504         struct swap_cgroup_ctrl *ctrl;
505
506         if (!do_swap_account)
507                 return;
508
509         mutex_lock(&swap_cgroup_mutex);
510         ctrl = &swap_cgroup_ctrl[type];
511         map = ctrl->map;
512         length = ctrl->length;
513         ctrl->map = NULL;
514         ctrl->length = 0;
515         mutex_unlock(&swap_cgroup_mutex);
516
517         if (map) {
518                 for (i = 0; i < length; i++) {
519                         struct page *page = map[i];
520                         if (page)
521                                 __free_page(page);
522                 }
523                 vfree(map);
524         }
525 }
526
527 #endif