mm/hugetlb_cgroup: fix hugetlb_cgroup migration
[platform/kernel/linux-rpi.git] / mm / hugetlb_cgroup.c
1 /*
2  *
3  * Copyright IBM Corporation, 2012
4  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
5  *
6  * Cgroup v2
7  * Copyright (C) 2019 Red Hat, Inc.
8  * Author: Giuseppe Scrivano <gscrivan@redhat.com>
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of version 2.1 of the GNU Lesser General Public License
12  * as published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it would be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17  *
18  */
19
20 #include <linux/cgroup.h>
21 #include <linux/page_counter.h>
22 #include <linux/slab.h>
23 #include <linux/hugetlb.h>
24 #include <linux/hugetlb_cgroup.h>
25
26 enum hugetlb_memory_event {
27         HUGETLB_MAX,
28         HUGETLB_NR_MEMORY_EVENTS,
29 };
30
31 struct hugetlb_cgroup {
32         struct cgroup_subsys_state css;
33
34         /*
35          * the counter to account for hugepages from hugetlb.
36          */
37         struct page_counter hugepage[HUGE_MAX_HSTATE];
38
39         /*
40          * the counter to account for hugepage reservations from hugetlb.
41          */
42         struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE];
43
44         atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
45         atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
46
47         /* Handle for "hugetlb.events" */
48         struct cgroup_file events_file[HUGE_MAX_HSTATE];
49
50         /* Handle for "hugetlb.events.local" */
51         struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
52 };
53
54 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
55 #define MEMFILE_IDX(val)        (((val) >> 16) & 0xffff)
56 #define MEMFILE_ATTR(val)       ((val) & 0xffff)
57
58 #define hugetlb_cgroup_from_counter(counter, idx)                   \
59         container_of(counter, struct hugetlb_cgroup, hugepage[idx])
60
61 static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
62
63 static inline struct page_counter *
64 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
65                                      bool rsvd)
66 {
67         if (rsvd)
68                 return &h_cg->rsvd_hugepage[idx];
69         return &h_cg->hugepage[idx];
70 }
71
72 static inline struct page_counter *
73 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
74 {
75         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
76 }
77
78 static inline struct page_counter *
79 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
80 {
81         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
82 }
83
84 static inline
85 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
86 {
87         return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
88 }
89
90 static inline
91 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
92 {
93         return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
94 }
95
96 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
97 {
98         return (h_cg == root_h_cgroup);
99 }
100
101 static inline struct hugetlb_cgroup *
102 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
103 {
104         return hugetlb_cgroup_from_css(h_cg->css.parent);
105 }
106
107 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
108 {
109         int idx;
110
111         for (idx = 0; idx < hugetlb_max_hstate; idx++) {
112                 if (page_counter_read(
113                             hugetlb_cgroup_counter_from_cgroup(h_cg, idx)) ||
114                     page_counter_read(hugetlb_cgroup_counter_from_cgroup_rsvd(
115                             h_cg, idx))) {
116                         return true;
117                 }
118         }
119         return false;
120 }
121
122 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
123                                 struct hugetlb_cgroup *parent_h_cgroup)
124 {
125         int idx;
126
127         for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
128                 struct page_counter *fault_parent = NULL;
129                 struct page_counter *rsvd_parent = NULL;
130                 unsigned long limit;
131                 int ret;
132
133                 if (parent_h_cgroup) {
134                         fault_parent = hugetlb_cgroup_counter_from_cgroup(
135                                 parent_h_cgroup, idx);
136                         rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
137                                 parent_h_cgroup, idx);
138                 }
139                 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
140                                                                      idx),
141                                   fault_parent);
142                 page_counter_init(
143                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
144                         rsvd_parent);
145
146                 limit = round_down(PAGE_COUNTER_MAX,
147                                    1 << huge_page_order(&hstates[idx]));
148
149                 ret = page_counter_set_max(
150                         hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx),
151                         limit);
152                 VM_BUG_ON(ret);
153                 ret = page_counter_set_max(
154                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
155                         limit);
156                 VM_BUG_ON(ret);
157         }
158 }
159
160 static struct cgroup_subsys_state *
161 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
162 {
163         struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
164         struct hugetlb_cgroup *h_cgroup;
165
166         h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
167         if (!h_cgroup)
168                 return ERR_PTR(-ENOMEM);
169
170         if (!parent_h_cgroup)
171                 root_h_cgroup = h_cgroup;
172
173         hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
174         return &h_cgroup->css;
175 }
176
177 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
178 {
179         struct hugetlb_cgroup *h_cgroup;
180
181         h_cgroup = hugetlb_cgroup_from_css(css);
182         kfree(h_cgroup);
183 }
184
185 /*
186  * Should be called with hugetlb_lock held.
187  * Since we are holding hugetlb_lock, pages cannot get moved from
188  * active list or uncharged from the cgroup, So no need to get
189  * page reference and test for page active here. This function
190  * cannot fail.
191  */
192 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
193                                        struct page *page)
194 {
195         unsigned int nr_pages;
196         struct page_counter *counter;
197         struct hugetlb_cgroup *page_hcg;
198         struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
199
200         page_hcg = hugetlb_cgroup_from_page(page);
201         /*
202          * We can have pages in active list without any cgroup
203          * ie, hugepage with less than 3 pages. We can safely
204          * ignore those pages.
205          */
206         if (!page_hcg || page_hcg != h_cg)
207                 goto out;
208
209         nr_pages = compound_nr(page);
210         if (!parent) {
211                 parent = root_h_cgroup;
212                 /* root has no limit */
213                 page_counter_charge(&parent->hugepage[idx], nr_pages);
214         }
215         counter = &h_cg->hugepage[idx];
216         /* Take the pages off the local counter */
217         page_counter_cancel(counter, nr_pages);
218
219         set_hugetlb_cgroup(page, parent);
220 out:
221         return;
222 }
223
224 /*
225  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
226  * the parent cgroup.
227  */
228 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
229 {
230         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
231         struct hstate *h;
232         struct page *page;
233         int idx = 0;
234
235         do {
236                 for_each_hstate(h) {
237                         spin_lock(&hugetlb_lock);
238                         list_for_each_entry(page, &h->hugepage_activelist, lru)
239                                 hugetlb_cgroup_move_parent(idx, h_cg, page);
240
241                         spin_unlock(&hugetlb_lock);
242                         idx++;
243                 }
244                 cond_resched();
245         } while (hugetlb_cgroup_have_usage(h_cg));
246 }
247
248 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
249                                  enum hugetlb_memory_event event)
250 {
251         atomic_long_inc(&hugetlb->events_local[idx][event]);
252         cgroup_file_notify(&hugetlb->events_local_file[idx]);
253
254         do {
255                 atomic_long_inc(&hugetlb->events[idx][event]);
256                 cgroup_file_notify(&hugetlb->events_file[idx]);
257         } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
258                  !hugetlb_cgroup_is_root(hugetlb));
259 }
260
261 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
262                                           struct hugetlb_cgroup **ptr,
263                                           bool rsvd)
264 {
265         int ret = 0;
266         struct page_counter *counter;
267         struct hugetlb_cgroup *h_cg = NULL;
268
269         if (hugetlb_cgroup_disabled())
270                 goto done;
271         /*
272          * We don't charge any cgroup if the compound page have less
273          * than 3 pages.
274          */
275         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
276                 goto done;
277 again:
278         rcu_read_lock();
279         h_cg = hugetlb_cgroup_from_task(current);
280         if (!css_tryget(&h_cg->css)) {
281                 rcu_read_unlock();
282                 goto again;
283         }
284         rcu_read_unlock();
285
286         if (!page_counter_try_charge(
287                     __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
288                     nr_pages, &counter)) {
289                 ret = -ENOMEM;
290                 hugetlb_event(h_cg, idx, HUGETLB_MAX);
291                 css_put(&h_cg->css);
292                 goto done;
293         }
294         /* Reservations take a reference to the css because they do not get
295          * reparented.
296          */
297         if (!rsvd)
298                 css_put(&h_cg->css);
299 done:
300         *ptr = h_cg;
301         return ret;
302 }
303
304 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
305                                  struct hugetlb_cgroup **ptr)
306 {
307         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
308 }
309
310 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
311                                       struct hugetlb_cgroup **ptr)
312 {
313         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
314 }
315
316 /* Should be called with hugetlb_lock held */
317 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
318                                            struct hugetlb_cgroup *h_cg,
319                                            struct page *page, bool rsvd)
320 {
321         if (hugetlb_cgroup_disabled() || !h_cg)
322                 return;
323
324         __set_hugetlb_cgroup(page, h_cg, rsvd);
325         return;
326 }
327
328 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
329                                   struct hugetlb_cgroup *h_cg,
330                                   struct page *page)
331 {
332         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false);
333 }
334
335 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
336                                        struct hugetlb_cgroup *h_cg,
337                                        struct page *page)
338 {
339         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true);
340 }
341
342 /*
343  * Should be called with hugetlb_lock held
344  */
345 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
346                                            struct page *page, bool rsvd)
347 {
348         struct hugetlb_cgroup *h_cg;
349
350         if (hugetlb_cgroup_disabled())
351                 return;
352         lockdep_assert_held(&hugetlb_lock);
353         h_cg = __hugetlb_cgroup_from_page(page, rsvd);
354         if (unlikely(!h_cg))
355                 return;
356         __set_hugetlb_cgroup(page, NULL, rsvd);
357
358         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
359                                                                    rsvd),
360                               nr_pages);
361
362         if (rsvd)
363                 css_put(&h_cg->css);
364
365         return;
366 }
367
368 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
369                                   struct page *page)
370 {
371         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false);
372 }
373
374 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
375                                        struct page *page)
376 {
377         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true);
378 }
379
380 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
381                                              struct hugetlb_cgroup *h_cg,
382                                              bool rsvd)
383 {
384         if (hugetlb_cgroup_disabled() || !h_cg)
385                 return;
386
387         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
388                 return;
389
390         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
391                                                                    rsvd),
392                               nr_pages);
393
394         if (rsvd)
395                 css_put(&h_cg->css);
396 }
397
398 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
399                                     struct hugetlb_cgroup *h_cg)
400 {
401         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
402 }
403
404 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
405                                          struct hugetlb_cgroup *h_cg)
406 {
407         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
408 }
409
410 void hugetlb_cgroup_uncharge_counter(struct page_counter *p,
411                                      unsigned long nr_pages,
412                                      struct cgroup_subsys_state *css)
413 {
414         if (hugetlb_cgroup_disabled() || !p || !css)
415                 return;
416
417         page_counter_uncharge(p, nr_pages);
418         css_put(css);
419 }
420
421 enum {
422         RES_USAGE,
423         RES_RSVD_USAGE,
424         RES_LIMIT,
425         RES_RSVD_LIMIT,
426         RES_MAX_USAGE,
427         RES_RSVD_MAX_USAGE,
428         RES_FAILCNT,
429         RES_RSVD_FAILCNT,
430 };
431
432 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
433                                    struct cftype *cft)
434 {
435         struct page_counter *counter;
436         struct page_counter *rsvd_counter;
437         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
438
439         counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
440         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
441
442         switch (MEMFILE_ATTR(cft->private)) {
443         case RES_USAGE:
444                 return (u64)page_counter_read(counter) * PAGE_SIZE;
445         case RES_RSVD_USAGE:
446                 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
447         case RES_LIMIT:
448                 return (u64)counter->max * PAGE_SIZE;
449         case RES_RSVD_LIMIT:
450                 return (u64)rsvd_counter->max * PAGE_SIZE;
451         case RES_MAX_USAGE:
452                 return (u64)counter->watermark * PAGE_SIZE;
453         case RES_RSVD_MAX_USAGE:
454                 return (u64)rsvd_counter->watermark * PAGE_SIZE;
455         case RES_FAILCNT:
456                 return counter->failcnt;
457         case RES_RSVD_FAILCNT:
458                 return rsvd_counter->failcnt;
459         default:
460                 BUG();
461         }
462 }
463
464 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
465 {
466         int idx;
467         u64 val;
468         struct cftype *cft = seq_cft(seq);
469         unsigned long limit;
470         struct page_counter *counter;
471         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
472
473         idx = MEMFILE_IDX(cft->private);
474         counter = &h_cg->hugepage[idx];
475
476         limit = round_down(PAGE_COUNTER_MAX,
477                            1 << huge_page_order(&hstates[idx]));
478
479         switch (MEMFILE_ATTR(cft->private)) {
480         case RES_RSVD_USAGE:
481                 counter = &h_cg->rsvd_hugepage[idx];
482                 /* Fall through. */
483         case RES_USAGE:
484                 val = (u64)page_counter_read(counter);
485                 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
486                 break;
487         case RES_RSVD_LIMIT:
488                 counter = &h_cg->rsvd_hugepage[idx];
489                 /* Fall through. */
490         case RES_LIMIT:
491                 val = (u64)counter->max;
492                 if (val == limit)
493                         seq_puts(seq, "max\n");
494                 else
495                         seq_printf(seq, "%llu\n", val * PAGE_SIZE);
496                 break;
497         default:
498                 BUG();
499         }
500
501         return 0;
502 }
503
504 static DEFINE_MUTEX(hugetlb_limit_mutex);
505
506 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
507                                     char *buf, size_t nbytes, loff_t off,
508                                     const char *max)
509 {
510         int ret, idx;
511         unsigned long nr_pages;
512         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
513         bool rsvd = false;
514
515         if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
516                 return -EINVAL;
517
518         buf = strstrip(buf);
519         ret = page_counter_memparse(buf, max, &nr_pages);
520         if (ret)
521                 return ret;
522
523         idx = MEMFILE_IDX(of_cft(of)->private);
524         nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx]));
525
526         switch (MEMFILE_ATTR(of_cft(of)->private)) {
527         case RES_RSVD_LIMIT:
528                 rsvd = true;
529                 /* Fall through. */
530         case RES_LIMIT:
531                 mutex_lock(&hugetlb_limit_mutex);
532                 ret = page_counter_set_max(
533                         __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
534                         nr_pages);
535                 mutex_unlock(&hugetlb_limit_mutex);
536                 break;
537         default:
538                 ret = -EINVAL;
539                 break;
540         }
541         return ret ?: nbytes;
542 }
543
544 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
545                                            char *buf, size_t nbytes, loff_t off)
546 {
547         return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
548 }
549
550 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
551                                         char *buf, size_t nbytes, loff_t off)
552 {
553         return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
554 }
555
556 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
557                                     char *buf, size_t nbytes, loff_t off)
558 {
559         int ret = 0;
560         struct page_counter *counter, *rsvd_counter;
561         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
562
563         counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
564         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
565
566         switch (MEMFILE_ATTR(of_cft(of)->private)) {
567         case RES_MAX_USAGE:
568                 page_counter_reset_watermark(counter);
569                 break;
570         case RES_RSVD_MAX_USAGE:
571                 page_counter_reset_watermark(rsvd_counter);
572                 break;
573         case RES_FAILCNT:
574                 counter->failcnt = 0;
575                 break;
576         case RES_RSVD_FAILCNT:
577                 rsvd_counter->failcnt = 0;
578                 break;
579         default:
580                 ret = -EINVAL;
581                 break;
582         }
583         return ret ?: nbytes;
584 }
585
586 static char *mem_fmt(char *buf, int size, unsigned long hsize)
587 {
588         if (hsize >= (1UL << 30))
589                 snprintf(buf, size, "%luGB", hsize >> 30);
590         else if (hsize >= (1UL << 20))
591                 snprintf(buf, size, "%luMB", hsize >> 20);
592         else
593                 snprintf(buf, size, "%luKB", hsize >> 10);
594         return buf;
595 }
596
597 static int __hugetlb_events_show(struct seq_file *seq, bool local)
598 {
599         int idx;
600         long max;
601         struct cftype *cft = seq_cft(seq);
602         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
603
604         idx = MEMFILE_IDX(cft->private);
605
606         if (local)
607                 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
608         else
609                 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
610
611         seq_printf(seq, "max %lu\n", max);
612
613         return 0;
614 }
615
616 static int hugetlb_events_show(struct seq_file *seq, void *v)
617 {
618         return __hugetlb_events_show(seq, false);
619 }
620
621 static int hugetlb_events_local_show(struct seq_file *seq, void *v)
622 {
623         return __hugetlb_events_show(seq, true);
624 }
625
626 static void __init __hugetlb_cgroup_file_dfl_init(int idx)
627 {
628         char buf[32];
629         struct cftype *cft;
630         struct hstate *h = &hstates[idx];
631
632         /* format the size */
633         mem_fmt(buf, sizeof(buf), huge_page_size(h));
634
635         /* Add the limit file */
636         cft = &h->cgroup_files_dfl[0];
637         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
638         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
639         cft->seq_show = hugetlb_cgroup_read_u64_max;
640         cft->write = hugetlb_cgroup_write_dfl;
641         cft->flags = CFTYPE_NOT_ON_ROOT;
642
643         /* Add the reservation limit file */
644         cft = &h->cgroup_files_dfl[1];
645         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
646         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
647         cft->seq_show = hugetlb_cgroup_read_u64_max;
648         cft->write = hugetlb_cgroup_write_dfl;
649         cft->flags = CFTYPE_NOT_ON_ROOT;
650
651         /* Add the current usage file */
652         cft = &h->cgroup_files_dfl[2];
653         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
654         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
655         cft->seq_show = hugetlb_cgroup_read_u64_max;
656         cft->flags = CFTYPE_NOT_ON_ROOT;
657
658         /* Add the current reservation usage file */
659         cft = &h->cgroup_files_dfl[3];
660         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
661         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
662         cft->seq_show = hugetlb_cgroup_read_u64_max;
663         cft->flags = CFTYPE_NOT_ON_ROOT;
664
665         /* Add the events file */
666         cft = &h->cgroup_files_dfl[4];
667         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
668         cft->private = MEMFILE_PRIVATE(idx, 0);
669         cft->seq_show = hugetlb_events_show;
670         cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]),
671         cft->flags = CFTYPE_NOT_ON_ROOT;
672
673         /* Add the events.local file */
674         cft = &h->cgroup_files_dfl[5];
675         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
676         cft->private = MEMFILE_PRIVATE(idx, 0);
677         cft->seq_show = hugetlb_events_local_show;
678         cft->file_offset = offsetof(struct hugetlb_cgroup,
679                                     events_local_file[idx]),
680         cft->flags = CFTYPE_NOT_ON_ROOT;
681
682         /* NULL terminate the last cft */
683         cft = &h->cgroup_files_dfl[6];
684         memset(cft, 0, sizeof(*cft));
685
686         WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
687                                        h->cgroup_files_dfl));
688 }
689
690 static void __init __hugetlb_cgroup_file_legacy_init(int idx)
691 {
692         char buf[32];
693         struct cftype *cft;
694         struct hstate *h = &hstates[idx];
695
696         /* format the size */
697         mem_fmt(buf, sizeof(buf), huge_page_size(h));
698
699         /* Add the limit file */
700         cft = &h->cgroup_files_legacy[0];
701         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
702         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
703         cft->read_u64 = hugetlb_cgroup_read_u64;
704         cft->write = hugetlb_cgroup_write_legacy;
705
706         /* Add the reservation limit file */
707         cft = &h->cgroup_files_legacy[1];
708         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
709         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
710         cft->read_u64 = hugetlb_cgroup_read_u64;
711         cft->write = hugetlb_cgroup_write_legacy;
712
713         /* Add the usage file */
714         cft = &h->cgroup_files_legacy[2];
715         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
716         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
717         cft->read_u64 = hugetlb_cgroup_read_u64;
718
719         /* Add the reservation usage file */
720         cft = &h->cgroup_files_legacy[3];
721         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
722         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
723         cft->read_u64 = hugetlb_cgroup_read_u64;
724
725         /* Add the MAX usage file */
726         cft = &h->cgroup_files_legacy[4];
727         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
728         cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
729         cft->write = hugetlb_cgroup_reset;
730         cft->read_u64 = hugetlb_cgroup_read_u64;
731
732         /* Add the MAX reservation usage file */
733         cft = &h->cgroup_files_legacy[5];
734         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
735         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
736         cft->write = hugetlb_cgroup_reset;
737         cft->read_u64 = hugetlb_cgroup_read_u64;
738
739         /* Add the failcntfile */
740         cft = &h->cgroup_files_legacy[6];
741         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
742         cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
743         cft->write = hugetlb_cgroup_reset;
744         cft->read_u64 = hugetlb_cgroup_read_u64;
745
746         /* Add the reservation failcntfile */
747         cft = &h->cgroup_files_legacy[7];
748         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
749         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
750         cft->write = hugetlb_cgroup_reset;
751         cft->read_u64 = hugetlb_cgroup_read_u64;
752
753         /* NULL terminate the last cft */
754         cft = &h->cgroup_files_legacy[8];
755         memset(cft, 0, sizeof(*cft));
756
757         WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
758                                           h->cgroup_files_legacy));
759 }
760
761 static void __init __hugetlb_cgroup_file_init(int idx)
762 {
763         __hugetlb_cgroup_file_dfl_init(idx);
764         __hugetlb_cgroup_file_legacy_init(idx);
765 }
766
767 void __init hugetlb_cgroup_file_init(void)
768 {
769         struct hstate *h;
770
771         for_each_hstate(h) {
772                 /*
773                  * Add cgroup control files only if the huge page consists
774                  * of more than two normal pages. This is because we use
775                  * page[2].private for storing cgroup details.
776                  */
777                 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
778                         __hugetlb_cgroup_file_init(hstate_index(h));
779         }
780 }
781
782 /*
783  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
784  * when we migrate hugepages
785  */
786 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
787 {
788         struct hugetlb_cgroup *h_cg;
789         struct hugetlb_cgroup *h_cg_rsvd;
790         struct hstate *h = page_hstate(oldhpage);
791
792         if (hugetlb_cgroup_disabled())
793                 return;
794
795         VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
796         spin_lock(&hugetlb_lock);
797         h_cg = hugetlb_cgroup_from_page(oldhpage);
798         h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
799         set_hugetlb_cgroup(oldhpage, NULL);
800         set_hugetlb_cgroup_rsvd(oldhpage, NULL);
801
802         /* move the h_cg details to new cgroup */
803         set_hugetlb_cgroup(newhpage, h_cg);
804         set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
805         list_move(&newhpage->lru, &h->hugepage_activelist);
806         spin_unlock(&hugetlb_lock);
807         return;
808 }
809
810 static struct cftype hugetlb_files[] = {
811         {} /* terminate */
812 };
813
814 struct cgroup_subsys hugetlb_cgrp_subsys = {
815         .css_alloc      = hugetlb_cgroup_css_alloc,
816         .css_offline    = hugetlb_cgroup_css_offline,
817         .css_free       = hugetlb_cgroup_css_free,
818         .dfl_cftypes    = hugetlb_files,
819         .legacy_cftypes = hugetlb_files,
820 };