hugetlb/cgroup: add support for cgroup removal
[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / hugetlb_cgroup.c
1 /*
2  *
3  * Copyright IBM Corporation, 2012
4  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of version 2.1 of the GNU Lesser General Public License
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it would be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13  *
14  */
15
16 #include <linux/cgroup.h>
17 #include <linux/slab.h>
18 #include <linux/hugetlb.h>
19 #include <linux/hugetlb_cgroup.h>
20
21 struct hugetlb_cgroup {
22         struct cgroup_subsys_state css;
23         /*
24          * the counter to account for hugepages from hugetlb.
25          */
26         struct res_counter hugepage[HUGE_MAX_HSTATE];
27 };
28
29 struct cgroup_subsys hugetlb_subsys __read_mostly;
30 static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
31
32 static inline
33 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
34 {
35         return container_of(s, struct hugetlb_cgroup, css);
36 }
37
38 static inline
39 struct hugetlb_cgroup *hugetlb_cgroup_from_cgroup(struct cgroup *cgroup)
40 {
41         return hugetlb_cgroup_from_css(cgroup_subsys_state(cgroup,
42                                                            hugetlb_subsys_id));
43 }
44
45 static inline
46 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
47 {
48         return hugetlb_cgroup_from_css(task_subsys_state(task,
49                                                          hugetlb_subsys_id));
50 }
51
52 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
53 {
54         return (h_cg == root_h_cgroup);
55 }
56
57 static inline struct hugetlb_cgroup *parent_hugetlb_cgroup(struct cgroup *cg)
58 {
59         if (!cg->parent)
60                 return NULL;
61         return hugetlb_cgroup_from_cgroup(cg->parent);
62 }
63
64 static inline bool hugetlb_cgroup_have_usage(struct cgroup *cg)
65 {
66         int idx;
67         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_cgroup(cg);
68
69         for (idx = 0; idx < hugetlb_max_hstate; idx++) {
70                 if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
71                         return true;
72         }
73         return false;
74 }
75
76 static struct cgroup_subsys_state *hugetlb_cgroup_create(struct cgroup *cgroup)
77 {
78         int idx;
79         struct cgroup *parent_cgroup;
80         struct hugetlb_cgroup *h_cgroup, *parent_h_cgroup;
81
82         h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
83         if (!h_cgroup)
84                 return ERR_PTR(-ENOMEM);
85
86         parent_cgroup = cgroup->parent;
87         if (parent_cgroup) {
88                 parent_h_cgroup = hugetlb_cgroup_from_cgroup(parent_cgroup);
89                 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
90                         res_counter_init(&h_cgroup->hugepage[idx],
91                                          &parent_h_cgroup->hugepage[idx]);
92         } else {
93                 root_h_cgroup = h_cgroup;
94                 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
95                         res_counter_init(&h_cgroup->hugepage[idx], NULL);
96         }
97         return &h_cgroup->css;
98 }
99
100 static void hugetlb_cgroup_destroy(struct cgroup *cgroup)
101 {
102         struct hugetlb_cgroup *h_cgroup;
103
104         h_cgroup = hugetlb_cgroup_from_cgroup(cgroup);
105         kfree(h_cgroup);
106 }
107
108
109 /*
110  * Should be called with hugetlb_lock held.
111  * Since we are holding hugetlb_lock, pages cannot get moved from
112  * active list or uncharged from the cgroup, So no need to get
113  * page reference and test for page active here. This function
114  * cannot fail.
115  */
116 static void hugetlb_cgroup_move_parent(int idx, struct cgroup *cgroup,
117                                        struct page *page)
118 {
119         int csize;
120         struct res_counter *counter;
121         struct res_counter *fail_res;
122         struct hugetlb_cgroup *page_hcg;
123         struct hugetlb_cgroup *h_cg   = hugetlb_cgroup_from_cgroup(cgroup);
124         struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(cgroup);
125
126         page_hcg = hugetlb_cgroup_from_page(page);
127         /*
128          * We can have pages in active list without any cgroup
129          * ie, hugepage with less than 3 pages. We can safely
130          * ignore those pages.
131          */
132         if (!page_hcg || page_hcg != h_cg)
133                 goto out;
134
135         csize = PAGE_SIZE << compound_order(page);
136         if (!parent) {
137                 parent = root_h_cgroup;
138                 /* root has no limit */
139                 res_counter_charge_nofail(&parent->hugepage[idx],
140                                           csize, &fail_res);
141         }
142         counter = &h_cg->hugepage[idx];
143         res_counter_uncharge_until(counter, counter->parent, csize);
144
145         set_hugetlb_cgroup(page, parent);
146 out:
147         return;
148 }
149
150 /*
151  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
152  * the parent cgroup.
153  */
154 static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
155 {
156         struct hstate *h;
157         struct page *page;
158         int ret = 0, idx = 0;
159
160         do {
161                 if (cgroup_task_count(cgroup) ||
162                     !list_empty(&cgroup->children)) {
163                         ret = -EBUSY;
164                         goto out;
165                 }
166                 for_each_hstate(h) {
167                         spin_lock(&hugetlb_lock);
168                         list_for_each_entry(page, &h->hugepage_activelist, lru)
169                                 hugetlb_cgroup_move_parent(idx, cgroup, page);
170
171                         spin_unlock(&hugetlb_lock);
172                         idx++;
173                 }
174                 cond_resched();
175         } while (hugetlb_cgroup_have_usage(cgroup));
176 out:
177         return ret;
178 }
179
180 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
181                                  struct hugetlb_cgroup **ptr)
182 {
183         int ret = 0;
184         struct res_counter *fail_res;
185         struct hugetlb_cgroup *h_cg = NULL;
186         unsigned long csize = nr_pages * PAGE_SIZE;
187
188         if (hugetlb_cgroup_disabled())
189                 goto done;
190         /*
191          * We don't charge any cgroup if the compound page have less
192          * than 3 pages.
193          */
194         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
195                 goto done;
196 again:
197         rcu_read_lock();
198         h_cg = hugetlb_cgroup_from_task(current);
199         if (!css_tryget(&h_cg->css)) {
200                 rcu_read_unlock();
201                 goto again;
202         }
203         rcu_read_unlock();
204
205         ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res);
206         css_put(&h_cg->css);
207 done:
208         *ptr = h_cg;
209         return ret;
210 }
211
212 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
213                                   struct hugetlb_cgroup *h_cg,
214                                   struct page *page)
215 {
216         if (hugetlb_cgroup_disabled() || !h_cg)
217                 return;
218
219         spin_lock(&hugetlb_lock);
220         set_hugetlb_cgroup(page, h_cg);
221         spin_unlock(&hugetlb_lock);
222         return;
223 }
224
225 /*
226  * Should be called with hugetlb_lock held
227  */
228 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
229                                   struct page *page)
230 {
231         struct hugetlb_cgroup *h_cg;
232         unsigned long csize = nr_pages * PAGE_SIZE;
233
234         if (hugetlb_cgroup_disabled())
235                 return;
236         VM_BUG_ON(!spin_is_locked(&hugetlb_lock));
237         h_cg = hugetlb_cgroup_from_page(page);
238         if (unlikely(!h_cg))
239                 return;
240         set_hugetlb_cgroup(page, NULL);
241         res_counter_uncharge(&h_cg->hugepage[idx], csize);
242         return;
243 }
244
245 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
246                                     struct hugetlb_cgroup *h_cg)
247 {
248         unsigned long csize = nr_pages * PAGE_SIZE;
249
250         if (hugetlb_cgroup_disabled() || !h_cg)
251                 return;
252
253         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
254                 return;
255
256         res_counter_uncharge(&h_cg->hugepage[idx], csize);
257         return;
258 }
259
260 struct cgroup_subsys hugetlb_subsys = {
261         .name = "hugetlb",
262         .create     = hugetlb_cgroup_create,
263         .pre_destroy = hugetlb_cgroup_pre_destroy,
264         .destroy    = hugetlb_cgroup_destroy,
265         .subsys_id  = hugetlb_subsys_id,
266 };