mm/hugetlb_cgroup.c

   1 /*
   2  *
   3  * Copyright IBM Corporation, 2012
   4  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   5  *
   6  * Cgroup v2
   7  * Copyright (C) 2019 Red Hat, Inc.
   8  * Author: Giuseppe Scrivano <gscrivan@redhat.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify it
  11  * under the terms of version 2.1 of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation.
  13  *
  14  * This program is distributed in the hope that it would be useful, but
  15  * WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17  *
  18  */
  19
  20 #include <linux/cgroup.h>
  21 #include <linux/page_counter.h>
  22 #include <linux/slab.h>
  23 #include <linux/hugetlb.h>
  24 #include <linux/hugetlb_cgroup.h>
  25
  26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
  27 #define MEMFILE_IDX(val)        (((val) >> 16) & 0xffff)
  28 #define MEMFILE_ATTR(val)       ((val) & 0xffff)
  29
  30 #define hugetlb_cgroup_from_counter(counter, idx)                   \
  31         container_of(counter, struct hugetlb_cgroup, hugepage[idx])
  32
  33 static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
  34
  35 static inline struct page_counter *
  36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
  37                                      bool rsvd)
  38 {
  39         if (rsvd)
  40                 return &h_cg->rsvd_hugepage[idx];
  41         return &h_cg->hugepage[idx];
  42 }
  43
  44 static inline struct page_counter *
  45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
  46 {
  47         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
  48 }
  49
  50 static inline struct page_counter *
  51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
  52 {
  53         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
  54 }
  55
  56 static inline
  57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
  58 {
  59         return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
  60 }
  61
  62 static inline
  63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
  64 {
  65         return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
  66 }
  67
  68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
  69 {
  70         return (h_cg == root_h_cgroup);
  71 }
  72
  73 static inline struct hugetlb_cgroup *
  74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
  75 {
  76         return hugetlb_cgroup_from_css(h_cg->css.parent);
  77 }
  78
  79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
  80 {
  81         int idx;
  82
  83         for (idx = 0; idx < hugetlb_max_hstate; idx++) {
  84                 if (page_counter_read(
  85                             hugetlb_cgroup_counter_from_cgroup(h_cg, idx)) ||
  86                     page_counter_read(hugetlb_cgroup_counter_from_cgroup_rsvd(
  87                             h_cg, idx))) {
  88                         return true;
  89                 }
  90         }
  91         return false;
  92 }
  93
  94 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
  95                                 struct hugetlb_cgroup *parent_h_cgroup)
  96 {
  97         int idx;
  98
  99         for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
 100                 struct page_counter *fault_parent = NULL;
 101                 struct page_counter *rsvd_parent = NULL;
 102                 unsigned long limit;
 103                 int ret;
 104
 105                 if (parent_h_cgroup) {
 106                         fault_parent = hugetlb_cgroup_counter_from_cgroup(
 107                                 parent_h_cgroup, idx);
 108                         rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
 109                                 parent_h_cgroup, idx);
 110                 }
 111                 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
 112                                                                      idx),
 113                                   fault_parent);
 114                 page_counter_init(
 115                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 116                         rsvd_parent);
 117
 118                 limit = round_down(PAGE_COUNTER_MAX,
 119                                    1 << huge_page_order(&hstates[idx]));
 120
 121                 ret = page_counter_set_max(
 122                         hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx),
 123                         limit);
 124                 VM_BUG_ON(ret);
 125                 ret = page_counter_set_max(
 126                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 127                         limit);
 128                 VM_BUG_ON(ret);
 129         }
 130 }
 131
 132 static struct cgroup_subsys_state *
 133 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 134 {
 135         struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
 136         struct hugetlb_cgroup *h_cgroup;
 137
 138         h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
 139         if (!h_cgroup)
 140                 return ERR_PTR(-ENOMEM);
 141
 142         if (!parent_h_cgroup)
 143                 root_h_cgroup = h_cgroup;
 144
 145         hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
 146         return &h_cgroup->css;
 147 }
 148
 149 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
 150 {
 151         struct hugetlb_cgroup *h_cgroup;
 152
 153         h_cgroup = hugetlb_cgroup_from_css(css);
 154         kfree(h_cgroup);
 155 }
 156
 157 /*
 158  * Should be called with hugetlb_lock held.
 159  * Since we are holding hugetlb_lock, pages cannot get moved from
 160  * active list or uncharged from the cgroup, So no need to get
 161  * page reference and test for page active here. This function
 162  * cannot fail.
 163  */
 164 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 165                                        struct page *page)
 166 {
 167         unsigned int nr_pages;
 168         struct page_counter *counter;
 169         struct hugetlb_cgroup *page_hcg;
 170         struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
 171
 172         page_hcg = hugetlb_cgroup_from_page(page);
 173         /*
 174          * We can have pages in active list without any cgroup
 175          * ie, hugepage with less than 3 pages. We can safely
 176          * ignore those pages.
 177          */
 178         if (!page_hcg || page_hcg != h_cg)
 179                 goto out;
 180
 181         nr_pages = compound_nr(page);
 182         if (!parent) {
 183                 parent = root_h_cgroup;
 184                 /* root has no limit */
 185                 page_counter_charge(&parent->hugepage[idx], nr_pages);
 186         }
 187         counter = &h_cg->hugepage[idx];
 188         /* Take the pages off the local counter */
 189         page_counter_cancel(counter, nr_pages);
 190
 191         set_hugetlb_cgroup(page, parent);
 192 out:
 193         return;
 194 }
 195
 196 /*
 197  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
 198  * the parent cgroup.
 199  */
 200 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
 201 {
 202         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 203         struct hstate *h;
 204         struct page *page;
 205         int idx = 0;
 206
 207         do {
 208                 for_each_hstate(h) {
 209                         spin_lock(&hugetlb_lock);
 210                         list_for_each_entry(page, &h->hugepage_activelist, lru)
 211                                 hugetlb_cgroup_move_parent(idx, h_cg, page);
 212
 213                         spin_unlock(&hugetlb_lock);
 214                         idx++;
 215                 }
 216                 cond_resched();
 217         } while (hugetlb_cgroup_have_usage(h_cg));
 218 }
 219
 220 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
 221                                  enum hugetlb_memory_event event)
 222 {
 223         atomic_long_inc(&hugetlb->events_local[idx][event]);
 224         cgroup_file_notify(&hugetlb->events_local_file[idx]);
 225
 226         do {
 227                 atomic_long_inc(&hugetlb->events[idx][event]);
 228                 cgroup_file_notify(&hugetlb->events_file[idx]);
 229         } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
 230                  !hugetlb_cgroup_is_root(hugetlb));
 231 }
 232
 233 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 234                                           struct hugetlb_cgroup **ptr,
 235                                           bool rsvd)
 236 {
 237         int ret = 0;
 238         struct page_counter *counter;
 239         struct hugetlb_cgroup *h_cg = NULL;
 240
 241         if (hugetlb_cgroup_disabled())
 242                 goto done;
 243         /*
 244          * We don't charge any cgroup if the compound page have less
 245          * than 3 pages.
 246          */
 247         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 248                 goto done;
 249 again:
 250         rcu_read_lock();
 251         h_cg = hugetlb_cgroup_from_task(current);
 252         if (!css_tryget(&h_cg->css)) {
 253                 rcu_read_unlock();
 254                 goto again;
 255         }
 256         rcu_read_unlock();
 257
 258         if (!page_counter_try_charge(
 259                     __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 260                     nr_pages, &counter)) {
 261                 ret = -ENOMEM;
 262                 hugetlb_event(h_cg, idx, HUGETLB_MAX);
 263                 css_put(&h_cg->css);
 264                 goto done;
 265         }
 266         /* Reservations take a reference to the css because they do not get
 267          * reparented.
 268          */
 269         if (!rsvd)
 270                 css_put(&h_cg->css);
 271 done:
 272         *ptr = h_cg;
 273         return ret;
 274 }
 275
 276 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 277                                  struct hugetlb_cgroup **ptr)
 278 {
 279         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
 280 }
 281
 282 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
 283                                       struct hugetlb_cgroup **ptr)
 284 {
 285         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
 286 }
 287
 288 /* Should be called with hugetlb_lock held */
 289 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 290                                            struct hugetlb_cgroup *h_cg,
 291                                            struct page *page, bool rsvd)
 292 {
 293         if (hugetlb_cgroup_disabled() || !h_cg)
 294                 return;
 295
 296         __set_hugetlb_cgroup(page, h_cg, rsvd);
 297         return;
 298 }
 299
 300 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 301                                   struct hugetlb_cgroup *h_cg,
 302                                   struct page *page)
 303 {
 304         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false);
 305 }
 306
 307 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 308                                        struct hugetlb_cgroup *h_cg,
 309                                        struct page *page)
 310 {
 311         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true);
 312 }
 313
 314 /*
 315  * Should be called with hugetlb_lock held
 316  */
 317 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 318                                            struct page *page, bool rsvd)
 319 {
 320         struct hugetlb_cgroup *h_cg;
 321
 322         if (hugetlb_cgroup_disabled())
 323                 return;
 324         lockdep_assert_held(&hugetlb_lock);
 325         h_cg = __hugetlb_cgroup_from_page(page, rsvd);
 326         if (unlikely(!h_cg))
 327                 return;
 328         __set_hugetlb_cgroup(page, NULL, rsvd);
 329
 330         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 331                                                                    rsvd),
 332                               nr_pages);
 333
 334         if (rsvd)
 335                 css_put(&h_cg->css);
 336
 337         return;
 338 }
 339
 340 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 341                                   struct page *page)
 342 {
 343         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false);
 344 }
 345
 346 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
 347                                        struct page *page)
 348 {
 349         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true);
 350 }
 351
 352 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 353                                              struct hugetlb_cgroup *h_cg,
 354                                              bool rsvd)
 355 {
 356         if (hugetlb_cgroup_disabled() || !h_cg)
 357                 return;
 358
 359         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 360                 return;
 361
 362         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 363                                                                    rsvd),
 364                               nr_pages);
 365
 366         if (rsvd)
 367                 css_put(&h_cg->css);
 368 }
 369
 370 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 371                                     struct hugetlb_cgroup *h_cg)
 372 {
 373         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
 374 }
 375
 376 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
 377                                          struct hugetlb_cgroup *h_cg)
 378 {
 379         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
 380 }
 381
 382 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
 383                                      unsigned long end)
 384 {
 385         if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
 386             !resv->css)
 387                 return;
 388
 389         page_counter_uncharge(resv->reservation_counter,
 390                               (end - start) * resv->pages_per_hpage);
 391         css_put(resv->css);
 392 }
 393
 394 enum {
 395         RES_USAGE,
 396         RES_RSVD_USAGE,
 397         RES_LIMIT,
 398         RES_RSVD_LIMIT,
 399         RES_MAX_USAGE,
 400         RES_RSVD_MAX_USAGE,
 401         RES_FAILCNT,
 402         RES_RSVD_FAILCNT,
 403 };
 404
 405 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
 406                                    struct cftype *cft)
 407 {
 408         struct page_counter *counter;
 409         struct page_counter *rsvd_counter;
 410         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 411
 412         counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
 413         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
 414
 415         switch (MEMFILE_ATTR(cft->private)) {
 416         case RES_USAGE:
 417                 return (u64)page_counter_read(counter) * PAGE_SIZE;
 418         case RES_RSVD_USAGE:
 419                 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
 420         case RES_LIMIT:
 421                 return (u64)counter->max * PAGE_SIZE;
 422         case RES_RSVD_LIMIT:
 423                 return (u64)rsvd_counter->max * PAGE_SIZE;
 424         case RES_MAX_USAGE:
 425                 return (u64)counter->watermark * PAGE_SIZE;
 426         case RES_RSVD_MAX_USAGE:
 427                 return (u64)rsvd_counter->watermark * PAGE_SIZE;
 428         case RES_FAILCNT:
 429                 return counter->failcnt;
 430         case RES_RSVD_FAILCNT:
 431                 return rsvd_counter->failcnt;
 432         default:
 433                 BUG();
 434         }
 435 }
 436
 437 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
 438 {
 439         int idx;
 440         u64 val;
 441         struct cftype *cft = seq_cft(seq);
 442         unsigned long limit;
 443         struct page_counter *counter;
 444         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 445
 446         idx = MEMFILE_IDX(cft->private);
 447         counter = &h_cg->hugepage[idx];
 448
 449         limit = round_down(PAGE_COUNTER_MAX,
 450                            1 << huge_page_order(&hstates[idx]));
 451
 452         switch (MEMFILE_ATTR(cft->private)) {
 453         case RES_RSVD_USAGE:
 454                 counter = &h_cg->rsvd_hugepage[idx];
 455                 /* Fall through. */
 456         case RES_USAGE:
 457                 val = (u64)page_counter_read(counter);
 458                 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 459                 break;
 460         case RES_RSVD_LIMIT:
 461                 counter = &h_cg->rsvd_hugepage[idx];
 462                 /* Fall through. */
 463         case RES_LIMIT:
 464                 val = (u64)counter->max;
 465                 if (val == limit)
 466                         seq_puts(seq, "max\n");
 467                 else
 468                         seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 469                 break;
 470         default:
 471                 BUG();
 472         }
 473
 474         return 0;
 475 }
 476
 477 static DEFINE_MUTEX(hugetlb_limit_mutex);
 478
 479 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 480                                     char *buf, size_t nbytes, loff_t off,
 481                                     const char *max)
 482 {
 483         int ret, idx;
 484         unsigned long nr_pages;
 485         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 486         bool rsvd = false;
 487
 488         if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
 489                 return -EINVAL;
 490
 491         buf = strstrip(buf);
 492         ret = page_counter_memparse(buf, max, &nr_pages);
 493         if (ret)
 494                 return ret;
 495
 496         idx = MEMFILE_IDX(of_cft(of)->private);
 497         nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx]));
 498
 499         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 500         case RES_RSVD_LIMIT:
 501                 rsvd = true;
 502                 /* Fall through. */
 503         case RES_LIMIT:
 504                 mutex_lock(&hugetlb_limit_mutex);
 505                 ret = page_counter_set_max(
 506                         __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 507                         nr_pages);
 508                 mutex_unlock(&hugetlb_limit_mutex);
 509                 break;
 510         default:
 511                 ret = -EINVAL;
 512                 break;
 513         }
 514         return ret ?: nbytes;
 515 }
 516
 517 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
 518                                            char *buf, size_t nbytes, loff_t off)
 519 {
 520         return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
 521 }
 522
 523 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
 524                                         char *buf, size_t nbytes, loff_t off)
 525 {
 526         return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
 527 }
 528
 529 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
 530                                     char *buf, size_t nbytes, loff_t off)
 531 {
 532         int ret = 0;
 533         struct page_counter *counter, *rsvd_counter;
 534         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 535
 536         counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
 537         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
 538
 539         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 540         case RES_MAX_USAGE:
 541                 page_counter_reset_watermark(counter);
 542                 break;
 543         case RES_RSVD_MAX_USAGE:
 544                 page_counter_reset_watermark(rsvd_counter);
 545                 break;
 546         case RES_FAILCNT:
 547                 counter->failcnt = 0;
 548                 break;
 549         case RES_RSVD_FAILCNT:
 550                 rsvd_counter->failcnt = 0;
 551                 break;
 552         default:
 553                 ret = -EINVAL;
 554                 break;
 555         }
 556         return ret ?: nbytes;
 557 }
 558
 559 static char *mem_fmt(char *buf, int size, unsigned long hsize)
 560 {
 561         if (hsize >= (1UL << 30))
 562                 snprintf(buf, size, "%luGB", hsize >> 30);
 563         else if (hsize >= (1UL << 20))
 564                 snprintf(buf, size, "%luMB", hsize >> 20);
 565         else
 566                 snprintf(buf, size, "%luKB", hsize >> 10);
 567         return buf;
 568 }
 569
 570 static int __hugetlb_events_show(struct seq_file *seq, bool local)
 571 {
 572         int idx;
 573         long max;
 574         struct cftype *cft = seq_cft(seq);
 575         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 576
 577         idx = MEMFILE_IDX(cft->private);
 578
 579         if (local)
 580                 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
 581         else
 582                 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
 583
 584         seq_printf(seq, "max %lu\n", max);
 585
 586         return 0;
 587 }
 588
 589 static int hugetlb_events_show(struct seq_file *seq, void *v)
 590 {
 591         return __hugetlb_events_show(seq, false);
 592 }
 593
 594 static int hugetlb_events_local_show(struct seq_file *seq, void *v)
 595 {
 596         return __hugetlb_events_show(seq, true);
 597 }
 598
 599 static void __init __hugetlb_cgroup_file_dfl_init(int idx)
 600 {
 601         char buf[32];
 602         struct cftype *cft;
 603         struct hstate *h = &hstates[idx];
 604
 605         /* format the size */
 606         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 607
 608         /* Add the limit file */
 609         cft = &h->cgroup_files_dfl[0];
 610         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
 611         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 612         cft->seq_show = hugetlb_cgroup_read_u64_max;
 613         cft->write = hugetlb_cgroup_write_dfl;
 614         cft->flags = CFTYPE_NOT_ON_ROOT;
 615
 616         /* Add the reservation limit file */
 617         cft = &h->cgroup_files_dfl[1];
 618         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
 619         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 620         cft->seq_show = hugetlb_cgroup_read_u64_max;
 621         cft->write = hugetlb_cgroup_write_dfl;
 622         cft->flags = CFTYPE_NOT_ON_ROOT;
 623
 624         /* Add the current usage file */
 625         cft = &h->cgroup_files_dfl[2];
 626         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
 627         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 628         cft->seq_show = hugetlb_cgroup_read_u64_max;
 629         cft->flags = CFTYPE_NOT_ON_ROOT;
 630
 631         /* Add the current reservation usage file */
 632         cft = &h->cgroup_files_dfl[3];
 633         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
 634         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 635         cft->seq_show = hugetlb_cgroup_read_u64_max;
 636         cft->flags = CFTYPE_NOT_ON_ROOT;
 637
 638         /* Add the events file */
 639         cft = &h->cgroup_files_dfl[4];
 640         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
 641         cft->private = MEMFILE_PRIVATE(idx, 0);
 642         cft->seq_show = hugetlb_events_show;
 643         cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]),
 644         cft->flags = CFTYPE_NOT_ON_ROOT;
 645
 646         /* Add the events.local file */
 647         cft = &h->cgroup_files_dfl[5];
 648         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
 649         cft->private = MEMFILE_PRIVATE(idx, 0);
 650         cft->seq_show = hugetlb_events_local_show;
 651         cft->file_offset = offsetof(struct hugetlb_cgroup,
 652                                     events_local_file[idx]),
 653         cft->flags = CFTYPE_NOT_ON_ROOT;
 654
 655         /* NULL terminate the last cft */
 656         cft = &h->cgroup_files_dfl[6];
 657         memset(cft, 0, sizeof(*cft));
 658
 659         WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
 660                                        h->cgroup_files_dfl));
 661 }
 662
 663 static void __init __hugetlb_cgroup_file_legacy_init(int idx)
 664 {
 665         char buf[32];
 666         struct cftype *cft;
 667         struct hstate *h = &hstates[idx];
 668
 669         /* format the size */
 670         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 671
 672         /* Add the limit file */
 673         cft = &h->cgroup_files_legacy[0];
 674         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
 675         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 676         cft->read_u64 = hugetlb_cgroup_read_u64;
 677         cft->write = hugetlb_cgroup_write_legacy;
 678
 679         /* Add the reservation limit file */
 680         cft = &h->cgroup_files_legacy[1];
 681         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
 682         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 683         cft->read_u64 = hugetlb_cgroup_read_u64;
 684         cft->write = hugetlb_cgroup_write_legacy;
 685
 686         /* Add the usage file */
 687         cft = &h->cgroup_files_legacy[2];
 688         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
 689         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 690         cft->read_u64 = hugetlb_cgroup_read_u64;
 691
 692         /* Add the reservation usage file */
 693         cft = &h->cgroup_files_legacy[3];
 694         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
 695         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 696         cft->read_u64 = hugetlb_cgroup_read_u64;
 697
 698         /* Add the MAX usage file */
 699         cft = &h->cgroup_files_legacy[4];
 700         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
 701         cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
 702         cft->write = hugetlb_cgroup_reset;
 703         cft->read_u64 = hugetlb_cgroup_read_u64;
 704
 705         /* Add the MAX reservation usage file */
 706         cft = &h->cgroup_files_legacy[5];
 707         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
 708         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
 709         cft->write = hugetlb_cgroup_reset;
 710         cft->read_u64 = hugetlb_cgroup_read_u64;
 711
 712         /* Add the failcntfile */
 713         cft = &h->cgroup_files_legacy[6];
 714         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
 715         cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
 716         cft->write = hugetlb_cgroup_reset;
 717         cft->read_u64 = hugetlb_cgroup_read_u64;
 718
 719         /* Add the reservation failcntfile */
 720         cft = &h->cgroup_files_legacy[7];
 721         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
 722         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
 723         cft->write = hugetlb_cgroup_reset;
 724         cft->read_u64 = hugetlb_cgroup_read_u64;
 725
 726         /* NULL terminate the last cft */
 727         cft = &h->cgroup_files_legacy[8];
 728         memset(cft, 0, sizeof(*cft));
 729
 730         WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
 731                                           h->cgroup_files_legacy));
 732 }
 733
 734 static void __init __hugetlb_cgroup_file_init(int idx)
 735 {
 736         __hugetlb_cgroup_file_dfl_init(idx);
 737         __hugetlb_cgroup_file_legacy_init(idx);
 738 }
 739
 740 void __init hugetlb_cgroup_file_init(void)
 741 {
 742         struct hstate *h;
 743
 744         for_each_hstate(h) {
 745                 /*
 746                  * Add cgroup control files only if the huge page consists
 747                  * of more than two normal pages. This is because we use
 748                  * page[2].private for storing cgroup details.
 749                  */
 750                 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
 751                         __hugetlb_cgroup_file_init(hstate_index(h));
 752         }
 753 }
 754
 755 /*
 756  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
 757  * when we migrate hugepages
 758  */
 759 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 760 {
 761         struct hugetlb_cgroup *h_cg;
 762         struct hugetlb_cgroup *h_cg_rsvd;
 763         struct hstate *h = page_hstate(oldhpage);
 764
 765         if (hugetlb_cgroup_disabled())
 766                 return;
 767
 768         VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
 769         spin_lock(&hugetlb_lock);
 770         h_cg = hugetlb_cgroup_from_page(oldhpage);
 771         h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
 772         set_hugetlb_cgroup(oldhpage, NULL);
 773         set_hugetlb_cgroup_rsvd(oldhpage, NULL);
 774
 775         /* move the h_cg details to new cgroup */
 776         set_hugetlb_cgroup(newhpage, h_cg);
 777         set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
 778         list_move(&newhpage->lru, &h->hugepage_activelist);
 779         spin_unlock(&hugetlb_lock);
 780         return;
 781 }
 782
 783 static struct cftype hugetlb_files[] = {
 784         {} /* terminate */
 785 };
 786
 787 struct cgroup_subsys hugetlb_cgrp_subsys = {
 788         .css_alloc      = hugetlb_cgroup_css_alloc,
 789         .css_offline    = hugetlb_cgroup_css_offline,
 790         .css_free       = hugetlb_cgroup_css_free,
 791         .dfl_cftypes    = hugetlb_files,
 792         .legacy_cftypes = hugetlb_files,
 793 };