mm/hugetlb_cgroup.c

   1 /*
   2  *
   3  * Copyright IBM Corporation, 2012
   4  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   5  *
   6  * Cgroup v2
   7  * Copyright (C) 2019 Red Hat, Inc.
   8  * Author: Giuseppe Scrivano <gscrivan@redhat.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify it
  11  * under the terms of version 2.1 of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation.
  13  *
  14  * This program is distributed in the hope that it would be useful, but
  15  * WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17  *
  18  */
  19
  20 #include <linux/cgroup.h>
  21 #include <linux/page_counter.h>
  22 #include <linux/slab.h>
  23 #include <linux/hugetlb.h>
  24 #include <linux/hugetlb_cgroup.h>
  25
  26 enum hugetlb_memory_event {
  27         HUGETLB_MAX,
  28         HUGETLB_NR_MEMORY_EVENTS,
  29 };
  30
  31 struct hugetlb_cgroup {
  32         struct cgroup_subsys_state css;
  33
  34         /*
  35          * the counter to account for hugepages from hugetlb.
  36          */
  37         struct page_counter hugepage[HUGE_MAX_HSTATE];
  38
  39         /*
  40          * the counter to account for hugepage reservations from hugetlb.
  41          */
  42         struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE];
  43
  44         atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
  45         atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
  46
  47         /* Handle for "hugetlb.events" */
  48         struct cgroup_file events_file[HUGE_MAX_HSTATE];
  49
  50         /* Handle for "hugetlb.events.local" */
  51         struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
  52 };
  53
  54 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
  55 #define MEMFILE_IDX(val)        (((val) >> 16) & 0xffff)
  56 #define MEMFILE_ATTR(val)       ((val) & 0xffff)
  57
  58 #define hugetlb_cgroup_from_counter(counter, idx)                   \
  59         container_of(counter, struct hugetlb_cgroup, hugepage[idx])
  60
  61 static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
  62
  63 static inline struct page_counter *
  64 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
  65                                      bool rsvd)
  66 {
  67         if (rsvd)
  68                 return &h_cg->rsvd_hugepage[idx];
  69         return &h_cg->hugepage[idx];
  70 }
  71
  72 static inline struct page_counter *
  73 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
  74 {
  75         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
  76 }
  77
  78 static inline struct page_counter *
  79 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
  80 {
  81         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
  82 }
  83
  84 static inline
  85 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
  86 {
  87         return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
  88 }
  89
  90 static inline
  91 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
  92 {
  93         return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
  94 }
  95
  96 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
  97 {
  98         return (h_cg == root_h_cgroup);
  99 }
 100
 101 static inline struct hugetlb_cgroup *
 102 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
 103 {
 104         return hugetlb_cgroup_from_css(h_cg->css.parent);
 105 }
 106
 107 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
 108 {
 109         int idx;
 110
 111         for (idx = 0; idx < hugetlb_max_hstate; idx++) {
 112                 if (page_counter_read(
 113                             hugetlb_cgroup_counter_from_cgroup(h_cg, idx)) ||
 114                     page_counter_read(hugetlb_cgroup_counter_from_cgroup_rsvd(
 115                             h_cg, idx))) {
 116                         return true;
 117                 }
 118         }
 119         return false;
 120 }
 121
 122 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
 123                                 struct hugetlb_cgroup *parent_h_cgroup)
 124 {
 125         int idx;
 126
 127         for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
 128                 struct page_counter *fault_parent = NULL;
 129                 struct page_counter *rsvd_parent = NULL;
 130                 unsigned long limit;
 131                 int ret;
 132
 133                 if (parent_h_cgroup) {
 134                         fault_parent = hugetlb_cgroup_counter_from_cgroup(
 135                                 parent_h_cgroup, idx);
 136                         rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
 137                                 parent_h_cgroup, idx);
 138                 }
 139                 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
 140                                                                      idx),
 141                                   fault_parent);
 142                 page_counter_init(
 143                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 144                         rsvd_parent);
 145
 146                 limit = round_down(PAGE_COUNTER_MAX,
 147                                    1 << huge_page_order(&hstates[idx]));
 148
 149                 ret = page_counter_set_max(
 150                         hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx),
 151                         limit);
 152                 VM_BUG_ON(ret);
 153                 ret = page_counter_set_max(
 154                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 155                         limit);
 156                 VM_BUG_ON(ret);
 157         }
 158 }
 159
 160 static struct cgroup_subsys_state *
 161 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 162 {
 163         struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
 164         struct hugetlb_cgroup *h_cgroup;
 165
 166         h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
 167         if (!h_cgroup)
 168                 return ERR_PTR(-ENOMEM);
 169
 170         if (!parent_h_cgroup)
 171                 root_h_cgroup = h_cgroup;
 172
 173         hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
 174         return &h_cgroup->css;
 175 }
 176
 177 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
 178 {
 179         struct hugetlb_cgroup *h_cgroup;
 180
 181         h_cgroup = hugetlb_cgroup_from_css(css);
 182         kfree(h_cgroup);
 183 }
 184
 185 /*
 186  * Should be called with hugetlb_lock held.
 187  * Since we are holding hugetlb_lock, pages cannot get moved from
 188  * active list or uncharged from the cgroup, So no need to get
 189  * page reference and test for page active here. This function
 190  * cannot fail.
 191  */
 192 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 193                                        struct page *page)
 194 {
 195         unsigned int nr_pages;
 196         struct page_counter *counter;
 197         struct hugetlb_cgroup *page_hcg;
 198         struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
 199
 200         page_hcg = hugetlb_cgroup_from_page(page);
 201         /*
 202          * We can have pages in active list without any cgroup
 203          * ie, hugepage with less than 3 pages. We can safely
 204          * ignore those pages.
 205          */
 206         if (!page_hcg || page_hcg != h_cg)
 207                 goto out;
 208
 209         nr_pages = compound_nr(page);
 210         if (!parent) {
 211                 parent = root_h_cgroup;
 212                 /* root has no limit */
 213                 page_counter_charge(&parent->hugepage[idx], nr_pages);
 214         }
 215         counter = &h_cg->hugepage[idx];
 216         /* Take the pages off the local counter */
 217         page_counter_cancel(counter, nr_pages);
 218
 219         set_hugetlb_cgroup(page, parent);
 220 out:
 221         return;
 222 }
 223
 224 /*
 225  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
 226  * the parent cgroup.
 227  */
 228 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
 229 {
 230         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 231         struct hstate *h;
 232         struct page *page;
 233         int idx = 0;
 234
 235         do {
 236                 for_each_hstate(h) {
 237                         spin_lock(&hugetlb_lock);
 238                         list_for_each_entry(page, &h->hugepage_activelist, lru)
 239                                 hugetlb_cgroup_move_parent(idx, h_cg, page);
 240
 241                         spin_unlock(&hugetlb_lock);
 242                         idx++;
 243                 }
 244                 cond_resched();
 245         } while (hugetlb_cgroup_have_usage(h_cg));
 246 }
 247
 248 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
 249                                  enum hugetlb_memory_event event)
 250 {
 251         atomic_long_inc(&hugetlb->events_local[idx][event]);
 252         cgroup_file_notify(&hugetlb->events_local_file[idx]);
 253
 254         do {
 255                 atomic_long_inc(&hugetlb->events[idx][event]);
 256                 cgroup_file_notify(&hugetlb->events_file[idx]);
 257         } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
 258                  !hugetlb_cgroup_is_root(hugetlb));
 259 }
 260
 261 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 262                                           struct hugetlb_cgroup **ptr,
 263                                           bool rsvd)
 264 {
 265         int ret = 0;
 266         struct page_counter *counter;
 267         struct hugetlb_cgroup *h_cg = NULL;
 268
 269         if (hugetlb_cgroup_disabled())
 270                 goto done;
 271         /*
 272          * We don't charge any cgroup if the compound page have less
 273          * than 3 pages.
 274          */
 275         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 276                 goto done;
 277 again:
 278         rcu_read_lock();
 279         h_cg = hugetlb_cgroup_from_task(current);
 280         if (!css_tryget(&h_cg->css)) {
 281                 rcu_read_unlock();
 282                 goto again;
 283         }
 284         rcu_read_unlock();
 285
 286         if (!page_counter_try_charge(
 287                     __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 288                     nr_pages, &counter)) {
 289                 ret = -ENOMEM;
 290                 hugetlb_event(h_cg, idx, HUGETLB_MAX);
 291                 css_put(&h_cg->css);
 292                 goto done;
 293         }
 294         /* Reservations take a reference to the css because they do not get
 295          * reparented.
 296          */
 297         if (!rsvd)
 298                 css_put(&h_cg->css);
 299 done:
 300         *ptr = h_cg;
 301         return ret;
 302 }
 303
 304 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 305                                  struct hugetlb_cgroup **ptr)
 306 {
 307         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
 308 }
 309
 310 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
 311                                       struct hugetlb_cgroup **ptr)
 312 {
 313         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
 314 }
 315
 316 /* Should be called with hugetlb_lock held */
 317 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 318                                            struct hugetlb_cgroup *h_cg,
 319                                            struct page *page, bool rsvd)
 320 {
 321         if (hugetlb_cgroup_disabled() || !h_cg)
 322                 return;
 323
 324         __set_hugetlb_cgroup(page, h_cg, rsvd);
 325         return;
 326 }
 327
 328 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 329                                   struct hugetlb_cgroup *h_cg,
 330                                   struct page *page)
 331 {
 332         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false);
 333 }
 334
 335 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 336                                        struct hugetlb_cgroup *h_cg,
 337                                        struct page *page)
 338 {
 339         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true);
 340 }
 341
 342 /*
 343  * Should be called with hugetlb_lock held
 344  */
 345 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 346                                            struct page *page, bool rsvd)
 347 {
 348         struct hugetlb_cgroup *h_cg;
 349
 350         if (hugetlb_cgroup_disabled())
 351                 return;
 352         lockdep_assert_held(&hugetlb_lock);
 353         h_cg = __hugetlb_cgroup_from_page(page, rsvd);
 354         if (unlikely(!h_cg))
 355                 return;
 356         __set_hugetlb_cgroup(page, NULL, rsvd);
 357
 358         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 359                                                                    rsvd),
 360                               nr_pages);
 361
 362         if (rsvd)
 363                 css_put(&h_cg->css);
 364
 365         return;
 366 }
 367
 368 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 369                                   struct page *page)
 370 {
 371         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false);
 372 }
 373
 374 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
 375                                        struct page *page)
 376 {
 377         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true);
 378 }
 379
 380 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 381                                              struct hugetlb_cgroup *h_cg,
 382                                              bool rsvd)
 383 {
 384         if (hugetlb_cgroup_disabled() || !h_cg)
 385                 return;
 386
 387         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 388                 return;
 389
 390         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 391                                                                    rsvd),
 392                               nr_pages);
 393
 394         if (rsvd)
 395                 css_put(&h_cg->css);
 396 }
 397
 398 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 399                                     struct hugetlb_cgroup *h_cg)
 400 {
 401         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
 402 }
 403
 404 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
 405                                          struct hugetlb_cgroup *h_cg)
 406 {
 407         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
 408 }
 409
 410 void hugetlb_cgroup_uncharge_counter(struct page_counter *p,
 411                                      unsigned long nr_pages,
 412                                      struct cgroup_subsys_state *css)
 413 {
 414         if (hugetlb_cgroup_disabled() || !p || !css)
 415                 return;
 416
 417         page_counter_uncharge(p, nr_pages);
 418         css_put(css);
 419 }
 420
 421 enum {
 422         RES_USAGE,
 423         RES_RSVD_USAGE,
 424         RES_LIMIT,
 425         RES_RSVD_LIMIT,
 426         RES_MAX_USAGE,
 427         RES_RSVD_MAX_USAGE,
 428         RES_FAILCNT,
 429         RES_RSVD_FAILCNT,
 430 };
 431
 432 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
 433                                    struct cftype *cft)
 434 {
 435         struct page_counter *counter;
 436         struct page_counter *rsvd_counter;
 437         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 438
 439         counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
 440         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
 441
 442         switch (MEMFILE_ATTR(cft->private)) {
 443         case RES_USAGE:
 444                 return (u64)page_counter_read(counter) * PAGE_SIZE;
 445         case RES_RSVD_USAGE:
 446                 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
 447         case RES_LIMIT:
 448                 return (u64)counter->max * PAGE_SIZE;
 449         case RES_RSVD_LIMIT:
 450                 return (u64)rsvd_counter->max * PAGE_SIZE;
 451         case RES_MAX_USAGE:
 452                 return (u64)counter->watermark * PAGE_SIZE;
 453         case RES_RSVD_MAX_USAGE:
 454                 return (u64)rsvd_counter->watermark * PAGE_SIZE;
 455         case RES_FAILCNT:
 456                 return counter->failcnt;
 457         case RES_RSVD_FAILCNT:
 458                 return rsvd_counter->failcnt;
 459         default:
 460                 BUG();
 461         }
 462 }
 463
 464 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
 465 {
 466         int idx;
 467         u64 val;
 468         struct cftype *cft = seq_cft(seq);
 469         unsigned long limit;
 470         struct page_counter *counter;
 471         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 472
 473         idx = MEMFILE_IDX(cft->private);
 474         counter = &h_cg->hugepage[idx];
 475
 476         limit = round_down(PAGE_COUNTER_MAX,
 477                            1 << huge_page_order(&hstates[idx]));
 478
 479         switch (MEMFILE_ATTR(cft->private)) {
 480         case RES_RSVD_USAGE:
 481                 counter = &h_cg->rsvd_hugepage[idx];
 482                 /* Fall through. */
 483         case RES_USAGE:
 484                 val = (u64)page_counter_read(counter);
 485                 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 486                 break;
 487         case RES_RSVD_LIMIT:
 488                 counter = &h_cg->rsvd_hugepage[idx];
 489                 /* Fall through. */
 490         case RES_LIMIT:
 491                 val = (u64)counter->max;
 492                 if (val == limit)
 493                         seq_puts(seq, "max\n");
 494                 else
 495                         seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 496                 break;
 497         default:
 498                 BUG();
 499         }
 500
 501         return 0;
 502 }
 503
 504 static DEFINE_MUTEX(hugetlb_limit_mutex);
 505
 506 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 507                                     char *buf, size_t nbytes, loff_t off,
 508                                     const char *max)
 509 {
 510         int ret, idx;
 511         unsigned long nr_pages;
 512         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 513         bool rsvd = false;
 514
 515         if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
 516                 return -EINVAL;
 517
 518         buf = strstrip(buf);
 519         ret = page_counter_memparse(buf, max, &nr_pages);
 520         if (ret)
 521                 return ret;
 522
 523         idx = MEMFILE_IDX(of_cft(of)->private);
 524         nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx]));
 525
 526         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 527         case RES_RSVD_LIMIT:
 528                 rsvd = true;
 529                 /* Fall through. */
 530         case RES_LIMIT:
 531                 mutex_lock(&hugetlb_limit_mutex);
 532                 ret = page_counter_set_max(
 533                         __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 534                         nr_pages);
 535                 mutex_unlock(&hugetlb_limit_mutex);
 536                 break;
 537         default:
 538                 ret = -EINVAL;
 539                 break;
 540         }
 541         return ret ?: nbytes;
 542 }
 543
 544 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
 545                                            char *buf, size_t nbytes, loff_t off)
 546 {
 547         return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
 548 }
 549
 550 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
 551                                         char *buf, size_t nbytes, loff_t off)
 552 {
 553         return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
 554 }
 555
 556 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
 557                                     char *buf, size_t nbytes, loff_t off)
 558 {
 559         int ret = 0;
 560         struct page_counter *counter, *rsvd_counter;
 561         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 562
 563         counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
 564         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
 565
 566         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 567         case RES_MAX_USAGE:
 568                 page_counter_reset_watermark(counter);
 569                 break;
 570         case RES_RSVD_MAX_USAGE:
 571                 page_counter_reset_watermark(rsvd_counter);
 572                 break;
 573         case RES_FAILCNT:
 574                 counter->failcnt = 0;
 575                 break;
 576         case RES_RSVD_FAILCNT:
 577                 rsvd_counter->failcnt = 0;
 578                 break;
 579         default:
 580                 ret = -EINVAL;
 581                 break;
 582         }
 583         return ret ?: nbytes;
 584 }
 585
 586 static char *mem_fmt(char *buf, int size, unsigned long hsize)
 587 {
 588         if (hsize >= (1UL << 30))
 589                 snprintf(buf, size, "%luGB", hsize >> 30);
 590         else if (hsize >= (1UL << 20))
 591                 snprintf(buf, size, "%luMB", hsize >> 20);
 592         else
 593                 snprintf(buf, size, "%luKB", hsize >> 10);
 594         return buf;
 595 }
 596
 597 static int __hugetlb_events_show(struct seq_file *seq, bool local)
 598 {
 599         int idx;
 600         long max;
 601         struct cftype *cft = seq_cft(seq);
 602         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 603
 604         idx = MEMFILE_IDX(cft->private);
 605
 606         if (local)
 607                 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
 608         else
 609                 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
 610
 611         seq_printf(seq, "max %lu\n", max);
 612
 613         return 0;
 614 }
 615
 616 static int hugetlb_events_show(struct seq_file *seq, void *v)
 617 {
 618         return __hugetlb_events_show(seq, false);
 619 }
 620
 621 static int hugetlb_events_local_show(struct seq_file *seq, void *v)
 622 {
 623         return __hugetlb_events_show(seq, true);
 624 }
 625
 626 static void __init __hugetlb_cgroup_file_dfl_init(int idx)
 627 {
 628         char buf[32];
 629         struct cftype *cft;
 630         struct hstate *h = &hstates[idx];
 631
 632         /* format the size */
 633         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 634
 635         /* Add the limit file */
 636         cft = &h->cgroup_files_dfl[0];
 637         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
 638         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 639         cft->seq_show = hugetlb_cgroup_read_u64_max;
 640         cft->write = hugetlb_cgroup_write_dfl;
 641         cft->flags = CFTYPE_NOT_ON_ROOT;
 642
 643         /* Add the reservation limit file */
 644         cft = &h->cgroup_files_dfl[1];
 645         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
 646         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 647         cft->seq_show = hugetlb_cgroup_read_u64_max;
 648         cft->write = hugetlb_cgroup_write_dfl;
 649         cft->flags = CFTYPE_NOT_ON_ROOT;
 650
 651         /* Add the current usage file */
 652         cft = &h->cgroup_files_dfl[2];
 653         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
 654         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 655         cft->seq_show = hugetlb_cgroup_read_u64_max;
 656         cft->flags = CFTYPE_NOT_ON_ROOT;
 657
 658         /* Add the current reservation usage file */
 659         cft = &h->cgroup_files_dfl[3];
 660         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
 661         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 662         cft->seq_show = hugetlb_cgroup_read_u64_max;
 663         cft->flags = CFTYPE_NOT_ON_ROOT;
 664
 665         /* Add the events file */
 666         cft = &h->cgroup_files_dfl[4];
 667         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
 668         cft->private = MEMFILE_PRIVATE(idx, 0);
 669         cft->seq_show = hugetlb_events_show;
 670         cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]),
 671         cft->flags = CFTYPE_NOT_ON_ROOT;
 672
 673         /* Add the events.local file */
 674         cft = &h->cgroup_files_dfl[5];
 675         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
 676         cft->private = MEMFILE_PRIVATE(idx, 0);
 677         cft->seq_show = hugetlb_events_local_show;
 678         cft->file_offset = offsetof(struct hugetlb_cgroup,
 679                                     events_local_file[idx]),
 680         cft->flags = CFTYPE_NOT_ON_ROOT;
 681
 682         /* NULL terminate the last cft */
 683         cft = &h->cgroup_files_dfl[6];
 684         memset(cft, 0, sizeof(*cft));
 685
 686         WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
 687                                        h->cgroup_files_dfl));
 688 }
 689
 690 static void __init __hugetlb_cgroup_file_legacy_init(int idx)
 691 {
 692         char buf[32];
 693         struct cftype *cft;
 694         struct hstate *h = &hstates[idx];
 695
 696         /* format the size */
 697         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 698
 699         /* Add the limit file */
 700         cft = &h->cgroup_files_legacy[0];
 701         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
 702         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 703         cft->read_u64 = hugetlb_cgroup_read_u64;
 704         cft->write = hugetlb_cgroup_write_legacy;
 705
 706         /* Add the reservation limit file */
 707         cft = &h->cgroup_files_legacy[1];
 708         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
 709         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 710         cft->read_u64 = hugetlb_cgroup_read_u64;
 711         cft->write = hugetlb_cgroup_write_legacy;
 712
 713         /* Add the usage file */
 714         cft = &h->cgroup_files_legacy[2];
 715         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
 716         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 717         cft->read_u64 = hugetlb_cgroup_read_u64;
 718
 719         /* Add the reservation usage file */
 720         cft = &h->cgroup_files_legacy[3];
 721         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
 722         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 723         cft->read_u64 = hugetlb_cgroup_read_u64;
 724
 725         /* Add the MAX usage file */
 726         cft = &h->cgroup_files_legacy[4];
 727         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
 728         cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
 729         cft->write = hugetlb_cgroup_reset;
 730         cft->read_u64 = hugetlb_cgroup_read_u64;
 731
 732         /* Add the MAX reservation usage file */
 733         cft = &h->cgroup_files_legacy[5];
 734         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
 735         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
 736         cft->write = hugetlb_cgroup_reset;
 737         cft->read_u64 = hugetlb_cgroup_read_u64;
 738
 739         /* Add the failcntfile */
 740         cft = &h->cgroup_files_legacy[6];
 741         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
 742         cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
 743         cft->write = hugetlb_cgroup_reset;
 744         cft->read_u64 = hugetlb_cgroup_read_u64;
 745
 746         /* Add the reservation failcntfile */
 747         cft = &h->cgroup_files_legacy[7];
 748         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
 749         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
 750         cft->write = hugetlb_cgroup_reset;
 751         cft->read_u64 = hugetlb_cgroup_read_u64;
 752
 753         /* NULL terminate the last cft */
 754         cft = &h->cgroup_files_legacy[8];
 755         memset(cft, 0, sizeof(*cft));
 756
 757         WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
 758                                           h->cgroup_files_legacy));
 759 }
 760
 761 static void __init __hugetlb_cgroup_file_init(int idx)
 762 {
 763         __hugetlb_cgroup_file_dfl_init(idx);
 764         __hugetlb_cgroup_file_legacy_init(idx);
 765 }
 766
 767 void __init hugetlb_cgroup_file_init(void)
 768 {
 769         struct hstate *h;
 770
 771         for_each_hstate(h) {
 772                 /*
 773                  * Add cgroup control files only if the huge page consists
 774                  * of more than two normal pages. This is because we use
 775                  * page[2].private for storing cgroup details.
 776                  */
 777                 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
 778                         __hugetlb_cgroup_file_init(hstate_index(h));
 779         }
 780 }
 781
 782 /*
 783  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
 784  * when we migrate hugepages
 785  */
 786 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 787 {
 788         struct hugetlb_cgroup *h_cg;
 789         struct hugetlb_cgroup *h_cg_rsvd;
 790         struct hstate *h = page_hstate(oldhpage);
 791
 792         if (hugetlb_cgroup_disabled())
 793                 return;
 794
 795         VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
 796         spin_lock(&hugetlb_lock);
 797         h_cg = hugetlb_cgroup_from_page(oldhpage);
 798         h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
 799         set_hugetlb_cgroup(oldhpage, NULL);
 800         set_hugetlb_cgroup_rsvd(oldhpage, NULL);
 801
 802         /* move the h_cg details to new cgroup */
 803         set_hugetlb_cgroup(newhpage, h_cg);
 804         set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
 805         list_move(&newhpage->lru, &h->hugepage_activelist);
 806         spin_unlock(&hugetlb_lock);
 807         return;
 808 }
 809
 810 static struct cftype hugetlb_files[] = {
 811         {} /* terminate */
 812 };
 813
 814 struct cgroup_subsys hugetlb_cgrp_subsys = {
 815         .css_alloc      = hugetlb_cgroup_css_alloc,
 816         .css_offline    = hugetlb_cgroup_css_offline,
 817         .css_free       = hugetlb_cgroup_css_free,
 818         .dfl_cftypes    = hugetlb_files,
 819         .legacy_cftypes = hugetlb_files,
 820 };