linux/mm/hugetlb_cgroup.c
<<
>>
Prefs
   1/*
   2 *
   3 * Copyright IBM Corporation, 2012
   4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   5 *
   6 * Cgroup v2
   7 * Copyright (C) 2019 Red Hat, Inc.
   8 * Author: Giuseppe Scrivano <gscrivan@redhat.com>
   9 *
  10 * This program is free software; you can redistribute it and/or modify it
  11 * under the terms of version 2.1 of the GNU Lesser General Public License
  12 * as published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope that it would be useful, but
  15 * WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17 *
  18 */
  19
  20#include <linux/cgroup.h>
  21#include <linux/page_counter.h>
  22#include <linux/slab.h>
  23#include <linux/hugetlb.h>
  24#include <linux/hugetlb_cgroup.h>
  25
  26#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
  27#define MEMFILE_IDX(val)        (((val) >> 16) & 0xffff)
  28#define MEMFILE_ATTR(val)       ((val) & 0xffff)
  29
  30#define hugetlb_cgroup_from_counter(counter, idx)                   \
  31        container_of(counter, struct hugetlb_cgroup, hugepage[idx])
  32
  33static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
  34
  35static inline struct page_counter *
  36__hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
  37                                     bool rsvd)
  38{
  39        if (rsvd)
  40                return &h_cg->rsvd_hugepage[idx];
  41        return &h_cg->hugepage[idx];
  42}
  43
  44static inline struct page_counter *
  45hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
  46{
  47        return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
  48}
  49
  50static inline struct page_counter *
  51hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
  52{
  53        return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
  54}
  55
  56static inline
  57struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
  58{
  59        return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
  60}
  61
  62static inline
  63struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
  64{
  65        return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
  66}
  67
  68static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
  69{
  70        return (h_cg == root_h_cgroup);
  71}
  72
  73static inline struct hugetlb_cgroup *
  74parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
  75{
  76        return hugetlb_cgroup_from_css(h_cg->css.parent);
  77}
  78
  79static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
  80{
  81        int idx;
  82
  83        for (idx = 0; idx < hugetlb_max_hstate; idx++) {
  84                if (page_counter_read(
  85                                hugetlb_cgroup_counter_from_cgroup(h_cg, idx)))
  86                        return true;
  87        }
  88        return false;
  89}
  90
  91static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
  92                                struct hugetlb_cgroup *parent_h_cgroup)
  93{
  94        int idx;
  95
  96        for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
  97                struct page_counter *fault_parent = NULL;
  98                struct page_counter *rsvd_parent = NULL;
  99                unsigned long limit;
 100                int ret;
 101
 102                if (parent_h_cgroup) {
 103                        fault_parent = hugetlb_cgroup_counter_from_cgroup(
 104                                parent_h_cgroup, idx);
 105                        rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
 106                                parent_h_cgroup, idx);
 107                }
 108                page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
 109                                                                     idx),
 110                                  fault_parent);
 111                page_counter_init(
 112                        hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 113                        rsvd_parent);
 114
 115                limit = round_down(PAGE_COUNTER_MAX,
 116                                   pages_per_huge_page(&hstates[idx]));
 117
 118                ret = page_counter_set_max(
 119                        hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx),
 120                        limit);
 121                VM_BUG_ON(ret);
 122                ret = page_counter_set_max(
 123                        hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 124                        limit);
 125                VM_BUG_ON(ret);
 126        }
 127}
 128
 129static struct cgroup_subsys_state *
 130hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 131{
 132        struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
 133        struct hugetlb_cgroup *h_cgroup;
 134
 135        h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
 136        if (!h_cgroup)
 137                return ERR_PTR(-ENOMEM);
 138
 139        if (!parent_h_cgroup)
 140                root_h_cgroup = h_cgroup;
 141
 142        hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
 143        return &h_cgroup->css;
 144}
 145
 146static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
 147{
 148        struct hugetlb_cgroup *h_cgroup;
 149
 150        h_cgroup = hugetlb_cgroup_from_css(css);
 151        kfree(h_cgroup);
 152}
 153
 154/*
 155 * Should be called with hugetlb_lock held.
 156 * Since we are holding hugetlb_lock, pages cannot get moved from
 157 * active list or uncharged from the cgroup, So no need to get
 158 * page reference and test for page active here. This function
 159 * cannot fail.
 160 */
 161static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 162                                       struct page *page)
 163{
 164        unsigned int nr_pages;
 165        struct page_counter *counter;
 166        struct hugetlb_cgroup *page_hcg;
 167        struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
 168
 169        page_hcg = hugetlb_cgroup_from_page(page);
 170        /*
 171         * We can have pages in active list without any cgroup
 172         * ie, hugepage with less than 3 pages. We can safely
 173         * ignore those pages.
 174         */
 175        if (!page_hcg || page_hcg != h_cg)
 176                goto out;
 177
 178        nr_pages = compound_nr(page);
 179        if (!parent) {
 180                parent = root_h_cgroup;
 181                /* root has no limit */
 182                page_counter_charge(&parent->hugepage[idx], nr_pages);
 183        }
 184        counter = &h_cg->hugepage[idx];
 185        /* Take the pages off the local counter */
 186        page_counter_cancel(counter, nr_pages);
 187
 188        set_hugetlb_cgroup(page, parent);
 189out:
 190        return;
 191}
 192
 193/*
 194 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
 195 * the parent cgroup.
 196 */
 197static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
 198{
 199        struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 200        struct hstate *h;
 201        struct page *page;
 202        int idx;
 203
 204        do {
 205                idx = 0;
 206                for_each_hstate(h) {
 207                        spin_lock_irq(&hugetlb_lock);
 208                        list_for_each_entry(page, &h->hugepage_activelist, lru)
 209                                hugetlb_cgroup_move_parent(idx, h_cg, page);
 210
 211                        spin_unlock_irq(&hugetlb_lock);
 212                        idx++;
 213                }
 214                cond_resched();
 215        } while (hugetlb_cgroup_have_usage(h_cg));
 216}
 217
 218static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
 219                                 enum hugetlb_memory_event event)
 220{
 221        atomic_long_inc(&hugetlb->events_local[idx][event]);
 222        cgroup_file_notify(&hugetlb->events_local_file[idx]);
 223
 224        do {
 225                atomic_long_inc(&hugetlb->events[idx][event]);
 226                cgroup_file_notify(&hugetlb->events_file[idx]);
 227        } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
 228                 !hugetlb_cgroup_is_root(hugetlb));
 229}
 230
 231static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 232                                          struct hugetlb_cgroup **ptr,
 233                                          bool rsvd)
 234{
 235        int ret = 0;
 236        struct page_counter *counter;
 237        struct hugetlb_cgroup *h_cg = NULL;
 238
 239        if (hugetlb_cgroup_disabled())
 240                goto done;
 241        /*
 242         * We don't charge any cgroup if the compound page have less
 243         * than 3 pages.
 244         */
 245        if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 246                goto done;
 247again:
 248        rcu_read_lock();
 249        h_cg = hugetlb_cgroup_from_task(current);
 250        if (!css_tryget(&h_cg->css)) {
 251                rcu_read_unlock();
 252                goto again;
 253        }
 254        rcu_read_unlock();
 255
 256        if (!page_counter_try_charge(
 257                    __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 258                    nr_pages, &counter)) {
 259                ret = -ENOMEM;
 260                hugetlb_event(h_cg, idx, HUGETLB_MAX);
 261                css_put(&h_cg->css);
 262                goto done;
 263        }
 264        /* Reservations take a reference to the css because they do not get
 265         * reparented.
 266         */
 267        if (!rsvd)
 268                css_put(&h_cg->css);
 269done:
 270        *ptr = h_cg;
 271        return ret;
 272}
 273
 274int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 275                                 struct hugetlb_cgroup **ptr)
 276{
 277        return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
 278}
 279
 280int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
 281                                      struct hugetlb_cgroup **ptr)
 282{
 283        return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
 284}
 285
 286/* Should be called with hugetlb_lock held */
 287static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 288                                           struct hugetlb_cgroup *h_cg,
 289                                           struct page *page, bool rsvd)
 290{
 291        if (hugetlb_cgroup_disabled() || !h_cg)
 292                return;
 293
 294        __set_hugetlb_cgroup(page, h_cg, rsvd);
 295        return;
 296}
 297
 298void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 299                                  struct hugetlb_cgroup *h_cg,
 300                                  struct page *page)
 301{
 302        __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false);
 303}
 304
 305void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 306                                       struct hugetlb_cgroup *h_cg,
 307                                       struct page *page)
 308{
 309        __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true);
 310}
 311
 312/*
 313 * Should be called with hugetlb_lock held
 314 */
 315static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 316                                           struct page *page, bool rsvd)
 317{
 318        struct hugetlb_cgroup *h_cg;
 319
 320        if (hugetlb_cgroup_disabled())
 321                return;
 322        lockdep_assert_held(&hugetlb_lock);
 323        h_cg = __hugetlb_cgroup_from_page(page, rsvd);
 324        if (unlikely(!h_cg))
 325                return;
 326        __set_hugetlb_cgroup(page, NULL, rsvd);
 327
 328        page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 329                                                                   rsvd),
 330                              nr_pages);
 331
 332        if (rsvd)
 333                css_put(&h_cg->css);
 334
 335        return;
 336}
 337
 338void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 339                                  struct page *page)
 340{
 341        __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false);
 342}
 343
 344void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
 345                                       struct page *page)
 346{
 347        __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true);
 348}
 349
 350static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 351                                             struct hugetlb_cgroup *h_cg,
 352                                             bool rsvd)
 353{
 354        if (hugetlb_cgroup_disabled() || !h_cg)
 355                return;
 356
 357        if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 358                return;
 359
 360        page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 361                                                                   rsvd),
 362                              nr_pages);
 363
 364        if (rsvd)
 365                css_put(&h_cg->css);
 366}
 367
 368void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 369                                    struct hugetlb_cgroup *h_cg)
 370{
 371        __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
 372}
 373
 374void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
 375                                         struct hugetlb_cgroup *h_cg)
 376{
 377        __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
 378}
 379
 380void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
 381                                     unsigned long end)
 382{
 383        if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
 384            !resv->css)
 385                return;
 386
 387        page_counter_uncharge(resv->reservation_counter,
 388                              (end - start) * resv->pages_per_hpage);
 389        css_put(resv->css);
 390}
 391
 392void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
 393                                         struct file_region *rg,
 394                                         unsigned long nr_pages,
 395                                         bool region_del)
 396{
 397        if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
 398                return;
 399
 400        if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 &&
 401            !resv->reservation_counter) {
 402                page_counter_uncharge(rg->reservation_counter,
 403                                      nr_pages * resv->pages_per_hpage);
 404                /*
 405                 * Only do css_put(rg->css) when we delete the entire region
 406                 * because one file_region must hold exactly one css reference.
 407                 */
 408                if (region_del)
 409                        css_put(rg->css);
 410        }
 411}
 412
 413enum {
 414        RES_USAGE,
 415        RES_RSVD_USAGE,
 416        RES_LIMIT,
 417        RES_RSVD_LIMIT,
 418        RES_MAX_USAGE,
 419        RES_RSVD_MAX_USAGE,
 420        RES_FAILCNT,
 421        RES_RSVD_FAILCNT,
 422};
 423
 424static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
 425                                   struct cftype *cft)
 426{
 427        struct page_counter *counter;
 428        struct page_counter *rsvd_counter;
 429        struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 430
 431        counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
 432        rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
 433
 434        switch (MEMFILE_ATTR(cft->private)) {
 435        case RES_USAGE:
 436                return (u64)page_counter_read(counter) * PAGE_SIZE;
 437        case RES_RSVD_USAGE:
 438                return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
 439        case RES_LIMIT:
 440                return (u64)counter->max * PAGE_SIZE;
 441        case RES_RSVD_LIMIT:
 442                return (u64)rsvd_counter->max * PAGE_SIZE;
 443        case RES_MAX_USAGE:
 444                return (u64)counter->watermark * PAGE_SIZE;
 445        case RES_RSVD_MAX_USAGE:
 446                return (u64)rsvd_counter->watermark * PAGE_SIZE;
 447        case RES_FAILCNT:
 448                return counter->failcnt;
 449        case RES_RSVD_FAILCNT:
 450                return rsvd_counter->failcnt;
 451        default:
 452                BUG();
 453        }
 454}
 455
 456static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
 457{
 458        int idx;
 459        u64 val;
 460        struct cftype *cft = seq_cft(seq);
 461        unsigned long limit;
 462        struct page_counter *counter;
 463        struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 464
 465        idx = MEMFILE_IDX(cft->private);
 466        counter = &h_cg->hugepage[idx];
 467
 468        limit = round_down(PAGE_COUNTER_MAX,
 469                           pages_per_huge_page(&hstates[idx]));
 470
 471        switch (MEMFILE_ATTR(cft->private)) {
 472        case RES_RSVD_USAGE:
 473                counter = &h_cg->rsvd_hugepage[idx];
 474                fallthrough;
 475        case RES_USAGE:
 476                val = (u64)page_counter_read(counter);
 477                seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 478                break;
 479        case RES_RSVD_LIMIT:
 480                counter = &h_cg->rsvd_hugepage[idx];
 481                fallthrough;
 482        case RES_LIMIT:
 483                val = (u64)counter->max;
 484                if (val == limit)
 485                        seq_puts(seq, "max\n");
 486                else
 487                        seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 488                break;
 489        default:
 490                BUG();
 491        }
 492
 493        return 0;
 494}
 495
 496static DEFINE_MUTEX(hugetlb_limit_mutex);
 497
 498static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 499                                    char *buf, size_t nbytes, loff_t off,
 500                                    const char *max)
 501{
 502        int ret, idx;
 503        unsigned long nr_pages;
 504        struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 505        bool rsvd = false;
 506
 507        if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
 508                return -EINVAL;
 509
 510        buf = strstrip(buf);
 511        ret = page_counter_memparse(buf, max, &nr_pages);
 512        if (ret)
 513                return ret;
 514
 515        idx = MEMFILE_IDX(of_cft(of)->private);
 516        nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx]));
 517
 518        switch (MEMFILE_ATTR(of_cft(of)->private)) {
 519        case RES_RSVD_LIMIT:
 520                rsvd = true;
 521                fallthrough;
 522        case RES_LIMIT:
 523                mutex_lock(&hugetlb_limit_mutex);
 524                ret = page_counter_set_max(
 525                        __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 526                        nr_pages);
 527                mutex_unlock(&hugetlb_limit_mutex);
 528                break;
 529        default:
 530                ret = -EINVAL;
 531                break;
 532        }
 533        return ret ?: nbytes;
 534}
 535
 536static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
 537                                           char *buf, size_t nbytes, loff_t off)
 538{
 539        return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
 540}
 541
 542static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
 543                                        char *buf, size_t nbytes, loff_t off)
 544{
 545        return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
 546}
 547
 548static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
 549                                    char *buf, size_t nbytes, loff_t off)
 550{
 551        int ret = 0;
 552        struct page_counter *counter, *rsvd_counter;
 553        struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 554
 555        counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
 556        rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
 557
 558        switch (MEMFILE_ATTR(of_cft(of)->private)) {
 559        case RES_MAX_USAGE:
 560                page_counter_reset_watermark(counter);
 561                break;
 562        case RES_RSVD_MAX_USAGE:
 563                page_counter_reset_watermark(rsvd_counter);
 564                break;
 565        case RES_FAILCNT:
 566                counter->failcnt = 0;
 567                break;
 568        case RES_RSVD_FAILCNT:
 569                rsvd_counter->failcnt = 0;
 570                break;
 571        default:
 572                ret = -EINVAL;
 573                break;
 574        }
 575        return ret ?: nbytes;
 576}
 577
 578static char *mem_fmt(char *buf, int size, unsigned long hsize)
 579{
 580        if (hsize >= (1UL << 30))
 581                snprintf(buf, size, "%luGB", hsize >> 30);
 582        else if (hsize >= (1UL << 20))
 583                snprintf(buf, size, "%luMB", hsize >> 20);
 584        else
 585                snprintf(buf, size, "%luKB", hsize >> 10);
 586        return buf;
 587}
 588
 589static int __hugetlb_events_show(struct seq_file *seq, bool local)
 590{
 591        int idx;
 592        long max;
 593        struct cftype *cft = seq_cft(seq);
 594        struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 595
 596        idx = MEMFILE_IDX(cft->private);
 597
 598        if (local)
 599                max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
 600        else
 601                max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
 602
 603        seq_printf(seq, "max %lu\n", max);
 604
 605        return 0;
 606}
 607
 608static int hugetlb_events_show(struct seq_file *seq, void *v)
 609{
 610        return __hugetlb_events_show(seq, false);
 611}
 612
 613static int hugetlb_events_local_show(struct seq_file *seq, void *v)
 614{
 615        return __hugetlb_events_show(seq, true);
 616}
 617
 618static void __init __hugetlb_cgroup_file_dfl_init(int idx)
 619{
 620        char buf[32];
 621        struct cftype *cft;
 622        struct hstate *h = &hstates[idx];
 623
 624        /* format the size */
 625        mem_fmt(buf, sizeof(buf), huge_page_size(h));
 626
 627        /* Add the limit file */
 628        cft = &h->cgroup_files_dfl[0];
 629        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
 630        cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 631        cft->seq_show = hugetlb_cgroup_read_u64_max;
 632        cft->write = hugetlb_cgroup_write_dfl;
 633        cft->flags = CFTYPE_NOT_ON_ROOT;
 634
 635        /* Add the reservation limit file */
 636        cft = &h->cgroup_files_dfl[1];
 637        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
 638        cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 639        cft->seq_show = hugetlb_cgroup_read_u64_max;
 640        cft->write = hugetlb_cgroup_write_dfl;
 641        cft->flags = CFTYPE_NOT_ON_ROOT;
 642
 643        /* Add the current usage file */
 644        cft = &h->cgroup_files_dfl[2];
 645        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
 646        cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 647        cft->seq_show = hugetlb_cgroup_read_u64_max;
 648        cft->flags = CFTYPE_NOT_ON_ROOT;
 649
 650        /* Add the current reservation usage file */
 651        cft = &h->cgroup_files_dfl[3];
 652        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
 653        cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 654        cft->seq_show = hugetlb_cgroup_read_u64_max;
 655        cft->flags = CFTYPE_NOT_ON_ROOT;
 656
 657        /* Add the events file */
 658        cft = &h->cgroup_files_dfl[4];
 659        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
 660        cft->private = MEMFILE_PRIVATE(idx, 0);
 661        cft->seq_show = hugetlb_events_show;
 662        cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]);
 663        cft->flags = CFTYPE_NOT_ON_ROOT;
 664
 665        /* Add the events.local file */
 666        cft = &h->cgroup_files_dfl[5];
 667        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
 668        cft->private = MEMFILE_PRIVATE(idx, 0);
 669        cft->seq_show = hugetlb_events_local_show;
 670        cft->file_offset = offsetof(struct hugetlb_cgroup,
 671                                    events_local_file[idx]);
 672        cft->flags = CFTYPE_NOT_ON_ROOT;
 673
 674        /* NULL terminate the last cft */
 675        cft = &h->cgroup_files_dfl[6];
 676        memset(cft, 0, sizeof(*cft));
 677
 678        WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
 679                                       h->cgroup_files_dfl));
 680}
 681
 682static void __init __hugetlb_cgroup_file_legacy_init(int idx)
 683{
 684        char buf[32];
 685        struct cftype *cft;
 686        struct hstate *h = &hstates[idx];
 687
 688        /* format the size */
 689        mem_fmt(buf, sizeof(buf), huge_page_size(h));
 690
 691        /* Add the limit file */
 692        cft = &h->cgroup_files_legacy[0];
 693        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
 694        cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 695        cft->read_u64 = hugetlb_cgroup_read_u64;
 696        cft->write = hugetlb_cgroup_write_legacy;
 697
 698        /* Add the reservation limit file */
 699        cft = &h->cgroup_files_legacy[1];
 700        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
 701        cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 702        cft->read_u64 = hugetlb_cgroup_read_u64;
 703        cft->write = hugetlb_cgroup_write_legacy;
 704
 705        /* Add the usage file */
 706        cft = &h->cgroup_files_legacy[2];
 707        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
 708        cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 709        cft->read_u64 = hugetlb_cgroup_read_u64;
 710
 711        /* Add the reservation usage file */
 712        cft = &h->cgroup_files_legacy[3];
 713        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
 714        cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 715        cft->read_u64 = hugetlb_cgroup_read_u64;
 716
 717        /* Add the MAX usage file */
 718        cft = &h->cgroup_files_legacy[4];
 719        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
 720        cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
 721        cft->write = hugetlb_cgroup_reset;
 722        cft->read_u64 = hugetlb_cgroup_read_u64;
 723
 724        /* Add the MAX reservation usage file */
 725        cft = &h->cgroup_files_legacy[5];
 726        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
 727        cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
 728        cft->write = hugetlb_cgroup_reset;
 729        cft->read_u64 = hugetlb_cgroup_read_u64;
 730
 731        /* Add the failcntfile */
 732        cft = &h->cgroup_files_legacy[6];
 733        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
 734        cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
 735        cft->write = hugetlb_cgroup_reset;
 736        cft->read_u64 = hugetlb_cgroup_read_u64;
 737
 738        /* Add the reservation failcntfile */
 739        cft = &h->cgroup_files_legacy[7];
 740        snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
 741        cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
 742        cft->write = hugetlb_cgroup_reset;
 743        cft->read_u64 = hugetlb_cgroup_read_u64;
 744
 745        /* NULL terminate the last cft */
 746        cft = &h->cgroup_files_legacy[8];
 747        memset(cft, 0, sizeof(*cft));
 748
 749        WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
 750                                          h->cgroup_files_legacy));
 751}
 752
 753static void __init __hugetlb_cgroup_file_init(int idx)
 754{
 755        __hugetlb_cgroup_file_dfl_init(idx);
 756        __hugetlb_cgroup_file_legacy_init(idx);
 757}
 758
 759void __init hugetlb_cgroup_file_init(void)
 760{
 761        struct hstate *h;
 762
 763        for_each_hstate(h) {
 764                /*
 765                 * Add cgroup control files only if the huge page consists
 766                 * of more than two normal pages. This is because we use
 767                 * page[2].private for storing cgroup details.
 768                 */
 769                if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
 770                        __hugetlb_cgroup_file_init(hstate_index(h));
 771        }
 772}
 773
 774/*
 775 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
 776 * when we migrate hugepages
 777 */
 778void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 779{
 780        struct hugetlb_cgroup *h_cg;
 781        struct hugetlb_cgroup *h_cg_rsvd;
 782        struct hstate *h = page_hstate(oldhpage);
 783
 784        if (hugetlb_cgroup_disabled())
 785                return;
 786
 787        spin_lock_irq(&hugetlb_lock);
 788        h_cg = hugetlb_cgroup_from_page(oldhpage);
 789        h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
 790        set_hugetlb_cgroup(oldhpage, NULL);
 791        set_hugetlb_cgroup_rsvd(oldhpage, NULL);
 792
 793        /* move the h_cg details to new cgroup */
 794        set_hugetlb_cgroup(newhpage, h_cg);
 795        set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
 796        list_move(&newhpage->lru, &h->hugepage_activelist);
 797        spin_unlock_irq(&hugetlb_lock);
 798        return;
 799}
 800
 801static struct cftype hugetlb_files[] = {
 802        {} /* terminate */
 803};
 804
 805struct cgroup_subsys hugetlb_cgrp_subsys = {
 806        .css_alloc      = hugetlb_cgroup_css_alloc,
 807        .css_offline    = hugetlb_cgroup_css_offline,
 808        .css_free       = hugetlb_cgroup_css_free,
 809        .dfl_cftypes    = hugetlb_files,
 810        .legacy_cftypes = hugetlb_files,
 811};
 812