LXR linux/include/linux/memcontrol.h

   1/* memcontrol.h - Memory Controller
   2 *
   3 * Copyright IBM Corporation, 2007
   4 * Author Balbir Singh <balbir@linux.vnet.ibm.com>
   5 *
   6 * Copyright 2007 OpenVZ SWsoft Inc
   7 * Author: Pavel Emelianov <xemul@openvz.org>
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of the GNU General Public License as published by
  11 * the Free Software Foundation; either version 2 of the License, or
  12 * (at your option) any later version.
  13 *
  14 * This program is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 * GNU General Public License for more details.
  18 */
  19
  20#ifndef _LINUX_MEMCONTROL_H
  21#define _LINUX_MEMCONTROL_H
  22#include <linux/cgroup.h>
  23#include <linux/vm_event_item.h>
  24#include <linux/hardirq.h>
  25#include <linux/jump_label.h>
  26
  27struct mem_cgroup;
  28struct page_cgroup;
  29struct page;
  30struct mm_struct;
  31struct kmem_cache;
  32
  33/*
  34 * The corresponding mem_cgroup_stat_names is defined in mm/memcontrol.c,
  35 * These two lists should keep in accord with each other.
  36 */
  37enum mem_cgroup_stat_index {
  38        /*
  39         * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
  40         */
  41        MEM_CGROUP_STAT_CACHE,          /* # of pages charged as cache */
  42        MEM_CGROUP_STAT_RSS,            /* # of pages charged as anon rss */
  43        MEM_CGROUP_STAT_RSS_HUGE,       /* # of pages charged as anon huge */
  44        MEM_CGROUP_STAT_FILE_MAPPED,    /* # of pages charged as file rss */
  45        MEM_CGROUP_STAT_WRITEBACK,      /* # of pages under writeback */
  46        MEM_CGROUP_STAT_SWAP,           /* # of pages, swapped out */
  47        MEM_CGROUP_STAT_NSTATS,
  48};
  49
  50struct mem_cgroup_reclaim_cookie {
  51        struct zone *zone;
  52        int priority;
  53        unsigned int generation;
  54};
  55
  56#ifdef CONFIG_MEMCG
  57/*
  58 * All "charge" functions with gfp_mask should use GFP_KERNEL or
  59 * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
  60 * alloc memory but reclaims memory from all available zones. So, "where I want
  61 * memory from" bits of gfp_mask has no meaning. So any bits of that field is
  62 * available but adding a rule is better. charge functions' gfp_mask should
  63 * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
  64 * codes.
  65 * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
  66 */
  67
  68extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
  69                                gfp_t gfp_mask);
  70/* for swap handling */
  71extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
  72                struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
  73extern void mem_cgroup_commit_charge_swapin(struct page *page,
  74                                        struct mem_cgroup *memcg);
  75extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
  76
  77extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
  78                                        gfp_t gfp_mask);
  79
  80struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
  81struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
  82
  83/* For coalescing uncharge for reducing memcg' overhead*/
  84extern void mem_cgroup_uncharge_start(void);
  85extern void mem_cgroup_uncharge_end(void);
  86
  87extern void mem_cgroup_uncharge_page(struct page *page);
  88extern void mem_cgroup_uncharge_cache_page(struct page *page);
  89
  90bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
  91                                  struct mem_cgroup *memcg);
  92bool task_in_mem_cgroup(struct task_struct *task,
  93                        const struct mem_cgroup *memcg);
  94
  95extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
  96extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
  97
  98extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
  99extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
 100
 101static inline
 102bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
 103{
 104        struct mem_cgroup *task_memcg;
 105        bool match;
 106
 107        rcu_read_lock();
 108        task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 109        match = __mem_cgroup_same_or_subtree(memcg, task_memcg);
 110        rcu_read_unlock();
 111        return match;
 112}
 113
 114extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
 115
 116extern void
 117mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 118                             struct mem_cgroup **memcgp);
 119extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 120        struct page *oldpage, struct page *newpage, bool migration_ok);
 121
 122struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 123                                   struct mem_cgroup *,
 124                                   struct mem_cgroup_reclaim_cookie *);
 125void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 126
 127/*
 128 * For memory reclaim.
 129 */
 130int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
 131int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 132unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
 133void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
 134extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 135                                        struct task_struct *p);
 136extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 137                                        struct page *newpage);
 138
 139static inline void mem_cgroup_oom_enable(void)
 140{
 141        WARN_ON(current->memcg_oom.may_oom);
 142        current->memcg_oom.may_oom = 1;
 143}
 144
 145static inline void mem_cgroup_oom_disable(void)
 146{
 147        WARN_ON(!current->memcg_oom.may_oom);
 148        current->memcg_oom.may_oom = 0;
 149}
 150
 151static inline bool task_in_memcg_oom(struct task_struct *p)
 152{
 153        return p->memcg_oom.memcg;
 154}
 155
 156bool mem_cgroup_oom_synchronize(bool wait);
 157
 158#ifdef CONFIG_MEMCG_SWAP
 159extern int do_swap_account;
 160#endif
 161
 162static inline bool mem_cgroup_disabled(void)
 163{
 164        if (memory_cgrp_subsys.disabled)
 165                return true;
 166        return false;
 167}
 168
 169void __mem_cgroup_begin_update_page_stat(struct page *page, bool *locked,
 170                                         unsigned long *flags);
 171
 172extern atomic_t memcg_moving;
 173
 174static inline void mem_cgroup_begin_update_page_stat(struct page *page,
 175                                        bool *locked, unsigned long *flags)
 176{
 177        if (mem_cgroup_disabled())
 178                return;
 179        rcu_read_lock();
 180        *locked = false;
 181        if (atomic_read(&memcg_moving))
 182                __mem_cgroup_begin_update_page_stat(page, locked, flags);
 183}
 184
 185void __mem_cgroup_end_update_page_stat(struct page *page,
 186                                unsigned long *flags);
 187static inline void mem_cgroup_end_update_page_stat(struct page *page,
 188                                        bool *locked, unsigned long *flags)
 189{
 190        if (mem_cgroup_disabled())
 191                return;
 192        if (*locked)
 193                __mem_cgroup_end_update_page_stat(page, flags);
 194        rcu_read_unlock();
 195}
 196
 197void mem_cgroup_update_page_stat(struct page *page,
 198                                 enum mem_cgroup_stat_index idx,
 199                                 int val);
 200
 201static inline void mem_cgroup_inc_page_stat(struct page *page,
 202                                            enum mem_cgroup_stat_index idx)
 203{
 204        mem_cgroup_update_page_stat(page, idx, 1);
 205}
 206
 207static inline void mem_cgroup_dec_page_stat(struct page *page,
 208                                            enum mem_cgroup_stat_index idx)
 209{
 210        mem_cgroup_update_page_stat(page, idx, -1);
 211}
 212
 213unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 214                                                gfp_t gfp_mask,
 215                                                unsigned long *total_scanned);
 216
 217void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 218static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
 219                                             enum vm_event_item idx)
 220{
 221        if (mem_cgroup_disabled())
 222                return;
 223        __mem_cgroup_count_vm_event(mm, idx);
 224}
 225#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 226void mem_cgroup_split_huge_fixup(struct page *head);
 227#endif
 228
 229#ifdef CONFIG_DEBUG_VM
 230bool mem_cgroup_bad_page_check(struct page *page);
 231void mem_cgroup_print_bad_page(struct page *page);
 232#endif
 233#else /* CONFIG_MEMCG */
 234struct mem_cgroup;
 235
 236static inline int mem_cgroup_charge_anon(struct page *page,
 237                                        struct mm_struct *mm, gfp_t gfp_mask)
 238{
 239        return 0;
 240}
 241
 242static inline int mem_cgroup_charge_file(struct page *page,
 243                                        struct mm_struct *mm, gfp_t gfp_mask)
 244{
 245        return 0;
 246}
 247
 248static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 249                struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
 250{
 251        return 0;
 252}
 253
 254static inline void mem_cgroup_commit_charge_swapin(struct page *page,
 255                                          struct mem_cgroup *memcg)
 256{
 257}
 258
 259static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
 260{
 261}
 262
 263static inline void mem_cgroup_uncharge_start(void)
 264{
 265}
 266
 267static inline void mem_cgroup_uncharge_end(void)
 268{
 269}
 270
 271static inline void mem_cgroup_uncharge_page(struct page *page)
 272{
 273}
 274
 275static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 276{
 277}
 278
 279static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
 280                                                    struct mem_cgroup *memcg)
 281{
 282        return &zone->lruvec;
 283}
 284
 285static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
 286                                                    struct zone *zone)
 287{
 288        return &zone->lruvec;
 289}
 290
 291static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 292{
 293        return NULL;
 294}
 295
 296static inline bool mm_match_cgroup(struct mm_struct *mm,
 297                struct mem_cgroup *memcg)
 298{
 299        return true;
 300}
 301
 302static inline bool task_in_mem_cgroup(struct task_struct *task,
 303                                      const struct mem_cgroup *memcg)
 304{
 305        return true;
 306}
 307
 308static inline struct cgroup_subsys_state
 309                *mem_cgroup_css(struct mem_cgroup *memcg)
 310{
 311        return NULL;
 312}
 313
 314static inline void
 315mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 316                             struct mem_cgroup **memcgp)
 317{
 318}
 319
 320static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 321                struct page *oldpage, struct page *newpage, bool migration_ok)
 322{
 323}
 324
 325static inline struct mem_cgroup *
 326mem_cgroup_iter(struct mem_cgroup *root,
 327                struct mem_cgroup *prev,
 328                struct mem_cgroup_reclaim_cookie *reclaim)
 329{
 330        return NULL;
 331}
 332
 333static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
 334                                         struct mem_cgroup *prev)
 335{
 336}
 337
 338static inline bool mem_cgroup_disabled(void)
 339{
 340        return true;
 341}
 342
 343static inline int
 344mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 345{
 346        return 1;
 347}
 348
 349static inline unsigned long
 350mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 351{
 352        return 0;
 353}
 354
 355static inline void
 356mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 357                              int increment)
 358{
 359}
 360
 361static inline void
 362mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 363{
 364}
 365
 366static inline void mem_cgroup_begin_update_page_stat(struct page *page,
 367                                        bool *locked, unsigned long *flags)
 368{
 369}
 370
 371static inline void mem_cgroup_end_update_page_stat(struct page *page,
 372                                        bool *locked, unsigned long *flags)
 373{
 374}
 375
 376static inline void mem_cgroup_oom_enable(void)
 377{
 378}
 379
 380static inline void mem_cgroup_oom_disable(void)
 381{
 382}
 383
 384static inline bool task_in_memcg_oom(struct task_struct *p)
 385{
 386        return false;
 387}
 388
 389static inline bool mem_cgroup_oom_synchronize(bool wait)
 390{
 391        return false;
 392}
 393
 394static inline void mem_cgroup_inc_page_stat(struct page *page,
 395                                            enum mem_cgroup_stat_index idx)
 396{
 397}
 398
 399static inline void mem_cgroup_dec_page_stat(struct page *page,
 400                                            enum mem_cgroup_stat_index idx)
 401{
 402}
 403
 404static inline
 405unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 406                                            gfp_t gfp_mask,
 407                                            unsigned long *total_scanned)
 408{
 409        return 0;
 410}
 411
 412static inline void mem_cgroup_split_huge_fixup(struct page *head)
 413{
 414}
 415
 416static inline
 417void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 418{
 419}
 420static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
 421                                struct page *newpage)
 422{
 423}
 424#endif /* CONFIG_MEMCG */
 425
 426#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
 427static inline bool
 428mem_cgroup_bad_page_check(struct page *page)
 429{
 430        return false;
 431}
 432
 433static inline void
 434mem_cgroup_print_bad_page(struct page *page)
 435{
 436}
 437#endif
 438
 439enum {
 440        UNDER_LIMIT,
 441        SOFT_LIMIT,
 442        OVER_LIMIT,
 443};
 444
 445struct sock;
 446#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
 447void sock_update_memcg(struct sock *sk);
 448void sock_release_memcg(struct sock *sk);
 449#else
 450static inline void sock_update_memcg(struct sock *sk)
 451{
 452}
 453static inline void sock_release_memcg(struct sock *sk)
 454{
 455}
 456#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
 457
 458#ifdef CONFIG_MEMCG_KMEM
 459extern struct static_key memcg_kmem_enabled_key;
 460
 461extern int memcg_limited_groups_array_size;
 462
 463/*
 464 * Helper macro to loop through all memcg-specific caches. Callers must still
 465 * check if the cache is valid (it is either valid or NULL).
 466 * the slab_mutex must be held when looping through those caches
 467 */
 468#define for_each_memcg_cache_index(_idx)        \
 469        for ((_idx) = 0; (_idx) < memcg_limited_groups_array_size; (_idx)++)
 470
 471static inline bool memcg_kmem_enabled(void)
 472{
 473        return static_key_false(&memcg_kmem_enabled_key);
 474}
 475
 476/*
 477 * In general, we'll do everything in our power to not incur in any overhead
 478 * for non-memcg users for the kmem functions. Not even a function call, if we
 479 * can avoid it.
 480 *
 481 * Therefore, we'll inline all those functions so that in the best case, we'll
 482 * see that kmemcg is off for everybody and proceed quickly.  If it is on,
 483 * we'll still do most of the flag checking inline. We check a lot of
 484 * conditions, but because they are pretty simple, they are expected to be
 485 * fast.
 486 */
 487bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
 488                                        int order);
 489void __memcg_kmem_commit_charge(struct page *page,
 490                                       struct mem_cgroup *memcg, int order);
 491void __memcg_kmem_uncharge_pages(struct page *page, int order);
 492
 493int memcg_cache_id(struct mem_cgroup *memcg);
 494
 495char *memcg_create_cache_name(struct mem_cgroup *memcg,
 496                              struct kmem_cache *root_cache);
 497int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
 498                             struct kmem_cache *root_cache);
 499void memcg_free_cache_params(struct kmem_cache *s);
 500void memcg_register_cache(struct kmem_cache *s);
 501void memcg_unregister_cache(struct kmem_cache *s);
 502
 503int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 504void memcg_update_array_size(int num_groups);
 505
 506struct kmem_cache *
 507__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 508
 509void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
 510int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 511
 512/**
 513 * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
 514 * @gfp: the gfp allocation flags.
 515 * @memcg: a pointer to the memcg this was charged against.
 516 * @order: allocation order.
 517 *
 518 * returns true if the memcg where the current task belongs can hold this
 519 * allocation.
 520 *
 521 * We return true automatically if this allocation is not to be accounted to
 522 * any memcg.
 523 */
 524static inline bool
 525memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 526{
 527        if (!memcg_kmem_enabled())
 528                return true;
 529
 530        /*
 531         * __GFP_NOFAIL allocations will move on even if charging is not
 532         * possible. Therefore we don't even try, and have this allocation
 533         * unaccounted. We could in theory charge it with
 534         * res_counter_charge_nofail, but we hope those allocations are rare,
 535         * and won't be worth the trouble.
 536         */
 537        if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
 538                return true;
 539        if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
 540                return true;
 541
 542        /* If the test is dying, just let it go. */
 543        if (unlikely(fatal_signal_pending(current)))
 544                return true;
 545
 546        return __memcg_kmem_newpage_charge(gfp, memcg, order);
 547}
 548
 549/**
 550 * memcg_kmem_uncharge_pages: uncharge pages from memcg
 551 * @page: pointer to struct page being freed
 552 * @order: allocation order.
 553 *
 554 * there is no need to specify memcg here, since it is embedded in page_cgroup
 555 */
 556static inline void
 557memcg_kmem_uncharge_pages(struct page *page, int order)
 558{
 559        if (memcg_kmem_enabled())
 560                __memcg_kmem_uncharge_pages(page, order);
 561}
 562
 563/**
 564 * memcg_kmem_commit_charge: embeds correct memcg in a page
 565 * @page: pointer to struct page recently allocated
 566 * @memcg: the memcg structure we charged against
 567 * @order: allocation order.
 568 *
 569 * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
 570 * failure of the allocation. if @page is NULL, this function will revert the
 571 * charges. Otherwise, it will commit the memcg given by @memcg to the
 572 * corresponding page_cgroup.
 573 */
 574static inline void
 575memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 576{
 577        if (memcg_kmem_enabled() && memcg)
 578                __memcg_kmem_commit_charge(page, memcg, order);
 579}
 580
 581/**
 582 * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
 583 * @cachep: the original global kmem cache
 584 * @gfp: allocation flags.
 585 *
 586 * This function assumes that the task allocating, which determines the memcg
 587 * in the page allocator, belongs to the same cgroup throughout the whole
 588 * process.  Misacounting can happen if the task calls memcg_kmem_get_cache()
 589 * while belonging to a cgroup, and later on changes. This is considered
 590 * acceptable, and should only happen upon task migration.
 591 *
 592 * Before the cache is created by the memcg core, there is also a possible
 593 * imbalance: the task belongs to a memcg, but the cache being allocated from
 594 * is the global cache, since the child cache is not yet guaranteed to be
 595 * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
 596 * passed and the page allocator will not attempt any cgroup accounting.
 597 */
 598static __always_inline struct kmem_cache *
 599memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 600{
 601        if (!memcg_kmem_enabled())
 602                return cachep;
 603        if (gfp & __GFP_NOFAIL)
 604                return cachep;
 605        if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
 606                return cachep;
 607        if (unlikely(fatal_signal_pending(current)))
 608                return cachep;
 609
 610        return __memcg_kmem_get_cache(cachep, gfp);
 611}
 612#else
 613#define for_each_memcg_cache_index(_idx)        \
 614        for (; NULL; )
 615
 616static inline bool memcg_kmem_enabled(void)
 617{
 618        return false;
 619}
 620
 621static inline bool
 622memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 623{
 624        return true;
 625}
 626
 627static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
 628{
 629}
 630
 631static inline void
 632memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 633{
 634}
 635
 636static inline int memcg_cache_id(struct mem_cgroup *memcg)
 637{
 638        return -1;
 639}
 640
 641static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
 642                struct kmem_cache *s, struct kmem_cache *root_cache)
 643{
 644        return 0;
 645}
 646
 647static inline void memcg_free_cache_params(struct kmem_cache *s)
 648{
 649}
 650
 651static inline void memcg_register_cache(struct kmem_cache *s)
 652{
 653}
 654
 655static inline void memcg_unregister_cache(struct kmem_cache *s)
 656{
 657}
 658
 659static inline struct kmem_cache *
 660memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 661{
 662        return cachep;
 663}
 664#endif /* CONFIG_MEMCG_KMEM */
 665#endif /* _LINUX_MEMCONTROL_H */
 666
 667