linux/mm/list_lru.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
   4 * Authors: David Chinner and Glauber Costa
   5 *
   6 * Generic LRU infrastructure
   7 */
   8#include <linux/kernel.h>
   9#include <linux/module.h>
  10#include <linux/mm.h>
  11#include <linux/list_lru.h>
  12#include <linux/slab.h>
  13#include <linux/mutex.h>
  14#include <linux/memcontrol.h>
  15#include "slab.h"
  16
  17#ifdef CONFIG_MEMCG_KMEM
  18static LIST_HEAD(list_lrus);
  19static DEFINE_MUTEX(list_lrus_mutex);
  20
  21static void list_lru_register(struct list_lru *lru)
  22{
  23        mutex_lock(&list_lrus_mutex);
  24        list_add(&lru->list, &list_lrus);
  25        mutex_unlock(&list_lrus_mutex);
  26}
  27
  28static void list_lru_unregister(struct list_lru *lru)
  29{
  30        mutex_lock(&list_lrus_mutex);
  31        list_del(&lru->list);
  32        mutex_unlock(&list_lrus_mutex);
  33}
  34
  35static int lru_shrinker_id(struct list_lru *lru)
  36{
  37        return lru->shrinker_id;
  38}
  39
  40static inline bool list_lru_memcg_aware(struct list_lru *lru)
  41{
  42        return lru->memcg_aware;
  43}
  44
  45static inline struct list_lru_one *
  46list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
  47{
  48        struct list_lru_memcg *memcg_lrus;
  49        /*
  50         * Either lock or RCU protects the array of per cgroup lists
  51         * from relocation (see memcg_update_list_lru_node).
  52         */
  53        memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
  54                                           lockdep_is_held(&nlru->lock));
  55        if (memcg_lrus && idx >= 0)
  56                return memcg_lrus->lru[idx];
  57        return &nlru->lru;
  58}
  59
  60static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
  61{
  62        struct page *page;
  63
  64        if (!memcg_kmem_enabled())
  65                return NULL;
  66        page = virt_to_head_page(ptr);
  67        return memcg_from_slab_page(page);
  68}
  69
  70static inline struct list_lru_one *
  71list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
  72                   struct mem_cgroup **memcg_ptr)
  73{
  74        struct list_lru_one *l = &nlru->lru;
  75        struct mem_cgroup *memcg = NULL;
  76
  77        if (!nlru->memcg_lrus)
  78                goto out;
  79
  80        memcg = mem_cgroup_from_kmem(ptr);
  81        if (!memcg)
  82                goto out;
  83
  84        l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
  85out:
  86        if (memcg_ptr)
  87                *memcg_ptr = memcg;
  88        return l;
  89}
  90#else
  91static void list_lru_register(struct list_lru *lru)
  92{
  93}
  94
  95static void list_lru_unregister(struct list_lru *lru)
  96{
  97}
  98
  99static int lru_shrinker_id(struct list_lru *lru)
 100{
 101        return -1;
 102}
 103
 104static inline bool list_lru_memcg_aware(struct list_lru *lru)
 105{
 106        return false;
 107}
 108
 109static inline struct list_lru_one *
 110list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
 111{
 112        return &nlru->lru;
 113}
 114
 115static inline struct list_lru_one *
 116list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
 117                   struct mem_cgroup **memcg_ptr)
 118{
 119        if (memcg_ptr)
 120                *memcg_ptr = NULL;
 121        return &nlru->lru;
 122}
 123#endif /* CONFIG_MEMCG_KMEM */
 124
 125bool list_lru_add(struct list_lru *lru, struct list_head *item)
 126{
 127        int nid = page_to_nid(virt_to_page(item));
 128        struct list_lru_node *nlru = &lru->node[nid];
 129        struct mem_cgroup *memcg;
 130        struct list_lru_one *l;
 131
 132        spin_lock(&nlru->lock);
 133        if (list_empty(item)) {
 134                l = list_lru_from_kmem(nlru, item, &memcg);
 135                list_add_tail(item, &l->list);
 136                /* Set shrinker bit if the first element was added */
 137                if (!l->nr_items++)
 138                        memcg_set_shrinker_bit(memcg, nid,
 139                                               lru_shrinker_id(lru));
 140                nlru->nr_items++;
 141                spin_unlock(&nlru->lock);
 142                return true;
 143        }
 144        spin_unlock(&nlru->lock);
 145        return false;
 146}
 147EXPORT_SYMBOL_GPL(list_lru_add);
 148
 149bool list_lru_del(struct list_lru *lru, struct list_head *item)
 150{
 151        int nid = page_to_nid(virt_to_page(item));
 152        struct list_lru_node *nlru = &lru->node[nid];
 153        struct list_lru_one *l;
 154
 155        spin_lock(&nlru->lock);
 156        if (!list_empty(item)) {
 157                l = list_lru_from_kmem(nlru, item, NULL);
 158                list_del_init(item);
 159                l->nr_items--;
 160                nlru->nr_items--;
 161                spin_unlock(&nlru->lock);
 162                return true;
 163        }
 164        spin_unlock(&nlru->lock);
 165        return false;
 166}
 167EXPORT_SYMBOL_GPL(list_lru_del);
 168
 169void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
 170{
 171        list_del_init(item);
 172        list->nr_items--;
 173}
 174EXPORT_SYMBOL_GPL(list_lru_isolate);
 175
 176void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
 177                           struct list_head *head)
 178{
 179        list_move(item, head);
 180        list->nr_items--;
 181}
 182EXPORT_SYMBOL_GPL(list_lru_isolate_move);
 183
 184unsigned long list_lru_count_one(struct list_lru *lru,
 185                                 int nid, struct mem_cgroup *memcg)
 186{
 187        struct list_lru_node *nlru = &lru->node[nid];
 188        struct list_lru_one *l;
 189        unsigned long count;
 190
 191        rcu_read_lock();
 192        l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
 193        count = l->nr_items;
 194        rcu_read_unlock();
 195
 196        return count;
 197}
 198EXPORT_SYMBOL_GPL(list_lru_count_one);
 199
 200unsigned long list_lru_count_node(struct list_lru *lru, int nid)
 201{
 202        struct list_lru_node *nlru;
 203
 204        nlru = &lru->node[nid];
 205        return nlru->nr_items;
 206}
 207EXPORT_SYMBOL_GPL(list_lru_count_node);
 208
 209static unsigned long
 210__list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
 211                    list_lru_walk_cb isolate, void *cb_arg,
 212                    unsigned long *nr_to_walk)
 213{
 214
 215        struct list_lru_one *l;
 216        struct list_head *item, *n;
 217        unsigned long isolated = 0;
 218
 219        l = list_lru_from_memcg_idx(nlru, memcg_idx);
 220restart:
 221        list_for_each_safe(item, n, &l->list) {
 222                enum lru_status ret;
 223
 224                /*
 225                 * decrement nr_to_walk first so that we don't livelock if we
 226                 * get stuck on large numbesr of LRU_RETRY items
 227                 */
 228                if (!*nr_to_walk)
 229                        break;
 230                --*nr_to_walk;
 231
 232                ret = isolate(item, l, &nlru->lock, cb_arg);
 233                switch (ret) {
 234                case LRU_REMOVED_RETRY:
 235                        assert_spin_locked(&nlru->lock);
 236                        /* fall through */
 237                case LRU_REMOVED:
 238                        isolated++;
 239                        nlru->nr_items--;
 240                        /*
 241                         * If the lru lock has been dropped, our list
 242                         * traversal is now invalid and so we have to
 243                         * restart from scratch.
 244                         */
 245                        if (ret == LRU_REMOVED_RETRY)
 246                                goto restart;
 247                        break;
 248                case LRU_ROTATE:
 249                        list_move_tail(item, &l->list);
 250                        break;
 251                case LRU_SKIP:
 252                        break;
 253                case LRU_RETRY:
 254                        /*
 255                         * The lru lock has been dropped, our list traversal is
 256                         * now invalid and so we have to restart from scratch.
 257                         */
 258                        assert_spin_locked(&nlru->lock);
 259                        goto restart;
 260                default:
 261                        BUG();
 262                }
 263        }
 264        return isolated;
 265}
 266
 267unsigned long
 268list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 269                  list_lru_walk_cb isolate, void *cb_arg,
 270                  unsigned long *nr_to_walk)
 271{
 272        struct list_lru_node *nlru = &lru->node[nid];
 273        unsigned long ret;
 274
 275        spin_lock(&nlru->lock);
 276        ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
 277                                  nr_to_walk);
 278        spin_unlock(&nlru->lock);
 279        return ret;
 280}
 281EXPORT_SYMBOL_GPL(list_lru_walk_one);
 282
 283unsigned long
 284list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 285                      list_lru_walk_cb isolate, void *cb_arg,
 286                      unsigned long *nr_to_walk)
 287{
 288        struct list_lru_node *nlru = &lru->node[nid];
 289        unsigned long ret;
 290
 291        spin_lock_irq(&nlru->lock);
 292        ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
 293                                  nr_to_walk);
 294        spin_unlock_irq(&nlru->lock);
 295        return ret;
 296}
 297
 298unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
 299                                 list_lru_walk_cb isolate, void *cb_arg,
 300                                 unsigned long *nr_to_walk)
 301{
 302        long isolated = 0;
 303        int memcg_idx;
 304
 305        isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
 306                                      nr_to_walk);
 307        if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
 308                for_each_memcg_cache_index(memcg_idx) {
 309                        struct list_lru_node *nlru = &lru->node[nid];
 310
 311                        spin_lock(&nlru->lock);
 312                        isolated += __list_lru_walk_one(nlru, memcg_idx,
 313                                                        isolate, cb_arg,
 314                                                        nr_to_walk);
 315                        spin_unlock(&nlru->lock);
 316
 317                        if (*nr_to_walk <= 0)
 318                                break;
 319                }
 320        }
 321        return isolated;
 322}
 323EXPORT_SYMBOL_GPL(list_lru_walk_node);
 324
 325static void init_one_lru(struct list_lru_one *l)
 326{
 327        INIT_LIST_HEAD(&l->list);
 328        l->nr_items = 0;
 329}
 330
 331#ifdef CONFIG_MEMCG_KMEM
 332static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
 333                                          int begin, int end)
 334{
 335        int i;
 336
 337        for (i = begin; i < end; i++)
 338                kfree(memcg_lrus->lru[i]);
 339}
 340
 341static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
 342                                      int begin, int end)
 343{
 344        int i;
 345
 346        for (i = begin; i < end; i++) {
 347                struct list_lru_one *l;
 348
 349                l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
 350                if (!l)
 351                        goto fail;
 352
 353                init_one_lru(l);
 354                memcg_lrus->lru[i] = l;
 355        }
 356        return 0;
 357fail:
 358        __memcg_destroy_list_lru_node(memcg_lrus, begin, i);
 359        return -ENOMEM;
 360}
 361
 362static int memcg_init_list_lru_node(struct list_lru_node *nlru)
 363{
 364        struct list_lru_memcg *memcg_lrus;
 365        int size = memcg_nr_cache_ids;
 366
 367        memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
 368                              size * sizeof(void *), GFP_KERNEL);
 369        if (!memcg_lrus)
 370                return -ENOMEM;
 371
 372        if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
 373                kvfree(memcg_lrus);
 374                return -ENOMEM;
 375        }
 376        RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
 377
 378        return 0;
 379}
 380
 381static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
 382{
 383        struct list_lru_memcg *memcg_lrus;
 384        /*
 385         * This is called when shrinker has already been unregistered,
 386         * and nobody can use it. So, there is no need to use kvfree_rcu().
 387         */
 388        memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
 389        __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
 390        kvfree(memcg_lrus);
 391}
 392
 393static void kvfree_rcu(struct rcu_head *head)
 394{
 395        struct list_lru_memcg *mlru;
 396
 397        mlru = container_of(head, struct list_lru_memcg, rcu);
 398        kvfree(mlru);
 399}
 400
 401static int memcg_update_list_lru_node(struct list_lru_node *nlru,
 402                                      int old_size, int new_size)
 403{
 404        struct list_lru_memcg *old, *new;
 405
 406        BUG_ON(old_size > new_size);
 407
 408        old = rcu_dereference_protected(nlru->memcg_lrus,
 409                                        lockdep_is_held(&list_lrus_mutex));
 410        new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
 411        if (!new)
 412                return -ENOMEM;
 413
 414        if (__memcg_init_list_lru_node(new, old_size, new_size)) {
 415                kvfree(new);
 416                return -ENOMEM;
 417        }
 418
 419        memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
 420
 421        /*
 422         * The locking below allows readers that hold nlru->lock avoid taking
 423         * rcu_read_lock (see list_lru_from_memcg_idx).
 424         *
 425         * Since list_lru_{add,del} may be called under an IRQ-safe lock,
 426         * we have to use IRQ-safe primitives here to avoid deadlock.
 427         */
 428        spin_lock_irq(&nlru->lock);
 429        rcu_assign_pointer(nlru->memcg_lrus, new);
 430        spin_unlock_irq(&nlru->lock);
 431
 432        call_rcu(&old->rcu, kvfree_rcu);
 433        return 0;
 434}
 435
 436static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
 437                                              int old_size, int new_size)
 438{
 439        struct list_lru_memcg *memcg_lrus;
 440
 441        memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
 442                                               lockdep_is_held(&list_lrus_mutex));
 443        /* do not bother shrinking the array back to the old size, because we
 444         * cannot handle allocation failures here */
 445        __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
 446}
 447
 448static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
 449{
 450        int i;
 451
 452        lru->memcg_aware = memcg_aware;
 453
 454        if (!memcg_aware)
 455                return 0;
 456
 457        for_each_node(i) {
 458                if (memcg_init_list_lru_node(&lru->node[i]))
 459                        goto fail;
 460        }
 461        return 0;
 462fail:
 463        for (i = i - 1; i >= 0; i--) {
 464                if (!lru->node[i].memcg_lrus)
 465                        continue;
 466                memcg_destroy_list_lru_node(&lru->node[i]);
 467        }
 468        return -ENOMEM;
 469}
 470
 471static void memcg_destroy_list_lru(struct list_lru *lru)
 472{
 473        int i;
 474
 475        if (!list_lru_memcg_aware(lru))
 476                return;
 477
 478        for_each_node(i)
 479                memcg_destroy_list_lru_node(&lru->node[i]);
 480}
 481
 482static int memcg_update_list_lru(struct list_lru *lru,
 483                                 int old_size, int new_size)
 484{
 485        int i;
 486
 487        if (!list_lru_memcg_aware(lru))
 488                return 0;
 489
 490        for_each_node(i) {
 491                if (memcg_update_list_lru_node(&lru->node[i],
 492                                               old_size, new_size))
 493                        goto fail;
 494        }
 495        return 0;
 496fail:
 497        for (i = i - 1; i >= 0; i--) {
 498                if (!lru->node[i].memcg_lrus)
 499                        continue;
 500
 501                memcg_cancel_update_list_lru_node(&lru->node[i],
 502                                                  old_size, new_size);
 503        }
 504        return -ENOMEM;
 505}
 506
 507static void memcg_cancel_update_list_lru(struct list_lru *lru,
 508                                         int old_size, int new_size)
 509{
 510        int i;
 511
 512        if (!list_lru_memcg_aware(lru))
 513                return;
 514
 515        for_each_node(i)
 516                memcg_cancel_update_list_lru_node(&lru->node[i],
 517                                                  old_size, new_size);
 518}
 519
 520int memcg_update_all_list_lrus(int new_size)
 521{
 522        int ret = 0;
 523        struct list_lru *lru;
 524        int old_size = memcg_nr_cache_ids;
 525
 526        mutex_lock(&list_lrus_mutex);
 527        list_for_each_entry(lru, &list_lrus, list) {
 528                ret = memcg_update_list_lru(lru, old_size, new_size);
 529                if (ret)
 530                        goto fail;
 531        }
 532out:
 533        mutex_unlock(&list_lrus_mutex);
 534        return ret;
 535fail:
 536        list_for_each_entry_continue_reverse(lru, &list_lrus, list)
 537                memcg_cancel_update_list_lru(lru, old_size, new_size);
 538        goto out;
 539}
 540
 541static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
 542                                      int src_idx, struct mem_cgroup *dst_memcg)
 543{
 544        struct list_lru_node *nlru = &lru->node[nid];
 545        int dst_idx = dst_memcg->kmemcg_id;
 546        struct list_lru_one *src, *dst;
 547        bool set;
 548
 549        /*
 550         * Since list_lru_{add,del} may be called under an IRQ-safe lock,
 551         * we have to use IRQ-safe primitives here to avoid deadlock.
 552         */
 553        spin_lock_irq(&nlru->lock);
 554
 555        src = list_lru_from_memcg_idx(nlru, src_idx);
 556        dst = list_lru_from_memcg_idx(nlru, dst_idx);
 557
 558        list_splice_init(&src->list, &dst->list);
 559        set = (!dst->nr_items && src->nr_items);
 560        dst->nr_items += src->nr_items;
 561        if (set)
 562                memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
 563        src->nr_items = 0;
 564
 565        spin_unlock_irq(&nlru->lock);
 566}
 567
 568static void memcg_drain_list_lru(struct list_lru *lru,
 569                                 int src_idx, struct mem_cgroup *dst_memcg)
 570{
 571        int i;
 572
 573        if (!list_lru_memcg_aware(lru))
 574                return;
 575
 576        for_each_node(i)
 577                memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
 578}
 579
 580void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
 581{
 582        struct list_lru *lru;
 583
 584        mutex_lock(&list_lrus_mutex);
 585        list_for_each_entry(lru, &list_lrus, list)
 586                memcg_drain_list_lru(lru, src_idx, dst_memcg);
 587        mutex_unlock(&list_lrus_mutex);
 588}
 589#else
 590static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
 591{
 592        return 0;
 593}
 594
 595static void memcg_destroy_list_lru(struct list_lru *lru)
 596{
 597}
 598#endif /* CONFIG_MEMCG_KMEM */
 599
 600int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 601                    struct lock_class_key *key, struct shrinker *shrinker)
 602{
 603        int i;
 604        int err = -ENOMEM;
 605
 606#ifdef CONFIG_MEMCG_KMEM
 607        if (shrinker)
 608                lru->shrinker_id = shrinker->id;
 609        else
 610                lru->shrinker_id = -1;
 611#endif
 612        memcg_get_cache_ids();
 613
 614        lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
 615        if (!lru->node)
 616                goto out;
 617
 618        for_each_node(i) {
 619                spin_lock_init(&lru->node[i].lock);
 620                if (key)
 621                        lockdep_set_class(&lru->node[i].lock, key);
 622                init_one_lru(&lru->node[i].lru);
 623        }
 624
 625        err = memcg_init_list_lru(lru, memcg_aware);
 626        if (err) {
 627                kfree(lru->node);
 628                /* Do this so a list_lru_destroy() doesn't crash: */
 629                lru->node = NULL;
 630                goto out;
 631        }
 632
 633        list_lru_register(lru);
 634out:
 635        memcg_put_cache_ids();
 636        return err;
 637}
 638EXPORT_SYMBOL_GPL(__list_lru_init);
 639
 640void list_lru_destroy(struct list_lru *lru)
 641{
 642        /* Already destroyed or not yet initialized? */
 643        if (!lru->node)
 644                return;
 645
 646        memcg_get_cache_ids();
 647
 648        list_lru_unregister(lru);
 649
 650        memcg_destroy_list_lru(lru);
 651        kfree(lru->node);
 652        lru->node = NULL;
 653
 654#ifdef CONFIG_MEMCG_KMEM
 655        lru->shrinker_id = -1;
 656#endif
 657        memcg_put_cache_ids();
 658}
 659EXPORT_SYMBOL_GPL(list_lru_destroy);
 660