linux/mm/list_lru.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
   4 * Authors: David Chinner and Glauber Costa
   5 *
   6 * Generic LRU infrastructure
   7 */
   8#include <linux/kernel.h>
   9#include <linux/module.h>
  10#include <linux/mm.h>
  11#include <linux/list_lru.h>
  12#include <linux/slab.h>
  13#include <linux/mutex.h>
  14#include <linux/memcontrol.h>
  15#include "slab.h"
  16
  17#ifdef CONFIG_MEMCG_KMEM
  18static LIST_HEAD(list_lrus);
  19static DEFINE_MUTEX(list_lrus_mutex);
  20
  21static void list_lru_register(struct list_lru *lru)
  22{
  23        mutex_lock(&list_lrus_mutex);
  24        list_add(&lru->list, &list_lrus);
  25        mutex_unlock(&list_lrus_mutex);
  26}
  27
  28static void list_lru_unregister(struct list_lru *lru)
  29{
  30        mutex_lock(&list_lrus_mutex);
  31        list_del(&lru->list);
  32        mutex_unlock(&list_lrus_mutex);
  33}
  34
  35static int lru_shrinker_id(struct list_lru *lru)
  36{
  37        return lru->shrinker_id;
  38}
  39
  40static inline bool list_lru_memcg_aware(struct list_lru *lru)
  41{
  42        return lru->memcg_aware;
  43}
  44
  45static inline struct list_lru_one *
  46list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
  47{
  48        struct list_lru_memcg *memcg_lrus;
  49        /*
  50         * Either lock or RCU protects the array of per cgroup lists
  51         * from relocation (see memcg_update_list_lru_node).
  52         */
  53        memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
  54                                           lockdep_is_held(&nlru->lock));
  55        if (memcg_lrus && idx >= 0)
  56                return memcg_lrus->lru[idx];
  57        return &nlru->lru;
  58}
  59
  60static inline struct list_lru_one *
  61list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
  62                   struct mem_cgroup **memcg_ptr)
  63{
  64        struct list_lru_one *l = &nlru->lru;
  65        struct mem_cgroup *memcg = NULL;
  66
  67        if (!nlru->memcg_lrus)
  68                goto out;
  69
  70        memcg = mem_cgroup_from_obj(ptr);
  71        if (!memcg)
  72                goto out;
  73
  74        l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
  75out:
  76        if (memcg_ptr)
  77                *memcg_ptr = memcg;
  78        return l;
  79}
  80#else
  81static void list_lru_register(struct list_lru *lru)
  82{
  83}
  84
  85static void list_lru_unregister(struct list_lru *lru)
  86{
  87}
  88
  89static int lru_shrinker_id(struct list_lru *lru)
  90{
  91        return -1;
  92}
  93
  94static inline bool list_lru_memcg_aware(struct list_lru *lru)
  95{
  96        return false;
  97}
  98
  99static inline struct list_lru_one *
 100list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
 101{
 102        return &nlru->lru;
 103}
 104
 105static inline struct list_lru_one *
 106list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
 107                   struct mem_cgroup **memcg_ptr)
 108{
 109        if (memcg_ptr)
 110                *memcg_ptr = NULL;
 111        return &nlru->lru;
 112}
 113#endif /* CONFIG_MEMCG_KMEM */
 114
 115bool list_lru_add(struct list_lru *lru, struct list_head *item)
 116{
 117        int nid = page_to_nid(virt_to_page(item));
 118        struct list_lru_node *nlru = &lru->node[nid];
 119        struct mem_cgroup *memcg;
 120        struct list_lru_one *l;
 121
 122        spin_lock(&nlru->lock);
 123        if (list_empty(item)) {
 124                l = list_lru_from_kmem(nlru, item, &memcg);
 125                list_add_tail(item, &l->list);
 126                /* Set shrinker bit if the first element was added */
 127                if (!l->nr_items++)
 128                        set_shrinker_bit(memcg, nid,
 129                                         lru_shrinker_id(lru));
 130                nlru->nr_items++;
 131                spin_unlock(&nlru->lock);
 132                return true;
 133        }
 134        spin_unlock(&nlru->lock);
 135        return false;
 136}
 137EXPORT_SYMBOL_GPL(list_lru_add);
 138
 139bool list_lru_del(struct list_lru *lru, struct list_head *item)
 140{
 141        int nid = page_to_nid(virt_to_page(item));
 142        struct list_lru_node *nlru = &lru->node[nid];
 143        struct list_lru_one *l;
 144
 145        spin_lock(&nlru->lock);
 146        if (!list_empty(item)) {
 147                l = list_lru_from_kmem(nlru, item, NULL);
 148                list_del_init(item);
 149                l->nr_items--;
 150                nlru->nr_items--;
 151                spin_unlock(&nlru->lock);
 152                return true;
 153        }
 154        spin_unlock(&nlru->lock);
 155        return false;
 156}
 157EXPORT_SYMBOL_GPL(list_lru_del);
 158
 159void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
 160{
 161        list_del_init(item);
 162        list->nr_items--;
 163}
 164EXPORT_SYMBOL_GPL(list_lru_isolate);
 165
 166void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
 167                           struct list_head *head)
 168{
 169        list_move(item, head);
 170        list->nr_items--;
 171}
 172EXPORT_SYMBOL_GPL(list_lru_isolate_move);
 173
 174unsigned long list_lru_count_one(struct list_lru *lru,
 175                                 int nid, struct mem_cgroup *memcg)
 176{
 177        struct list_lru_node *nlru = &lru->node[nid];
 178        struct list_lru_one *l;
 179        unsigned long count;
 180
 181        rcu_read_lock();
 182        l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
 183        count = READ_ONCE(l->nr_items);
 184        rcu_read_unlock();
 185
 186        return count;
 187}
 188EXPORT_SYMBOL_GPL(list_lru_count_one);
 189
 190unsigned long list_lru_count_node(struct list_lru *lru, int nid)
 191{
 192        struct list_lru_node *nlru;
 193
 194        nlru = &lru->node[nid];
 195        return nlru->nr_items;
 196}
 197EXPORT_SYMBOL_GPL(list_lru_count_node);
 198
 199static unsigned long
 200__list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
 201                    list_lru_walk_cb isolate, void *cb_arg,
 202                    unsigned long *nr_to_walk)
 203{
 204
 205        struct list_lru_one *l;
 206        struct list_head *item, *n;
 207        unsigned long isolated = 0;
 208
 209        l = list_lru_from_memcg_idx(nlru, memcg_idx);
 210restart:
 211        list_for_each_safe(item, n, &l->list) {
 212                enum lru_status ret;
 213
 214                /*
 215                 * decrement nr_to_walk first so that we don't livelock if we
 216                 * get stuck on large numbers of LRU_RETRY items
 217                 */
 218                if (!*nr_to_walk)
 219                        break;
 220                --*nr_to_walk;
 221
 222                ret = isolate(item, l, &nlru->lock, cb_arg);
 223                switch (ret) {
 224                case LRU_REMOVED_RETRY:
 225                        assert_spin_locked(&nlru->lock);
 226                        fallthrough;
 227                case LRU_REMOVED:
 228                        isolated++;
 229                        nlru->nr_items--;
 230                        /*
 231                         * If the lru lock has been dropped, our list
 232                         * traversal is now invalid and so we have to
 233                         * restart from scratch.
 234                         */
 235                        if (ret == LRU_REMOVED_RETRY)
 236                                goto restart;
 237                        break;
 238                case LRU_ROTATE:
 239                        list_move_tail(item, &l->list);
 240                        break;
 241                case LRU_SKIP:
 242                        break;
 243                case LRU_RETRY:
 244                        /*
 245                         * The lru lock has been dropped, our list traversal is
 246                         * now invalid and so we have to restart from scratch.
 247                         */
 248                        assert_spin_locked(&nlru->lock);
 249                        goto restart;
 250                default:
 251                        BUG();
 252                }
 253        }
 254        return isolated;
 255}
 256
 257unsigned long
 258list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 259                  list_lru_walk_cb isolate, void *cb_arg,
 260                  unsigned long *nr_to_walk)
 261{
 262        struct list_lru_node *nlru = &lru->node[nid];
 263        unsigned long ret;
 264
 265        spin_lock(&nlru->lock);
 266        ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
 267                                  nr_to_walk);
 268        spin_unlock(&nlru->lock);
 269        return ret;
 270}
 271EXPORT_SYMBOL_GPL(list_lru_walk_one);
 272
 273unsigned long
 274list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 275                      list_lru_walk_cb isolate, void *cb_arg,
 276                      unsigned long *nr_to_walk)
 277{
 278        struct list_lru_node *nlru = &lru->node[nid];
 279        unsigned long ret;
 280
 281        spin_lock_irq(&nlru->lock);
 282        ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
 283                                  nr_to_walk);
 284        spin_unlock_irq(&nlru->lock);
 285        return ret;
 286}
 287
 288unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
 289                                 list_lru_walk_cb isolate, void *cb_arg,
 290                                 unsigned long *nr_to_walk)
 291{
 292        long isolated = 0;
 293        int memcg_idx;
 294
 295        isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
 296                                      nr_to_walk);
 297        if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
 298                for_each_memcg_cache_index(memcg_idx) {
 299                        struct list_lru_node *nlru = &lru->node[nid];
 300
 301                        spin_lock(&nlru->lock);
 302                        isolated += __list_lru_walk_one(nlru, memcg_idx,
 303                                                        isolate, cb_arg,
 304                                                        nr_to_walk);
 305                        spin_unlock(&nlru->lock);
 306
 307                        if (*nr_to_walk <= 0)
 308                                break;
 309                }
 310        }
 311        return isolated;
 312}
 313EXPORT_SYMBOL_GPL(list_lru_walk_node);
 314
 315static void init_one_lru(struct list_lru_one *l)
 316{
 317        INIT_LIST_HEAD(&l->list);
 318        l->nr_items = 0;
 319}
 320
 321#ifdef CONFIG_MEMCG_KMEM
 322static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
 323                                          int begin, int end)
 324{
 325        int i;
 326
 327        for (i = begin; i < end; i++)
 328                kfree(memcg_lrus->lru[i]);
 329}
 330
 331static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
 332                                      int begin, int end)
 333{
 334        int i;
 335
 336        for (i = begin; i < end; i++) {
 337                struct list_lru_one *l;
 338
 339                l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
 340                if (!l)
 341                        goto fail;
 342
 343                init_one_lru(l);
 344                memcg_lrus->lru[i] = l;
 345        }
 346        return 0;
 347fail:
 348        __memcg_destroy_list_lru_node(memcg_lrus, begin, i);
 349        return -ENOMEM;
 350}
 351
 352static int memcg_init_list_lru_node(struct list_lru_node *nlru)
 353{
 354        struct list_lru_memcg *memcg_lrus;
 355        int size = memcg_nr_cache_ids;
 356
 357        memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
 358                              size * sizeof(void *), GFP_KERNEL);
 359        if (!memcg_lrus)
 360                return -ENOMEM;
 361
 362        if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
 363                kvfree(memcg_lrus);
 364                return -ENOMEM;
 365        }
 366        RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
 367
 368        return 0;
 369}
 370
 371static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
 372{
 373        struct list_lru_memcg *memcg_lrus;
 374        /*
 375         * This is called when shrinker has already been unregistered,
 376         * and nobody can use it. So, there is no need to use kvfree_rcu().
 377         */
 378        memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
 379        __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
 380        kvfree(memcg_lrus);
 381}
 382
 383static int memcg_update_list_lru_node(struct list_lru_node *nlru,
 384                                      int old_size, int new_size)
 385{
 386        struct list_lru_memcg *old, *new;
 387
 388        BUG_ON(old_size > new_size);
 389
 390        old = rcu_dereference_protected(nlru->memcg_lrus,
 391                                        lockdep_is_held(&list_lrus_mutex));
 392        new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
 393        if (!new)
 394                return -ENOMEM;
 395
 396        if (__memcg_init_list_lru_node(new, old_size, new_size)) {
 397                kvfree(new);
 398                return -ENOMEM;
 399        }
 400
 401        memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
 402
 403        /*
 404         * The locking below allows readers that hold nlru->lock avoid taking
 405         * rcu_read_lock (see list_lru_from_memcg_idx).
 406         *
 407         * Since list_lru_{add,del} may be called under an IRQ-safe lock,
 408         * we have to use IRQ-safe primitives here to avoid deadlock.
 409         */
 410        spin_lock_irq(&nlru->lock);
 411        rcu_assign_pointer(nlru->memcg_lrus, new);
 412        spin_unlock_irq(&nlru->lock);
 413
 414        kvfree_rcu(old, rcu);
 415        return 0;
 416}
 417
 418static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
 419                                              int old_size, int new_size)
 420{
 421        struct list_lru_memcg *memcg_lrus;
 422
 423        memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
 424                                               lockdep_is_held(&list_lrus_mutex));
 425        /* do not bother shrinking the array back to the old size, because we
 426         * cannot handle allocation failures here */
 427        __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
 428}
 429
 430static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
 431{
 432        int i;
 433
 434        lru->memcg_aware = memcg_aware;
 435
 436        if (!memcg_aware)
 437                return 0;
 438
 439        for_each_node(i) {
 440                if (memcg_init_list_lru_node(&lru->node[i]))
 441                        goto fail;
 442        }
 443        return 0;
 444fail:
 445        for (i = i - 1; i >= 0; i--) {
 446                if (!lru->node[i].memcg_lrus)
 447                        continue;
 448                memcg_destroy_list_lru_node(&lru->node[i]);
 449        }
 450        return -ENOMEM;
 451}
 452
 453static void memcg_destroy_list_lru(struct list_lru *lru)
 454{
 455        int i;
 456
 457        if (!list_lru_memcg_aware(lru))
 458                return;
 459
 460        for_each_node(i)
 461                memcg_destroy_list_lru_node(&lru->node[i]);
 462}
 463
 464static int memcg_update_list_lru(struct list_lru *lru,
 465                                 int old_size, int new_size)
 466{
 467        int i;
 468
 469        if (!list_lru_memcg_aware(lru))
 470                return 0;
 471
 472        for_each_node(i) {
 473                if (memcg_update_list_lru_node(&lru->node[i],
 474                                               old_size, new_size))
 475                        goto fail;
 476        }
 477        return 0;
 478fail:
 479        for (i = i - 1; i >= 0; i--) {
 480                if (!lru->node[i].memcg_lrus)
 481                        continue;
 482
 483                memcg_cancel_update_list_lru_node(&lru->node[i],
 484                                                  old_size, new_size);
 485        }
 486        return -ENOMEM;
 487}
 488
 489static void memcg_cancel_update_list_lru(struct list_lru *lru,
 490                                         int old_size, int new_size)
 491{
 492        int i;
 493
 494        if (!list_lru_memcg_aware(lru))
 495                return;
 496
 497        for_each_node(i)
 498                memcg_cancel_update_list_lru_node(&lru->node[i],
 499                                                  old_size, new_size);
 500}
 501
 502int memcg_update_all_list_lrus(int new_size)
 503{
 504        int ret = 0;
 505        struct list_lru *lru;
 506        int old_size = memcg_nr_cache_ids;
 507
 508        mutex_lock(&list_lrus_mutex);
 509        list_for_each_entry(lru, &list_lrus, list) {
 510                ret = memcg_update_list_lru(lru, old_size, new_size);
 511                if (ret)
 512                        goto fail;
 513        }
 514out:
 515        mutex_unlock(&list_lrus_mutex);
 516        return ret;
 517fail:
 518        list_for_each_entry_continue_reverse(lru, &list_lrus, list)
 519                memcg_cancel_update_list_lru(lru, old_size, new_size);
 520        goto out;
 521}
 522
 523static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
 524                                      int src_idx, struct mem_cgroup *dst_memcg)
 525{
 526        struct list_lru_node *nlru = &lru->node[nid];
 527        int dst_idx = dst_memcg->kmemcg_id;
 528        struct list_lru_one *src, *dst;
 529
 530        /*
 531         * Since list_lru_{add,del} may be called under an IRQ-safe lock,
 532         * we have to use IRQ-safe primitives here to avoid deadlock.
 533         */
 534        spin_lock_irq(&nlru->lock);
 535
 536        src = list_lru_from_memcg_idx(nlru, src_idx);
 537        dst = list_lru_from_memcg_idx(nlru, dst_idx);
 538
 539        list_splice_init(&src->list, &dst->list);
 540
 541        if (src->nr_items) {
 542                dst->nr_items += src->nr_items;
 543                set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
 544                src->nr_items = 0;
 545        }
 546
 547        spin_unlock_irq(&nlru->lock);
 548}
 549
 550static void memcg_drain_list_lru(struct list_lru *lru,
 551                                 int src_idx, struct mem_cgroup *dst_memcg)
 552{
 553        int i;
 554
 555        if (!list_lru_memcg_aware(lru))
 556                return;
 557
 558        for_each_node(i)
 559                memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
 560}
 561
 562void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
 563{
 564        struct list_lru *lru;
 565
 566        mutex_lock(&list_lrus_mutex);
 567        list_for_each_entry(lru, &list_lrus, list)
 568                memcg_drain_list_lru(lru, src_idx, dst_memcg);
 569        mutex_unlock(&list_lrus_mutex);
 570}
 571#else
 572static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
 573{
 574        return 0;
 575}
 576
 577static void memcg_destroy_list_lru(struct list_lru *lru)
 578{
 579}
 580#endif /* CONFIG_MEMCG_KMEM */
 581
 582int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 583                    struct lock_class_key *key, struct shrinker *shrinker)
 584{
 585        int i;
 586        int err = -ENOMEM;
 587
 588#ifdef CONFIG_MEMCG_KMEM
 589        if (shrinker)
 590                lru->shrinker_id = shrinker->id;
 591        else
 592                lru->shrinker_id = -1;
 593#endif
 594        memcg_get_cache_ids();
 595
 596        lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
 597        if (!lru->node)
 598                goto out;
 599
 600        for_each_node(i) {
 601                spin_lock_init(&lru->node[i].lock);
 602                if (key)
 603                        lockdep_set_class(&lru->node[i].lock, key);
 604                init_one_lru(&lru->node[i].lru);
 605        }
 606
 607        err = memcg_init_list_lru(lru, memcg_aware);
 608        if (err) {
 609                kfree(lru->node);
 610                /* Do this so a list_lru_destroy() doesn't crash: */
 611                lru->node = NULL;
 612                goto out;
 613        }
 614
 615        list_lru_register(lru);
 616out:
 617        memcg_put_cache_ids();
 618        return err;
 619}
 620EXPORT_SYMBOL_GPL(__list_lru_init);
 621
 622void list_lru_destroy(struct list_lru *lru)
 623{
 624        /* Already destroyed or not yet initialized? */
 625        if (!lru->node)
 626                return;
 627
 628        memcg_get_cache_ids();
 629
 630        list_lru_unregister(lru);
 631
 632        memcg_destroy_list_lru(lru);
 633        kfree(lru->node);
 634        lru->node = NULL;
 635
 636#ifdef CONFIG_MEMCG_KMEM
 637        lru->shrinker_id = -1;
 638#endif
 639        memcg_put_cache_ids();
 640}
 641EXPORT_SYMBOL_GPL(list_lru_destroy);
 642