LXR linux/mm/slab

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Slab allocator functions that are independent of the allocator strategy
   4 *
   5 * (C) 2012 Christoph Lameter <cl@linux.com>
   6 */
   7#include <linux/slab.h>
   8
   9#include <linux/mm.h>
  10#include <linux/poison.h>
  11#include <linux/interrupt.h>
  12#include <linux/memory.h>
  13#include <linux/cache.h>
  14#include <linux/compiler.h>
  15#include <linux/module.h>
  16#include <linux/cpu.h>
  17#include <linux/uaccess.h>
  18#include <linux/seq_file.h>
  19#include <linux/proc_fs.h>
  20#include <asm/cacheflush.h>
  21#include <asm/tlbflush.h>
  22#include <asm/page.h>
  23#include <linux/memcontrol.h>
  24
  25#define CREATE_TRACE_POINTS
  26#include <trace/events/kmem.h>
  27
  28#include "slab.h"
  29
  30enum slab_state slab_state;
  31LIST_HEAD(slab_caches);
  32DEFINE_MUTEX(slab_mutex);
  33struct kmem_cache *kmem_cache;
  34
  35#ifdef CONFIG_HARDENED_USERCOPY
  36bool usercopy_fallback __ro_after_init =
  37                IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
  38module_param(usercopy_fallback, bool, 0400);
  39MODULE_PARM_DESC(usercopy_fallback,
  40                "WARN instead of reject usercopy whitelist violations");
  41#endif
  42
  43static LIST_HEAD(slab_caches_to_rcu_destroy);
  44static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
  45static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  46                    slab_caches_to_rcu_destroy_workfn);
  47
  48/*
  49 * Set of flags that will prevent slab merging
  50 */
  51#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
  52                SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
  53                SLAB_FAILSLAB | SLAB_KASAN)
  54
  55#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
  56                         SLAB_ACCOUNT)
  57
  58/*
  59 * Merge control. If this is set then no merging of slab caches will occur.
  60 */
  61static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
  62
  63static int __init setup_slab_nomerge(char *str)
  64{
  65        slab_nomerge = true;
  66        return 1;
  67}
  68
  69#ifdef CONFIG_SLUB
  70__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
  71#endif
  72
  73__setup("slab_nomerge", setup_slab_nomerge);
  74
  75/*
  76 * Determine the size of a slab object
  77 */
  78unsigned int kmem_cache_size(struct kmem_cache *s)
  79{
  80        return s->object_size;
  81}
  82EXPORT_SYMBOL(kmem_cache_size);
  83
  84#ifdef CONFIG_DEBUG_VM
  85static int kmem_cache_sanity_check(const char *name, unsigned int size)
  86{
  87        if (!name || in_interrupt() || size < sizeof(void *) ||
  88                size > KMALLOC_MAX_SIZE) {
  89                pr_err("kmem_cache_create(%s) integrity check failed\n", name);
  90                return -EINVAL;
  91        }
  92
  93        WARN_ON(strchr(name, ' '));     /* It confuses parsers */
  94        return 0;
  95}
  96#else
  97static inline int kmem_cache_sanity_check(const char *name, unsigned int size)
  98{
  99        return 0;
 100}
 101#endif
 102
 103void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
 104{
 105        size_t i;
 106
 107        for (i = 0; i < nr; i++) {
 108                if (s)
 109                        kmem_cache_free(s, p[i]);
 110                else
 111                        kfree(p[i]);
 112        }
 113}
 114
 115int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 116                                                                void **p)
 117{
 118        size_t i;
 119
 120        for (i = 0; i < nr; i++) {
 121                void *x = p[i] = kmem_cache_alloc(s, flags);
 122                if (!x) {
 123                        __kmem_cache_free_bulk(s, i, p);
 124                        return 0;
 125                }
 126        }
 127        return i;
 128}
 129
 130#ifdef CONFIG_MEMCG_KMEM
 131
 132LIST_HEAD(slab_root_caches);
 133
 134void slab_init_memcg_params(struct kmem_cache *s)
 135{
 136        s->memcg_params.root_cache = NULL;
 137        RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
 138        INIT_LIST_HEAD(&s->memcg_params.children);
 139        s->memcg_params.dying = false;
 140}
 141
 142static int init_memcg_params(struct kmem_cache *s,
 143                struct mem_cgroup *memcg, struct kmem_cache *root_cache)
 144{
 145        struct memcg_cache_array *arr;
 146
 147        if (root_cache) {
 148                s->memcg_params.root_cache = root_cache;
 149                s->memcg_params.memcg = memcg;
 150                INIT_LIST_HEAD(&s->memcg_params.children_node);
 151                INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
 152                return 0;
 153        }
 154
 155        slab_init_memcg_params(s);
 156
 157        if (!memcg_nr_cache_ids)
 158                return 0;
 159
 160        arr = kvzalloc(sizeof(struct memcg_cache_array) +
 161                       memcg_nr_cache_ids * sizeof(void *),
 162                       GFP_KERNEL);
 163        if (!arr)
 164                return -ENOMEM;
 165
 166        RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
 167        return 0;
 168}
 169
 170static void destroy_memcg_params(struct kmem_cache *s)
 171{
 172        if (is_root_cache(s))
 173                kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
 174}
 175
 176static void free_memcg_params(struct rcu_head *rcu)
 177{
 178        struct memcg_cache_array *old;
 179
 180        old = container_of(rcu, struct memcg_cache_array, rcu);
 181        kvfree(old);
 182}
 183
 184static int update_memcg_params(struct kmem_cache *s, int new_array_size)
 185{
 186        struct memcg_cache_array *old, *new;
 187
 188        new = kvzalloc(sizeof(struct memcg_cache_array) +
 189                       new_array_size * sizeof(void *), GFP_KERNEL);
 190        if (!new)
 191                return -ENOMEM;
 192
 193        old = rcu_dereference_protected(s->memcg_params.memcg_caches,
 194                                        lockdep_is_held(&slab_mutex));
 195        if (old)
 196                memcpy(new->entries, old->entries,
 197                       memcg_nr_cache_ids * sizeof(void *));
 198
 199        rcu_assign_pointer(s->memcg_params.memcg_caches, new);
 200        if (old)
 201                call_rcu(&old->rcu, free_memcg_params);
 202        return 0;
 203}
 204
 205int memcg_update_all_caches(int num_memcgs)
 206{
 207        struct kmem_cache *s;
 208        int ret = 0;
 209
 210        mutex_lock(&slab_mutex);
 211        list_for_each_entry(s, &slab_root_caches, root_caches_node) {
 212                ret = update_memcg_params(s, num_memcgs);
 213                /*
 214                 * Instead of freeing the memory, we'll just leave the caches
 215                 * up to this point in an updated state.
 216                 */
 217                if (ret)
 218                        break;
 219        }
 220        mutex_unlock(&slab_mutex);
 221        return ret;
 222}
 223
 224void memcg_link_cache(struct kmem_cache *s)
 225{
 226        if (is_root_cache(s)) {
 227                list_add(&s->root_caches_node, &slab_root_caches);
 228        } else {
 229                list_add(&s->memcg_params.children_node,
 230                         &s->memcg_params.root_cache->memcg_params.children);
 231                list_add(&s->memcg_params.kmem_caches_node,
 232                         &s->memcg_params.memcg->kmem_caches);
 233        }
 234}
 235
 236static void memcg_unlink_cache(struct kmem_cache *s)
 237{
 238        if (is_root_cache(s)) {
 239                list_del(&s->root_caches_node);
 240        } else {
 241                list_del(&s->memcg_params.children_node);
 242                list_del(&s->memcg_params.kmem_caches_node);
 243        }
 244}
 245#else
 246static inline int init_memcg_params(struct kmem_cache *s,
 247                struct mem_cgroup *memcg, struct kmem_cache *root_cache)
 248{
 249        return 0;
 250}
 251
 252static inline void destroy_memcg_params(struct kmem_cache *s)
 253{
 254}
 255
 256static inline void memcg_unlink_cache(struct kmem_cache *s)
 257{
 258}
 259#endif /* CONFIG_MEMCG_KMEM */
 260
 261/*
 262 * Figure out what the alignment of the objects will be given a set of
 263 * flags, a user specified alignment and the size of the objects.
 264 */
 265static unsigned int calculate_alignment(slab_flags_t flags,
 266                unsigned int align, unsigned int size)
 267{
 268        /*
 269         * If the user wants hardware cache aligned objects then follow that
 270         * suggestion if the object is sufficiently large.
 271         *
 272         * The hardware cache alignment cannot override the specified
 273         * alignment though. If that is greater then use it.
 274         */
 275        if (flags & SLAB_HWCACHE_ALIGN) {
 276                unsigned int ralign;
 277
 278                ralign = cache_line_size();
 279                while (size <= ralign / 2)
 280                        ralign /= 2;
 281                align = max(align, ralign);
 282        }
 283
 284        if (align < ARCH_SLAB_MINALIGN)
 285                align = ARCH_SLAB_MINALIGN;
 286
 287        return ALIGN(align, sizeof(void *));
 288}
 289
 290/*
 291 * Find a mergeable slab cache
 292 */
 293int slab_unmergeable(struct kmem_cache *s)
 294{
 295        if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
 296                return 1;
 297
 298        if (!is_root_cache(s))
 299                return 1;
 300
 301        if (s->ctor)
 302                return 1;
 303
 304        if (s->usersize)
 305                return 1;
 306
 307        /*
 308         * We may have set a slab to be unmergeable during bootstrap.
 309         */
 310        if (s->refcount < 0)
 311                return 1;
 312
 313        return 0;
 314}
 315
 316struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
 317                slab_flags_t flags, const char *name, void (*ctor)(void *))
 318{
 319        struct kmem_cache *s;
 320
 321        if (slab_nomerge)
 322                return NULL;
 323
 324        if (ctor)
 325                return NULL;
 326
 327        size = ALIGN(size, sizeof(void *));
 328        align = calculate_alignment(flags, align, size);
 329        size = ALIGN(size, align);
 330        flags = kmem_cache_flags(size, flags, name, NULL);
 331
 332        if (flags & SLAB_NEVER_MERGE)
 333                return NULL;
 334
 335        list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
 336                if (slab_unmergeable(s))
 337                        continue;
 338
 339                if (size > s->size)
 340                        continue;
 341
 342                if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
 343                        continue;
 344                /*
 345                 * Check if alignment is compatible.
 346                 * Courtesy of Adrian Drzewiecki
 347                 */
 348                if ((s->size & ~(align - 1)) != s->size)
 349                        continue;
 350
 351                if (s->size - size >= sizeof(void *))
 352                        continue;
 353
 354                if (IS_ENABLED(CONFIG_SLAB) && align &&
 355                        (align > s->align || s->align % align))
 356                        continue;
 357
 358                return s;
 359        }
 360        return NULL;
 361}
 362
 363static struct kmem_cache *create_cache(const char *name,
 364                unsigned int object_size, unsigned int align,
 365                slab_flags_t flags, unsigned int useroffset,
 366                unsigned int usersize, void (*ctor)(void *),
 367                struct mem_cgroup *memcg, struct kmem_cache *root_cache)
 368{
 369        struct kmem_cache *s;
 370        int err;
 371
 372        if (WARN_ON(useroffset + usersize > object_size))
 373                useroffset = usersize = 0;
 374
 375        err = -ENOMEM;
 376        s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
 377        if (!s)
 378                goto out;
 379
 380        s->name = name;
 381        s->size = s->object_size = object_size;
 382        s->align = align;
 383        s->ctor = ctor;
 384        s->useroffset = useroffset;
 385        s->usersize = usersize;
 386
 387        err = init_memcg_params(s, memcg, root_cache);
 388        if (err)
 389                goto out_free_cache;
 390
 391        err = __kmem_cache_create(s, flags);
 392        if (err)
 393                goto out_free_cache;
 394
 395        s->refcount = 1;
 396        list_add(&s->list, &slab_caches);
 397        memcg_link_cache(s);
 398out:
 399        if (err)
 400                return ERR_PTR(err);
 401        return s;
 402
 403out_free_cache:
 404        destroy_memcg_params(s);
 405        kmem_cache_free(kmem_cache, s);
 406        goto out;
 407}
 408
 409/*
 410 * kmem_cache_create_usercopy - Create a cache.
 411 * @name: A string which is used in /proc/slabinfo to identify this cache.
 412 * @size: The size of objects to be created in this cache.
 413 * @align: The required alignment for the objects.
 414 * @flags: SLAB flags
 415 * @useroffset: Usercopy region offset
 416 * @usersize: Usercopy region size
 417 * @ctor: A constructor for the objects.
 418 *
 419 * Returns a ptr to the cache on success, NULL on failure.
 420 * Cannot be called within a interrupt, but can be interrupted.
 421 * The @ctor is run when new pages are allocated by the cache.
 422 *
 423 * The flags are
 424 *
 425 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 426 * to catch references to uninitialised memory.
 427 *
 428 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 429 * for buffer overruns.
 430 *
 431 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 432 * cacheline.  This can be beneficial if you're counting cycles as closely
 433 * as davem.
 434 */
 435struct kmem_cache *
 436kmem_cache_create_usercopy(const char *name,
 437                  unsigned int size, unsigned int align,
 438                  slab_flags_t flags,
 439                  unsigned int useroffset, unsigned int usersize,
 440                  void (*ctor)(void *))
 441{
 442        struct kmem_cache *s = NULL;
 443        const char *cache_name;
 444        int err;
 445
 446        get_online_cpus();
 447        get_online_mems();
 448        memcg_get_cache_ids();
 449
 450        mutex_lock(&slab_mutex);
 451
 452        err = kmem_cache_sanity_check(name, size);
 453        if (err) {
 454                goto out_unlock;
 455        }
 456
 457        /* Refuse requests with allocator specific flags */
 458        if (flags & ~SLAB_FLAGS_PERMITTED) {
 459                err = -EINVAL;
 460                goto out_unlock;
 461        }
 462
 463        /*
 464         * Some allocators will constraint the set of valid flags to a subset
 465         * of all flags. We expect them to define CACHE_CREATE_MASK in this
 466         * case, and we'll just provide them with a sanitized version of the
 467         * passed flags.
 468         */
 469        flags &= CACHE_CREATE_MASK;
 470
 471        /* Fail closed on bad usersize of useroffset values. */
 472        if (WARN_ON(!usersize && useroffset) ||
 473            WARN_ON(size < usersize || size - usersize < useroffset))
 474                usersize = useroffset = 0;
 475
 476        if (!usersize)
 477                s = __kmem_cache_alias(name, size, align, flags, ctor);
 478        if (s)
 479                goto out_unlock;
 480
 481        cache_name = kstrdup_const(name, GFP_KERNEL);
 482        if (!cache_name) {
 483                err = -ENOMEM;
 484                goto out_unlock;
 485        }
 486
 487        s = create_cache(cache_name, size,
 488                         calculate_alignment(flags, align, size),
 489                         flags, useroffset, usersize, ctor, NULL, NULL);
 490        if (IS_ERR(s)) {
 491                err = PTR_ERR(s);
 492                kfree_const(cache_name);
 493        }
 494
 495out_unlock:
 496        mutex_unlock(&slab_mutex);
 497
 498        memcg_put_cache_ids();
 499        put_online_mems();
 500        put_online_cpus();
 501
 502        if (err) {
 503                if (flags & SLAB_PANIC)
 504                        panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
 505                                name, err);
 506                else {
 507                        pr_warn("kmem_cache_create(%s) failed with error %d\n",
 508                                name, err);
 509                        dump_stack();
 510                }
 511                return NULL;
 512        }
 513        return s;
 514}
 515EXPORT_SYMBOL(kmem_cache_create_usercopy);
 516
 517struct kmem_cache *
 518kmem_cache_create(const char *name, unsigned int size, unsigned int align,
 519                slab_flags_t flags, void (*ctor)(void *))
 520{
 521        return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
 522                                          ctor);
 523}
 524EXPORT_SYMBOL(kmem_cache_create);
 525
 526static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
 527{
 528        LIST_HEAD(to_destroy);
 529        struct kmem_cache *s, *s2;
 530
 531        /*
 532         * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
 533         * @slab_caches_to_rcu_destroy list.  The slab pages are freed
 534         * through RCU and and the associated kmem_cache are dereferenced
 535         * while freeing the pages, so the kmem_caches should be freed only
 536         * after the pending RCU operations are finished.  As rcu_barrier()
 537         * is a pretty slow operation, we batch all pending destructions
 538         * asynchronously.
 539         */
 540        mutex_lock(&slab_mutex);
 541        list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
 542        mutex_unlock(&slab_mutex);
 543
 544        if (list_empty(&to_destroy))
 545                return;
 546
 547        rcu_barrier();
 548
 549        list_for_each_entry_safe(s, s2, &to_destroy, list) {
 550#ifdef SLAB_SUPPORTS_SYSFS
 551                sysfs_slab_release(s);
 552#else
 553                slab_kmem_cache_release(s);
 554#endif
 555        }
 556}
 557
 558static int shutdown_cache(struct kmem_cache *s)
 559{
 560        /* free asan quarantined objects */
 561        kasan_cache_shutdown(s);
 562
 563        if (__kmem_cache_shutdown(s) != 0)
 564                return -EBUSY;
 565
 566        memcg_unlink_cache(s);
 567        list_del(&s->list);
 568
 569        if (s->flags & SLAB_TYPESAFE_BY_RCU) {
 570#ifdef SLAB_SUPPORTS_SYSFS
 571                sysfs_slab_unlink(s);
 572#endif
 573                list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
 574                schedule_work(&slab_caches_to_rcu_destroy_work);
 575        } else {
 576#ifdef SLAB_SUPPORTS_SYSFS
 577                sysfs_slab_unlink(s);
 578                sysfs_slab_release(s);
 579#else
 580                slab_kmem_cache_release(s);
 581#endif
 582        }
 583
 584        return 0;
 585}
 586
 587#ifdef CONFIG_MEMCG_KMEM
 588/*
 589 * memcg_create_kmem_cache - Create a cache for a memory cgroup.
 590 * @memcg: The memory cgroup the new cache is for.
 591 * @root_cache: The parent of the new cache.
 592 *
 593 * This function attempts to create a kmem cache that will serve allocation
 594 * requests going from @memcg to @root_cache. The new cache inherits properties
 595 * from its parent.
 596 */
 597void memcg_create_kmem_cache(struct mem_cgroup *memcg,
 598                             struct kmem_cache *root_cache)
 599{
 600        static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
 601        struct cgroup_subsys_state *css = &memcg->css;
 602        struct memcg_cache_array *arr;
 603        struct kmem_cache *s = NULL;
 604        char *cache_name;
 605        int idx;
 606
 607        get_online_cpus();
 608        get_online_mems();
 609
 610        mutex_lock(&slab_mutex);
 611
 612        /*
 613         * The memory cgroup could have been offlined while the cache
 614         * creation work was pending.
 615         */
 616        if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying)
 617                goto out_unlock;
 618
 619        idx = memcg_cache_id(memcg);
 620        arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
 621                                        lockdep_is_held(&slab_mutex));
 622
 623        /*
 624         * Since per-memcg caches are created asynchronously on first
 625         * allocation (see memcg_kmem_get_cache()), several threads can try to
 626         * create the same cache, but only one of them may succeed.
 627         */
 628        if (arr->entries[idx])
 629                goto out_unlock;
 630
 631        cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
 632        cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
 633                               css->serial_nr, memcg_name_buf);
 634        if (!cache_name)
 635                goto out_unlock;
 636
 637        s = create_cache(cache_name, root_cache->object_size,
 638                         root_cache->align,
 639                         root_cache->flags & CACHE_CREATE_MASK,
 640                         root_cache->useroffset, root_cache->usersize,
 641                         root_cache->ctor, memcg, root_cache);
 642        /*
 643         * If we could not create a memcg cache, do not complain, because
 644         * that's not critical at all as we can always proceed with the root
 645         * cache.
 646         */
 647        if (IS_ERR(s)) {
 648                kfree(cache_name);
 649                goto out_unlock;
 650        }
 651
 652        /*
 653         * Since readers won't lock (see cache_from_memcg_idx()), we need a
 654         * barrier here to ensure nobody will see the kmem_cache partially
 655         * initialized.
 656         */
 657        smp_wmb();
 658        arr->entries[idx] = s;
 659
 660out_unlock:
 661        mutex_unlock(&slab_mutex);
 662
 663        put_online_mems();
 664        put_online_cpus();
 665}
 666
 667static void kmemcg_deactivate_workfn(struct work_struct *work)
 668{
 669        struct kmem_cache *s = container_of(work, struct kmem_cache,
 670                                            memcg_params.deact_work);
 671
 672        get_online_cpus();
 673        get_online_mems();
 674
 675        mutex_lock(&slab_mutex);
 676
 677        s->memcg_params.deact_fn(s);
 678
 679        mutex_unlock(&slab_mutex);
 680
 681        put_online_mems();
 682        put_online_cpus();
 683
 684        /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
 685        css_put(&s->memcg_params.memcg->css);
 686}
 687
 688static void kmemcg_deactivate_rcufn(struct rcu_head *head)
 689{
 690        struct kmem_cache *s = container_of(head, struct kmem_cache,
 691                                            memcg_params.deact_rcu_head);
 692
 693        /*
 694         * We need to grab blocking locks.  Bounce to ->deact_work.  The
 695         * work item shares the space with the RCU head and can't be
 696         * initialized eariler.
 697         */
 698        INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
 699        queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
 700}
 701
 702/**
 703 * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
 704 *                                         sched RCU grace period
 705 * @s: target kmem_cache
 706 * @deact_fn: deactivation function to call
 707 *
 708 * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
 709 * held after a sched RCU grace period.  The slab is guaranteed to stay
 710 * alive until @deact_fn is finished.  This is to be used from
 711 * __kmemcg_cache_deactivate().
 712 */
 713void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
 714                                           void (*deact_fn)(struct kmem_cache *))
 715{
 716        if (WARN_ON_ONCE(is_root_cache(s)) ||
 717            WARN_ON_ONCE(s->memcg_params.deact_fn))
 718                return;
 719
 720        if (s->memcg_params.root_cache->memcg_params.dying)
 721                return;
 722
 723        /* pin memcg so that @s doesn't get destroyed in the middle */
 724        css_get(&s->memcg_params.memcg->css);
 725
 726        s->memcg_params.deact_fn = deact_fn;
 727        call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
 728}
 729
 730void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
 731{
 732        int idx;
 733        struct memcg_cache_array *arr;
 734        struct kmem_cache *s, *c;
 735
 736        idx = memcg_cache_id(memcg);
 737
 738        get_online_cpus();
 739        get_online_mems();
 740
 741        mutex_lock(&slab_mutex);
 742        list_for_each_entry(s, &slab_root_caches, root_caches_node) {
 743                arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
 744                                                lockdep_is_held(&slab_mutex));
 745                c = arr->entries[idx];
 746                if (!c)
 747                        continue;
 748
 749                __kmemcg_cache_deactivate(c);
 750                arr->entries[idx] = NULL;
 751        }
 752        mutex_unlock(&slab_mutex);
 753
 754        put_online_mems();
 755        put_online_cpus();
 756}
 757
 758void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
 759{
 760        struct kmem_cache *s, *s2;
 761
 762        get_online_cpus();
 763        get_online_mems();
 764
 765        mutex_lock(&slab_mutex);
 766        list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
 767                                 memcg_params.kmem_caches_node) {
 768                /*
 769                 * The cgroup is about to be freed and therefore has no charges
 770                 * left. Hence, all its caches must be empty by now.
 771                 */
 772                BUG_ON(shutdown_cache(s));
 773        }
 774        mutex_unlock(&slab_mutex);
 775
 776        put_online_mems();
 777        put_online_cpus();
 778}
 779
 780static int shutdown_memcg_caches(struct kmem_cache *s)
 781{
 782        struct memcg_cache_array *arr;
 783        struct kmem_cache *c, *c2;
 784        LIST_HEAD(busy);
 785        int i;
 786
 787        BUG_ON(!is_root_cache(s));
 788
 789        /*
 790         * First, shutdown active caches, i.e. caches that belong to online
 791         * memory cgroups.
 792         */
 793        arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
 794                                        lockdep_is_held(&slab_mutex));
 795        for_each_memcg_cache_index(i) {
 796                c = arr->entries[i];
 797                if (!c)
 798                        continue;
 799                if (shutdown_cache(c))
 800                        /*
 801                         * The cache still has objects. Move it to a temporary
 802                         * list so as not to try to destroy it for a second
 803                         * time while iterating over inactive caches below.
 804                         */
 805                        list_move(&c->memcg_params.children_node, &busy);
 806                else
 807                        /*
 808                         * The cache is empty and will be destroyed soon. Clear
 809                         * the pointer to it in the memcg_caches array so that
 810                         * it will never be accessed even if the root cache
 811                         * stays alive.
 812                         */
 813                        arr->entries[i] = NULL;
 814        }
 815
 816        /*
 817         * Second, shutdown all caches left from memory cgroups that are now
 818         * offline.
 819         */
 820        list_for_each_entry_safe(c, c2, &s->memcg_params.children,
 821                                 memcg_params.children_node)
 822                shutdown_cache(c);
 823
 824        list_splice(&busy, &s->memcg_params.children);
 825
 826        /*
 827         * A cache being destroyed must be empty. In particular, this means
 828         * that all per memcg caches attached to it must be empty too.
 829         */
 830        if (!list_empty(&s->memcg_params.children))
 831                return -EBUSY;
 832        return 0;
 833}
 834
 835static void flush_memcg_workqueue(struct kmem_cache *s)
 836{
 837        mutex_lock(&slab_mutex);
 838        s->memcg_params.dying = true;
 839        mutex_unlock(&slab_mutex);
 840
 841        /*
 842         * SLUB deactivates the kmem_caches through call_rcu_sched. Make
 843         * sure all registered rcu callbacks have been invoked.
 844         */
 845        if (IS_ENABLED(CONFIG_SLUB))
 846                rcu_barrier_sched();
 847
 848        /*
 849         * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
 850         * deactivates the memcg kmem_caches through workqueue. Make sure all
 851         * previous workitems on workqueue are processed.
 852         */
 853        flush_workqueue(memcg_kmem_cache_wq);
 854}
 855#else
 856static inline int shutdown_memcg_caches(struct kmem_cache *s)
 857{
 858        return 0;
 859}
 860
 861static inline void flush_memcg_workqueue(struct kmem_cache *s)
 862{
 863}
 864#endif /* CONFIG_MEMCG_KMEM */
 865
 866void slab_kmem_cache_release(struct kmem_cache *s)
 867{
 868        __kmem_cache_release(s);
 869        destroy_memcg_params(s);
 870        kfree_const(s->name);
 871        kmem_cache_free(kmem_cache, s);
 872}
 873
 874void kmem_cache_destroy(struct kmem_cache *s)
 875{
 876        int err;
 877
 878        if (unlikely(!s))
 879                return;
 880
 881        flush_memcg_workqueue(s);
 882
 883        get_online_cpus();
 884        get_online_mems();
 885
 886        mutex_lock(&slab_mutex);
 887
 888        s->refcount--;
 889        if (s->refcount)
 890                goto out_unlock;
 891
 892        err = shutdown_memcg_caches(s);
 893        if (!err)
 894                err = shutdown_cache(s);
 895
 896        if (err) {
 897                pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
 898                       s->name);
 899                dump_stack();
 900        }
 901out_unlock:
 902        mutex_unlock(&slab_mutex);
 903
 904        put_online_mems();
 905        put_online_cpus();
 906}
 907EXPORT_SYMBOL(kmem_cache_destroy);
 908
 909/**
 910 * kmem_cache_shrink - Shrink a cache.
 911 * @cachep: The cache to shrink.
 912 *
 913 * Releases as many slabs as possible for a cache.
 914 * To help debugging, a zero exit status indicates all slabs were released.
 915 */
 916int kmem_cache_shrink(struct kmem_cache *cachep)
 917{
 918        int ret;
 919
 920        get_online_cpus();
 921        get_online_mems();
 922        kasan_cache_shrink(cachep);
 923        ret = __kmem_cache_shrink(cachep);
 924        put_online_mems();
 925        put_online_cpus();
 926        return ret;
 927}
 928EXPORT_SYMBOL(kmem_cache_shrink);
 929
 930bool slab_is_available(void)
 931{
 932        return slab_state >= UP;
 933}
 934
 935#ifndef CONFIG_SLOB
 936/* Create a cache during boot when no slab services are available yet */
 937void __init create_boot_cache(struct kmem_cache *s, const char *name,
 938                unsigned int size, slab_flags_t flags,
 939                unsigned int useroffset, unsigned int usersize)
 940{
 941        int err;
 942
 943        s->name = name;
 944        s->size = s->object_size = size;
 945        s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
 946        s->useroffset = useroffset;
 947        s->usersize = usersize;
 948
 949        slab_init_memcg_params(s);
 950
 951        err = __kmem_cache_create(s, flags);
 952
 953        if (err)
 954                panic("Creation of kmalloc slab %s size=%u failed. Reason %d\n",
 955                                        name, size, err);
 956
 957        s->refcount = -1;       /* Exempt from merging for now */
 958}
 959
 960struct kmem_cache *__init create_kmalloc_cache(const char *name,
 961                unsigned int size, slab_flags_t flags,
 962                unsigned int useroffset, unsigned int usersize)
 963{
 964        struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
 965
 966        if (!s)
 967                panic("Out of memory when creating slab %s\n", name);
 968
 969        create_boot_cache(s, name, size, flags, useroffset, usersize);
 970        list_add(&s->list, &slab_caches);
 971        memcg_link_cache(s);
 972        s->refcount = 1;
 973        return s;
 974}
 975
 976struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
 977EXPORT_SYMBOL(kmalloc_caches);
 978
 979#ifdef CONFIG_ZONE_DMA
 980struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
 981EXPORT_SYMBOL(kmalloc_dma_caches);
 982#endif
 983
 984/*
 985 * Conversion table for small slabs sizes / 8 to the index in the
 986 * kmalloc array. This is necessary for slabs < 192 since we have non power
 987 * of two cache sizes there. The size of larger slabs can be determined using
 988 * fls.
 989 */
 990static u8 size_index[24] __ro_after_init = {
 991        3,      /* 8 */
 992        4,      /* 16 */
 993        5,      /* 24 */
 994        5,      /* 32 */
 995        6,      /* 40 */
 996        6,      /* 48 */
 997        6,      /* 56 */
 998        6,      /* 64 */
 999        1,      /* 72 */
1000        1,      /* 80 */

1001        1,      /* 88 */
1002        1,      /* 96 */
1003        7,      /* 104 */
1004        7,      /* 112 */
1005        7,      /* 120 */
1006        7,      /* 128 */
1007        2,      /* 136 */
1008        2,      /* 144 */
1009        2,      /* 152 */
1010        2,      /* 160 */
1011        2,      /* 168 */
1012        2,      /* 176 */
1013        2,      /* 184 */
1014        2       /* 192 */
1015};
1016
1017static inline unsigned int size_index_elem(unsigned int bytes)
1018{
1019        return (bytes - 1) / 8;
1020}
1021
1022/*
1023 * Find the kmem_cache structure that serves a given size of
1024 * allocation
1025 */
1026struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
1027{
1028        unsigned int index;
1029
1030        if (unlikely(size > KMALLOC_MAX_SIZE)) {
1031                WARN_ON_ONCE(!(flags & __GFP_NOWARN));
1032                return NULL;
1033        }
1034
1035        if (size <= 192) {
1036                if (!size)
1037                        return ZERO_SIZE_PTR;
1038
1039                index = size_index[size_index_elem(size)];
1040        } else
1041                index = fls(size - 1);
1042
1043#ifdef CONFIG_ZONE_DMA
1044        if (unlikely((flags & GFP_DMA)))
1045                return kmalloc_dma_caches[index];
1046
1047#endif
1048        return kmalloc_caches[index];
1049}
1050
1051/*
1052 * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
1053 * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
1054 * kmalloc-67108864.
1055 */
1056const struct kmalloc_info_struct kmalloc_info[] __initconst = {
1057        {NULL,                      0},         {"kmalloc-96",             96},
1058        {"kmalloc-192",           192},         {"kmalloc-8",               8},
1059        {"kmalloc-16",             16},         {"kmalloc-32",             32},
1060        {"kmalloc-64",             64},         {"kmalloc-128",           128},
1061        {"kmalloc-256",           256},         {"kmalloc-512",           512},
1062        {"kmalloc-1024",         1024},         {"kmalloc-2048",         2048},
1063        {"kmalloc-4096",         4096},         {"kmalloc-8192",         8192},
1064        {"kmalloc-16384",       16384},         {"kmalloc-32768",       32768},
1065        {"kmalloc-65536",       65536},         {"kmalloc-131072",     131072},
1066        {"kmalloc-262144",     262144},         {"kmalloc-524288",     524288},
1067        {"kmalloc-1048576",   1048576},         {"kmalloc-2097152",   2097152},
1068        {"kmalloc-4194304",   4194304},         {"kmalloc-8388608",   8388608},
1069        {"kmalloc-16777216", 16777216},         {"kmalloc-33554432", 33554432},
1070        {"kmalloc-67108864", 67108864}
1071};
1072
1073/*
1074 * Patch up the size_index table if we have strange large alignment
1075 * requirements for the kmalloc array. This is only the case for
1076 * MIPS it seems. The standard arches will not generate any code here.
1077 *
1078 * Largest permitted alignment is 256 bytes due to the way we
1079 * handle the index determination for the smaller caches.
1080 *
1081 * Make sure that nothing crazy happens if someone starts tinkering
1082 * around with ARCH_KMALLOC_MINALIGN
1083 */
1084void __init setup_kmalloc_cache_index_table(void)
1085{
1086        unsigned int i;
1087
1088        BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
1089                (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
1090
1091        for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
1092                unsigned int elem = size_index_elem(i);
1093
1094                if (elem >= ARRAY_SIZE(size_index))
1095                        break;
1096                size_index[elem] = KMALLOC_SHIFT_LOW;
1097        }
1098
1099        if (KMALLOC_MIN_SIZE >= 64) {
1100                /*
1101                 * The 96 byte size cache is not used if the alignment
1102                 * is 64 byte.
1103                 */
1104                for (i = 64 + 8; i <= 96; i += 8)
1105                        size_index[size_index_elem(i)] = 7;
1106
1107        }
1108
1109        if (KMALLOC_MIN_SIZE >= 128) {
1110                /*
1111                 * The 192 byte sized cache is not used if the alignment
1112                 * is 128 byte. Redirect kmalloc to use the 256 byte cache
1113                 * instead.
1114                 */
1115                for (i = 128 + 8; i <= 192; i += 8)
1116                        size_index[size_index_elem(i)] = 8;
1117        }
1118}
1119
1120static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
1121{
1122        kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
1123                                        kmalloc_info[idx].size, flags, 0,
1124                                        kmalloc_info[idx].size);
1125}
1126
1127/*
1128 * Create the kmalloc array. Some of the regular kmalloc arrays
1129 * may already have been created because they were needed to
1130 * enable allocations for slab creation.
1131 */
1132void __init create_kmalloc_caches(slab_flags_t flags)
1133{
1134        int i;
1135
1136        for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
1137                if (!kmalloc_caches[i])
1138                        new_kmalloc_cache(i, flags);
1139
1140                /*
1141                 * Caches that are not of the two-to-the-power-of size.
1142                 * These have to be created immediately after the
1143                 * earlier power of two caches
1144                 */
1145                if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
1146                        new_kmalloc_cache(1, flags);
1147                if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
1148                        new_kmalloc_cache(2, flags);
1149        }
1150
1151        /* Kmalloc array is now usable */
1152        slab_state = UP;
1153
1154#ifdef CONFIG_ZONE_DMA
1155        for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
1156                struct kmem_cache *s = kmalloc_caches[i];
1157
1158                if (s) {
1159                        unsigned int size = kmalloc_size(i);
1160                        char *n = kasprintf(GFP_NOWAIT,
1161                                 "dma-kmalloc-%u", size);
1162
1163                        BUG_ON(!n);
1164                        kmalloc_dma_caches[i] = create_kmalloc_cache(n,
1165                                size, SLAB_CACHE_DMA | flags, 0, 0);
1166                }
1167        }
1168#endif
1169}
1170#endif /* !CONFIG_SLOB */
1171
1172/*
1173 * To avoid unnecessary overhead, we pass through large allocation requests
1174 * directly to the page allocator. We use __GFP_COMP, because we will need to
1175 * know the allocation order to free the pages properly in kfree.
1176 */
1177void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
1178{
1179        void *ret;
1180        struct page *page;
1181
1182        flags |= __GFP_COMP;
1183        page = alloc_pages(flags, order);
1184        ret = page ? page_address(page) : NULL;
1185        kmemleak_alloc(ret, size, 1, flags);
1186        kasan_kmalloc_large(ret, size, flags);
1187        return ret;
1188}
1189EXPORT_SYMBOL(kmalloc_order);
1190
1191#ifdef CONFIG_TRACING
1192void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
1193{
1194        void *ret = kmalloc_order(size, flags, order);
1195        trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
1196        return ret;
1197}
1198EXPORT_SYMBOL(kmalloc_order_trace);
1199#endif
1200
1201#ifdef CONFIG_SLAB_FREELIST_RANDOM
1202/* Randomize a generic freelist */
1203static void freelist_randomize(struct rnd_state *state, unsigned int *list,
1204                               unsigned int count)
1205{
1206        unsigned int rand;
1207        unsigned int i;
1208
1209        for (i = 0; i < count; i++)
1210                list[i] = i;
1211
1212        /* Fisher-Yates shuffle */
1213        for (i = count - 1; i > 0; i--) {
1214                rand = prandom_u32_state(state);
1215                rand %= (i + 1);
1216                swap(list[i], list[rand]);
1217        }
1218}
1219
1220/* Create a random sequence per cache */
1221int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
1222                                    gfp_t gfp)
1223{
1224        struct rnd_state state;
1225
1226        if (count < 2 || cachep->random_seq)
1227                return 0;
1228
1229        cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
1230        if (!cachep->random_seq)
1231                return -ENOMEM;
1232
1233        /* Get best entropy at this stage of boot */
1234        prandom_seed_state(&state, get_random_long());
1235
1236        freelist_randomize(&state, cachep->random_seq, count);
1237        return 0;
1238}
1239
1240/* Destroy the per-cache random freelist sequence */
1241void cache_random_seq_destroy(struct kmem_cache *cachep)
1242{
1243        kfree(cachep->random_seq);
1244        cachep->random_seq = NULL;
1245}
1246#endif /* CONFIG_SLAB_FREELIST_RANDOM */
1247
1248#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
1249#ifdef CONFIG_SLAB
1250#define SLABINFO_RIGHTS (0600)
1251#else
1252#define SLABINFO_RIGHTS (0400)
1253#endif
1254
1255static void print_slabinfo_header(struct seq_file *m)
1256{
1257        /*
1258         * Output format version, so at least we can change it
1259         * without _too_ many complaints.
1260         */
1261#ifdef CONFIG_DEBUG_SLAB
1262        seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
1263#else
1264        seq_puts(m, "slabinfo - version: 2.1\n");
1265#endif
1266        seq_puts(m, "# name            <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
1267        seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
1268        seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
1269#ifdef CONFIG_DEBUG_SLAB
1270        seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
1271        seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
1272#endif
1273        seq_putc(m, '\n');
1274}
1275
1276void *slab_start(struct seq_file *m, loff_t *pos)
1277{
1278        mutex_lock(&slab_mutex);
1279        return seq_list_start(&slab_root_caches, *pos);
1280}
1281
1282void *slab_next(struct seq_file *m, void *p, loff_t *pos)
1283{
1284        return seq_list_next(p, &slab_root_caches, pos);
1285}
1286
1287void slab_stop(struct seq_file *m, void *p)
1288{
1289        mutex_unlock(&slab_mutex);
1290}
1291
1292static void
1293memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
1294{
1295        struct kmem_cache *c;
1296        struct slabinfo sinfo;
1297
1298        if (!is_root_cache(s))
1299                return;
1300
1301        for_each_memcg_cache(c, s) {
1302                memset(&sinfo, 0, sizeof(sinfo));
1303                get_slabinfo(c, &sinfo);
1304
1305                info->active_slabs += sinfo.active_slabs;
1306                info->num_slabs += sinfo.num_slabs;
1307                info->shared_avail += sinfo.shared_avail;
1308                info->active_objs += sinfo.active_objs;
1309                info->num_objs += sinfo.num_objs;
1310        }
1311}
1312
1313static void cache_show(struct kmem_cache *s, struct seq_file *m)
1314{
1315        struct slabinfo sinfo;
1316
1317        memset(&sinfo, 0, sizeof(sinfo));
1318        get_slabinfo(s, &sinfo);
1319
1320        memcg_accumulate_slabinfo(s, &sinfo);
1321
1322        seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
1323                   cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
1324                   sinfo.objects_per_slab, (1 << sinfo.cache_order));
1325
1326        seq_printf(m, " : tunables %4u %4u %4u",
1327                   sinfo.limit, sinfo.batchcount, sinfo.shared);
1328        seq_printf(m, " : slabdata %6lu %6lu %6lu",
1329                   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
1330        slabinfo_show_stats(m, s);
1331        seq_putc(m, '\n');
1332}
1333
1334static int slab_show(struct seq_file *m, void *p)
1335{
1336        struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
1337
1338        if (p == slab_root_caches.next)
1339                print_slabinfo_header(m);
1340        cache_show(s, m);
1341        return 0;
1342}
1343
1344void dump_unreclaimable_slab(void)
1345{
1346        struct kmem_cache *s, *s2;
1347        struct slabinfo sinfo;
1348
1349        /*
1350         * Here acquiring slab_mutex is risky since we don't prefer to get
1351         * sleep in oom path. But, without mutex hold, it may introduce a
1352         * risk of crash.
1353         * Use mutex_trylock to protect the list traverse, dump nothing
1354         * without acquiring the mutex.
1355         */
1356        if (!mutex_trylock(&slab_mutex)) {
1357                pr_warn("excessive unreclaimable slab but cannot dump stats\n");
1358                return;
1359        }
1360
1361        pr_info("Unreclaimable slab info:\n");
1362        pr_info("Name                      Used          Total\n");
1363
1364        list_for_each_entry_safe(s, s2, &slab_caches, list) {
1365                if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
1366                        continue;
1367
1368                get_slabinfo(s, &sinfo);
1369
1370                if (sinfo.num_objs > 0)
1371                        pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
1372                                (sinfo.active_objs * s->size) / 1024,
1373                                (sinfo.num_objs * s->size) / 1024);
1374        }
1375        mutex_unlock(&slab_mutex);
1376}
1377
1378#if defined(CONFIG_MEMCG)
1379void *memcg_slab_start(struct seq_file *m, loff_t *pos)
1380{
1381        struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1382
1383        mutex_lock(&slab_mutex);
1384        return seq_list_start(&memcg->kmem_caches, *pos);
1385}
1386
1387void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
1388{
1389        struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1390
1391        return seq_list_next(p, &memcg->kmem_caches, pos);
1392}
1393
1394void memcg_slab_stop(struct seq_file *m, void *p)
1395{
1396        mutex_unlock(&slab_mutex);
1397}
1398
1399int memcg_slab_show(struct seq_file *m, void *p)
1400{
1401        struct kmem_cache *s = list_entry(p, struct kmem_cache,
1402                                          memcg_params.kmem_caches_node);
1403        struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1404
1405        if (p == memcg->kmem_caches.next)
1406                print_slabinfo_header(m);
1407        cache_show(s, m);
1408        return 0;
1409}
1410#endif
1411
1412/*
1413 * slabinfo_op - iterator that generates /proc/slabinfo
1414 *
1415 * Output layout:
1416 * cache-name
1417 * num-active-objs
1418 * total-objs
1419 * object size
1420 * num-active-slabs
1421 * total-slabs
1422 * num-pages-per-slab
1423 * + further values on SMP and with statistics enabled
1424 */
1425static const struct seq_operations slabinfo_op = {
1426        .start = slab_start,
1427        .next = slab_next,
1428        .stop = slab_stop,
1429        .show = slab_show,
1430};
1431
1432static int slabinfo_open(struct inode *inode, struct file *file)
1433{
1434        return seq_open(file, &slabinfo_op);
1435}
1436
1437static const struct file_operations proc_slabinfo_operations = {
1438        .open           = slabinfo_open,
1439        .read           = seq_read,
1440        .write          = slabinfo_write,
1441        .llseek         = seq_lseek,
1442        .release        = seq_release,
1443};
1444
1445static int __init slab_proc_init(void)
1446{
1447        proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
1448                                                &proc_slabinfo_operations);
1449        return 0;
1450}
1451module_init(slab_proc_init);
1452#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
1453
1454static __always_inline void *__do_krealloc(const void *p, size_t new_size,
1455                                           gfp_t flags)
1456{
1457        void *ret;
1458        size_t ks = 0;
1459
1460        if (p)
1461                ks = ksize(p);
1462
1463        if (ks >= new_size) {
1464                kasan_krealloc((void *)p, new_size, flags);
1465                return (void *)p;
1466        }
1467
1468        ret = kmalloc_track_caller(new_size, flags);
1469        if (ret && p)
1470                memcpy(ret, p, ks);
1471
1472        return ret;
1473}
1474
1475/**
1476 * __krealloc - like krealloc() but don't free @p.
1477 * @p: object to reallocate memory for.
1478 * @new_size: how many bytes of memory are required.
1479 * @flags: the type of memory to allocate.
1480 *
1481 * This function is like krealloc() except it never frees the originally
1482 * allocated buffer. Use this if you don't want to free the buffer immediately
1483 * like, for example, with RCU.
1484 */
1485void *__krealloc(const void *p, size_t new_size, gfp_t flags)
1486{
1487        if (unlikely(!new_size))
1488                return ZERO_SIZE_PTR;
1489
1490        return __do_krealloc(p, new_size, flags);
1491
1492}
1493EXPORT_SYMBOL(__krealloc);
1494
1495/**
1496 * krealloc - reallocate memory. The contents will remain unchanged.
1497 * @p: object to reallocate memory for.
1498 * @new_size: how many bytes of memory are required.
1499 * @flags: the type of memory to allocate.
1500 *
1501 * The contents of the object pointed to are preserved up to the
1502 * lesser of the new and old sizes.  If @p is %NULL, krealloc()
1503 * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
1504 * %NULL pointer, the object pointed to is freed.
1505 */
1506void *krealloc(const void *p, size_t new_size, gfp_t flags)
1507{
1508        void *ret;
1509
1510        if (unlikely(!new_size)) {
1511                kfree(p);
1512                return ZERO_SIZE_PTR;
1513        }
1514
1515        ret = __do_krealloc(p, new_size, flags);
1516        if (ret && p != ret)
1517                kfree(p);
1518
1519        return ret;
1520}
1521EXPORT_SYMBOL(krealloc);
1522
1523/**
1524 * kzfree - like kfree but zero memory
1525 * @p: object to free memory of
1526 *
1527 * The memory of the object @p points to is zeroed before freed.
1528 * If @p is %NULL, kzfree() does nothing.
1529 *
1530 * Note: this function zeroes the whole allocated buffer which can be a good
1531 * deal bigger than the requested buffer size passed to kmalloc(). So be
1532 * careful when using this function in performance sensitive code.
1533 */
1534void kzfree(const void *p)
1535{
1536        size_t ks;
1537        void *mem = (void *)p;
1538
1539        if (unlikely(ZERO_OR_NULL_PTR(mem)))
1540                return;
1541        ks = ksize(mem);
1542        memset(mem, 0, ks);
1543        kfree(mem);
1544}
1545EXPORT_SYMBOL(kzfree);
1546
1547/* Tracepoints definitions. */
1548EXPORT_TRACEPOINT_SYMBOL(kmalloc);
1549EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
1550EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
1551EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
1552EXPORT_TRACEPOINT_SYMBOL(kfree);
1553EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
1554
1555int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
1556{
1557        if (__should_failslab(s, gfpflags))
1558                return -ENOMEM;
1559        return 0;
1560}
1561ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
1562