LXR linux/include/linux/slab.h

   1/*
   2 * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk).
   3 *
   4 * (C) SGI 2006, Christoph Lameter
   5 *      Cleaned up and restructured to ease the addition of alternative
   6 *      implementations of SLAB allocators.
   7 * (C) Linux Foundation 2008-2013
   8 *      Unified interface for all slab allocators
   9 */
  10
  11#ifndef _LINUX_SLAB_H
  12#define _LINUX_SLAB_H
  13
  14#include <linux/gfp.h>
  15#include <linux/types.h>
  16#include <linux/workqueue.h>
  17
  18
  19/*
  20 * Flags to pass to kmem_cache_create().
  21 * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set.
  22 */
  23#define SLAB_DEBUG_FREE         0x00000100UL    /* DEBUG: Perform (expensive) checks on free */
  24#define SLAB_RED_ZONE           0x00000400UL    /* DEBUG: Red zone objs in a cache */
  25#define SLAB_POISON             0x00000800UL    /* DEBUG: Poison objects */
  26#define SLAB_HWCACHE_ALIGN      0x00002000UL    /* Align objs on cache lines */
  27#define SLAB_CACHE_DMA          0x00004000UL    /* Use GFP_DMA memory */
  28#define SLAB_STORE_USER         0x00010000UL    /* DEBUG: Store the last owner for bug hunting */
  29#define SLAB_PANIC              0x00040000UL    /* Panic if kmem_cache_create() fails */
  30/*
  31 * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS!
  32 *
  33 * This delays freeing the SLAB page by a grace period, it does _NOT_
  34 * delay object freeing. This means that if you do kmem_cache_free()
  35 * that memory location is free to be reused at any time. Thus it may
  36 * be possible to see another object there in the same RCU grace period.
  37 *
  38 * This feature only ensures the memory location backing the object
  39 * stays valid, the trick to using this is relying on an independent
  40 * object validation pass. Something like:
  41 *
  42 *  rcu_read_lock()
  43 * again:
  44 *  obj = lockless_lookup(key);
  45 *  if (obj) {
  46 *    if (!try_get_ref(obj)) // might fail for free objects
  47 *      goto again;
  48 *
  49 *    if (obj->key != key) { // not the object we expected
  50 *      put_ref(obj);
  51 *      goto again;
  52 *    }
  53 *  }
  54 *  rcu_read_unlock();
  55 *
  56 * This is useful if we need to approach a kernel structure obliquely,
  57 * from its address obtained without the usual locking. We can lock
  58 * the structure to stabilize it and check it's still at the given address,
  59 * only if we can be sure that the memory has not been meanwhile reused
  60 * for some other kind of object (which our subsystem's lock might corrupt).
  61 *
  62 * rcu_read_lock before reading the address, then rcu_read_unlock after
  63 * taking the spinlock within the structure expected at that address.
  64 */
  65#define SLAB_DESTROY_BY_RCU     0x00080000UL    /* Defer freeing slabs to RCU */
  66#define SLAB_MEM_SPREAD         0x00100000UL    /* Spread some memory over cpuset */
  67#define SLAB_TRACE              0x00200000UL    /* Trace allocations and frees */
  68
  69/* Flag to prevent checks on free */
  70#ifdef CONFIG_DEBUG_OBJECTS
  71# define SLAB_DEBUG_OBJECTS     0x00400000UL
  72#else
  73# define SLAB_DEBUG_OBJECTS     0x00000000UL
  74#endif
  75
  76#define SLAB_NOLEAKTRACE        0x00800000UL    /* Avoid kmemleak tracing */
  77
  78/* Don't track use of uninitialized memory */
  79#ifdef CONFIG_KMEMCHECK
  80# define SLAB_NOTRACK           0x01000000UL
  81#else
  82# define SLAB_NOTRACK           0x00000000UL
  83#endif
  84#ifdef CONFIG_FAILSLAB
  85# define SLAB_FAILSLAB          0x02000000UL    /* Fault injection mark */
  86#else
  87# define SLAB_FAILSLAB          0x00000000UL
  88#endif
  89
  90/* The following flags affect the page allocator grouping pages by mobility */
  91#define SLAB_RECLAIM_ACCOUNT    0x00020000UL            /* Objects are reclaimable */
  92#define SLAB_TEMPORARY          SLAB_RECLAIM_ACCOUNT    /* Objects are short-lived */
  93/*
  94 * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
  95 *
  96 * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault.
  97 *
  98 * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can.
  99 * Both make kfree a no-op.
 100 */
 101#define ZERO_SIZE_PTR ((void *)16)
 102
 103#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
 104                                (unsigned long)ZERO_SIZE_PTR)
 105
 106#include <linux/kmemleak.h>
 107#include <linux/kasan.h>
 108
 109struct mem_cgroup;
 110/*
 111 * struct kmem_cache related prototypes
 112 */
 113void __init kmem_cache_init(void);
 114int slab_is_available(void);
 115
 116struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 117                        unsigned long,
 118                        void (*)(void *));
 119void kmem_cache_destroy(struct kmem_cache *);
 120int kmem_cache_shrink(struct kmem_cache *);
 121
 122void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *);
 123void memcg_deactivate_kmem_caches(struct mem_cgroup *);
 124void memcg_destroy_kmem_caches(struct mem_cgroup *);
 125
 126/*
 127 * Please use this macro to create slab caches. Simply specify the
 128 * name of the structure and maybe some flags that are listed above.
 129 *
 130 * The alignment of the struct determines object alignment. If you
 131 * f.e. add ____cacheline_aligned_in_smp to the struct declaration
 132 * then the objects will be properly aligned in SMP configurations.
 133 */
 134#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
 135                sizeof(struct __struct), __alignof__(struct __struct),\
 136                (__flags), NULL)
 137
 138/*
 139 * Common kmalloc functions provided by all allocators
 140 */
 141void * __must_check __krealloc(const void *, size_t, gfp_t);
 142void * __must_check krealloc(const void *, size_t, gfp_t);
 143void kfree(const void *);
 144void kzfree(const void *);
 145size_t ksize(const void *);
 146
 147/*
 148 * Some archs want to perform DMA into kmalloc caches and need a guaranteed
 149 * alignment larger than the alignment of a 64-bit integer.
 150 * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
 151 */
 152#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
 153#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
 154#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
 155#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN)
 156#else
 157#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
 158#endif
 159
 160/*
 161 * Kmalloc array related definitions
 162 */
 163
 164#ifdef CONFIG_SLAB
 165/*
 166 * The largest kmalloc size supported by the SLAB allocators is
 167 * 32 megabyte (2^25) or the maximum allocatable page order if that is
 168 * less than 32 MB.
 169 *
 170 * WARNING: Its not easy to increase this value since the allocators have
 171 * to do various tricks to work around compiler limitations in order to
 172 * ensure proper constant folding.
 173 */
 174#define KMALLOC_SHIFT_HIGH      ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
 175                                (MAX_ORDER + PAGE_SHIFT - 1) : 25)
 176#define KMALLOC_SHIFT_MAX       KMALLOC_SHIFT_HIGH
 177#ifndef KMALLOC_SHIFT_LOW
 178#define KMALLOC_SHIFT_LOW       5
 179#endif
 180#endif
 181
 182#ifdef CONFIG_SLUB
 183/*
 184 * SLUB directly allocates requests fitting in to an order-1 page
 185 * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
 186 */
 187#define KMALLOC_SHIFT_HIGH      (PAGE_SHIFT + 1)
 188#define KMALLOC_SHIFT_MAX       (MAX_ORDER + PAGE_SHIFT)
 189#ifndef KMALLOC_SHIFT_LOW
 190#define KMALLOC_SHIFT_LOW       3
 191#endif
 192#endif
 193
 194#ifdef CONFIG_SLOB
 195/*
 196 * SLOB passes all requests larger than one page to the page allocator.
 197 * No kmalloc array is necessary since objects of different sizes can
 198 * be allocated from the same page.
 199 */
 200#define KMALLOC_SHIFT_HIGH      PAGE_SHIFT
 201#define KMALLOC_SHIFT_MAX       30
 202#ifndef KMALLOC_SHIFT_LOW
 203#define KMALLOC_SHIFT_LOW       3
 204#endif
 205#endif
 206
 207/* Maximum allocatable size */
 208#define KMALLOC_MAX_SIZE        (1UL << KMALLOC_SHIFT_MAX)
 209/* Maximum size for which we actually use a slab cache */
 210#define KMALLOC_MAX_CACHE_SIZE  (1UL << KMALLOC_SHIFT_HIGH)
 211/* Maximum order allocatable via the slab allocagtor */
 212#define KMALLOC_MAX_ORDER       (KMALLOC_SHIFT_MAX - PAGE_SHIFT)
 213
 214/*
 215 * Kmalloc subsystem.
 216 */
 217#ifndef KMALLOC_MIN_SIZE
 218#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW)
 219#endif
 220
 221/*
 222 * This restriction comes from byte sized index implementation.
 223 * Page size is normally 2^12 bytes and, in this case, if we want to use
 224 * byte sized index which can represent 2^8 entries, the size of the object
 225 * should be equal or greater to 2^12 / 2^8 = 2^4 = 16.
 226 * If minimum size of kmalloc is less than 16, we use it as minimum object
 227 * size and give up to use byte sized index.
 228 */
 229#define SLAB_OBJ_MIN_SIZE      (KMALLOC_MIN_SIZE < 16 ? \
 230                               (KMALLOC_MIN_SIZE) : 16)
 231
 232#ifndef CONFIG_SLOB
 233extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
 234#ifdef CONFIG_ZONE_DMA
 235extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
 236#endif
 237
 238/*
 239 * Figure out which kmalloc slab an allocation of a certain size
 240 * belongs to.
 241 * 0 = zero alloc
 242 * 1 =  65 .. 96 bytes
 243 * 2 = 120 .. 192 bytes
 244 * n = 2^(n-1) .. 2^n -1
 245 */
 246static __always_inline int kmalloc_index(size_t size)
 247{
 248        if (!size)
 249                return 0;
 250
 251        if (size <= KMALLOC_MIN_SIZE)
 252                return KMALLOC_SHIFT_LOW;
 253
 254        if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
 255                return 1;
 256        if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
 257                return 2;
 258        if (size <=          8) return 3;
 259        if (size <=         16) return 4;
 260        if (size <=         32) return 5;
 261        if (size <=         64) return 6;
 262        if (size <=        128) return 7;
 263        if (size <=        256) return 8;
 264        if (size <=        512) return 9;
 265        if (size <=       1024) return 10;
 266        if (size <=   2 * 1024) return 11;
 267        if (size <=   4 * 1024) return 12;
 268        if (size <=   8 * 1024) return 13;
 269        if (size <=  16 * 1024) return 14;
 270        if (size <=  32 * 1024) return 15;
 271        if (size <=  64 * 1024) return 16;
 272        if (size <= 128 * 1024) return 17;
 273        if (size <= 256 * 1024) return 18;
 274        if (size <= 512 * 1024) return 19;
 275        if (size <= 1024 * 1024) return 20;
 276        if (size <=  2 * 1024 * 1024) return 21;
 277        if (size <=  4 * 1024 * 1024) return 22;
 278        if (size <=  8 * 1024 * 1024) return 23;
 279        if (size <=  16 * 1024 * 1024) return 24;
 280        if (size <=  32 * 1024 * 1024) return 25;
 281        if (size <=  64 * 1024 * 1024) return 26;
 282        BUG();
 283
 284        /* Will never be reached. Needed because the compiler may complain */
 285        return -1;
 286}
 287#endif /* !CONFIG_SLOB */
 288
 289void *__kmalloc(size_t size, gfp_t flags);
 290void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags);
 291void kmem_cache_free(struct kmem_cache *, void *);
 292
 293#ifdef CONFIG_NUMA
 294void *__kmalloc_node(size_t size, gfp_t flags, int node);
 295void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 296#else
 297static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
 298{
 299        return __kmalloc(size, flags);
 300}
 301
 302static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node)
 303{
 304        return kmem_cache_alloc(s, flags);
 305}
 306#endif
 307
 308#ifdef CONFIG_TRACING
 309extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t);
 310
 311#ifdef CONFIG_NUMA
 312extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 313                                           gfp_t gfpflags,
 314                                           int node, size_t size);
 315#else
 316static __always_inline void *
 317kmem_cache_alloc_node_trace(struct kmem_cache *s,
 318                              gfp_t gfpflags,
 319                              int node, size_t size)
 320{
 321        return kmem_cache_alloc_trace(s, gfpflags, size);
 322}
 323#endif /* CONFIG_NUMA */
 324
 325#else /* CONFIG_TRACING */
 326static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s,
 327                gfp_t flags, size_t size)
 328{
 329        void *ret = kmem_cache_alloc(s, flags);
 330
 331        kasan_kmalloc(s, ret, size);
 332        return ret;
 333}
 334
 335static __always_inline void *
 336kmem_cache_alloc_node_trace(struct kmem_cache *s,
 337                              gfp_t gfpflags,
 338                              int node, size_t size)
 339{
 340        void *ret = kmem_cache_alloc_node(s, gfpflags, node);
 341
 342        kasan_kmalloc(s, ret, size);
 343        return ret;
 344}
 345#endif /* CONFIG_TRACING */
 346
 347extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
 348
 349#ifdef CONFIG_TRACING
 350extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
 351#else
 352static __always_inline void *
 353kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 354{
 355        return kmalloc_order(size, flags, order);
 356}
 357#endif
 358
 359static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 360{
 361        unsigned int order = get_order(size);
 362        return kmalloc_order_trace(size, flags, order);
 363}
 364
 365/**
 366 * kmalloc - allocate memory
 367 * @size: how many bytes of memory are required.
 368 * @flags: the type of memory to allocate.
 369 *
 370 * kmalloc is the normal method of allocating memory
 371 * for objects smaller than page size in the kernel.
 372 *
 373 * The @flags argument may be one of:
 374 *
 375 * %GFP_USER - Allocate memory on behalf of user.  May sleep.
 376 *
 377 * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
 378 *
 379 * %GFP_ATOMIC - Allocation will not sleep.  May use emergency pools.
 380 *   For example, use this inside interrupt handlers.
 381 *
 382 * %GFP_HIGHUSER - Allocate pages from high memory.
 383 *
 384 * %GFP_NOIO - Do not do any I/O at all while trying to get memory.
 385 *
 386 * %GFP_NOFS - Do not make any fs calls while trying to get memory.
 387 *
 388 * %GFP_NOWAIT - Allocation will not sleep.
 389 *
 390 * %__GFP_THISNODE - Allocate node-local memory only.
 391 *
 392 * %GFP_DMA - Allocation suitable for DMA.
 393 *   Should only be used for kmalloc() caches. Otherwise, use a
 394 *   slab created with SLAB_DMA.
 395 *
 396 * Also it is possible to set different flags by OR'ing
 397 * in one or more of the following additional @flags:
 398 *
 399 * %__GFP_COLD - Request cache-cold pages instead of
 400 *   trying to return cache-warm pages.
 401 *
 402 * %__GFP_HIGH - This allocation has high priority and may use emergency pools.
 403 *
 404 * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail
 405 *   (think twice before using).
 406 *
 407 * %__GFP_NORETRY - If memory is not immediately available,
 408 *   then give up at once.
 409 *
 410 * %__GFP_NOWARN - If allocation fails, don't issue any warnings.
 411 *
 412 * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
 413 *
 414 * There are other flags available as well, but these are not intended
 415 * for general use, and so are not documented here. For a full list of
 416 * potential flags, always refer to linux/gfp.h.
 417 */
 418static __always_inline void *kmalloc(size_t size, gfp_t flags)
 419{
 420        if (__builtin_constant_p(size)) {
 421                if (size > KMALLOC_MAX_CACHE_SIZE)
 422                        return kmalloc_large(size, flags);
 423#ifndef CONFIG_SLOB
 424                if (!(flags & GFP_DMA)) {
 425                        int index = kmalloc_index(size);
 426
 427                        if (!index)
 428                                return ZERO_SIZE_PTR;
 429
 430                        return kmem_cache_alloc_trace(kmalloc_caches[index],
 431                                        flags, size);
 432                }
 433#endif
 434        }
 435        return __kmalloc(size, flags);
 436}
 437
 438/*
 439 * Determine size used for the nth kmalloc cache.
 440 * return size or 0 if a kmalloc cache for that
 441 * size does not exist
 442 */
 443static __always_inline int kmalloc_size(int n)
 444{
 445#ifndef CONFIG_SLOB
 446        if (n > 2)
 447                return 1 << n;
 448
 449        if (n == 1 && KMALLOC_MIN_SIZE <= 32)
 450                return 96;
 451
 452        if (n == 2 && KMALLOC_MIN_SIZE <= 64)
 453                return 192;
 454#endif
 455        return 0;
 456}
 457
 458static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 459{
 460#ifndef CONFIG_SLOB
 461        if (__builtin_constant_p(size) &&
 462                size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
 463                int i = kmalloc_index(size);
 464
 465                if (!i)
 466                        return ZERO_SIZE_PTR;
 467
 468                return kmem_cache_alloc_node_trace(kmalloc_caches[i],
 469                                                flags, node, size);
 470        }
 471#endif
 472        return __kmalloc_node(size, flags, node);
 473}
 474
 475/*
 476 * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
 477 * Intended for arches that get misalignment faults even for 64 bit integer
 478 * aligned buffers.
 479 */
 480#ifndef ARCH_SLAB_MINALIGN
 481#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 482#endif
 483
 484struct memcg_cache_array {
 485        struct rcu_head rcu;
 486        struct kmem_cache *entries[0];
 487};
 488
 489/*
 490 * This is the main placeholder for memcg-related information in kmem caches.
 491 * Both the root cache and the child caches will have it. For the root cache,
 492 * this will hold a dynamically allocated array large enough to hold
 493 * information about the currently limited memcgs in the system. To allow the
 494 * array to be accessed without taking any locks, on relocation we free the old
 495 * version only after a grace period.
 496 *
 497 * Child caches will hold extra metadata needed for its operation. Fields are:
 498 *
 499 * @memcg: pointer to the memcg this cache belongs to
 500 * @root_cache: pointer to the global, root cache, this cache was derived from
 501 *
 502 * Both root and child caches of the same kind are linked into a list chained
 503 * through @list.
 504 */
 505struct memcg_cache_params {
 506        bool is_root_cache;
 507        struct list_head list;
 508        union {
 509                struct memcg_cache_array __rcu *memcg_caches;
 510                struct {
 511                        struct mem_cgroup *memcg;
 512                        struct kmem_cache *root_cache;
 513                };
 514        };
 515};
 516
 517int memcg_update_all_caches(int num_memcgs);
 518
 519/**
 520 * kmalloc_array - allocate memory for an array.
 521 * @n: number of elements.
 522 * @size: element size.
 523 * @flags: the type of memory to allocate (see kmalloc).
 524 */
 525static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
 526{
 527        if (size != 0 && n > SIZE_MAX / size)
 528                return NULL;
 529        return __kmalloc(n * size, flags);
 530}
 531
 532/**
 533 * kcalloc - allocate memory for an array. The memory is set to zero.
 534 * @n: number of elements.
 535 * @size: element size.
 536 * @flags: the type of memory to allocate (see kmalloc).
 537 */
 538static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
 539{
 540        return kmalloc_array(n, size, flags | __GFP_ZERO);
 541}
 542
 543/*
 544 * kmalloc_track_caller is a special version of kmalloc that records the
 545 * calling function of the routine calling it for slab leak tracking instead
 546 * of just the calling function (confusing, eh?).
 547 * It's useful when the call to kmalloc comes from a widely-used standard
 548 * allocator where we care about the real place the memory allocation
 549 * request comes from.
 550 */
 551extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
 552#define kmalloc_track_caller(size, flags) \
 553        __kmalloc_track_caller(size, flags, _RET_IP_)
 554
 555#ifdef CONFIG_NUMA
 556extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
 557#define kmalloc_node_track_caller(size, flags, node) \
 558        __kmalloc_node_track_caller(size, flags, node, \
 559                        _RET_IP_)
 560
 561#else /* CONFIG_NUMA */
 562
 563#define kmalloc_node_track_caller(size, flags, node) \
 564        kmalloc_track_caller(size, flags)
 565
 566#endif /* CONFIG_NUMA */
 567
 568/*
 569 * Shortcuts
 570 */
 571static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
 572{
 573        return kmem_cache_alloc(k, flags | __GFP_ZERO);
 574}
 575
 576/**
 577 * kzalloc - allocate memory. The memory is set to zero.
 578 * @size: how many bytes of memory are required.
 579 * @flags: the type of memory to allocate (see kmalloc).
 580 */
 581static inline void *kzalloc(size_t size, gfp_t flags)
 582{
 583        return kmalloc(size, flags | __GFP_ZERO);
 584}
 585
 586/**
 587 * kzalloc_node - allocate zeroed memory from a particular memory node.
 588 * @size: how many bytes of memory are required.
 589 * @flags: the type of memory to allocate (see kmalloc).
 590 * @node: memory node from which to allocate
 591 */
 592static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 593{
 594        return kmalloc_node(size, flags | __GFP_ZERO, node);
 595}
 596
 597unsigned int kmem_cache_size(struct kmem_cache *s);
 598void __init kmem_cache_init_late(void);
 599
 600#endif  /* _LINUX_SLAB_H */
 601