linux/mm/kasan/common.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * This file contains common generic and tag-based KASAN code.
   4 *
   5 * Copyright (c) 2014 Samsung Electronics Co., Ltd.
   6 * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
   7 *
   8 * Some code borrowed from https://github.com/xairy/kasan-prototype by
   9 *        Andrey Konovalov <andreyknvl@gmail.com>
  10 *
  11 * This program is free software; you can redistribute it and/or modify
  12 * it under the terms of the GNU General Public License version 2 as
  13 * published by the Free Software Foundation.
  14 *
  15 */
  16
  17#include <linux/export.h>
  18#include <linux/init.h>
  19#include <linux/kasan.h>
  20#include <linux/kernel.h>
  21#include <linux/kmemleak.h>
  22#include <linux/linkage.h>
  23#include <linux/memblock.h>
  24#include <linux/memory.h>
  25#include <linux/mm.h>
  26#include <linux/module.h>
  27#include <linux/printk.h>
  28#include <linux/sched.h>
  29#include <linux/sched/task_stack.h>
  30#include <linux/slab.h>
  31#include <linux/stacktrace.h>
  32#include <linux/string.h>
  33#include <linux/types.h>
  34#include <linux/vmalloc.h>
  35#include <linux/bug.h>
  36
  37#include <asm/cacheflush.h>
  38#include <asm/tlbflush.h>
  39
  40#include "kasan.h"
  41#include "../slab.h"
  42
  43depot_stack_handle_t kasan_save_stack(gfp_t flags)
  44{
  45        unsigned long entries[KASAN_STACK_DEPTH];
  46        unsigned int nr_entries;
  47
  48        nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
  49        nr_entries = filter_irq_stacks(entries, nr_entries);
  50        return stack_depot_save(entries, nr_entries, flags);
  51}
  52
  53void kasan_set_track(struct kasan_track *track, gfp_t flags)
  54{
  55        track->pid = current->pid;
  56        track->stack = kasan_save_stack(flags);
  57}
  58
  59void kasan_enable_current(void)
  60{
  61        current->kasan_depth++;
  62}
  63
  64void kasan_disable_current(void)
  65{
  66        current->kasan_depth--;
  67}
  68
  69bool __kasan_check_read(const volatile void *p, unsigned int size)
  70{
  71        return check_memory_region((unsigned long)p, size, false, _RET_IP_);
  72}
  73EXPORT_SYMBOL(__kasan_check_read);
  74
  75bool __kasan_check_write(const volatile void *p, unsigned int size)
  76{
  77        return check_memory_region((unsigned long)p, size, true, _RET_IP_);
  78}
  79EXPORT_SYMBOL(__kasan_check_write);
  80
  81#undef memset
  82void *memset(void *addr, int c, size_t len)
  83{
  84        if (!check_memory_region((unsigned long)addr, len, true, _RET_IP_))
  85                return NULL;
  86
  87        return __memset(addr, c, len);
  88}
  89
  90#ifdef __HAVE_ARCH_MEMMOVE
  91#undef memmove
  92void *memmove(void *dest, const void *src, size_t len)
  93{
  94        if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
  95            !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
  96                return NULL;
  97
  98        return __memmove(dest, src, len);
  99}
 100#endif
 101
 102#undef memcpy
 103void *memcpy(void *dest, const void *src, size_t len)
 104{
 105        if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
 106            !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
 107                return NULL;
 108
 109        return __memcpy(dest, src, len);
 110}
 111
 112/*
 113 * Poisons the shadow memory for 'size' bytes starting from 'addr'.
 114 * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE.
 115 */
 116void kasan_poison_shadow(const void *address, size_t size, u8 value)
 117{
 118        void *shadow_start, *shadow_end;
 119
 120        /*
 121         * Perform shadow offset calculation based on untagged address, as
 122         * some of the callers (e.g. kasan_poison_object_data) pass tagged
 123         * addresses to this function.
 124         */
 125        address = reset_tag(address);
 126
 127        shadow_start = kasan_mem_to_shadow(address);
 128        shadow_end = kasan_mem_to_shadow(address + size);
 129
 130        __memset(shadow_start, value, shadow_end - shadow_start);
 131}
 132
 133void kasan_unpoison_shadow(const void *address, size_t size)
 134{
 135        u8 tag = get_tag(address);
 136
 137        /*
 138         * Perform shadow offset calculation based on untagged address, as
 139         * some of the callers (e.g. kasan_unpoison_object_data) pass tagged
 140         * addresses to this function.
 141         */
 142        address = reset_tag(address);
 143
 144        kasan_poison_shadow(address, size, tag);
 145
 146        if (size & KASAN_SHADOW_MASK) {
 147                u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size);
 148
 149                if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
 150                        *shadow = tag;
 151                else
 152                        *shadow = size & KASAN_SHADOW_MASK;
 153        }
 154}
 155
 156static void __kasan_unpoison_stack(struct task_struct *task, const void *sp)
 157{
 158        void *base = task_stack_page(task);
 159        size_t size = sp - base;
 160
 161        kasan_unpoison_shadow(base, size);
 162}
 163
 164/* Unpoison the entire stack for a task. */
 165void kasan_unpoison_task_stack(struct task_struct *task)
 166{
 167        __kasan_unpoison_stack(task, task_stack_page(task) + THREAD_SIZE);
 168}
 169
 170/* Unpoison the stack for the current task beyond a watermark sp value. */
 171asmlinkage void kasan_unpoison_task_stack_below(const void *watermark)
 172{
 173        /*
 174         * Calculate the task stack base address.  Avoid using 'current'
 175         * because this function is called by early resume code which hasn't
 176         * yet set up the percpu register (%gs).
 177         */
 178        void *base = (void *)((unsigned long)watermark & ~(THREAD_SIZE - 1));
 179
 180        kasan_unpoison_shadow(base, watermark - base);
 181}
 182
 183void kasan_alloc_pages(struct page *page, unsigned int order)
 184{
 185        u8 tag;
 186        unsigned long i;
 187
 188        if (unlikely(PageHighMem(page)))
 189                return;
 190
 191        tag = random_tag();
 192        for (i = 0; i < (1 << order); i++)
 193                page_kasan_tag_set(page + i, tag);
 194        kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order);
 195}
 196
 197void kasan_free_pages(struct page *page, unsigned int order)
 198{
 199        if (likely(!PageHighMem(page)))
 200                kasan_poison_shadow(page_address(page),
 201                                PAGE_SIZE << order,
 202                                KASAN_FREE_PAGE);
 203}
 204
 205/*
 206 * Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
 207 * For larger allocations larger redzones are used.
 208 */
 209static inline unsigned int optimal_redzone(unsigned int object_size)
 210{
 211        if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
 212                return 0;
 213
 214        return
 215                object_size <= 64        - 16   ? 16 :
 216                object_size <= 128       - 32   ? 32 :
 217                object_size <= 512       - 64   ? 64 :
 218                object_size <= 4096      - 128  ? 128 :
 219                object_size <= (1 << 14) - 256  ? 256 :
 220                object_size <= (1 << 15) - 512  ? 512 :
 221                object_size <= (1 << 16) - 1024 ? 1024 : 2048;
 222}
 223
 224void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
 225                        slab_flags_t *flags)
 226{
 227        unsigned int orig_size = *size;
 228        unsigned int redzone_size;
 229        int redzone_adjust;
 230
 231        /* Add alloc meta. */
 232        cache->kasan_info.alloc_meta_offset = *size;
 233        *size += sizeof(struct kasan_alloc_meta);
 234
 235        /* Add free meta. */
 236        if (IS_ENABLED(CONFIG_KASAN_GENERIC) &&
 237            (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor ||
 238             cache->object_size < sizeof(struct kasan_free_meta))) {
 239                cache->kasan_info.free_meta_offset = *size;
 240                *size += sizeof(struct kasan_free_meta);
 241        }
 242
 243        redzone_size = optimal_redzone(cache->object_size);
 244        redzone_adjust = redzone_size - (*size - cache->object_size);
 245        if (redzone_adjust > 0)
 246                *size += redzone_adjust;
 247
 248        *size = min_t(unsigned int, KMALLOC_MAX_SIZE,
 249                        max(*size, cache->object_size + redzone_size));
 250
 251        /*
 252         * If the metadata doesn't fit, don't enable KASAN at all.
 253         */
 254        if (*size <= cache->kasan_info.alloc_meta_offset ||
 255                        *size <= cache->kasan_info.free_meta_offset) {
 256                cache->kasan_info.alloc_meta_offset = 0;
 257                cache->kasan_info.free_meta_offset = 0;
 258                *size = orig_size;
 259                return;
 260        }
 261
 262        *flags |= SLAB_KASAN;
 263}
 264
 265size_t kasan_metadata_size(struct kmem_cache *cache)
 266{
 267        return (cache->kasan_info.alloc_meta_offset ?
 268                sizeof(struct kasan_alloc_meta) : 0) +
 269                (cache->kasan_info.free_meta_offset ?
 270                sizeof(struct kasan_free_meta) : 0);
 271}
 272
 273struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
 274                                        const void *object)
 275{
 276        return (void *)object + cache->kasan_info.alloc_meta_offset;
 277}
 278
 279struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
 280                                      const void *object)
 281{
 282        BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32);
 283        return (void *)object + cache->kasan_info.free_meta_offset;
 284}
 285
 286void kasan_poison_slab(struct page *page)
 287{
 288        unsigned long i;
 289
 290        for (i = 0; i < compound_nr(page); i++)
 291                page_kasan_tag_reset(page + i);
 292        kasan_poison_shadow(page_address(page), page_size(page),
 293                        KASAN_KMALLOC_REDZONE);
 294}
 295
 296void kasan_unpoison_object_data(struct kmem_cache *cache, void *object)
 297{
 298        kasan_unpoison_shadow(object, cache->object_size);
 299}
 300
 301void kasan_poison_object_data(struct kmem_cache *cache, void *object)
 302{
 303        kasan_poison_shadow(object,
 304                        round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE),
 305                        KASAN_KMALLOC_REDZONE);
 306}
 307
 308/*
 309 * This function assigns a tag to an object considering the following:
 310 * 1. A cache might have a constructor, which might save a pointer to a slab
 311 *    object somewhere (e.g. in the object itself). We preassign a tag for
 312 *    each object in caches with constructors during slab creation and reuse
 313 *    the same tag each time a particular object is allocated.
 314 * 2. A cache might be SLAB_TYPESAFE_BY_RCU, which means objects can be
 315 *    accessed after being freed. We preassign tags for objects in these
 316 *    caches as well.
 317 * 3. For SLAB allocator we can't preassign tags randomly since the freelist
 318 *    is stored as an array of indexes instead of a linked list. Assign tags
 319 *    based on objects indexes, so that objects that are next to each other
 320 *    get different tags.
 321 */
 322static u8 assign_tag(struct kmem_cache *cache, const void *object,
 323                        bool init, bool keep_tag)
 324{
 325        /*
 326         * 1. When an object is kmalloc()'ed, two hooks are called:
 327         *    kasan_slab_alloc() and kasan_kmalloc(). We assign the
 328         *    tag only in the first one.
 329         * 2. We reuse the same tag for krealloc'ed objects.
 330         */
 331        if (keep_tag)
 332                return get_tag(object);
 333
 334        /*
 335         * If the cache neither has a constructor nor has SLAB_TYPESAFE_BY_RCU
 336         * set, assign a tag when the object is being allocated (init == false).
 337         */
 338        if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU))
 339                return init ? KASAN_TAG_KERNEL : random_tag();
 340
 341        /* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
 342#ifdef CONFIG_SLAB
 343        /* For SLAB assign tags based on the object index in the freelist. */
 344        return (u8)obj_to_index(cache, virt_to_page(object), (void *)object);
 345#else
 346        /*
 347         * For SLUB assign a random tag during slab creation, otherwise reuse
 348         * the already assigned tag.
 349         */
 350        return init ? random_tag() : get_tag(object);
 351#endif
 352}
 353
 354void * __must_check kasan_init_slab_obj(struct kmem_cache *cache,
 355                                                const void *object)
 356{
 357        struct kasan_alloc_meta *alloc_info;
 358
 359        if (!(cache->flags & SLAB_KASAN))
 360                return (void *)object;
 361
 362        alloc_info = get_alloc_info(cache, object);
 363        __memset(alloc_info, 0, sizeof(*alloc_info));
 364
 365        if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
 366                object = set_tag(object,
 367                                assign_tag(cache, object, true, false));
 368
 369        return (void *)object;
 370}
 371
 372static inline bool shadow_invalid(u8 tag, s8 shadow_byte)
 373{
 374        if (IS_ENABLED(CONFIG_KASAN_GENERIC))
 375                return shadow_byte < 0 ||
 376                        shadow_byte >= KASAN_SHADOW_SCALE_SIZE;
 377
 378        /* else CONFIG_KASAN_SW_TAGS: */
 379        if ((u8)shadow_byte == KASAN_TAG_INVALID)
 380                return true;
 381        if ((tag != KASAN_TAG_KERNEL) && (tag != (u8)shadow_byte))
 382                return true;
 383
 384        return false;
 385}
 386
 387static bool __kasan_slab_free(struct kmem_cache *cache, void *object,
 388                              unsigned long ip, bool quarantine)
 389{
 390        s8 shadow_byte;
 391        u8 tag;
 392        void *tagged_object;
 393        unsigned long rounded_up_size;
 394
 395        tag = get_tag(object);
 396        tagged_object = object;
 397        object = reset_tag(object);
 398
 399        if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) !=
 400            object)) {
 401                kasan_report_invalid_free(tagged_object, ip);
 402                return true;
 403        }
 404
 405        /* RCU slabs could be legally used after free within the RCU period */
 406        if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
 407                return false;
 408
 409        shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object));
 410        if (shadow_invalid(tag, shadow_byte)) {
 411                kasan_report_invalid_free(tagged_object, ip);
 412                return true;
 413        }
 414
 415        rounded_up_size = round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE);
 416        kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
 417
 418        if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine) ||
 419                        unlikely(!(cache->flags & SLAB_KASAN)))
 420                return false;
 421
 422        kasan_set_free_info(cache, object, tag);
 423
 424        quarantine_put(get_free_info(cache, object), cache);
 425
 426        return IS_ENABLED(CONFIG_KASAN_GENERIC);
 427}
 428
 429bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip)
 430{
 431        return __kasan_slab_free(cache, object, ip, true);
 432}
 433
 434static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
 435                                size_t size, gfp_t flags, bool keep_tag)
 436{
 437        unsigned long redzone_start;
 438        unsigned long redzone_end;
 439        u8 tag = 0xff;
 440
 441        if (gfpflags_allow_blocking(flags))
 442                quarantine_reduce();
 443
 444        if (unlikely(object == NULL))
 445                return NULL;
 446
 447        redzone_start = round_up((unsigned long)(object + size),
 448                                KASAN_SHADOW_SCALE_SIZE);
 449        redzone_end = round_up((unsigned long)object + cache->object_size,
 450                                KASAN_SHADOW_SCALE_SIZE);
 451
 452        if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
 453                tag = assign_tag(cache, object, false, keep_tag);
 454
 455        /* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */
 456        kasan_unpoison_shadow(set_tag(object, tag), size);
 457        kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
 458                KASAN_KMALLOC_REDZONE);
 459
 460        if (cache->flags & SLAB_KASAN)
 461                kasan_set_track(&get_alloc_info(cache, object)->alloc_track, flags);
 462
 463        return set_tag(object, tag);
 464}
 465
 466void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object,
 467                                        gfp_t flags)
 468{
 469        return __kasan_kmalloc(cache, object, cache->object_size, flags, false);
 470}
 471
 472void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object,
 473                                size_t size, gfp_t flags)
 474{
 475        return __kasan_kmalloc(cache, object, size, flags, true);
 476}
 477EXPORT_SYMBOL(kasan_kmalloc);
 478
 479void * __must_check kasan_kmalloc_large(const void *ptr, size_t size,
 480                                                gfp_t flags)
 481{
 482        struct page *page;
 483        unsigned long redzone_start;
 484        unsigned long redzone_end;
 485
 486        if (gfpflags_allow_blocking(flags))
 487                quarantine_reduce();
 488
 489        if (unlikely(ptr == NULL))
 490                return NULL;
 491
 492        page = virt_to_page(ptr);
 493        redzone_start = round_up((unsigned long)(ptr + size),
 494                                KASAN_SHADOW_SCALE_SIZE);
 495        redzone_end = (unsigned long)ptr + page_size(page);
 496
 497        kasan_unpoison_shadow(ptr, size);
 498        kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
 499                KASAN_PAGE_REDZONE);
 500
 501        return (void *)ptr;
 502}
 503
 504void * __must_check kasan_krealloc(const void *object, size_t size, gfp_t flags)
 505{
 506        struct page *page;
 507
 508        if (unlikely(object == ZERO_SIZE_PTR))
 509                return (void *)object;
 510
 511        page = virt_to_head_page(object);
 512
 513        if (unlikely(!PageSlab(page)))
 514                return kasan_kmalloc_large(object, size, flags);
 515        else
 516                return __kasan_kmalloc(page->slab_cache, object, size,
 517                                                flags, true);
 518}
 519
 520void kasan_poison_kfree(void *ptr, unsigned long ip)
 521{
 522        struct page *page;
 523
 524        page = virt_to_head_page(ptr);
 525
 526        if (unlikely(!PageSlab(page))) {
 527                if (ptr != page_address(page)) {
 528                        kasan_report_invalid_free(ptr, ip);
 529                        return;
 530                }
 531                kasan_poison_shadow(ptr, page_size(page), KASAN_FREE_PAGE);
 532        } else {
 533                __kasan_slab_free(page->slab_cache, ptr, ip, false);
 534        }
 535}
 536
 537void kasan_kfree_large(void *ptr, unsigned long ip)
 538{
 539        if (ptr != page_address(virt_to_head_page(ptr)))
 540                kasan_report_invalid_free(ptr, ip);
 541        /* The object will be poisoned by page_alloc. */
 542}
 543
 544#ifndef CONFIG_KASAN_VMALLOC
 545int kasan_module_alloc(void *addr, size_t size)
 546{
 547        void *ret;
 548        size_t scaled_size;
 549        size_t shadow_size;
 550        unsigned long shadow_start;
 551
 552        shadow_start = (unsigned long)kasan_mem_to_shadow(addr);
 553        scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT;
 554        shadow_size = round_up(scaled_size, PAGE_SIZE);
 555
 556        if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
 557                return -EINVAL;
 558
 559        ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
 560                        shadow_start + shadow_size,
 561                        GFP_KERNEL,
 562                        PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
 563                        __builtin_return_address(0));
 564
 565        if (ret) {
 566                __memset(ret, KASAN_SHADOW_INIT, shadow_size);
 567                find_vm_area(addr)->flags |= VM_KASAN;
 568                kmemleak_ignore(ret);
 569                return 0;
 570        }
 571
 572        return -ENOMEM;
 573}
 574
 575void kasan_free_shadow(const struct vm_struct *vm)
 576{
 577        if (vm->flags & VM_KASAN)
 578                vfree(kasan_mem_to_shadow(vm->addr));
 579}
 580#endif
 581
 582#ifdef CONFIG_MEMORY_HOTPLUG
 583static bool shadow_mapped(unsigned long addr)
 584{
 585        pgd_t *pgd = pgd_offset_k(addr);
 586        p4d_t *p4d;
 587        pud_t *pud;
 588        pmd_t *pmd;
 589        pte_t *pte;
 590
 591        if (pgd_none(*pgd))
 592                return false;
 593        p4d = p4d_offset(pgd, addr);
 594        if (p4d_none(*p4d))
 595                return false;
 596        pud = pud_offset(p4d, addr);
 597        if (pud_none(*pud))
 598                return false;
 599
 600        /*
 601         * We can't use pud_large() or pud_huge(), the first one is
 602         * arch-specific, the last one depends on HUGETLB_PAGE.  So let's abuse
 603         * pud_bad(), if pud is bad then it's bad because it's huge.
 604         */
 605        if (pud_bad(*pud))
 606                return true;
 607        pmd = pmd_offset(pud, addr);
 608        if (pmd_none(*pmd))
 609                return false;
 610
 611        if (pmd_bad(*pmd))
 612                return true;
 613        pte = pte_offset_kernel(pmd, addr);
 614        return !pte_none(*pte);
 615}
 616
 617static int __meminit kasan_mem_notifier(struct notifier_block *nb,
 618                        unsigned long action, void *data)
 619{
 620        struct memory_notify *mem_data = data;
 621        unsigned long nr_shadow_pages, start_kaddr, shadow_start;
 622        unsigned long shadow_end, shadow_size;
 623
 624        nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT;
 625        start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn);
 626        shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr);
 627        shadow_size = nr_shadow_pages << PAGE_SHIFT;
 628        shadow_end = shadow_start + shadow_size;
 629
 630        if (WARN_ON(mem_data->nr_pages % KASAN_SHADOW_SCALE_SIZE) ||
 631                WARN_ON(start_kaddr % (KASAN_SHADOW_SCALE_SIZE << PAGE_SHIFT)))
 632                return NOTIFY_BAD;
 633
 634        switch (action) {
 635        case MEM_GOING_ONLINE: {
 636                void *ret;
 637
 638                /*
 639                 * If shadow is mapped already than it must have been mapped
 640                 * during the boot. This could happen if we onlining previously
 641                 * offlined memory.
 642                 */
 643                if (shadow_mapped(shadow_start))
 644                        return NOTIFY_OK;
 645
 646                ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
 647                                        shadow_end, GFP_KERNEL,
 648                                        PAGE_KERNEL, VM_NO_GUARD,
 649                                        pfn_to_nid(mem_data->start_pfn),
 650                                        __builtin_return_address(0));
 651                if (!ret)
 652                        return NOTIFY_BAD;
 653
 654                kmemleak_ignore(ret);
 655                return NOTIFY_OK;
 656        }
 657        case MEM_CANCEL_ONLINE:
 658        case MEM_OFFLINE: {
 659                struct vm_struct *vm;
 660
 661                /*
 662                 * shadow_start was either mapped during boot by kasan_init()
 663                 * or during memory online by __vmalloc_node_range().
 664                 * In the latter case we can use vfree() to free shadow.
 665                 * Non-NULL result of the find_vm_area() will tell us if
 666                 * that was the second case.
 667                 *
 668                 * Currently it's not possible to free shadow mapped
 669                 * during boot by kasan_init(). It's because the code
 670                 * to do that hasn't been written yet. So we'll just
 671                 * leak the memory.
 672                 */
 673                vm = find_vm_area((void *)shadow_start);
 674                if (vm)
 675                        vfree((void *)shadow_start);
 676        }
 677        }
 678
 679        return NOTIFY_OK;
 680}
 681
 682static int __init kasan_memhotplug_init(void)
 683{
 684        hotplug_memory_notifier(kasan_mem_notifier, 0);
 685
 686        return 0;
 687}
 688
 689core_initcall(kasan_memhotplug_init);
 690#endif
 691
 692#ifdef CONFIG_KASAN_VMALLOC
 693static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
 694                                      void *unused)
 695{
 696        unsigned long page;
 697        pte_t pte;
 698
 699        if (likely(!pte_none(*ptep)))
 700                return 0;
 701
 702        page = __get_free_page(GFP_KERNEL);
 703        if (!page)
 704                return -ENOMEM;
 705
 706        memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE);
 707        pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
 708
 709        spin_lock(&init_mm.page_table_lock);
 710        if (likely(pte_none(*ptep))) {
 711                set_pte_at(&init_mm, addr, ptep, pte);
 712                page = 0;
 713        }
 714        spin_unlock(&init_mm.page_table_lock);
 715        if (page)
 716                free_page(page);
 717        return 0;
 718}
 719
 720int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
 721{
 722        unsigned long shadow_start, shadow_end;
 723        int ret;
 724
 725        if (!is_vmalloc_or_module_addr((void *)addr))
 726                return 0;
 727
 728        shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr);
 729        shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
 730        shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size);
 731        shadow_end = ALIGN(shadow_end, PAGE_SIZE);
 732
 733        ret = apply_to_page_range(&init_mm, shadow_start,
 734                                  shadow_end - shadow_start,
 735                                  kasan_populate_vmalloc_pte, NULL);
 736        if (ret)
 737                return ret;
 738
 739        flush_cache_vmap(shadow_start, shadow_end);
 740
 741        /*
 742         * We need to be careful about inter-cpu effects here. Consider:
 743         *
 744         *   CPU#0                                CPU#1
 745         * WRITE_ONCE(p, vmalloc(100));         while (x = READ_ONCE(p)) ;
 746         *                                      p[99] = 1;
 747         *
 748         * With compiler instrumentation, that ends up looking like this:
 749         *
 750         *   CPU#0                                CPU#1
 751         * // vmalloc() allocates memory
 752         * // let a = area->addr
 753         * // we reach kasan_populate_vmalloc
 754         * // and call kasan_unpoison_shadow:
 755         * STORE shadow(a), unpoison_val
 756         * ...
 757         * STORE shadow(a+99), unpoison_val     x = LOAD p
 758         * // rest of vmalloc process           <data dependency>
 759         * STORE p, a                           LOAD shadow(x+99)
 760         *
 761         * If there is no barrier between the end of unpoisioning the shadow
 762         * and the store of the result to p, the stores could be committed
 763         * in a different order by CPU#0, and CPU#1 could erroneously observe
 764         * poison in the shadow.
 765         *
 766         * We need some sort of barrier between the stores.
 767         *
 768         * In the vmalloc() case, this is provided by a smp_wmb() in
 769         * clear_vm_uninitialized_flag(). In the per-cpu allocator and in
 770         * get_vm_area() and friends, the caller gets shadow allocated but
 771         * doesn't have any pages mapped into the virtual address space that
 772         * has been reserved. Mapping those pages in will involve taking and
 773         * releasing a page-table lock, which will provide the barrier.
 774         */
 775
 776        return 0;
 777}
 778
 779/*
 780 * Poison the shadow for a vmalloc region. Called as part of the
 781 * freeing process at the time the region is freed.
 782 */
 783void kasan_poison_vmalloc(const void *start, unsigned long size)
 784{
 785        if (!is_vmalloc_or_module_addr(start))
 786                return;
 787
 788        size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
 789        kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID);
 790}
 791
 792void kasan_unpoison_vmalloc(const void *start, unsigned long size)
 793{
 794        if (!is_vmalloc_or_module_addr(start))
 795                return;
 796
 797        kasan_unpoison_shadow(start, size);
 798}
 799
 800static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
 801                                        void *unused)
 802{
 803        unsigned long page;
 804
 805        page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT);
 806
 807        spin_lock(&init_mm.page_table_lock);
 808
 809        if (likely(!pte_none(*ptep))) {
 810                pte_clear(&init_mm, addr, ptep);
 811                free_page(page);
 812        }
 813        spin_unlock(&init_mm.page_table_lock);
 814
 815        return 0;
 816}
 817
 818/*
 819 * Release the backing for the vmalloc region [start, end), which
 820 * lies within the free region [free_region_start, free_region_end).
 821 *
 822 * This can be run lazily, long after the region was freed. It runs
 823 * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap
 824 * infrastructure.
 825 *
 826 * How does this work?
 827 * -------------------
 828 *
 829 * We have a region that is page aligned, labelled as A.
 830 * That might not map onto the shadow in a way that is page-aligned:
 831 *
 832 *                    start                     end
 833 *                    v                         v
 834 * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc
 835 *  -------- -------- --------          -------- --------
 836 *      |        |       |                 |        |
 837 *      |        |       |         /-------/        |
 838 *      \-------\|/------/         |/---------------/
 839 *              |||                ||
 840 *             |??AAAAAA|AAAAAAAA|AA??????|                < shadow
 841 *                 (1)      (2)      (3)
 842 *
 843 * First we align the start upwards and the end downwards, so that the
 844 * shadow of the region aligns with shadow page boundaries. In the
 845 * example, this gives us the shadow page (2). This is the shadow entirely
 846 * covered by this allocation.
 847 *
 848 * Then we have the tricky bits. We want to know if we can free the
 849 * partially covered shadow pages - (1) and (3) in the example. For this,
 850 * we are given the start and end of the free region that contains this
 851 * allocation. Extending our previous example, we could have:
 852 *
 853 *  free_region_start                                    free_region_end
 854 *  |                 start                     end      |
 855 *  v                 v                         v        v
 856 * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc
 857 *  -------- -------- --------          -------- --------
 858 *      |        |       |                 |        |
 859 *      |        |       |         /-------/        |
 860 *      \-------\|/------/         |/---------------/
 861 *              |||                ||
 862 *             |FFAAAAAA|AAAAAAAA|AAF?????|                < shadow
 863 *                 (1)      (2)      (3)
 864 *
 865 * Once again, we align the start of the free region up, and the end of
 866 * the free region down so that the shadow is page aligned. So we can free
 867 * page (1) - we know no allocation currently uses anything in that page,
 868 * because all of it is in the vmalloc free region. But we cannot free
 869 * page (3), because we can't be sure that the rest of it is unused.
 870 *
 871 * We only consider pages that contain part of the original region for
 872 * freeing: we don't try to free other pages from the free region or we'd
 873 * end up trying to free huge chunks of virtual address space.
 874 *
 875 * Concurrency
 876 * -----------
 877 *
 878 * How do we know that we're not freeing a page that is simultaneously
 879 * being used for a fresh allocation in kasan_populate_vmalloc(_pte)?
 880 *
 881 * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running
 882 * at the same time. While we run under free_vmap_area_lock, the population
 883 * code does not.
 884 *
 885 * free_vmap_area_lock instead operates to ensure that the larger range
 886 * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and
 887 * the per-cpu region-finding algorithm both run under free_vmap_area_lock,
 888 * no space identified as free will become used while we are running. This
 889 * means that so long as we are careful with alignment and only free shadow
 890 * pages entirely covered by the free region, we will not run in to any
 891 * trouble - any simultaneous allocations will be for disjoint regions.
 892 */
 893void kasan_release_vmalloc(unsigned long start, unsigned long end,
 894                           unsigned long free_region_start,
 895                           unsigned long free_region_end)
 896{
 897        void *shadow_start, *shadow_end;
 898        unsigned long region_start, region_end;
 899        unsigned long size;
 900
 901        region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
 902        region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
 903
 904        free_region_start = ALIGN(free_region_start,
 905                                  PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
 906
 907        if (start != region_start &&
 908            free_region_start < region_start)
 909                region_start -= PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
 910
 911        free_region_end = ALIGN_DOWN(free_region_end,
 912                                     PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
 913
 914        if (end != region_end &&
 915            free_region_end > region_end)
 916                region_end += PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
 917
 918        shadow_start = kasan_mem_to_shadow((void *)region_start);
 919        shadow_end = kasan_mem_to_shadow((void *)region_end);
 920
 921        if (shadow_end > shadow_start) {
 922                size = shadow_end - shadow_start;
 923                apply_to_existing_page_range(&init_mm,
 924                                             (unsigned long)shadow_start,
 925                                             size, kasan_depopulate_vmalloc_pte,
 926                                             NULL);
 927                flush_tlb_kernel_range((unsigned long)shadow_start,
 928                                       (unsigned long)shadow_end);
 929        }
 930}
 931#endif
 932