linux/arch/arm64/kernel/hibernate.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*:
   3 * Hibernate support specific for ARM64
   4 *
   5 * Derived from work on ARM hibernation support by:
   6 *
   7 * Ubuntu project, hibernation support for mach-dove
   8 * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
   9 * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
  10 *  https://lkml.org/lkml/2010/6/18/4
  11 *  https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
  12 *  https://patchwork.kernel.org/patch/96442/
  13 *
  14 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
  15 */
  16#define pr_fmt(x) "hibernate: " x
  17#include <linux/cpu.h>
  18#include <linux/kvm_host.h>
  19#include <linux/pm.h>
  20#include <linux/sched.h>
  21#include <linux/suspend.h>
  22#include <linux/utsname.h>
  23
  24#include <asm/barrier.h>
  25#include <asm/cacheflush.h>
  26#include <asm/cputype.h>
  27#include <asm/daifflags.h>
  28#include <asm/irqflags.h>
  29#include <asm/kexec.h>
  30#include <asm/memory.h>
  31#include <asm/mmu_context.h>
  32#include <asm/mte.h>
  33#include <asm/sections.h>
  34#include <asm/smp.h>
  35#include <asm/smp_plat.h>
  36#include <asm/suspend.h>
  37#include <asm/sysreg.h>
  38#include <asm/trans_pgd.h>
  39#include <asm/virt.h>
  40
  41/*
  42 * Hibernate core relies on this value being 0 on resume, and marks it
  43 * __nosavedata assuming it will keep the resume kernel's '0' value. This
  44 * doesn't happen with either KASLR.
  45 *
  46 * defined as "__visible int in_suspend __nosavedata" in
  47 * kernel/power/hibernate.c
  48 */
  49extern int in_suspend;
  50
  51/* Do we need to reset el2? */
  52#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
  53
  54/* temporary el2 vectors in the __hibernate_exit_text section. */
  55extern char hibernate_el2_vectors[];
  56
  57/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
  58extern char __hyp_stub_vectors[];
  59
  60/*
  61 * The logical cpu number we should resume on, initialised to a non-cpu
  62 * number.
  63 */
  64static int sleep_cpu = -EINVAL;
  65
  66/*
  67 * Values that may not change over hibernate/resume. We put the build number
  68 * and date in here so that we guarantee not to resume with a different
  69 * kernel.
  70 */
  71struct arch_hibernate_hdr_invariants {
  72        char            uts_version[__NEW_UTS_LEN + 1];
  73};
  74
  75/* These values need to be know across a hibernate/restore. */
  76static struct arch_hibernate_hdr {
  77        struct arch_hibernate_hdr_invariants invariants;
  78
  79        /* These are needed to find the relocated kernel if built with kaslr */
  80        phys_addr_t     ttbr1_el1;
  81        void            (*reenter_kernel)(void);
  82
  83        /*
  84         * We need to know where the __hyp_stub_vectors are after restore to
  85         * re-configure el2.
  86         */
  87        phys_addr_t     __hyp_stub_vectors;
  88
  89        u64             sleep_cpu_mpidr;
  90} resume_hdr;
  91
  92static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
  93{
  94        memset(i, 0, sizeof(*i));
  95        memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version));
  96}
  97
  98int pfn_is_nosave(unsigned long pfn)
  99{
 100        unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin);
 101        unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1);
 102
 103        return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) ||
 104                crash_is_nosave(pfn);
 105}
 106
 107void notrace save_processor_state(void)
 108{
 109        WARN_ON(num_online_cpus() != 1);
 110}
 111
 112void notrace restore_processor_state(void)
 113{
 114}
 115
 116int arch_hibernation_header_save(void *addr, unsigned int max_size)
 117{
 118        struct arch_hibernate_hdr *hdr = addr;
 119
 120        if (max_size < sizeof(*hdr))
 121                return -EOVERFLOW;
 122
 123        arch_hdr_invariants(&hdr->invariants);
 124        hdr->ttbr1_el1          = __pa_symbol(swapper_pg_dir);
 125        hdr->reenter_kernel     = _cpu_resume;
 126
 127        /* We can't use __hyp_get_vectors() because kvm may still be loaded */
 128        if (el2_reset_needed())
 129                hdr->__hyp_stub_vectors = __pa_symbol(__hyp_stub_vectors);
 130        else
 131                hdr->__hyp_stub_vectors = 0;
 132
 133        /* Save the mpidr of the cpu we called cpu_suspend() on... */
 134        if (sleep_cpu < 0) {
 135                pr_err("Failing to hibernate on an unknown CPU.\n");
 136                return -ENODEV;
 137        }
 138        hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu);
 139        pr_info("Hibernating on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
 140                hdr->sleep_cpu_mpidr);
 141
 142        return 0;
 143}
 144EXPORT_SYMBOL(arch_hibernation_header_save);
 145
 146int arch_hibernation_header_restore(void *addr)
 147{
 148        int ret;
 149        struct arch_hibernate_hdr_invariants invariants;
 150        struct arch_hibernate_hdr *hdr = addr;
 151
 152        arch_hdr_invariants(&invariants);
 153        if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) {
 154                pr_crit("Hibernate image not generated by this kernel!\n");
 155                return -EINVAL;
 156        }
 157
 158        sleep_cpu = get_logical_index(hdr->sleep_cpu_mpidr);
 159        pr_info("Hibernated on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
 160                hdr->sleep_cpu_mpidr);
 161        if (sleep_cpu < 0) {
 162                pr_crit("Hibernated on a CPU not known to this kernel!\n");
 163                sleep_cpu = -EINVAL;
 164                return -EINVAL;
 165        }
 166
 167        ret = bringup_hibernate_cpu(sleep_cpu);
 168        if (ret) {
 169                sleep_cpu = -EINVAL;
 170                return ret;
 171        }
 172
 173        resume_hdr = *hdr;
 174
 175        return 0;
 176}
 177EXPORT_SYMBOL(arch_hibernation_header_restore);
 178
 179static void *hibernate_page_alloc(void *arg)
 180{
 181        return (void *)get_safe_page((__force gfp_t)(unsigned long)arg);
 182}
 183
 184/*
 185 * Copies length bytes, starting at src_start into an new page,
 186 * perform cache maintenance, then maps it at the specified address low
 187 * address as executable.
 188 *
 189 * This is used by hibernate to copy the code it needs to execute when
 190 * overwriting the kernel text. This function generates a new set of page
 191 * tables, which it loads into ttbr0.
 192 *
 193 * Length is provided as we probably only want 4K of data, even on a 64K
 194 * page system.
 195 */
 196static int create_safe_exec_page(void *src_start, size_t length,
 197                                 phys_addr_t *phys_dst_addr)
 198{
 199        struct trans_pgd_info trans_info = {
 200                .trans_alloc_page       = hibernate_page_alloc,
 201                .trans_alloc_arg        = (__force void *)GFP_ATOMIC,
 202        };
 203
 204        void *page = (void *)get_safe_page(GFP_ATOMIC);
 205        phys_addr_t trans_ttbr0;
 206        unsigned long t0sz;
 207        int rc;
 208
 209        if (!page)
 210                return -ENOMEM;
 211
 212        memcpy(page, src_start, length);
 213        caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length);
 214        rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
 215        if (rc)
 216                return rc;
 217
 218        /*
 219         * Load our new page tables. A strict BBM approach requires that we
 220         * ensure that TLBs are free of any entries that may overlap with the
 221         * global mappings we are about to install.
 222         *
 223         * For a real hibernate/resume cycle TTBR0 currently points to a zero
 224         * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
 225         * runtime services), while for a userspace-driven test_resume cycle it
 226         * points to userspace page tables (and we must point it at a zero page
 227         * ourselves).
 228         *
 229         * We change T0SZ as part of installing the idmap. This is undone by
 230         * cpu_uninstall_idmap() in __cpu_suspend_exit().
 231         */
 232        cpu_set_reserved_ttbr0();
 233        local_flush_tlb_all();
 234        __cpu_set_tcr_t0sz(t0sz);
 235        write_sysreg(trans_ttbr0, ttbr0_el1);
 236        isb();
 237
 238        *phys_dst_addr = virt_to_phys(page);
 239
 240        return 0;
 241}
 242
 243#ifdef CONFIG_ARM64_MTE
 244
 245static DEFINE_XARRAY(mte_pages);
 246
 247static int save_tags(struct page *page, unsigned long pfn)
 248{
 249        void *tag_storage, *ret;
 250
 251        tag_storage = mte_allocate_tag_storage();
 252        if (!tag_storage)
 253                return -ENOMEM;
 254
 255        mte_save_page_tags(page_address(page), tag_storage);
 256
 257        ret = xa_store(&mte_pages, pfn, tag_storage, GFP_KERNEL);
 258        if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
 259                mte_free_tag_storage(tag_storage);
 260                return xa_err(ret);
 261        } else if (WARN(ret, "swsusp: %s: Duplicate entry", __func__)) {
 262                mte_free_tag_storage(ret);
 263        }
 264
 265        return 0;
 266}
 267
 268static void swsusp_mte_free_storage(void)
 269{
 270        XA_STATE(xa_state, &mte_pages, 0);
 271        void *tags;
 272
 273        xa_lock(&mte_pages);
 274        xas_for_each(&xa_state, tags, ULONG_MAX) {
 275                mte_free_tag_storage(tags);
 276        }
 277        xa_unlock(&mte_pages);
 278
 279        xa_destroy(&mte_pages);
 280}
 281
 282static int swsusp_mte_save_tags(void)
 283{
 284        struct zone *zone;
 285        unsigned long pfn, max_zone_pfn;
 286        int ret = 0;
 287        int n = 0;
 288
 289        if (!system_supports_mte())
 290                return 0;
 291
 292        for_each_populated_zone(zone) {
 293                max_zone_pfn = zone_end_pfn(zone);
 294                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
 295                        struct page *page = pfn_to_online_page(pfn);
 296
 297                        if (!page)
 298                                continue;
 299
 300                        if (!test_bit(PG_mte_tagged, &page->flags))
 301                                continue;
 302
 303                        ret = save_tags(page, pfn);
 304                        if (ret) {
 305                                swsusp_mte_free_storage();
 306                                goto out;
 307                        }
 308
 309                        n++;
 310                }
 311        }
 312        pr_info("Saved %d MTE pages\n", n);
 313
 314out:
 315        return ret;
 316}
 317
 318static void swsusp_mte_restore_tags(void)
 319{
 320        XA_STATE(xa_state, &mte_pages, 0);
 321        int n = 0;
 322        void *tags;
 323
 324        xa_lock(&mte_pages);
 325        xas_for_each(&xa_state, tags, ULONG_MAX) {
 326                unsigned long pfn = xa_state.xa_index;
 327                struct page *page = pfn_to_online_page(pfn);
 328
 329                /*
 330                 * It is not required to invoke page_kasan_tag_reset(page)
 331                 * at this point since the tags stored in page->flags are
 332                 * already restored.
 333                 */
 334                mte_restore_page_tags(page_address(page), tags);
 335
 336                mte_free_tag_storage(tags);
 337                n++;
 338        }
 339        xa_unlock(&mte_pages);
 340
 341        pr_info("Restored %d MTE pages\n", n);
 342
 343        xa_destroy(&mte_pages);
 344}
 345
 346#else   /* CONFIG_ARM64_MTE */
 347
 348static int swsusp_mte_save_tags(void)
 349{
 350        return 0;
 351}
 352
 353static void swsusp_mte_restore_tags(void)
 354{
 355}
 356
 357#endif  /* CONFIG_ARM64_MTE */
 358
 359int swsusp_arch_suspend(void)
 360{
 361        int ret = 0;
 362        unsigned long flags;
 363        struct sleep_stack_data state;
 364
 365        if (cpus_are_stuck_in_kernel()) {
 366                pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n");
 367                return -EBUSY;
 368        }
 369
 370        flags = local_daif_save();
 371
 372        if (__cpu_suspend_enter(&state)) {
 373                /* make the crash dump kernel image visible/saveable */
 374                crash_prepare_suspend();
 375
 376                ret = swsusp_mte_save_tags();
 377                if (ret)
 378                        return ret;
 379
 380                sleep_cpu = smp_processor_id();
 381                ret = swsusp_save();
 382        } else {
 383                /* Clean kernel core startup/idle code to PoC*/
 384                dcache_clean_inval_poc((unsigned long)__mmuoff_data_start,
 385                                    (unsigned long)__mmuoff_data_end);
 386                dcache_clean_inval_poc((unsigned long)__idmap_text_start,
 387                                    (unsigned long)__idmap_text_end);
 388
 389                /* Clean kvm setup code to PoC? */
 390                if (el2_reset_needed()) {
 391                        dcache_clean_inval_poc(
 392                                (unsigned long)__hyp_idmap_text_start,
 393                                (unsigned long)__hyp_idmap_text_end);
 394                        dcache_clean_inval_poc((unsigned long)__hyp_text_start,
 395                                            (unsigned long)__hyp_text_end);
 396                }
 397
 398                swsusp_mte_restore_tags();
 399
 400                /* make the crash dump kernel image protected again */
 401                crash_post_resume();
 402
 403                /*
 404                 * Tell the hibernation core that we've just restored
 405                 * the memory
 406                 */
 407                in_suspend = 0;
 408
 409                sleep_cpu = -EINVAL;
 410                __cpu_suspend_exit();
 411
 412                /*
 413                 * Just in case the boot kernel did turn the SSBD
 414                 * mitigation off behind our back, let's set the state
 415                 * to what we expect it to be.
 416                 */
 417                spectre_v4_enable_mitigation(NULL);
 418        }
 419
 420        local_daif_restore(flags);
 421
 422        return ret;
 423}
 424
 425/*
 426 * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
 427 *
 428 * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
 429 * we don't need to free it here.
 430 */
 431int swsusp_arch_resume(void)
 432{
 433        int rc;
 434        void *zero_page;
 435        size_t exit_size;
 436        pgd_t *tmp_pg_dir;
 437        void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
 438                                          void *, phys_addr_t, phys_addr_t);
 439        struct trans_pgd_info trans_info = {
 440                .trans_alloc_page       = hibernate_page_alloc,
 441                .trans_alloc_arg        = (void *)GFP_ATOMIC,
 442        };
 443
 444        /*
 445         * Restoring the memory image will overwrite the ttbr1 page tables.
 446         * Create a second copy of just the linear map, and use this when
 447         * restoring.
 448         */
 449        rc = trans_pgd_create_copy(&trans_info, &tmp_pg_dir, PAGE_OFFSET,
 450                                   PAGE_END);
 451        if (rc)
 452                return rc;
 453
 454        /*
 455         * We need a zero page that is zero before & after resume in order to
 456         * to break before make on the ttbr1 page tables.
 457         */
 458        zero_page = (void *)get_safe_page(GFP_ATOMIC);
 459        if (!zero_page) {
 460                pr_err("Failed to allocate zero page.\n");
 461                return -ENOMEM;
 462        }
 463
 464        exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
 465        /*
 466         * Copy swsusp_arch_suspend_exit() to a safe page. This will generate
 467         * a new set of ttbr0 page tables and load them.
 468         */
 469        rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
 470                                   (phys_addr_t *)&hibernate_exit);
 471        if (rc) {
 472                pr_err("Failed to create safe executable page for hibernate_exit code.\n");
 473                return rc;
 474        }
 475
 476        /*
 477         * The hibernate exit text contains a set of el2 vectors, that will
 478         * be executed at el2 with the mmu off in order to reload hyp-stub.
 479         */
 480        dcache_clean_inval_poc((unsigned long)hibernate_exit,
 481                            (unsigned long)hibernate_exit + exit_size);
 482
 483        /*
 484         * KASLR will cause the el2 vectors to be in a different location in
 485         * the resumed kernel. Load hibernate's temporary copy into el2.
 486         *
 487         * We can skip this step if we booted at EL1, or are running with VHE.
 488         */
 489        if (el2_reset_needed()) {
 490                phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit;
 491                el2_vectors += hibernate_el2_vectors -
 492                               __hibernate_exit_text_start;     /* offset */
 493
 494                __hyp_set_vectors(el2_vectors);
 495        }
 496
 497        hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
 498                       resume_hdr.reenter_kernel, restore_pblist,
 499                       resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
 500
 501        return 0;
 502}
 503
 504int hibernate_resume_nonboot_cpu_disable(void)
 505{
 506        if (sleep_cpu < 0) {
 507                pr_err("Failing to resume from hibernate on an unknown CPU.\n");
 508                return -ENODEV;
 509        }
 510
 511        return freeze_secondary_cpus(sleep_cpu);
 512}
 513