linux/arch/arm64/kernel/hibernate.c
<<
>>
Prefs
   1/*:
   2 * Hibernate support specific for ARM64
   3 *
   4 * Derived from work on ARM hibernation support by:
   5 *
   6 * Ubuntu project, hibernation support for mach-dove
   7 * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
   8 * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
   9 *  https://lkml.org/lkml/2010/6/18/4
  10 *  https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
  11 *  https://patchwork.kernel.org/patch/96442/
  12 *
  13 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
  14 *
  15 * License terms: GNU General Public License (GPL) version 2
  16 */
  17#define pr_fmt(x) "hibernate: " x
  18#include <linux/cpu.h>
  19#include <linux/kvm_host.h>
  20#include <linux/mm.h>
  21#include <linux/pm.h>
  22#include <linux/sched.h>
  23#include <linux/suspend.h>
  24#include <linux/utsname.h>
  25#include <linux/version.h>
  26
  27#include <asm/barrier.h>
  28#include <asm/cacheflush.h>
  29#include <asm/cputype.h>
  30#include <asm/daifflags.h>
  31#include <asm/irqflags.h>
  32#include <asm/kexec.h>
  33#include <asm/memory.h>
  34#include <asm/mmu_context.h>
  35#include <asm/pgalloc.h>
  36#include <asm/pgtable.h>
  37#include <asm/pgtable-hwdef.h>
  38#include <asm/sections.h>
  39#include <asm/smp.h>
  40#include <asm/smp_plat.h>
  41#include <asm/suspend.h>
  42#include <asm/sysreg.h>
  43#include <asm/virt.h>
  44
  45/*
  46 * Hibernate core relies on this value being 0 on resume, and marks it
  47 * __nosavedata assuming it will keep the resume kernel's '0' value. This
  48 * doesn't happen with either KASLR.
  49 *
  50 * defined as "__visible int in_suspend __nosavedata" in
  51 * kernel/power/hibernate.c
  52 */
  53extern int in_suspend;
  54
  55/* Do we need to reset el2? */
  56#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
  57
  58/* temporary el2 vectors in the __hibernate_exit_text section. */
  59extern char hibernate_el2_vectors[];
  60
  61/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
  62extern char __hyp_stub_vectors[];
  63
  64/*
  65 * The logical cpu number we should resume on, initialised to a non-cpu
  66 * number.
  67 */
  68static int sleep_cpu = -EINVAL;
  69
  70/*
  71 * Values that may not change over hibernate/resume. We put the build number
  72 * and date in here so that we guarantee not to resume with a different
  73 * kernel.
  74 */
  75struct arch_hibernate_hdr_invariants {
  76        char            uts_version[__NEW_UTS_LEN + 1];
  77};
  78
  79/* These values need to be know across a hibernate/restore. */
  80static struct arch_hibernate_hdr {
  81        struct arch_hibernate_hdr_invariants invariants;
  82
  83        /* These are needed to find the relocated kernel if built with kaslr */
  84        phys_addr_t     ttbr1_el1;
  85        void            (*reenter_kernel)(void);
  86
  87        /*
  88         * We need to know where the __hyp_stub_vectors are after restore to
  89         * re-configure el2.
  90         */
  91        phys_addr_t     __hyp_stub_vectors;
  92
  93        u64             sleep_cpu_mpidr;
  94} resume_hdr;
  95
  96static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
  97{
  98        memset(i, 0, sizeof(*i));
  99        memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version));
 100}
 101
 102int pfn_is_nosave(unsigned long pfn)
 103{
 104        unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin);
 105        unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1);
 106
 107        return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) ||
 108                crash_is_nosave(pfn);
 109}
 110
 111void notrace save_processor_state(void)
 112{
 113        WARN_ON(num_online_cpus() != 1);
 114}
 115
 116void notrace restore_processor_state(void)
 117{
 118}
 119
 120int arch_hibernation_header_save(void *addr, unsigned int max_size)
 121{
 122        struct arch_hibernate_hdr *hdr = addr;
 123
 124        if (max_size < sizeof(*hdr))
 125                return -EOVERFLOW;
 126
 127        arch_hdr_invariants(&hdr->invariants);
 128        hdr->ttbr1_el1          = __pa_symbol(swapper_pg_dir);
 129        hdr->reenter_kernel     = _cpu_resume;
 130
 131        /* We can't use __hyp_get_vectors() because kvm may still be loaded */
 132        if (el2_reset_needed())
 133                hdr->__hyp_stub_vectors = __pa_symbol(__hyp_stub_vectors);
 134        else
 135                hdr->__hyp_stub_vectors = 0;
 136
 137        /* Save the mpidr of the cpu we called cpu_suspend() on... */
 138        if (sleep_cpu < 0) {
 139                pr_err("Failing to hibernate on an unknown CPU.\n");
 140                return -ENODEV;
 141        }
 142        hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu);
 143        pr_info("Hibernating on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
 144                hdr->sleep_cpu_mpidr);
 145
 146        return 0;
 147}
 148EXPORT_SYMBOL(arch_hibernation_header_save);
 149
 150int arch_hibernation_header_restore(void *addr)
 151{
 152        int ret;
 153        struct arch_hibernate_hdr_invariants invariants;
 154        struct arch_hibernate_hdr *hdr = addr;
 155
 156        arch_hdr_invariants(&invariants);
 157        if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) {
 158                pr_crit("Hibernate image not generated by this kernel!\n");
 159                return -EINVAL;
 160        }
 161
 162        sleep_cpu = get_logical_index(hdr->sleep_cpu_mpidr);
 163        pr_info("Hibernated on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
 164                hdr->sleep_cpu_mpidr);
 165        if (sleep_cpu < 0) {
 166                pr_crit("Hibernated on a CPU not known to this kernel!\n");
 167                sleep_cpu = -EINVAL;
 168                return -EINVAL;
 169        }
 170        if (!cpu_online(sleep_cpu)) {
 171                pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
 172                ret = cpu_up(sleep_cpu);
 173                if (ret) {
 174                        pr_err("Failed to bring hibernate-CPU up!\n");
 175                        sleep_cpu = -EINVAL;
 176                        return ret;
 177                }
 178        }
 179
 180        resume_hdr = *hdr;
 181
 182        return 0;
 183}
 184EXPORT_SYMBOL(arch_hibernation_header_restore);
 185
 186/*
 187 * Copies length bytes, starting at src_start into an new page,
 188 * perform cache maintentance, then maps it at the specified address low
 189 * address as executable.
 190 *
 191 * This is used by hibernate to copy the code it needs to execute when
 192 * overwriting the kernel text. This function generates a new set of page
 193 * tables, which it loads into ttbr0.
 194 *
 195 * Length is provided as we probably only want 4K of data, even on a 64K
 196 * page system.
 197 */
 198static int create_safe_exec_page(void *src_start, size_t length,
 199                                 unsigned long dst_addr,
 200                                 phys_addr_t *phys_dst_addr,
 201                                 void *(*allocator)(gfp_t mask),
 202                                 gfp_t mask)
 203{
 204        int rc = 0;
 205        pgd_t *pgdp;
 206        pud_t *pudp;
 207        pmd_t *pmdp;
 208        pte_t *ptep;
 209        unsigned long dst = (unsigned long)allocator(mask);
 210
 211        if (!dst) {
 212                rc = -ENOMEM;
 213                goto out;
 214        }
 215
 216        memcpy((void *)dst, src_start, length);
 217        flush_icache_range(dst, dst + length);
 218
 219        pgdp = pgd_offset_raw(allocator(mask), dst_addr);
 220        if (pgd_none(READ_ONCE(*pgdp))) {
 221                pudp = allocator(mask);
 222                if (!pudp) {
 223                        rc = -ENOMEM;
 224                        goto out;
 225                }
 226                pgd_populate(&init_mm, pgdp, pudp);
 227        }
 228
 229        pudp = pud_offset(pgdp, dst_addr);
 230        if (pud_none(READ_ONCE(*pudp))) {
 231                pmdp = allocator(mask);
 232                if (!pmdp) {
 233                        rc = -ENOMEM;
 234                        goto out;
 235                }
 236                pud_populate(&init_mm, pudp, pmdp);
 237        }
 238
 239        pmdp = pmd_offset(pudp, dst_addr);
 240        if (pmd_none(READ_ONCE(*pmdp))) {
 241                ptep = allocator(mask);
 242                if (!ptep) {
 243                        rc = -ENOMEM;
 244                        goto out;
 245                }
 246                pmd_populate_kernel(&init_mm, pmdp, ptep);
 247        }
 248
 249        ptep = pte_offset_kernel(pmdp, dst_addr);
 250        set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
 251
 252        /*
 253         * Load our new page tables. A strict BBM approach requires that we
 254         * ensure that TLBs are free of any entries that may overlap with the
 255         * global mappings we are about to install.
 256         *
 257         * For a real hibernate/resume cycle TTBR0 currently points to a zero
 258         * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
 259         * runtime services), while for a userspace-driven test_resume cycle it
 260         * points to userspace page tables (and we must point it at a zero page
 261         * ourselves). Elsewhere we only (un)install the idmap with preemption
 262         * disabled, so T0SZ should be as required regardless.
 263         */
 264        cpu_set_reserved_ttbr0();
 265        local_flush_tlb_all();
 266        write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
 267        isb();
 268
 269        *phys_dst_addr = virt_to_phys((void *)dst);
 270
 271out:
 272        return rc;
 273}
 274
 275#define dcache_clean_range(start, end)  __flush_dcache_area(start, (end - start))
 276
 277int swsusp_arch_suspend(void)
 278{
 279        int ret = 0;
 280        unsigned long flags;
 281        struct sleep_stack_data state;
 282
 283        if (cpus_are_stuck_in_kernel()) {
 284                pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n");
 285                return -EBUSY;
 286        }
 287
 288        flags = local_daif_save();
 289
 290        if (__cpu_suspend_enter(&state)) {
 291                /* make the crash dump kernel image visible/saveable */
 292                crash_prepare_suspend();
 293
 294                sleep_cpu = smp_processor_id();
 295                ret = swsusp_save();
 296        } else {
 297                /* Clean kernel core startup/idle code to PoC*/
 298                dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
 299                dcache_clean_range(__idmap_text_start, __idmap_text_end);
 300
 301                /* Clean kvm setup code to PoC? */
 302                if (el2_reset_needed())
 303                        dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
 304
 305                /* make the crash dump kernel image protected again */
 306                crash_post_resume();
 307
 308                /*
 309                 * Tell the hibernation core that we've just restored
 310                 * the memory
 311                 */
 312                in_suspend = 0;
 313
 314                sleep_cpu = -EINVAL;
 315                __cpu_suspend_exit();
 316        }
 317
 318        local_daif_restore(flags);
 319
 320        return ret;
 321}
 322
 323static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 324{
 325        pte_t pte = READ_ONCE(*src_ptep);
 326
 327        if (pte_valid(pte)) {
 328                /*
 329                 * Resume will overwrite areas that may be marked
 330                 * read only (code, rodata). Clear the RDONLY bit from
 331                 * the temporary mappings we use during restore.
 332                 */
 333                set_pte(dst_ptep, pte_mkwrite(pte));
 334        } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
 335                /*
 336                 * debug_pagealloc will removed the PTE_VALID bit if
 337                 * the page isn't in use by the resume kernel. It may have
 338                 * been in use by the original kernel, in which case we need
 339                 * to put it back in our copy to do the restore.
 340                 *
 341                 * Before marking this entry valid, check the pfn should
 342                 * be mapped.
 343                 */
 344                BUG_ON(!pfn_valid(pte_pfn(pte)));
 345
 346                set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
 347        }
 348}
 349
 350static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
 351                    unsigned long end)
 352{
 353        pte_t *src_ptep;
 354        pte_t *dst_ptep;
 355        unsigned long addr = start;
 356
 357        dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
 358        if (!dst_ptep)
 359                return -ENOMEM;
 360        pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
 361        dst_ptep = pte_offset_kernel(dst_pmdp, start);
 362
 363        src_ptep = pte_offset_kernel(src_pmdp, start);
 364        do {
 365                _copy_pte(dst_ptep, src_ptep, addr);
 366        } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
 367
 368        return 0;
 369}
 370
 371static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
 372                    unsigned long end)
 373{
 374        pmd_t *src_pmdp;
 375        pmd_t *dst_pmdp;
 376        unsigned long next;
 377        unsigned long addr = start;
 378
 379        if (pud_none(READ_ONCE(*dst_pudp))) {
 380                dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
 381                if (!dst_pmdp)
 382                        return -ENOMEM;
 383                pud_populate(&init_mm, dst_pudp, dst_pmdp);
 384        }
 385        dst_pmdp = pmd_offset(dst_pudp, start);
 386
 387        src_pmdp = pmd_offset(src_pudp, start);
 388        do {
 389                pmd_t pmd = READ_ONCE(*src_pmdp);
 390
 391                next = pmd_addr_end(addr, end);
 392                if (pmd_none(pmd))
 393                        continue;
 394                if (pmd_table(pmd)) {
 395                        if (copy_pte(dst_pmdp, src_pmdp, addr, next))
 396                                return -ENOMEM;
 397                } else {
 398                        set_pmd(dst_pmdp,
 399                                __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
 400                }
 401        } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
 402
 403        return 0;
 404}
 405
 406static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
 407                    unsigned long end)
 408{
 409        pud_t *dst_pudp;
 410        pud_t *src_pudp;
 411        unsigned long next;
 412        unsigned long addr = start;
 413
 414        if (pgd_none(READ_ONCE(*dst_pgdp))) {
 415                dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
 416                if (!dst_pudp)
 417                        return -ENOMEM;
 418                pgd_populate(&init_mm, dst_pgdp, dst_pudp);
 419        }
 420        dst_pudp = pud_offset(dst_pgdp, start);
 421
 422        src_pudp = pud_offset(src_pgdp, start);
 423        do {
 424                pud_t pud = READ_ONCE(*src_pudp);
 425
 426                next = pud_addr_end(addr, end);
 427                if (pud_none(pud))
 428                        continue;
 429                if (pud_table(pud)) {
 430                        if (copy_pmd(dst_pudp, src_pudp, addr, next))
 431                                return -ENOMEM;
 432                } else {
 433                        set_pud(dst_pudp,
 434                                __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
 435                }
 436        } while (dst_pudp++, src_pudp++, addr = next, addr != end);
 437
 438        return 0;
 439}
 440
 441static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
 442                            unsigned long end)
 443{
 444        unsigned long next;
 445        unsigned long addr = start;
 446        pgd_t *src_pgdp = pgd_offset_k(start);
 447
 448        dst_pgdp = pgd_offset_raw(dst_pgdp, start);
 449        do {
 450                next = pgd_addr_end(addr, end);
 451                if (pgd_none(READ_ONCE(*src_pgdp)))
 452                        continue;
 453                if (copy_pud(dst_pgdp, src_pgdp, addr, next))
 454                        return -ENOMEM;
 455        } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
 456
 457        return 0;
 458}
 459
 460/*
 461 * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
 462 *
 463 * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
 464 * we don't need to free it here.
 465 */
 466int swsusp_arch_resume(void)
 467{
 468        int rc = 0;
 469        void *zero_page;
 470        size_t exit_size;
 471        pgd_t *tmp_pg_dir;
 472        phys_addr_t phys_hibernate_exit;
 473        void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
 474                                          void *, phys_addr_t, phys_addr_t);
 475
 476        /*
 477         * Restoring the memory image will overwrite the ttbr1 page tables.
 478         * Create a second copy of just the linear map, and use this when
 479         * restoring.
 480         */
 481        tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
 482        if (!tmp_pg_dir) {
 483                pr_err("Failed to allocate memory for temporary page tables.\n");
 484                rc = -ENOMEM;
 485                goto out;
 486        }
 487        rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
 488        if (rc)
 489                goto out;
 490
 491        /*
 492         * We need a zero page that is zero before & after resume in order to
 493         * to break before make on the ttbr1 page tables.
 494         */
 495        zero_page = (void *)get_safe_page(GFP_ATOMIC);
 496        if (!zero_page) {
 497                pr_err("Failed to allocate zero page.\n");
 498                rc = -ENOMEM;
 499                goto out;
 500        }
 501
 502        /*
 503         * Locate the exit code in the bottom-but-one page, so that *NULL
 504         * still has disastrous affects.
 505         */
 506        hibernate_exit = (void *)PAGE_SIZE;
 507        exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
 508        /*
 509         * Copy swsusp_arch_suspend_exit() to a safe page. This will generate
 510         * a new set of ttbr0 page tables and load them.
 511         */
 512        rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
 513                                   (unsigned long)hibernate_exit,
 514                                   &phys_hibernate_exit,
 515                                   (void *)get_safe_page, GFP_ATOMIC);
 516        if (rc) {
 517                pr_err("Failed to create safe executable page for hibernate_exit code.\n");
 518                goto out;
 519        }
 520
 521        /*
 522         * The hibernate exit text contains a set of el2 vectors, that will
 523         * be executed at el2 with the mmu off in order to reload hyp-stub.
 524         */
 525        __flush_dcache_area(hibernate_exit, exit_size);
 526
 527        /*
 528         * KASLR will cause the el2 vectors to be in a different location in
 529         * the resumed kernel. Load hibernate's temporary copy into el2.
 530         *
 531         * We can skip this step if we booted at EL1, or are running with VHE.
 532         */
 533        if (el2_reset_needed()) {
 534                phys_addr_t el2_vectors = phys_hibernate_exit;  /* base */
 535                el2_vectors += hibernate_el2_vectors -
 536                               __hibernate_exit_text_start;     /* offset */
 537
 538                __hyp_set_vectors(el2_vectors);
 539        }
 540
 541        hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
 542                       resume_hdr.reenter_kernel, restore_pblist,
 543                       resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
 544
 545out:
 546        return rc;
 547}
 548
 549int hibernate_resume_nonboot_cpu_disable(void)
 550{
 551        if (sleep_cpu < 0) {
 552                pr_err("Failing to resume from hibernate on an unknown CPU.\n");
 553                return -ENODEV;
 554        }
 555
 556        return freeze_secondary_cpus(sleep_cpu);
 557}
 558