linux/kernel/power/snapshot.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * linux/kernel/power/snapshot.c
   4 *
   5 * This file provides system snapshot/restore functionality for swsusp.
   6 *
   7 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
   8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
   9 */
  10
  11#define pr_fmt(fmt) "PM: hibernation: " fmt
  12
  13#include <linux/version.h>
  14#include <linux/module.h>
  15#include <linux/mm.h>
  16#include <linux/suspend.h>
  17#include <linux/delay.h>
  18#include <linux/bitops.h>
  19#include <linux/spinlock.h>
  20#include <linux/kernel.h>
  21#include <linux/pm.h>
  22#include <linux/device.h>
  23#include <linux/init.h>
  24#include <linux/memblock.h>
  25#include <linux/nmi.h>
  26#include <linux/syscalls.h>
  27#include <linux/console.h>
  28#include <linux/highmem.h>
  29#include <linux/list.h>
  30#include <linux/slab.h>
  31#include <linux/compiler.h>
  32#include <linux/ktime.h>
  33#include <linux/set_memory.h>
  34
  35#include <linux/uaccess.h>
  36#include <asm/mmu_context.h>
  37#include <asm/tlbflush.h>
  38#include <asm/io.h>
  39
  40#include "power.h"
  41
  42#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY)
  43static bool hibernate_restore_protection;
  44static bool hibernate_restore_protection_active;
  45
  46void enable_restore_image_protection(void)
  47{
  48        hibernate_restore_protection = true;
  49}
  50
  51static inline void hibernate_restore_protection_begin(void)
  52{
  53        hibernate_restore_protection_active = hibernate_restore_protection;
  54}
  55
  56static inline void hibernate_restore_protection_end(void)
  57{
  58        hibernate_restore_protection_active = false;
  59}
  60
  61static inline void hibernate_restore_protect_page(void *page_address)
  62{
  63        if (hibernate_restore_protection_active)
  64                set_memory_ro((unsigned long)page_address, 1);
  65}
  66
  67static inline void hibernate_restore_unprotect_page(void *page_address)
  68{
  69        if (hibernate_restore_protection_active)
  70                set_memory_rw((unsigned long)page_address, 1);
  71}
  72#else
  73static inline void hibernate_restore_protection_begin(void) {}
  74static inline void hibernate_restore_protection_end(void) {}
  75static inline void hibernate_restore_protect_page(void *page_address) {}
  76static inline void hibernate_restore_unprotect_page(void *page_address) {}
  77#endif /* CONFIG_STRICT_KERNEL_RWX  && CONFIG_ARCH_HAS_SET_MEMORY */
  78
  79static int swsusp_page_is_free(struct page *);
  80static void swsusp_set_page_forbidden(struct page *);
  81static void swsusp_unset_page_forbidden(struct page *);
  82
  83/*
  84 * Number of bytes to reserve for memory allocations made by device drivers
  85 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
  86 * cause image creation to fail (tunable via /sys/power/reserved_size).
  87 */
  88unsigned long reserved_size;
  89
  90void __init hibernate_reserved_size_init(void)
  91{
  92        reserved_size = SPARE_PAGES * PAGE_SIZE;
  93}
  94
  95/*
  96 * Preferred image size in bytes (tunable via /sys/power/image_size).
  97 * When it is set to N, swsusp will do its best to ensure the image
  98 * size will not exceed N bytes, but if that is impossible, it will
  99 * try to create the smallest image possible.
 100 */
 101unsigned long image_size;
 102
 103void __init hibernate_image_size_init(void)
 104{
 105        image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE;
 106}
 107
 108/*
 109 * List of PBEs needed for restoring the pages that were allocated before
 110 * the suspend and included in the suspend image, but have also been
 111 * allocated by the "resume" kernel, so their contents cannot be written
 112 * directly to their "original" page frames.
 113 */
 114struct pbe *restore_pblist;
 115
 116/* struct linked_page is used to build chains of pages */
 117
 118#define LINKED_PAGE_DATA_SIZE   (PAGE_SIZE - sizeof(void *))
 119
 120struct linked_page {
 121        struct linked_page *next;
 122        char data[LINKED_PAGE_DATA_SIZE];
 123} __packed;
 124
 125/*
 126 * List of "safe" pages (ie. pages that were not used by the image kernel
 127 * before hibernation) that may be used as temporary storage for image kernel
 128 * memory contents.
 129 */
 130static struct linked_page *safe_pages_list;
 131
 132/* Pointer to an auxiliary buffer (1 page) */
 133static void *buffer;
 134
 135#define PG_ANY          0
 136#define PG_SAFE         1
 137#define PG_UNSAFE_CLEAR 1
 138#define PG_UNSAFE_KEEP  0
 139
 140static unsigned int allocated_unsafe_pages;
 141
 142/**
 143 * get_image_page - Allocate a page for a hibernation image.
 144 * @gfp_mask: GFP mask for the allocation.
 145 * @safe_needed: Get pages that were not used before hibernation (restore only)
 146 *
 147 * During image restoration, for storing the PBE list and the image data, we can
 148 * only use memory pages that do not conflict with the pages used before
 149 * hibernation.  The "unsafe" pages have PageNosaveFree set and we count them
 150 * using allocated_unsafe_pages.
 151 *
 152 * Each allocated image page is marked as PageNosave and PageNosaveFree so that
 153 * swsusp_free() can release it.
 154 */
 155static void *get_image_page(gfp_t gfp_mask, int safe_needed)
 156{
 157        void *res;
 158
 159        res = (void *)get_zeroed_page(gfp_mask);
 160        if (safe_needed)
 161                while (res && swsusp_page_is_free(virt_to_page(res))) {
 162                        /* The page is unsafe, mark it for swsusp_free() */
 163                        swsusp_set_page_forbidden(virt_to_page(res));
 164                        allocated_unsafe_pages++;
 165                        res = (void *)get_zeroed_page(gfp_mask);
 166                }
 167        if (res) {
 168                swsusp_set_page_forbidden(virt_to_page(res));
 169                swsusp_set_page_free(virt_to_page(res));
 170        }
 171        return res;
 172}
 173
 174static void *__get_safe_page(gfp_t gfp_mask)
 175{
 176        if (safe_pages_list) {
 177                void *ret = safe_pages_list;
 178
 179                safe_pages_list = safe_pages_list->next;
 180                memset(ret, 0, PAGE_SIZE);
 181                return ret;
 182        }
 183        return get_image_page(gfp_mask, PG_SAFE);
 184}
 185
 186unsigned long get_safe_page(gfp_t gfp_mask)
 187{
 188        return (unsigned long)__get_safe_page(gfp_mask);
 189}
 190
 191static struct page *alloc_image_page(gfp_t gfp_mask)
 192{
 193        struct page *page;
 194
 195        page = alloc_page(gfp_mask);
 196        if (page) {
 197                swsusp_set_page_forbidden(page);
 198                swsusp_set_page_free(page);
 199        }
 200        return page;
 201}
 202
 203static void recycle_safe_page(void *page_address)
 204{
 205        struct linked_page *lp = page_address;
 206
 207        lp->next = safe_pages_list;
 208        safe_pages_list = lp;
 209}
 210
 211/**
 212 * free_image_page - Free a page allocated for hibernation image.
 213 * @addr: Address of the page to free.
 214 * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page.
 215 *
 216 * The page to free should have been allocated by get_image_page() (page flags
 217 * set by it are affected).
 218 */
 219static inline void free_image_page(void *addr, int clear_nosave_free)
 220{
 221        struct page *page;
 222
 223        BUG_ON(!virt_addr_valid(addr));
 224
 225        page = virt_to_page(addr);
 226
 227        swsusp_unset_page_forbidden(page);
 228        if (clear_nosave_free)
 229                swsusp_unset_page_free(page);
 230
 231        __free_page(page);
 232}
 233
 234static inline void free_list_of_pages(struct linked_page *list,
 235                                      int clear_page_nosave)
 236{
 237        while (list) {
 238                struct linked_page *lp = list->next;
 239
 240                free_image_page(list, clear_page_nosave);
 241                list = lp;
 242        }
 243}
 244
 245/*
 246 * struct chain_allocator is used for allocating small objects out of
 247 * a linked list of pages called 'the chain'.
 248 *
 249 * The chain grows each time when there is no room for a new object in
 250 * the current page.  The allocated objects cannot be freed individually.
 251 * It is only possible to free them all at once, by freeing the entire
 252 * chain.
 253 *
 254 * NOTE: The chain allocator may be inefficient if the allocated objects
 255 * are not much smaller than PAGE_SIZE.
 256 */
 257struct chain_allocator {
 258        struct linked_page *chain;      /* the chain */
 259        unsigned int used_space;        /* total size of objects allocated out
 260                                           of the current page */
 261        gfp_t gfp_mask;         /* mask for allocating pages */
 262        int safe_needed;        /* if set, only "safe" pages are allocated */
 263};
 264
 265static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask,
 266                       int safe_needed)
 267{
 268        ca->chain = NULL;
 269        ca->used_space = LINKED_PAGE_DATA_SIZE;
 270        ca->gfp_mask = gfp_mask;
 271        ca->safe_needed = safe_needed;
 272}
 273
 274static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 275{
 276        void *ret;
 277
 278        if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 279                struct linked_page *lp;
 280
 281                lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) :
 282                                        get_image_page(ca->gfp_mask, PG_ANY);
 283                if (!lp)
 284                        return NULL;
 285
 286                lp->next = ca->chain;
 287                ca->chain = lp;
 288                ca->used_space = 0;
 289        }
 290        ret = ca->chain->data + ca->used_space;
 291        ca->used_space += size;
 292        return ret;
 293}
 294
 295/**
 296 * Data types related to memory bitmaps.
 297 *
 298 * Memory bitmap is a structure consiting of many linked lists of
 299 * objects.  The main list's elements are of type struct zone_bitmap
 300 * and each of them corresonds to one zone.  For each zone bitmap
 301 * object there is a list of objects of type struct bm_block that
 302 * represent each blocks of bitmap in which information is stored.
 303 *
 304 * struct memory_bitmap contains a pointer to the main list of zone
 305 * bitmap objects, a struct bm_position used for browsing the bitmap,
 306 * and a pointer to the list of pages used for allocating all of the
 307 * zone bitmap objects and bitmap block objects.
 308 *
 309 * NOTE: It has to be possible to lay out the bitmap in memory
 310 * using only allocations of order 0.  Additionally, the bitmap is
 311 * designed to work with arbitrary number of zones (this is over the
 312 * top for now, but let's avoid making unnecessary assumptions ;-).
 313 *
 314 * struct zone_bitmap contains a pointer to a list of bitmap block
 315 * objects and a pointer to the bitmap block object that has been
 316 * most recently used for setting bits.  Additionally, it contains the
 317 * PFNs that correspond to the start and end of the represented zone.
 318 *
 319 * struct bm_block contains a pointer to the memory page in which
 320 * information is stored (in the form of a block of bitmap)
 321 * It also contains the pfns that correspond to the start and end of
 322 * the represented memory area.
 323 *
 324 * The memory bitmap is organized as a radix tree to guarantee fast random
 325 * access to the bits. There is one radix tree for each zone (as returned
 326 * from create_mem_extents).
 327 *
 328 * One radix tree is represented by one struct mem_zone_bm_rtree. There are
 329 * two linked lists for the nodes of the tree, one for the inner nodes and
 330 * one for the leave nodes. The linked leave nodes are used for fast linear
 331 * access of the memory bitmap.
 332 *
 333 * The struct rtree_node represents one node of the radix tree.
 334 */
 335
 336#define BM_END_OF_MAP   (~0UL)
 337
 338#define BM_BITS_PER_BLOCK       (PAGE_SIZE * BITS_PER_BYTE)
 339#define BM_BLOCK_SHIFT          (PAGE_SHIFT + 3)
 340#define BM_BLOCK_MASK           ((1UL << BM_BLOCK_SHIFT) - 1)
 341
 342/*
 343 * struct rtree_node is a wrapper struct to link the nodes
 344 * of the rtree together for easy linear iteration over
 345 * bits and easy freeing
 346 */
 347struct rtree_node {
 348        struct list_head list;
 349        unsigned long *data;
 350};
 351
 352/*
 353 * struct mem_zone_bm_rtree represents a bitmap used for one
 354 * populated memory zone.
 355 */
 356struct mem_zone_bm_rtree {
 357        struct list_head list;          /* Link Zones together         */
 358        struct list_head nodes;         /* Radix Tree inner nodes      */
 359        struct list_head leaves;        /* Radix Tree leaves           */
 360        unsigned long start_pfn;        /* Zone start page frame       */
 361        unsigned long end_pfn;          /* Zone end page frame + 1     */
 362        struct rtree_node *rtree;       /* Radix Tree Root             */
 363        int levels;                     /* Number of Radix Tree Levels */
 364        unsigned int blocks;            /* Number of Bitmap Blocks     */
 365};
 366
 367/* strcut bm_position is used for browsing memory bitmaps */
 368
 369struct bm_position {
 370        struct mem_zone_bm_rtree *zone;
 371        struct rtree_node *node;
 372        unsigned long node_pfn;
 373        int node_bit;
 374};
 375
 376struct memory_bitmap {
 377        struct list_head zones;
 378        struct linked_page *p_list;     /* list of pages used to store zone
 379                                           bitmap objects and bitmap block
 380                                           objects */
 381        struct bm_position cur; /* most recently used bit position */
 382};
 383
 384/* Functions that operate on memory bitmaps */
 385
 386#define BM_ENTRIES_PER_LEVEL    (PAGE_SIZE / sizeof(unsigned long))
 387#if BITS_PER_LONG == 32
 388#define BM_RTREE_LEVEL_SHIFT    (PAGE_SHIFT - 2)
 389#else
 390#define BM_RTREE_LEVEL_SHIFT    (PAGE_SHIFT - 3)
 391#endif
 392#define BM_RTREE_LEVEL_MASK     ((1UL << BM_RTREE_LEVEL_SHIFT) - 1)
 393
 394/**
 395 * alloc_rtree_node - Allocate a new node and add it to the radix tree.
 396 *
 397 * This function is used to allocate inner nodes as well as the
 398 * leave nodes of the radix tree. It also adds the node to the
 399 * corresponding linked list passed in by the *list parameter.
 400 */
 401static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed,
 402                                           struct chain_allocator *ca,
 403                                           struct list_head *list)
 404{
 405        struct rtree_node *node;
 406
 407        node = chain_alloc(ca, sizeof(struct rtree_node));
 408        if (!node)
 409                return NULL;
 410
 411        node->data = get_image_page(gfp_mask, safe_needed);
 412        if (!node->data)
 413                return NULL;
 414
 415        list_add_tail(&node->list, list);
 416
 417        return node;
 418}
 419
 420/**
 421 * add_rtree_block - Add a new leave node to the radix tree.
 422 *
 423 * The leave nodes need to be allocated in order to keep the leaves
 424 * linked list in order. This is guaranteed by the zone->blocks
 425 * counter.
 426 */
 427static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask,
 428                           int safe_needed, struct chain_allocator *ca)
 429{
 430        struct rtree_node *node, *block, **dst;
 431        unsigned int levels_needed, block_nr;
 432        int i;
 433
 434        block_nr = zone->blocks;
 435        levels_needed = 0;
 436
 437        /* How many levels do we need for this block nr? */
 438        while (block_nr) {
 439                levels_needed += 1;
 440                block_nr >>= BM_RTREE_LEVEL_SHIFT;
 441        }
 442
 443        /* Make sure the rtree has enough levels */
 444        for (i = zone->levels; i < levels_needed; i++) {
 445                node = alloc_rtree_node(gfp_mask, safe_needed, ca,
 446                                        &zone->nodes);
 447                if (!node)
 448                        return -ENOMEM;
 449
 450                node->data[0] = (unsigned long)zone->rtree;
 451                zone->rtree = node;
 452                zone->levels += 1;
 453        }
 454
 455        /* Allocate new block */
 456        block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves);
 457        if (!block)
 458                return -ENOMEM;
 459
 460        /* Now walk the rtree to insert the block */
 461        node = zone->rtree;
 462        dst = &zone->rtree;
 463        block_nr = zone->blocks;
 464        for (i = zone->levels; i > 0; i--) {
 465                int index;
 466
 467                if (!node) {
 468                        node = alloc_rtree_node(gfp_mask, safe_needed, ca,
 469                                                &zone->nodes);
 470                        if (!node)
 471                                return -ENOMEM;
 472                        *dst = node;
 473                }
 474
 475                index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
 476                index &= BM_RTREE_LEVEL_MASK;
 477                dst = (struct rtree_node **)&((*dst)->data[index]);
 478                node = *dst;
 479        }
 480
 481        zone->blocks += 1;
 482        *dst = block;
 483
 484        return 0;
 485}
 486
 487static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
 488                               int clear_nosave_free);
 489
 490/**
 491 * create_zone_bm_rtree - Create a radix tree for one zone.
 492 *
 493 * Allocated the mem_zone_bm_rtree structure and initializes it.
 494 * This function also allocated and builds the radix tree for the
 495 * zone.
 496 */
 497static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask,
 498                                                      int safe_needed,
 499                                                      struct chain_allocator *ca,
 500                                                      unsigned long start,
 501                                                      unsigned long end)
 502{
 503        struct mem_zone_bm_rtree *zone;
 504        unsigned int i, nr_blocks;
 505        unsigned long pages;
 506
 507        pages = end - start;
 508        zone  = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree));
 509        if (!zone)
 510                return NULL;
 511
 512        INIT_LIST_HEAD(&zone->nodes);
 513        INIT_LIST_HEAD(&zone->leaves);
 514        zone->start_pfn = start;
 515        zone->end_pfn = end;
 516        nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
 517
 518        for (i = 0; i < nr_blocks; i++) {
 519                if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) {
 520                        free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR);
 521                        return NULL;
 522                }
 523        }
 524
 525        return zone;
 526}
 527
 528/**
 529 * free_zone_bm_rtree - Free the memory of the radix tree.
 530 *
 531 * Free all node pages of the radix tree. The mem_zone_bm_rtree
 532 * structure itself is not freed here nor are the rtree_node
 533 * structs.
 534 */
 535static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
 536                               int clear_nosave_free)
 537{
 538        struct rtree_node *node;
 539
 540        list_for_each_entry(node, &zone->nodes, list)
 541                free_image_page(node->data, clear_nosave_free);
 542
 543        list_for_each_entry(node, &zone->leaves, list)
 544                free_image_page(node->data, clear_nosave_free);
 545}
 546
 547static void memory_bm_position_reset(struct memory_bitmap *bm)
 548{
 549        bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
 550                                  list);
 551        bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 552                                  struct rtree_node, list);
 553        bm->cur.node_pfn = 0;
 554        bm->cur.node_bit = 0;
 555}
 556
 557static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 558
 559struct mem_extent {
 560        struct list_head hook;
 561        unsigned long start;
 562        unsigned long end;
 563};
 564
 565/**
 566 * free_mem_extents - Free a list of memory extents.
 567 * @list: List of extents to free.
 568 */
 569static void free_mem_extents(struct list_head *list)
 570{
 571        struct mem_extent *ext, *aux;
 572
 573        list_for_each_entry_safe(ext, aux, list, hook) {
 574                list_del(&ext->hook);
 575                kfree(ext);
 576        }
 577}
 578
 579/**
 580 * create_mem_extents - Create a list of memory extents.
 581 * @list: List to put the extents into.
 582 * @gfp_mask: Mask to use for memory allocations.
 583 *
 584 * The extents represent contiguous ranges of PFNs.
 585 */
 586static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 587{
 588        struct zone *zone;
 589
 590        INIT_LIST_HEAD(list);
 591
 592        for_each_populated_zone(zone) {
 593                unsigned long zone_start, zone_end;
 594                struct mem_extent *ext, *cur, *aux;
 595
 596                zone_start = zone->zone_start_pfn;
 597                zone_end = zone_end_pfn(zone);
 598
 599                list_for_each_entry(ext, list, hook)
 600                        if (zone_start <= ext->end)
 601                                break;
 602
 603                if (&ext->hook == list || zone_end < ext->start) {
 604                        /* New extent is necessary */
 605                        struct mem_extent *new_ext;
 606
 607                        new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
 608                        if (!new_ext) {
 609                                free_mem_extents(list);
 610                                return -ENOMEM;
 611                        }
 612                        new_ext->start = zone_start;
 613                        new_ext->end = zone_end;
 614                        list_add_tail(&new_ext->hook, &ext->hook);
 615                        continue;
 616                }
 617
 618                /* Merge this zone's range of PFNs with the existing one */
 619                if (zone_start < ext->start)
 620                        ext->start = zone_start;
 621                if (zone_end > ext->end)
 622                        ext->end = zone_end;
 623
 624                /* More merging may be possible */
 625                cur = ext;
 626                list_for_each_entry_safe_continue(cur, aux, list, hook) {
 627                        if (zone_end < cur->start)
 628                                break;
 629                        if (zone_end < cur->end)
 630                                ext->end = cur->end;
 631                        list_del(&cur->hook);
 632                        kfree(cur);
 633                }
 634        }
 635
 636        return 0;
 637}
 638
 639/**
 640 * memory_bm_create - Allocate memory for a memory bitmap.
 641 */
 642static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
 643                            int safe_needed)
 644{
 645        struct chain_allocator ca;
 646        struct list_head mem_extents;
 647        struct mem_extent *ext;
 648        int error;
 649
 650        chain_init(&ca, gfp_mask, safe_needed);
 651        INIT_LIST_HEAD(&bm->zones);
 652
 653        error = create_mem_extents(&mem_extents, gfp_mask);
 654        if (error)
 655                return error;
 656
 657        list_for_each_entry(ext, &mem_extents, hook) {
 658                struct mem_zone_bm_rtree *zone;
 659
 660                zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca,
 661                                            ext->start, ext->end);
 662                if (!zone) {
 663                        error = -ENOMEM;
 664                        goto Error;
 665                }
 666                list_add_tail(&zone->list, &bm->zones);
 667        }
 668
 669        bm->p_list = ca.chain;
 670        memory_bm_position_reset(bm);
 671 Exit:
 672        free_mem_extents(&mem_extents);
 673        return error;
 674
 675 Error:
 676        bm->p_list = ca.chain;
 677        memory_bm_free(bm, PG_UNSAFE_CLEAR);
 678        goto Exit;
 679}
 680
 681/**
 682 * memory_bm_free - Free memory occupied by the memory bitmap.
 683 * @bm: Memory bitmap.
 684 */
 685static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 686{
 687        struct mem_zone_bm_rtree *zone;
 688
 689        list_for_each_entry(zone, &bm->zones, list)
 690                free_zone_bm_rtree(zone, clear_nosave_free);
 691
 692        free_list_of_pages(bm->p_list, clear_nosave_free);
 693
 694        INIT_LIST_HEAD(&bm->zones);
 695}
 696
 697/**
 698 * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap.
 699 *
 700 * Find the bit in memory bitmap @bm that corresponds to the given PFN.
 701 * The cur.zone, cur.block and cur.node_pfn members of @bm are updated.
 702 *
 703 * Walk the radix tree to find the page containing the bit that represents @pfn
 704 * and return the position of the bit in @addr and @bit_nr.
 705 */
 706static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 707                              void **addr, unsigned int *bit_nr)
 708{
 709        struct mem_zone_bm_rtree *curr, *zone;
 710        struct rtree_node *node;
 711        int i, block_nr;
 712
 713        zone = bm->cur.zone;
 714
 715        if (pfn >= zone->start_pfn && pfn < zone->end_pfn)
 716                goto zone_found;
 717
 718        zone = NULL;
 719
 720        /* Find the right zone */
 721        list_for_each_entry(curr, &bm->zones, list) {
 722                if (pfn >= curr->start_pfn && pfn < curr->end_pfn) {
 723                        zone = curr;
 724                        break;
 725                }
 726        }
 727
 728        if (!zone)
 729                return -EFAULT;
 730
 731zone_found:
 732        /*
 733         * We have found the zone. Now walk the radix tree to find the leaf node
 734         * for our PFN.
 735         */
 736
 737        /*
 738         * If the zone we wish to scan is the current zone and the
 739         * pfn falls into the current node then we do not need to walk
 740         * the tree.
 741         */
 742        node = bm->cur.node;
 743        if (zone == bm->cur.zone &&
 744            ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
 745                goto node_found;
 746
 747        node      = zone->rtree;
 748        block_nr  = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT;
 749
 750        for (i = zone->levels; i > 0; i--) {
 751                int index;
 752
 753                index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
 754                index &= BM_RTREE_LEVEL_MASK;
 755                BUG_ON(node->data[index] == 0);
 756                node = (struct rtree_node *)node->data[index];
 757        }
 758
 759node_found:
 760        /* Update last position */
 761        bm->cur.zone = zone;
 762        bm->cur.node = node;
 763        bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
 764
 765        /* Set return values */
 766        *addr = node->data;
 767        *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK;
 768
 769        return 0;
 770}
 771
 772static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 773{
 774        void *addr;
 775        unsigned int bit;
 776        int error;
 777
 778        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 779        BUG_ON(error);
 780        set_bit(bit, addr);
 781}
 782
 783static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
 784{
 785        void *addr;
 786        unsigned int bit;
 787        int error;
 788
 789        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 790        if (!error)
 791                set_bit(bit, addr);
 792
 793        return error;
 794}
 795
 796static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
 797{
 798        void *addr;
 799        unsigned int bit;
 800        int error;
 801
 802        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 803        BUG_ON(error);
 804        clear_bit(bit, addr);
 805}
 806
 807static void memory_bm_clear_current(struct memory_bitmap *bm)
 808{
 809        int bit;
 810
 811        bit = max(bm->cur.node_bit - 1, 0);
 812        clear_bit(bit, bm->cur.node->data);
 813}
 814
 815static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 816{
 817        void *addr;
 818        unsigned int bit;
 819        int error;
 820
 821        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 822        BUG_ON(error);
 823        return test_bit(bit, addr);
 824}
 825
 826static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
 827{
 828        void *addr;
 829        unsigned int bit;
 830
 831        return !memory_bm_find_bit(bm, pfn, &addr, &bit);
 832}
 833
 834/*
 835 * rtree_next_node - Jump to the next leaf node.
 836 *
 837 * Set the position to the beginning of the next node in the
 838 * memory bitmap. This is either the next node in the current
 839 * zone's radix tree or the first node in the radix tree of the
 840 * next zone.
 841 *
 842 * Return true if there is a next node, false otherwise.
 843 */
 844static bool rtree_next_node(struct memory_bitmap *bm)
 845{
 846        if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
 847                bm->cur.node = list_entry(bm->cur.node->list.next,
 848                                          struct rtree_node, list);
 849                bm->cur.node_pfn += BM_BITS_PER_BLOCK;
 850                bm->cur.node_bit  = 0;
 851                touch_softlockup_watchdog();
 852                return true;
 853        }
 854
 855        /* No more nodes, goto next zone */
 856        if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
 857                bm->cur.zone = list_entry(bm->cur.zone->list.next,
 858                                  struct mem_zone_bm_rtree, list);
 859                bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 860                                          struct rtree_node, list);
 861                bm->cur.node_pfn = 0;
 862                bm->cur.node_bit = 0;
 863                return true;
 864        }
 865
 866        /* No more zones */
 867        return false;
 868}
 869
 870/**
 871 * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap.
 872 * @bm: Memory bitmap.
 873 *
 874 * Starting from the last returned position this function searches for the next
 875 * set bit in @bm and returns the PFN represented by it.  If no more bits are
 876 * set, BM_END_OF_MAP is returned.
 877 *
 878 * It is required to run memory_bm_position_reset() before the first call to
 879 * this function for the given memory bitmap.
 880 */
 881static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 882{
 883        unsigned long bits, pfn, pages;
 884        int bit;
 885
 886        do {
 887                pages     = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn;
 888                bits      = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK);
 889                bit       = find_next_bit(bm->cur.node->data, bits,
 890                                          bm->cur.node_bit);
 891                if (bit < bits) {
 892                        pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 893                        bm->cur.node_bit = bit + 1;
 894                        return pfn;
 895                }
 896        } while (rtree_next_node(bm));
 897
 898        return BM_END_OF_MAP;
 899}
 900
 901/*
 902 * This structure represents a range of page frames the contents of which
 903 * should not be saved during hibernation.
 904 */
 905struct nosave_region {
 906        struct list_head list;
 907        unsigned long start_pfn;
 908        unsigned long end_pfn;
 909};
 910
 911static LIST_HEAD(nosave_regions);
 912
 913static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone)
 914{
 915        struct rtree_node *node;
 916
 917        list_for_each_entry(node, &zone->nodes, list)
 918                recycle_safe_page(node->data);
 919
 920        list_for_each_entry(node, &zone->leaves, list)
 921                recycle_safe_page(node->data);
 922}
 923
 924static void memory_bm_recycle(struct memory_bitmap *bm)
 925{
 926        struct mem_zone_bm_rtree *zone;
 927        struct linked_page *p_list;
 928
 929        list_for_each_entry(zone, &bm->zones, list)
 930                recycle_zone_bm_rtree(zone);
 931
 932        p_list = bm->p_list;
 933        while (p_list) {
 934                struct linked_page *lp = p_list;
 935
 936                p_list = lp->next;
 937                recycle_safe_page(lp);
 938        }
 939}
 940
 941/**
 942 * register_nosave_region - Register a region of unsaveable memory.
 943 *
 944 * Register a range of page frames the contents of which should not be saved
 945 * during hibernation (to be used in the early initialization code).
 946 */
 947void __init __register_nosave_region(unsigned long start_pfn,
 948                                     unsigned long end_pfn, int use_kmalloc)
 949{
 950        struct nosave_region *region;
 951
 952        if (start_pfn >= end_pfn)
 953                return;
 954
 955        if (!list_empty(&nosave_regions)) {
 956                /* Try to extend the previous region (they should be sorted) */
 957                region = list_entry(nosave_regions.prev,
 958                                        struct nosave_region, list);
 959                if (region->end_pfn == start_pfn) {
 960                        region->end_pfn = end_pfn;
 961                        goto Report;
 962                }
 963        }
 964        if (use_kmalloc) {
 965                /* During init, this shouldn't fail */
 966                region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
 967                BUG_ON(!region);
 968        } else {
 969                /* This allocation cannot fail */
 970                region = memblock_alloc(sizeof(struct nosave_region),
 971                                        SMP_CACHE_BYTES);
 972                if (!region)
 973                        panic("%s: Failed to allocate %zu bytes\n", __func__,
 974                              sizeof(struct nosave_region));
 975        }
 976        region->start_pfn = start_pfn;
 977        region->end_pfn = end_pfn;
 978        list_add_tail(&region->list, &nosave_regions);
 979 Report:
 980        pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n",
 981                (unsigned long long) start_pfn << PAGE_SHIFT,
 982                ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
 983}
 984
 985/*
 986 * Set bits in this map correspond to the page frames the contents of which
 987 * should not be saved during the suspend.
 988 */
 989static struct memory_bitmap *forbidden_pages_map;
 990
 991/* Set bits in this map correspond to free page frames. */
 992static struct memory_bitmap *free_pages_map;
 993
 994/*
 995 * Each page frame allocated for creating the image is marked by setting the
 996 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
 997 */
 998
 999void swsusp_set_page_free(struct page *page)
1000{
1001        if (free_pages_map)
1002                memory_bm_set_bit(free_pages_map, page_to_pfn(page));
1003}
1004
1005static int swsusp_page_is_free(struct page *page)
1006{
1007        return free_pages_map ?
1008                memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
1009}
1010
1011void swsusp_unset_page_free(struct page *page)
1012{
1013        if (free_pages_map)
1014                memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
1015}
1016
1017static void swsusp_set_page_forbidden(struct page *page)
1018{
1019        if (forbidden_pages_map)
1020                memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
1021}
1022
1023int swsusp_page_is_forbidden(struct page *page)
1024{
1025        return forbidden_pages_map ?
1026                memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
1027}
1028
1029static void swsusp_unset_page_forbidden(struct page *page)
1030{
1031        if (forbidden_pages_map)
1032                memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
1033}
1034
1035/**
1036 * mark_nosave_pages - Mark pages that should not be saved.
1037 * @bm: Memory bitmap.
1038 *
1039 * Set the bits in @bm that correspond to the page frames the contents of which
1040 * should not be saved.
1041 */
1042static void mark_nosave_pages(struct memory_bitmap *bm)
1043{
1044        struct nosave_region *region;
1045
1046        if (list_empty(&nosave_regions))
1047                return;
1048
1049        list_for_each_entry(region, &nosave_regions, list) {
1050                unsigned long pfn;
1051
1052                pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n",
1053                         (unsigned long long) region->start_pfn << PAGE_SHIFT,
1054                         ((unsigned long long) region->end_pfn << PAGE_SHIFT)
1055                                - 1);
1056
1057                for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
1058                        if (pfn_valid(pfn)) {
1059                                /*
1060                                 * It is safe to ignore the result of
1061                                 * mem_bm_set_bit_check() here, since we won't
1062                                 * touch the PFNs for which the error is
1063                                 * returned anyway.
1064                                 */
1065                                mem_bm_set_bit_check(bm, pfn);
1066                        }
1067        }
1068}
1069
1070/**
1071 * create_basic_memory_bitmaps - Create bitmaps to hold basic page information.
1072 *
1073 * Create bitmaps needed for marking page frames that should not be saved and
1074 * free page frames.  The forbidden_pages_map and free_pages_map pointers are
1075 * only modified if everything goes well, because we don't want the bits to be
1076 * touched before both bitmaps are set up.
1077 */
1078int create_basic_memory_bitmaps(void)
1079{
1080        struct memory_bitmap *bm1, *bm2;
1081        int error = 0;
1082
1083        if (forbidden_pages_map && free_pages_map)
1084                return 0;
1085        else
1086                BUG_ON(forbidden_pages_map || free_pages_map);
1087
1088        bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
1089        if (!bm1)
1090                return -ENOMEM;
1091
1092        error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
1093        if (error)
1094                goto Free_first_object;
1095
1096        bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
1097        if (!bm2)
1098                goto Free_first_bitmap;
1099
1100        error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
1101        if (error)
1102                goto Free_second_object;
1103
1104        forbidden_pages_map = bm1;
1105        free_pages_map = bm2;
1106        mark_nosave_pages(forbidden_pages_map);
1107
1108        pr_debug("Basic memory bitmaps created\n");
1109
1110        return 0;
1111
1112 Free_second_object:
1113        kfree(bm2);
1114 Free_first_bitmap:
1115        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
1116 Free_first_object:
1117        kfree(bm1);
1118        return -ENOMEM;
1119}
1120
1121/**
1122 * free_basic_memory_bitmaps - Free memory bitmaps holding basic information.
1123 *
1124 * Free memory bitmaps allocated by create_basic_memory_bitmaps().  The
1125 * auxiliary pointers are necessary so that the bitmaps themselves are not
1126 * referred to while they are being freed.
1127 */
1128void free_basic_memory_bitmaps(void)
1129{
1130        struct memory_bitmap *bm1, *bm2;
1131
1132        if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
1133                return;
1134
1135        bm1 = forbidden_pages_map;
1136        bm2 = free_pages_map;
1137        forbidden_pages_map = NULL;
1138        free_pages_map = NULL;
1139        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
1140        kfree(bm1);
1141        memory_bm_free(bm2, PG_UNSAFE_CLEAR);
1142        kfree(bm2);
1143
1144        pr_debug("Basic memory bitmaps freed\n");
1145}
1146
1147void clear_free_pages(void)
1148{
1149        struct memory_bitmap *bm = free_pages_map;
1150        unsigned long pfn;
1151
1152        if (WARN_ON(!(free_pages_map)))
1153                return;
1154
1155        if (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) || want_init_on_free()) {
1156                memory_bm_position_reset(bm);
1157                pfn = memory_bm_next_pfn(bm);
1158                while (pfn != BM_END_OF_MAP) {
1159                        if (pfn_valid(pfn))
1160                                clear_highpage(pfn_to_page(pfn));
1161
1162                        pfn = memory_bm_next_pfn(bm);
1163                }
1164                memory_bm_position_reset(bm);
1165                pr_info("free pages cleared after restore\n");
1166        }
1167}
1168
1169/**
1170 * snapshot_additional_pages - Estimate the number of extra pages needed.
1171 * @zone: Memory zone to carry out the computation for.
1172 *
1173 * Estimate the number of additional pages needed for setting up a hibernation
1174 * image data structures for @zone (usually, the returned value is greater than
1175 * the exact number).
1176 */
1177unsigned int snapshot_additional_pages(struct zone *zone)
1178{
1179        unsigned int rtree, nodes;
1180
1181        rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
1182        rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node),
1183                              LINKED_PAGE_DATA_SIZE);
1184        while (nodes > 1) {
1185                nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL);
1186                rtree += nodes;
1187        }
1188
1189        return 2 * rtree;
1190}
1191
1192#ifdef CONFIG_HIGHMEM
1193/**
1194 * count_free_highmem_pages - Compute the total number of free highmem pages.
1195 *
1196 * The returned number is system-wide.
1197 */
1198static unsigned int count_free_highmem_pages(void)
1199{
1200        struct zone *zone;
1201        unsigned int cnt = 0;
1202
1203        for_each_populated_zone(zone)
1204                if (is_highmem(zone))
1205                        cnt += zone_page_state(zone, NR_FREE_PAGES);
1206
1207        return cnt;
1208}
1209
1210/**
1211 * saveable_highmem_page - Check if a highmem page is saveable.
1212 *
1213 * Determine whether a highmem page should be included in a hibernation image.
1214 *
1215 * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
1216 * and it isn't part of a free chunk of pages.
1217 */
1218static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
1219{
1220        struct page *page;
1221
1222        if (!pfn_valid(pfn))
1223                return NULL;
1224
1225        page = pfn_to_online_page(pfn);
1226        if (!page || page_zone(page) != zone)
1227                return NULL;
1228
1229        BUG_ON(!PageHighMem(page));
1230
1231        if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page))
1232                return NULL;
1233
1234        if (PageReserved(page) || PageOffline(page))
1235                return NULL;
1236
1237        if (page_is_guard(page))
1238                return NULL;
1239
1240        return page;
1241}
1242
1243/**
1244 * count_highmem_pages - Compute the total number of saveable highmem pages.
1245 */
1246static unsigned int count_highmem_pages(void)
1247{
1248        struct zone *zone;
1249        unsigned int n = 0;
1250
1251        for_each_populated_zone(zone) {
1252                unsigned long pfn, max_zone_pfn;
1253
1254                if (!is_highmem(zone))
1255                        continue;
1256
1257                mark_free_pages(zone);
1258                max_zone_pfn = zone_end_pfn(zone);
1259                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1260                        if (saveable_highmem_page(zone, pfn))
1261                                n++;
1262        }
1263        return n;
1264}
1265#else
1266static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
1267{
1268        return NULL;
1269}
1270#endif /* CONFIG_HIGHMEM */
1271
1272/**
1273 * saveable_page - Check if the given page is saveable.
1274 *
1275 * Determine whether a non-highmem page should be included in a hibernation
1276 * image.
1277 *
1278 * We should save the page if it isn't Nosave, and is not in the range
1279 * of pages statically defined as 'unsaveable', and it isn't part of
1280 * a free chunk of pages.
1281 */
1282static struct page *saveable_page(struct zone *zone, unsigned long pfn)
1283{
1284        struct page *page;
1285
1286        if (!pfn_valid(pfn))
1287                return NULL;
1288
1289        page = pfn_to_online_page(pfn);
1290        if (!page || page_zone(page) != zone)
1291                return NULL;
1292
1293        BUG_ON(PageHighMem(page));
1294
1295        if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
1296                return NULL;
1297
1298        if (PageOffline(page))
1299                return NULL;
1300
1301        if (PageReserved(page)
1302            && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
1303                return NULL;
1304
1305        if (page_is_guard(page))
1306                return NULL;
1307
1308        return page;
1309}
1310
1311/**
1312 * count_data_pages - Compute the total number of saveable non-highmem pages.
1313 */
1314static unsigned int count_data_pages(void)
1315{
1316        struct zone *zone;
1317        unsigned long pfn, max_zone_pfn;
1318        unsigned int n = 0;
1319
1320        for_each_populated_zone(zone) {
1321                if (is_highmem(zone))
1322                        continue;
1323
1324                mark_free_pages(zone);
1325                max_zone_pfn = zone_end_pfn(zone);
1326                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1327                        if (saveable_page(zone, pfn))
1328                                n++;
1329        }
1330        return n;
1331}
1332
1333/*
1334 * This is needed, because copy_page and memcpy are not usable for copying
1335 * task structs.
1336 */
1337static inline void do_copy_page(long *dst, long *src)
1338{
1339        int n;
1340
1341        for (n = PAGE_SIZE / sizeof(long); n; n--)
1342                *dst++ = *src++;
1343}
1344
1345/**
1346 * safe_copy_page - Copy a page in a safe way.
1347 *
1348 * Check if the page we are going to copy is marked as present in the kernel
1349 * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
1350 * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
1351 * always returns 'true'.
1352 */
1353static void safe_copy_page(void *dst, struct page *s_page)
1354{
1355        if (kernel_page_present(s_page)) {
1356                do_copy_page(dst, page_address(s_page));
1357        } else {
1358                kernel_map_pages(s_page, 1, 1);
1359                do_copy_page(dst, page_address(s_page));
1360                kernel_map_pages(s_page, 1, 0);
1361        }
1362}
1363
1364#ifdef CONFIG_HIGHMEM
1365static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn)
1366{
1367        return is_highmem(zone) ?
1368                saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
1369}
1370
1371static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1372{
1373        struct page *s_page, *d_page;
1374        void *src, *dst;
1375
1376        s_page = pfn_to_page(src_pfn);
1377        d_page = pfn_to_page(dst_pfn);
1378        if (PageHighMem(s_page)) {
1379                src = kmap_atomic(s_page);
1380                dst = kmap_atomic(d_page);
1381                do_copy_page(dst, src);
1382                kunmap_atomic(dst);
1383                kunmap_atomic(src);
1384        } else {
1385                if (PageHighMem(d_page)) {
1386                        /*
1387                         * The page pointed to by src may contain some kernel
1388                         * data modified by kmap_atomic()
1389                         */
1390                        safe_copy_page(buffer, s_page);
1391                        dst = kmap_atomic(d_page);
1392                        copy_page(dst, buffer);
1393                        kunmap_atomic(dst);
1394                } else {
1395                        safe_copy_page(page_address(d_page), s_page);
1396                }
1397        }
1398}
1399#else
1400#define page_is_saveable(zone, pfn)     saveable_page(zone, pfn)
1401
1402static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1403{
1404        safe_copy_page(page_address(pfn_to_page(dst_pfn)),
1405                                pfn_to_page(src_pfn));
1406}
1407#endif /* CONFIG_HIGHMEM */
1408
1409static void copy_data_pages(struct memory_bitmap *copy_bm,
1410                            struct memory_bitmap *orig_bm)
1411{
1412        struct zone *zone;
1413        unsigned long pfn;
1414
1415        for_each_populated_zone(zone) {
1416                unsigned long max_zone_pfn;
1417
1418                mark_free_pages(zone);
1419                max_zone_pfn = zone_end_pfn(zone);
1420                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1421                        if (page_is_saveable(zone, pfn))
1422                                memory_bm_set_bit(orig_bm, pfn);
1423        }
1424        memory_bm_position_reset(orig_bm);
1425        memory_bm_position_reset(copy_bm);
1426        for(;;) {
1427                pfn = memory_bm_next_pfn(orig_bm);
1428                if (unlikely(pfn == BM_END_OF_MAP))
1429                        break;
1430                copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1431        }
1432}
1433
1434/* Total number of image pages */
1435static unsigned int nr_copy_pages;
1436/* Number of pages needed for saving the original pfns of the image pages */
1437static unsigned int nr_meta_pages;
1438/*
1439 * Numbers of normal and highmem page frames allocated for hibernation image
1440 * before suspending devices.
1441 */
1442static unsigned int alloc_normal, alloc_highmem;
1443/*
1444 * Memory bitmap used for marking saveable pages (during hibernation) or
1445 * hibernation image pages (during restore)
1446 */
1447static struct memory_bitmap orig_bm;
1448/*
1449 * Memory bitmap used during hibernation for marking allocated page frames that
1450 * will contain copies of saveable pages.  During restore it is initially used
1451 * for marking hibernation image pages, but then the set bits from it are
1452 * duplicated in @orig_bm and it is released.  On highmem systems it is next
1453 * used for marking "safe" highmem pages, but it has to be reinitialized for
1454 * this purpose.
1455 */
1456static struct memory_bitmap copy_bm;
1457
1458/**
1459 * swsusp_free - Free pages allocated for hibernation image.
1460 *
1461 * Image pages are alocated before snapshot creation, so they need to be
1462 * released after resume.
1463 */
1464void swsusp_free(void)
1465{
1466        unsigned long fb_pfn, fr_pfn;
1467
1468        if (!forbidden_pages_map || !free_pages_map)
1469                goto out;
1470
1471        memory_bm_position_reset(forbidden_pages_map);
1472        memory_bm_position_reset(free_pages_map);
1473
1474loop:
1475        fr_pfn = memory_bm_next_pfn(free_pages_map);
1476        fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
1477
1478        /*
1479         * Find the next bit set in both bitmaps. This is guaranteed to
1480         * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP.
1481         */
1482        do {
1483                if (fb_pfn < fr_pfn)
1484                        fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
1485                if (fr_pfn < fb_pfn)
1486                        fr_pfn = memory_bm_next_pfn(free_pages_map);
1487        } while (fb_pfn != fr_pfn);
1488
1489        if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) {
1490                struct page *page = pfn_to_page(fr_pfn);
1491
1492                memory_bm_clear_current(forbidden_pages_map);
1493                memory_bm_clear_current(free_pages_map);
1494                hibernate_restore_unprotect_page(page_address(page));
1495                __free_page(page);
1496                goto loop;
1497        }
1498
1499out:
1500        nr_copy_pages = 0;
1501        nr_meta_pages = 0;
1502        restore_pblist = NULL;
1503        buffer = NULL;
1504        alloc_normal = 0;
1505        alloc_highmem = 0;
1506        hibernate_restore_protection_end();
1507}
1508
1509/* Helper functions used for the shrinking of memory. */
1510
1511#define GFP_IMAGE       (GFP_KERNEL | __GFP_NOWARN)
1512
1513/**
1514 * preallocate_image_pages - Allocate a number of pages for hibernation image.
1515 * @nr_pages: Number of page frames to allocate.
1516 * @mask: GFP flags to use for the allocation.
1517 *
1518 * Return value: Number of page frames actually allocated
1519 */
1520static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1521{
1522        unsigned long nr_alloc = 0;
1523
1524        while (nr_pages > 0) {
1525                struct page *page;
1526
1527                page = alloc_image_page(mask);
1528                if (!page)
1529                        break;
1530                memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1531                if (PageHighMem(page))
1532                        alloc_highmem++;
1533                else
1534                        alloc_normal++;
1535                nr_pages--;
1536                nr_alloc++;
1537        }
1538
1539        return nr_alloc;
1540}
1541
1542static unsigned long preallocate_image_memory(unsigned long nr_pages,
1543                                              unsigned long avail_normal)
1544{
1545        unsigned long alloc;
1546
1547        if (avail_normal <= alloc_normal)
1548                return 0;
1549
1550        alloc = avail_normal - alloc_normal;
1551        if (nr_pages < alloc)
1552                alloc = nr_pages;
1553
1554        return preallocate_image_pages(alloc, GFP_IMAGE);
1555}
1556
1557#ifdef CONFIG_HIGHMEM
1558static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1559{
1560        return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1561}
1562
1563/**
1564 *  __fraction - Compute (an approximation of) x * (multiplier / base).
1565 */
1566static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
1567{
1568        return div64_u64(x * multiplier, base);
1569}
1570
1571static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1572                                                  unsigned long highmem,
1573                                                  unsigned long total)
1574{
1575        unsigned long alloc = __fraction(nr_pages, highmem, total);
1576
1577        return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
1578}
1579#else /* CONFIG_HIGHMEM */
1580static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1581{
1582        return 0;
1583}
1584
1585static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1586                                                         unsigned long highmem,
1587                                                         unsigned long total)
1588{
1589        return 0;
1590}
1591#endif /* CONFIG_HIGHMEM */
1592
1593/**
1594 * free_unnecessary_pages - Release preallocated pages not needed for the image.
1595 */
1596static unsigned long free_unnecessary_pages(void)
1597{
1598        unsigned long save, to_free_normal, to_free_highmem, free;
1599
1600        save = count_data_pages();
1601        if (alloc_normal >= save) {
1602                to_free_normal = alloc_normal - save;
1603                save = 0;
1604        } else {
1605                to_free_normal = 0;
1606                save -= alloc_normal;
1607        }
1608        save += count_highmem_pages();
1609        if (alloc_highmem >= save) {
1610                to_free_highmem = alloc_highmem - save;
1611        } else {
1612                to_free_highmem = 0;
1613                save -= alloc_highmem;
1614                if (to_free_normal > save)
1615                        to_free_normal -= save;
1616                else
1617                        to_free_normal = 0;
1618        }
1619        free = to_free_normal + to_free_highmem;
1620
1621        memory_bm_position_reset(&copy_bm);
1622
1623        while (to_free_normal > 0 || to_free_highmem > 0) {
1624                unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1625                struct page *page = pfn_to_page(pfn);
1626
1627                if (PageHighMem(page)) {
1628                        if (!to_free_highmem)
1629                                continue;
1630                        to_free_highmem--;
1631                        alloc_highmem--;
1632                } else {
1633                        if (!to_free_normal)
1634                                continue;
1635                        to_free_normal--;
1636                        alloc_normal--;
1637                }
1638                memory_bm_clear_bit(&copy_bm, pfn);
1639                swsusp_unset_page_forbidden(page);
1640                swsusp_unset_page_free(page);
1641                __free_page(page);
1642        }
1643
1644        return free;
1645}
1646
1647/**
1648 * minimum_image_size - Estimate the minimum acceptable size of an image.
1649 * @saveable: Number of saveable pages in the system.
1650 *
1651 * We want to avoid attempting to free too much memory too hard, so estimate the
1652 * minimum acceptable size of a hibernation image to use as the lower limit for
1653 * preallocating memory.
1654 *
1655 * We assume that the minimum image size should be proportional to
1656 *
1657 * [number of saveable pages] - [number of pages that can be freed in theory]
1658 *
1659 * where the second term is the sum of (1) reclaimable slab pages, (2) active
1660 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages.
1661 */
1662static unsigned long minimum_image_size(unsigned long saveable)
1663{
1664        unsigned long size;
1665
1666        size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B)
1667                + global_node_page_state(NR_ACTIVE_ANON)
1668                + global_node_page_state(NR_INACTIVE_ANON)
1669                + global_node_page_state(NR_ACTIVE_FILE)
1670                + global_node_page_state(NR_INACTIVE_FILE);
1671
1672        return saveable <= size ? 0 : saveable - size;
1673}
1674
1675/**
1676 * hibernate_preallocate_memory - Preallocate memory for hibernation image.
1677 *
1678 * To create a hibernation image it is necessary to make a copy of every page
1679 * frame in use.  We also need a number of page frames to be free during
1680 * hibernation for allocations made while saving the image and for device
1681 * drivers, in case they need to allocate memory from their hibernation
1682 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
1683 * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through
1684 * /sys/power/reserved_size, respectively).  To make this happen, we compute the
1685 * total number of available page frames and allocate at least
1686 *
1687 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
1688 *  + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
1689 *
1690 * of them, which corresponds to the maximum size of a hibernation image.
1691 *
1692 * If image_size is set below the number following from the above formula,
1693 * the preallocation of memory is continued until the total number of saveable
1694 * pages in the system is below the requested image size or the minimum
1695 * acceptable image size returned by minimum_image_size(), whichever is greater.
1696 */
1697int hibernate_preallocate_memory(void)
1698{
1699        struct zone *zone;
1700        unsigned long saveable, size, max_size, count, highmem, pages = 0;
1701        unsigned long alloc, save_highmem, pages_highmem, avail_normal;
1702        ktime_t start, stop;
1703        int error;
1704
1705        pr_info("Preallocating image memory\n");
1706        start = ktime_get();
1707
1708        error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
1709        if (error) {
1710                pr_err("Cannot allocate original bitmap\n");
1711                goto err_out;
1712        }
1713
1714        error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
1715        if (error) {
1716                pr_err("Cannot allocate copy bitmap\n");
1717                goto err_out;
1718        }
1719
1720        alloc_normal = 0;
1721        alloc_highmem = 0;
1722
1723        /* Count the number of saveable data pages. */
1724        save_highmem = count_highmem_pages();
1725        saveable = count_data_pages();
1726
1727        /*
1728         * Compute the total number of page frames we can use (count) and the
1729         * number of pages needed for image metadata (size).
1730         */
1731        count = saveable;
1732        saveable += save_highmem;
1733        highmem = save_highmem;
1734        size = 0;
1735        for_each_populated_zone(zone) {
1736                size += snapshot_additional_pages(zone);
1737                if (is_highmem(zone))
1738                        highmem += zone_page_state(zone, NR_FREE_PAGES);
1739                else
1740                        count += zone_page_state(zone, NR_FREE_PAGES);
1741        }
1742        avail_normal = count;
1743        count += highmem;
1744        count -= totalreserve_pages;
1745
1746        /* Compute the maximum number of saveable pages to leave in memory. */
1747        max_size = (count - (size + PAGES_FOR_IO)) / 2
1748                        - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
1749        /* Compute the desired number of image pages specified by image_size. */
1750        size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1751        if (size > max_size)
1752                size = max_size;
1753        /*
1754         * If the desired number of image pages is at least as large as the
1755         * current number of saveable pages in memory, allocate page frames for
1756         * the image and we're done.
1757         */
1758        if (size >= saveable) {
1759                pages = preallocate_image_highmem(save_highmem);
1760                pages += preallocate_image_memory(saveable - pages, avail_normal);
1761                goto out;
1762        }
1763
1764        /* Estimate the minimum size of the image. */
1765        pages = minimum_image_size(saveable);
1766        /*
1767         * To avoid excessive pressure on the normal zone, leave room in it to
1768         * accommodate an image of the minimum size (unless it's already too
1769         * small, in which case don't preallocate pages from it at all).
1770         */
1771        if (avail_normal > pages)
1772                avail_normal -= pages;
1773        else
1774                avail_normal = 0;
1775        if (size < pages)
1776                size = min_t(unsigned long, pages, max_size);
1777
1778        /*
1779         * Let the memory management subsystem know that we're going to need a
1780         * large number of page frames to allocate and make it free some memory.
1781         * NOTE: If this is not done, performance will be hurt badly in some
1782         * test cases.
1783         */
1784        shrink_all_memory(saveable - size);
1785
1786        /*
1787         * The number of saveable pages in memory was too high, so apply some
1788         * pressure to decrease it.  First, make room for the largest possible
1789         * image and fail if that doesn't work.  Next, try to decrease the size
1790         * of the image as much as indicated by 'size' using allocations from
1791         * highmem and non-highmem zones separately.
1792         */
1793        pages_highmem = preallocate_image_highmem(highmem / 2);
1794        alloc = count - max_size;
1795        if (alloc > pages_highmem)
1796                alloc -= pages_highmem;
1797        else
1798                alloc = 0;
1799        pages = preallocate_image_memory(alloc, avail_normal);
1800        if (pages < alloc) {
1801                /* We have exhausted non-highmem pages, try highmem. */
1802                alloc -= pages;
1803                pages += pages_highmem;
1804                pages_highmem = preallocate_image_highmem(alloc);
1805                if (pages_highmem < alloc) {
1806                        pr_err("Image allocation is %lu pages short\n",
1807                                alloc - pages_highmem);
1808                        goto err_out;
1809                }
1810                pages += pages_highmem;
1811                /*
1812                 * size is the desired number of saveable pages to leave in
1813                 * memory, so try to preallocate (all memory - size) pages.
1814                 */
1815                alloc = (count - pages) - size;
1816                pages += preallocate_image_highmem(alloc);
1817        } else {
1818                /*
1819                 * There are approximately max_size saveable pages at this point
1820                 * and we want to reduce this number down to size.
1821                 */
1822                alloc = max_size - size;
1823                size = preallocate_highmem_fraction(alloc, highmem, count);
1824                pages_highmem += size;
1825                alloc -= size;
1826                size = preallocate_image_memory(alloc, avail_normal);
1827                pages_highmem += preallocate_image_highmem(alloc - size);
1828                pages += pages_highmem + size;
1829        }
1830
1831        /*
1832         * We only need as many page frames for the image as there are saveable
1833         * pages in memory, but we have allocated more.  Release the excessive
1834         * ones now.
1835         */
1836        pages -= free_unnecessary_pages();
1837
1838 out:
1839        stop = ktime_get();
1840        pr_info("Allocated %lu pages for snapshot\n", pages);
1841        swsusp_show_speed(start, stop, pages, "Allocated");
1842
1843        return 0;
1844
1845 err_out:
1846        swsusp_free();
1847        return -ENOMEM;
1848}
1849
1850#ifdef CONFIG_HIGHMEM
1851/**
1852 * count_pages_for_highmem - Count non-highmem pages needed for copying highmem.
1853 *
1854 * Compute the number of non-highmem pages that will be necessary for creating
1855 * copies of highmem pages.
1856 */
1857static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1858{
1859        unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
1860
1861        if (free_highmem >= nr_highmem)
1862                nr_highmem = 0;
1863        else
1864                nr_highmem -= free_highmem;
1865
1866        return nr_highmem;
1867}
1868#else
1869static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1870#endif /* CONFIG_HIGHMEM */
1871
1872/**
1873 * enough_free_mem - Check if there is enough free memory for the image.
1874 */
1875static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1876{
1877        struct zone *zone;
1878        unsigned int free = alloc_normal;
1879
1880        for_each_populated_zone(zone)
1881                if (!is_highmem(zone))
1882                        free += zone_page_state(zone, NR_FREE_PAGES);
1883
1884        nr_pages += count_pages_for_highmem(nr_highmem);
1885        pr_debug("Normal pages needed: %u + %u, available pages: %u\n",
1886                 nr_pages, PAGES_FOR_IO, free);
1887
1888        return free > nr_pages + PAGES_FOR_IO;
1889}
1890
1891#ifdef CONFIG_HIGHMEM
1892/**
1893 * get_highmem_buffer - Allocate a buffer for highmem pages.
1894 *
1895 * If there are some highmem pages in the hibernation image, we may need a
1896 * buffer to copy them and/or load their data.
1897 */
1898static inline int get_highmem_buffer(int safe_needed)
1899{
1900        buffer = get_image_page(GFP_ATOMIC, safe_needed);
1901        return buffer ? 0 : -ENOMEM;
1902}
1903
1904/**
1905 * alloc_highmem_image_pages - Allocate some highmem pages for the image.
1906 *
1907 * Try to allocate as many pages as needed, but if the number of free highmem
1908 * pages is less than that, allocate them all.
1909 */
1910static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
1911                                               unsigned int nr_highmem)
1912{
1913        unsigned int to_alloc = count_free_highmem_pages();
1914
1915        if (to_alloc > nr_highmem)
1916                to_alloc = nr_highmem;
1917
1918        nr_highmem -= to_alloc;
1919        while (to_alloc-- > 0) {
1920                struct page *page;
1921
1922                page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
1923                memory_bm_set_bit(bm, page_to_pfn(page));
1924        }
1925        return nr_highmem;
1926}
1927#else
1928static inline int get_highmem_buffer(int safe_needed) { return 0; }
1929
1930static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
1931                                               unsigned int n) { return 0; }
1932#endif /* CONFIG_HIGHMEM */
1933
1934/**
1935 * swsusp_alloc - Allocate memory for hibernation image.
1936 *
1937 * We first try to allocate as many highmem pages as there are
1938 * saveable highmem pages in the system.  If that fails, we allocate
1939 * non-highmem pages for the copies of the remaining highmem ones.
1940 *
1941 * In this approach it is likely that the copies of highmem pages will
1942 * also be located in the high memory, because of the way in which
1943 * copy_data_pages() works.
1944 */
1945static int swsusp_alloc(struct memory_bitmap *copy_bm,
1946                        unsigned int nr_pages, unsigned int nr_highmem)
1947{
1948        if (nr_highmem > 0) {
1949                if (get_highmem_buffer(PG_ANY))
1950                        goto err_out;
1951                if (nr_highmem > alloc_highmem) {
1952                        nr_highmem -= alloc_highmem;
1953                        nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
1954                }
1955        }
1956        if (nr_pages > alloc_normal) {
1957                nr_pages -= alloc_normal;
1958                while (nr_pages-- > 0) {
1959                        struct page *page;
1960
1961                        page = alloc_image_page(GFP_ATOMIC);
1962                        if (!page)
1963                                goto err_out;
1964                        memory_bm_set_bit(copy_bm, page_to_pfn(page));
1965                }
1966        }
1967
1968        return 0;
1969
1970 err_out:
1971        swsusp_free();
1972        return -ENOMEM;
1973}
1974
1975asmlinkage __visible int swsusp_save(void)
1976{
1977        unsigned int nr_pages, nr_highmem;
1978
1979        pr_info("Creating image:\n");
1980
1981        drain_local_pages(NULL);
1982        nr_pages = count_data_pages();
1983        nr_highmem = count_highmem_pages();
1984        pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
1985
1986        if (!enough_free_mem(nr_pages, nr_highmem)) {
1987                pr_err("Not enough free memory\n");
1988                return -ENOMEM;
1989        }
1990
1991        if (swsusp_alloc(&copy_bm, nr_pages, nr_highmem)) {
1992                pr_err("Memory allocation failed\n");
1993                return -ENOMEM;
1994        }
1995
1996        /*
1997         * During allocating of suspend pagedir, new cold pages may appear.
1998         * Kill them.
1999         */
2000        drain_local_pages(NULL);
2001        copy_data_pages(&copy_bm, &orig_bm);
2002
2003        /*
2004         * End of critical section. From now on, we can write to memory,
2005         * but we should not touch disk. This specially means we must _not_
2006         * touch swap space! Except we must write out our image of course.
2007         */
2008
2009        nr_pages += nr_highmem;
2010        nr_copy_pages = nr_pages;
2011        nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
2012
2013        pr_info("Image created (%d pages copied)\n", nr_pages);
2014
2015        return 0;
2016}
2017
2018#ifndef CONFIG_ARCH_HIBERNATION_HEADER
2019static int init_header_complete(struct swsusp_info *info)
2020{
2021        memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
2022        info->version_code = LINUX_VERSION_CODE;
2023        return 0;
2024}
2025
2026static const char *check_image_kernel(struct swsusp_info *info)
2027{
2028        if (info->version_code != LINUX_VERSION_CODE)
2029                return "kernel version";
2030        if (strcmp(info->uts.sysname,init_utsname()->sysname))
2031                return "system type";
2032        if (strcmp(info->uts.release,init_utsname()->release))
2033                return "kernel release";
2034        if (strcmp(info->uts.version,init_utsname()->version))
2035                return "version";
2036        if (strcmp(info->uts.machine,init_utsname()->machine))
2037                return "machine";
2038        return NULL;
2039}
2040#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
2041
2042unsigned long snapshot_get_image_size(void)
2043{
2044        return nr_copy_pages + nr_meta_pages + 1;
2045}
2046
2047static int init_header(struct swsusp_info *info)
2048{
2049        memset(info, 0, sizeof(struct swsusp_info));
2050        info->num_physpages = get_num_physpages();
2051        info->image_pages = nr_copy_pages;
2052        info->pages = snapshot_get_image_size();
2053        info->size = info->pages;
2054        info->size <<= PAGE_SHIFT;
2055        return init_header_complete(info);
2056}
2057
2058/**
2059 * pack_pfns - Prepare PFNs for saving.
2060 * @bm: Memory bitmap.
2061 * @buf: Memory buffer to store the PFNs in.
2062 *
2063 * PFNs corresponding to set bits in @bm are stored in the area of memory
2064 * pointed to by @buf (1 page at a time).
2065 */
2066static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
2067{
2068        int j;
2069
2070        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
2071                buf[j] = memory_bm_next_pfn(bm);
2072                if (unlikely(buf[j] == BM_END_OF_MAP))
2073                        break;
2074        }
2075}
2076
2077/**
2078 * snapshot_read_next - Get the address to read the next image page from.
2079 * @handle: Snapshot handle to be used for the reading.
2080 *
2081 * On the first call, @handle should point to a zeroed snapshot_handle
2082 * structure.  The structure gets populated then and a pointer to it should be
2083 * passed to this function every next time.
2084 *
2085 * On success, the function returns a positive number.  Then, the caller
2086 * is allowed to read up to the returned number of bytes from the memory
2087 * location computed by the data_of() macro.
2088 *
2089 * The function returns 0 to indicate the end of the data stream condition,
2090 * and negative numbers are returned on errors.  If that happens, the structure
2091 * pointed to by @handle is not updated and should not be used any more.
2092 */
2093int snapshot_read_next(struct snapshot_handle *handle)
2094{
2095        if (handle->cur > nr_meta_pages + nr_copy_pages)
2096                return 0;
2097
2098        if (!buffer) {
2099                /* This makes the buffer be freed by swsusp_free() */
2100                buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2101                if (!buffer)
2102                        return -ENOMEM;
2103        }
2104        if (!handle->cur) {
2105                int error;
2106
2107                error = init_header((struct swsusp_info *)buffer);
2108                if (error)
2109                        return error;
2110                handle->buffer = buffer;
2111                memory_bm_position_reset(&orig_bm);
2112                memory_bm_position_reset(&copy_bm);
2113        } else if (handle->cur <= nr_meta_pages) {
2114                clear_page(buffer);
2115                pack_pfns(buffer, &orig_bm);
2116        } else {
2117                struct page *page;
2118
2119                page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
2120                if (PageHighMem(page)) {
2121                        /*
2122                         * Highmem pages are copied to the buffer,
2123                         * because we can't return with a kmapped
2124                         * highmem page (we may not be called again).
2125                         */
2126                        void *kaddr;
2127
2128                        kaddr = kmap_atomic(page);
2129                        copy_page(buffer, kaddr);
2130                        kunmap_atomic(kaddr);
2131                        handle->buffer = buffer;
2132                } else {
2133                        handle->buffer = page_address(page);
2134                }
2135        }
2136        handle->cur++;
2137        return PAGE_SIZE;
2138}
2139
2140static void duplicate_memory_bitmap(struct memory_bitmap *dst,
2141                                    struct memory_bitmap *src)
2142{
2143        unsigned long pfn;
2144
2145        memory_bm_position_reset(src);
2146        pfn = memory_bm_next_pfn(src);
2147        while (pfn != BM_END_OF_MAP) {
2148                memory_bm_set_bit(dst, pfn);
2149                pfn = memory_bm_next_pfn(src);
2150        }
2151}
2152
2153/**
2154 * mark_unsafe_pages - Mark pages that were used before hibernation.
2155 *
2156 * Mark the pages that cannot be used for storing the image during restoration,
2157 * because they conflict with the pages that had been used before hibernation.
2158 */
2159static void mark_unsafe_pages(struct memory_bitmap *bm)
2160{
2161        unsigned long pfn;
2162
2163        /* Clear the "free"/"unsafe" bit for all PFNs */
2164        memory_bm_position_reset(free_pages_map);
2165        pfn = memory_bm_next_pfn(free_pages_map);
2166        while (pfn != BM_END_OF_MAP) {
2167                memory_bm_clear_current(free_pages_map);
2168                pfn = memory_bm_next_pfn(free_pages_map);
2169        }
2170
2171        /* Mark pages that correspond to the "original" PFNs as "unsafe" */
2172        duplicate_memory_bitmap(free_pages_map, bm);
2173
2174        allocated_unsafe_pages = 0;
2175}
2176
2177static int check_header(struct swsusp_info *info)
2178{
2179        const char *reason;
2180
2181        reason = check_image_kernel(info);
2182        if (!reason && info->num_physpages != get_num_physpages())
2183                reason = "memory size";
2184        if (reason) {
2185                pr_err("Image mismatch: %s\n", reason);
2186                return -EPERM;
2187        }
2188        return 0;
2189}
2190
2191/**
2192 * load header - Check the image header and copy the data from it.
2193 */
2194static int load_header(struct swsusp_info *info)
2195{
2196        int error;
2197
2198        restore_pblist = NULL;
2199        error = check_header(info);
2200        if (!error) {
2201                nr_copy_pages = info->image_pages;
2202                nr_meta_pages = info->pages - info->image_pages - 1;
2203        }
2204        return error;
2205}
2206
2207/**
2208 * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
2209 * @bm: Memory bitmap.
2210 * @buf: Area of memory containing the PFNs.
2211 *
2212 * For each element of the array pointed to by @buf (1 page at a time), set the
2213 * corresponding bit in @bm.
2214 */
2215static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
2216{
2217        int j;
2218
2219        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
2220                if (unlikely(buf[j] == BM_END_OF_MAP))
2221                        break;
2222
2223                if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j]))
2224                        memory_bm_set_bit(bm, buf[j]);
2225                else
2226                        return -EFAULT;
2227        }
2228
2229        return 0;
2230}
2231
2232#ifdef CONFIG_HIGHMEM
2233/*
2234 * struct highmem_pbe is used for creating the list of highmem pages that
2235 * should be restored atomically during the resume from disk, because the page
2236 * frames they have occupied before the suspend are in use.
2237 */
2238struct highmem_pbe {
2239        struct page *copy_page; /* data is here now */
2240        struct page *orig_page; /* data was here before the suspend */
2241        struct highmem_pbe *next;
2242};
2243
2244/*
2245 * List of highmem PBEs needed for restoring the highmem pages that were
2246 * allocated before the suspend and included in the suspend image, but have
2247 * also been allocated by the "resume" kernel, so their contents cannot be
2248 * written directly to their "original" page frames.
2249 */
2250static struct highmem_pbe *highmem_pblist;
2251
2252/**
2253 * count_highmem_image_pages - Compute the number of highmem pages in the image.
2254 * @bm: Memory bitmap.
2255 *
2256 * The bits in @bm that correspond to image pages are assumed to be set.
2257 */
2258static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
2259{
2260        unsigned long pfn;
2261        unsigned int cnt = 0;
2262
2263        memory_bm_position_reset(bm);
2264        pfn = memory_bm_next_pfn(bm);
2265        while (pfn != BM_END_OF_MAP) {
2266                if (PageHighMem(pfn_to_page(pfn)))
2267                        cnt++;
2268
2269                pfn = memory_bm_next_pfn(bm);
2270        }
2271        return cnt;
2272}
2273
2274static unsigned int safe_highmem_pages;
2275
2276static struct memory_bitmap *safe_highmem_bm;
2277
2278/**
2279 * prepare_highmem_image - Allocate memory for loading highmem data from image.
2280 * @bm: Pointer to an uninitialized memory bitmap structure.
2281 * @nr_highmem_p: Pointer to the number of highmem image pages.
2282 *
2283 * Try to allocate as many highmem pages as there are highmem image pages
2284 * (@nr_highmem_p points to the variable containing the number of highmem image
2285 * pages).  The pages that are "safe" (ie. will not be overwritten when the
2286 * hibernation image is restored entirely) have the corresponding bits set in
2287 * @bm (it must be unitialized).
2288 *
2289 * NOTE: This function should not be called if there are no highmem image pages.
2290 */
2291static int prepare_highmem_image(struct memory_bitmap *bm,
2292                                 unsigned int *nr_highmem_p)
2293{
2294        unsigned int to_alloc;
2295
2296        if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
2297                return -ENOMEM;
2298
2299        if (get_highmem_buffer(PG_SAFE))
2300                return -ENOMEM;
2301
2302        to_alloc = count_free_highmem_pages();
2303        if (to_alloc > *nr_highmem_p)
2304                to_alloc = *nr_highmem_p;
2305        else
2306                *nr_highmem_p = to_alloc;
2307
2308        safe_highmem_pages = 0;
2309        while (to_alloc-- > 0) {
2310                struct page *page;
2311
2312                page = alloc_page(__GFP_HIGHMEM);
2313                if (!swsusp_page_is_free(page)) {
2314                        /* The page is "safe", set its bit the bitmap */
2315                        memory_bm_set_bit(bm, page_to_pfn(page));
2316                        safe_highmem_pages++;
2317                }
2318                /* Mark the page as allocated */
2319                swsusp_set_page_forbidden(page);
2320                swsusp_set_page_free(page);
2321        }
2322        memory_bm_position_reset(bm);
2323        safe_highmem_bm = bm;
2324        return 0;
2325}
2326
2327static struct page *last_highmem_page;
2328
2329/**
2330 * get_highmem_page_buffer - Prepare a buffer to store a highmem image page.
2331 *
2332 * For a given highmem image page get a buffer that suspend_write_next() should
2333 * return to its caller to write to.
2334 *
2335 * If the page is to be saved to its "original" page frame or a copy of
2336 * the page is to be made in the highmem, @buffer is returned.  Otherwise,
2337 * the copy of the page is to be made in normal memory, so the address of
2338 * the copy is returned.
2339 *
2340 * If @buffer is returned, the caller of suspend_write_next() will write
2341 * the page's contents to @buffer, so they will have to be copied to the
2342 * right location on the next call to suspend_write_next() and it is done
2343 * with the help of copy_last_highmem_page().  For this purpose, if
2344 * @buffer is returned, @last_highmem_page is set to the page to which
2345 * the data will have to be copied from @buffer.
2346 */
2347static void *get_highmem_page_buffer(struct page *page,
2348                                     struct chain_allocator *ca)
2349{
2350        struct highmem_pbe *pbe;
2351        void *kaddr;
2352
2353        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
2354                /*
2355                 * We have allocated the "original" page frame and we can
2356                 * use it directly to store the loaded page.
2357                 */
2358                last_highmem_page = page;
2359                return buffer;
2360        }
2361        /*
2362         * The "original" page frame has not been allocated and we have to
2363         * use a "safe" page frame to store the loaded page.
2364         */
2365        pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
2366        if (!pbe) {
2367                swsusp_free();
2368                return ERR_PTR(-ENOMEM);
2369        }
2370        pbe->orig_page = page;
2371        if (safe_highmem_pages > 0) {
2372                struct page *tmp;
2373
2374                /* Copy of the page will be stored in high memory */
2375                kaddr = buffer;
2376                tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
2377                safe_highmem_pages--;
2378                last_highmem_page = tmp;
2379                pbe->copy_page = tmp;
2380        } else {
2381                /* Copy of the page will be stored in normal memory */
2382                kaddr = safe_pages_list;
2383                safe_pages_list = safe_pages_list->next;
2384                pbe->copy_page = virt_to_page(kaddr);
2385        }
2386        pbe->next = highmem_pblist;
2387        highmem_pblist = pbe;
2388        return kaddr;
2389}
2390
2391/**
2392 * copy_last_highmem_page - Copy most the most recent highmem image page.
2393 *
2394 * Copy the contents of a highmem image from @buffer, where the caller of
2395 * snapshot_write_next() has stored them, to the right location represented by
2396 * @last_highmem_page .
2397 */
2398static void copy_last_highmem_page(void)
2399{
2400        if (last_highmem_page) {
2401                void *dst;
2402
2403                dst = kmap_atomic(last_highmem_page);
2404                copy_page(dst, buffer);
2405                kunmap_atomic(dst);
2406                last_highmem_page = NULL;
2407        }
2408}
2409
2410static inline int last_highmem_page_copied(void)
2411{
2412        return !last_highmem_page;
2413}
2414
2415static inline void free_highmem_data(void)
2416{
2417        if (safe_highmem_bm)
2418                memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
2419
2420        if (buffer)
2421                free_image_page(buffer, PG_UNSAFE_CLEAR);
2422}
2423#else
2424static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
2425
2426static inline int prepare_highmem_image(struct memory_bitmap *bm,
2427                                        unsigned int *nr_highmem_p) { return 0; }
2428
2429static inline void *get_highmem_page_buffer(struct page *page,
2430                                            struct chain_allocator *ca)
2431{
2432        return ERR_PTR(-EINVAL);
2433}
2434
2435static inline void copy_last_highmem_page(void) {}
2436static inline int last_highmem_page_copied(void) { return 1; }
2437static inline void free_highmem_data(void) {}
2438#endif /* CONFIG_HIGHMEM */
2439
2440#define PBES_PER_LINKED_PAGE    (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2441
2442/**
2443 * prepare_image - Make room for loading hibernation image.
2444 * @new_bm: Unitialized memory bitmap structure.
2445 * @bm: Memory bitmap with unsafe pages marked.
2446 *
2447 * Use @bm to mark the pages that will be overwritten in the process of
2448 * restoring the system memory state from the suspend image ("unsafe" pages)
2449 * and allocate memory for the image.
2450 *
2451 * The idea is to allocate a new memory bitmap first and then allocate
2452 * as many pages as needed for image data, but without specifying what those
2453 * pages will be used for just yet.  Instead, we mark them all as allocated and
2454 * create a lists of "safe" pages to be used later.  On systems with high
2455 * memory a list of "safe" highmem pages is created too.
2456 */
2457static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
2458{
2459        unsigned int nr_pages, nr_highmem;
2460        struct linked_page *lp;
2461        int error;
2462
2463        /* If there is no highmem, the buffer will not be necessary */
2464        free_image_page(buffer, PG_UNSAFE_CLEAR);
2465        buffer = NULL;
2466
2467        nr_highmem = count_highmem_image_pages(bm);
2468        mark_unsafe_pages(bm);
2469
2470        error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2471        if (error)
2472                goto Free;
2473
2474        duplicate_memory_bitmap(new_bm, bm);
2475        memory_bm_free(bm, PG_UNSAFE_KEEP);
2476        if (nr_highmem > 0) {
2477                error = prepare_highmem_image(bm, &nr_highmem);
2478                if (error)
2479                        goto Free;
2480        }
2481        /*
2482         * Reserve some safe pages for potential later use.
2483         *
2484         * NOTE: This way we make sure there will be enough safe pages for the
2485         * chain_alloc() in get_buffer().  It is a bit wasteful, but
2486         * nr_copy_pages cannot be greater than 50% of the memory anyway.
2487         *
2488         * nr_copy_pages cannot be less than allocated_unsafe_pages too.
2489         */
2490        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2491        nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2492        while (nr_pages > 0) {
2493                lp = get_image_page(GFP_ATOMIC, PG_SAFE);
2494                if (!lp) {
2495                        error = -ENOMEM;
2496                        goto Free;
2497                }
2498                lp->next = safe_pages_list;
2499                safe_pages_list = lp;
2500                nr_pages--;
2501        }
2502        /* Preallocate memory for the image */
2503        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2504        while (nr_pages > 0) {
2505                lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2506                if (!lp) {
2507                        error = -ENOMEM;
2508                        goto Free;
2509                }
2510                if (!swsusp_page_is_free(virt_to_page(lp))) {
2511                        /* The page is "safe", add it to the list */
2512                        lp->next = safe_pages_list;
2513                        safe_pages_list = lp;
2514                }
2515                /* Mark the page as allocated */
2516                swsusp_set_page_forbidden(virt_to_page(lp));
2517                swsusp_set_page_free(virt_to_page(lp));
2518                nr_pages--;
2519        }
2520        return 0;
2521
2522 Free:
2523        swsusp_free();
2524        return error;
2525}
2526
2527/**
2528 * get_buffer - Get the address to store the next image data page.
2529 *
2530 * Get the address that snapshot_write_next() should return to its caller to
2531 * write to.
2532 */
2533static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2534{
2535        struct pbe *pbe;
2536        struct page *page;
2537        unsigned long pfn = memory_bm_next_pfn(bm);
2538
2539        if (pfn == BM_END_OF_MAP)
2540                return ERR_PTR(-EFAULT);
2541
2542        page = pfn_to_page(pfn);
2543        if (PageHighMem(page))
2544                return get_highmem_page_buffer(page, ca);
2545
2546        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
2547                /*
2548                 * We have allocated the "original" page frame and we can
2549                 * use it directly to store the loaded page.
2550                 */
2551                return page_address(page);
2552
2553        /*
2554         * The "original" page frame has not been allocated and we have to
2555         * use a "safe" page frame to store the loaded page.
2556         */
2557        pbe = chain_alloc(ca, sizeof(struct pbe));
2558        if (!pbe) {
2559                swsusp_free();
2560                return ERR_PTR(-ENOMEM);
2561        }
2562        pbe->orig_address = page_address(page);
2563        pbe->address = safe_pages_list;
2564        safe_pages_list = safe_pages_list->next;
2565        pbe->next = restore_pblist;
2566        restore_pblist = pbe;
2567        return pbe->address;
2568}
2569
2570/**
2571 * snapshot_write_next - Get the address to store the next image page.
2572 * @handle: Snapshot handle structure to guide the writing.
2573 *
2574 * On the first call, @handle should point to a zeroed snapshot_handle
2575 * structure.  The structure gets populated then and a pointer to it should be
2576 * passed to this function every next time.
2577 *
2578 * On success, the function returns a positive number.  Then, the caller
2579 * is allowed to write up to the returned number of bytes to the memory
2580 * location computed by the data_of() macro.
2581 *
2582 * The function returns 0 to indicate the "end of file" condition.  Negative
2583 * numbers are returned on errors, in which cases the structure pointed to by
2584 * @handle is not updated and should not be used any more.
2585 */
2586int snapshot_write_next(struct snapshot_handle *handle)
2587{
2588        static struct chain_allocator ca;
2589        int error = 0;
2590
2591        /* Check if we have already loaded the entire image */
2592        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
2593                return 0;
2594
2595        handle->sync_read = 1;
2596
2597        if (!handle->cur) {
2598                if (!buffer)
2599                        /* This makes the buffer be freed by swsusp_free() */
2600                        buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2601
2602                if (!buffer)
2603                        return -ENOMEM;
2604
2605                handle->buffer = buffer;
2606        } else if (handle->cur == 1) {
2607                error = load_header(buffer);
2608                if (error)
2609                        return error;
2610
2611                safe_pages_list = NULL;
2612
2613                error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2614                if (error)
2615                        return error;
2616
2617                hibernate_restore_protection_begin();
2618        } else if (handle->cur <= nr_meta_pages + 1) {
2619                error = unpack_orig_pfns(buffer, &copy_bm);
2620                if (error)
2621                        return error;
2622
2623                if (handle->cur == nr_meta_pages + 1) {
2624                        error = prepare_image(&orig_bm, &copy_bm);
2625                        if (error)
2626                                return error;
2627
2628                        chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2629                        memory_bm_position_reset(&orig_bm);
2630                        restore_pblist = NULL;
2631                        handle->buffer = get_buffer(&orig_bm, &ca);
2632                        handle->sync_read = 0;
2633                        if (IS_ERR(handle->buffer))
2634                                return PTR_ERR(handle->buffer);
2635                }
2636        } else {
2637                copy_last_highmem_page();
2638                hibernate_restore_protect_page(handle->buffer);
2639                handle->buffer = get_buffer(&orig_bm, &ca);
2640                if (IS_ERR(handle->buffer))
2641                        return PTR_ERR(handle->buffer);
2642                if (handle->buffer != buffer)
2643                        handle->sync_read = 0;
2644        }
2645        handle->cur++;
2646        return PAGE_SIZE;
2647}
2648
2649/**
2650 * snapshot_write_finalize - Complete the loading of a hibernation image.
2651 *
2652 * Must be called after the last call to snapshot_write_next() in case the last
2653 * page in the image happens to be a highmem page and its contents should be
2654 * stored in highmem.  Additionally, it recycles bitmap memory that's not
2655 * necessary any more.
2656 */
2657void snapshot_write_finalize(struct snapshot_handle *handle)
2658{
2659        copy_last_highmem_page();
2660        hibernate_restore_protect_page(handle->buffer);
2661        /* Do that only if we have loaded the image entirely */
2662        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2663                memory_bm_recycle(&orig_bm);
2664                free_highmem_data();
2665        }
2666}
2667
2668int snapshot_image_loaded(struct snapshot_handle *handle)
2669{
2670        return !(!nr_copy_pages || !last_highmem_page_copied() ||
2671                        handle->cur <= nr_meta_pages + nr_copy_pages);
2672}
2673
2674#ifdef CONFIG_HIGHMEM
2675/* Assumes that @buf is ready and points to a "safe" page */
2676static inline void swap_two_pages_data(struct page *p1, struct page *p2,
2677                                       void *buf)
2678{
2679        void *kaddr1, *kaddr2;
2680
2681        kaddr1 = kmap_atomic(p1);
2682        kaddr2 = kmap_atomic(p2);
2683        copy_page(buf, kaddr1);
2684        copy_page(kaddr1, kaddr2);
2685        copy_page(kaddr2, buf);
2686        kunmap_atomic(kaddr2);
2687        kunmap_atomic(kaddr1);
2688}
2689
2690/**
2691 * restore_highmem - Put highmem image pages into their original locations.
2692 *
2693 * For each highmem page that was in use before hibernation and is included in
2694 * the image, and also has been allocated by the "restore" kernel, swap its
2695 * current contents with the previous (ie. "before hibernation") ones.
2696 *
2697 * If the restore eventually fails, we can call this function once again and
2698 * restore the highmem state as seen by the restore kernel.
2699 */
2700int restore_highmem(void)
2701{
2702        struct highmem_pbe *pbe = highmem_pblist;
2703        void *buf;
2704
2705        if (!pbe)
2706                return 0;
2707
2708        buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2709        if (!buf)
2710                return -ENOMEM;
2711
2712        while (pbe) {
2713                swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2714                pbe = pbe->next;
2715        }
2716        free_image_page(buf, PG_UNSAFE_CLEAR);
2717        return 0;
2718}
2719#endif /* CONFIG_HIGHMEM */
2720