linux/kernel/power/snapshot.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * linux/kernel/power/snapshot.c
   4 *
   5 * This file provides system snapshot/restore functionality for swsusp.
   6 *
   7 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
   8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
   9 */
  10
  11#define pr_fmt(fmt) "PM: hibernation: " fmt
  12
  13#include <linux/version.h>
  14#include <linux/module.h>
  15#include <linux/mm.h>
  16#include <linux/suspend.h>
  17#include <linux/delay.h>
  18#include <linux/bitops.h>
  19#include <linux/spinlock.h>
  20#include <linux/kernel.h>
  21#include <linux/pm.h>
  22#include <linux/device.h>
  23#include <linux/init.h>
  24#include <linux/memblock.h>
  25#include <linux/nmi.h>
  26#include <linux/syscalls.h>
  27#include <linux/console.h>
  28#include <linux/highmem.h>
  29#include <linux/list.h>
  30#include <linux/slab.h>
  31#include <linux/compiler.h>
  32#include <linux/ktime.h>
  33#include <linux/set_memory.h>
  34
  35#include <linux/uaccess.h>
  36#include <asm/mmu_context.h>
  37#include <asm/tlbflush.h>
  38#include <asm/io.h>
  39
  40#include "power.h"
  41
  42#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY)
  43static bool hibernate_restore_protection;
  44static bool hibernate_restore_protection_active;
  45
  46void enable_restore_image_protection(void)
  47{
  48        hibernate_restore_protection = true;
  49}
  50
  51static inline void hibernate_restore_protection_begin(void)
  52{
  53        hibernate_restore_protection_active = hibernate_restore_protection;
  54}
  55
  56static inline void hibernate_restore_protection_end(void)
  57{
  58        hibernate_restore_protection_active = false;
  59}
  60
  61static inline void hibernate_restore_protect_page(void *page_address)
  62{
  63        if (hibernate_restore_protection_active)
  64                set_memory_ro((unsigned long)page_address, 1);
  65}
  66
  67static inline void hibernate_restore_unprotect_page(void *page_address)
  68{
  69        if (hibernate_restore_protection_active)
  70                set_memory_rw((unsigned long)page_address, 1);
  71}
  72#else
  73static inline void hibernate_restore_protection_begin(void) {}
  74static inline void hibernate_restore_protection_end(void) {}
  75static inline void hibernate_restore_protect_page(void *page_address) {}
  76static inline void hibernate_restore_unprotect_page(void *page_address) {}
  77#endif /* CONFIG_STRICT_KERNEL_RWX  && CONFIG_ARCH_HAS_SET_MEMORY */
  78
  79
  80/*
  81 * The calls to set_direct_map_*() should not fail because remapping a page
  82 * here means that we only update protection bits in an existing PTE.
  83 * It is still worth to have a warning here if something changes and this
  84 * will no longer be the case.
  85 */
  86static inline void hibernate_map_page(struct page *page)
  87{
  88        if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
  89                int ret = set_direct_map_default_noflush(page);
  90
  91                if (ret)
  92                        pr_warn_once("Failed to remap page\n");
  93        } else {
  94                debug_pagealloc_map_pages(page, 1);
  95        }
  96}
  97
  98static inline void hibernate_unmap_page(struct page *page)
  99{
 100        if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
 101                unsigned long addr = (unsigned long)page_address(page);
 102                int ret  = set_direct_map_invalid_noflush(page);
 103
 104                if (ret)
 105                        pr_warn_once("Failed to remap page\n");
 106
 107                flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
 108        } else {
 109                debug_pagealloc_unmap_pages(page, 1);
 110        }
 111}
 112
 113static int swsusp_page_is_free(struct page *);
 114static void swsusp_set_page_forbidden(struct page *);
 115static void swsusp_unset_page_forbidden(struct page *);
 116
 117/*
 118 * Number of bytes to reserve for memory allocations made by device drivers
 119 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
 120 * cause image creation to fail (tunable via /sys/power/reserved_size).
 121 */
 122unsigned long reserved_size;
 123
 124void __init hibernate_reserved_size_init(void)
 125{
 126        reserved_size = SPARE_PAGES * PAGE_SIZE;
 127}
 128
 129/*
 130 * Preferred image size in bytes (tunable via /sys/power/image_size).
 131 * When it is set to N, swsusp will do its best to ensure the image
 132 * size will not exceed N bytes, but if that is impossible, it will
 133 * try to create the smallest image possible.
 134 */
 135unsigned long image_size;
 136
 137void __init hibernate_image_size_init(void)
 138{
 139        image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE;
 140}
 141
 142/*
 143 * List of PBEs needed for restoring the pages that were allocated before
 144 * the suspend and included in the suspend image, but have also been
 145 * allocated by the "resume" kernel, so their contents cannot be written
 146 * directly to their "original" page frames.
 147 */
 148struct pbe *restore_pblist;
 149
 150/* struct linked_page is used to build chains of pages */
 151
 152#define LINKED_PAGE_DATA_SIZE   (PAGE_SIZE - sizeof(void *))
 153
 154struct linked_page {
 155        struct linked_page *next;
 156        char data[LINKED_PAGE_DATA_SIZE];
 157} __packed;
 158
 159/*
 160 * List of "safe" pages (ie. pages that were not used by the image kernel
 161 * before hibernation) that may be used as temporary storage for image kernel
 162 * memory contents.
 163 */
 164static struct linked_page *safe_pages_list;
 165
 166/* Pointer to an auxiliary buffer (1 page) */
 167static void *buffer;
 168
 169#define PG_ANY          0
 170#define PG_SAFE         1
 171#define PG_UNSAFE_CLEAR 1
 172#define PG_UNSAFE_KEEP  0
 173
 174static unsigned int allocated_unsafe_pages;
 175
 176/**
 177 * get_image_page - Allocate a page for a hibernation image.
 178 * @gfp_mask: GFP mask for the allocation.
 179 * @safe_needed: Get pages that were not used before hibernation (restore only)
 180 *
 181 * During image restoration, for storing the PBE list and the image data, we can
 182 * only use memory pages that do not conflict with the pages used before
 183 * hibernation.  The "unsafe" pages have PageNosaveFree set and we count them
 184 * using allocated_unsafe_pages.
 185 *
 186 * Each allocated image page is marked as PageNosave and PageNosaveFree so that
 187 * swsusp_free() can release it.
 188 */
 189static void *get_image_page(gfp_t gfp_mask, int safe_needed)
 190{
 191        void *res;
 192
 193        res = (void *)get_zeroed_page(gfp_mask);
 194        if (safe_needed)
 195                while (res && swsusp_page_is_free(virt_to_page(res))) {
 196                        /* The page is unsafe, mark it for swsusp_free() */
 197                        swsusp_set_page_forbidden(virt_to_page(res));
 198                        allocated_unsafe_pages++;
 199                        res = (void *)get_zeroed_page(gfp_mask);
 200                }
 201        if (res) {
 202                swsusp_set_page_forbidden(virt_to_page(res));
 203                swsusp_set_page_free(virt_to_page(res));
 204        }
 205        return res;
 206}
 207
 208static void *__get_safe_page(gfp_t gfp_mask)
 209{
 210        if (safe_pages_list) {
 211                void *ret = safe_pages_list;
 212
 213                safe_pages_list = safe_pages_list->next;
 214                memset(ret, 0, PAGE_SIZE);
 215                return ret;
 216        }
 217        return get_image_page(gfp_mask, PG_SAFE);
 218}
 219
 220unsigned long get_safe_page(gfp_t gfp_mask)
 221{
 222        return (unsigned long)__get_safe_page(gfp_mask);
 223}
 224
 225static struct page *alloc_image_page(gfp_t gfp_mask)
 226{
 227        struct page *page;
 228
 229        page = alloc_page(gfp_mask);
 230        if (page) {
 231                swsusp_set_page_forbidden(page);
 232                swsusp_set_page_free(page);
 233        }
 234        return page;
 235}
 236
 237static void recycle_safe_page(void *page_address)
 238{
 239        struct linked_page *lp = page_address;
 240
 241        lp->next = safe_pages_list;
 242        safe_pages_list = lp;
 243}
 244
 245/**
 246 * free_image_page - Free a page allocated for hibernation image.
 247 * @addr: Address of the page to free.
 248 * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page.
 249 *
 250 * The page to free should have been allocated by get_image_page() (page flags
 251 * set by it are affected).
 252 */
 253static inline void free_image_page(void *addr, int clear_nosave_free)
 254{
 255        struct page *page;
 256
 257        BUG_ON(!virt_addr_valid(addr));
 258
 259        page = virt_to_page(addr);
 260
 261        swsusp_unset_page_forbidden(page);
 262        if (clear_nosave_free)
 263                swsusp_unset_page_free(page);
 264
 265        __free_page(page);
 266}
 267
 268static inline void free_list_of_pages(struct linked_page *list,
 269                                      int clear_page_nosave)
 270{
 271        while (list) {
 272                struct linked_page *lp = list->next;
 273
 274                free_image_page(list, clear_page_nosave);
 275                list = lp;
 276        }
 277}
 278
 279/*
 280 * struct chain_allocator is used for allocating small objects out of
 281 * a linked list of pages called 'the chain'.
 282 *
 283 * The chain grows each time when there is no room for a new object in
 284 * the current page.  The allocated objects cannot be freed individually.
 285 * It is only possible to free them all at once, by freeing the entire
 286 * chain.
 287 *
 288 * NOTE: The chain allocator may be inefficient if the allocated objects
 289 * are not much smaller than PAGE_SIZE.
 290 */
 291struct chain_allocator {
 292        struct linked_page *chain;      /* the chain */
 293        unsigned int used_space;        /* total size of objects allocated out
 294                                           of the current page */
 295        gfp_t gfp_mask;         /* mask for allocating pages */
 296        int safe_needed;        /* if set, only "safe" pages are allocated */
 297};
 298
 299static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask,
 300                       int safe_needed)
 301{
 302        ca->chain = NULL;
 303        ca->used_space = LINKED_PAGE_DATA_SIZE;
 304        ca->gfp_mask = gfp_mask;
 305        ca->safe_needed = safe_needed;
 306}
 307
 308static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 309{
 310        void *ret;
 311
 312        if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 313                struct linked_page *lp;
 314
 315                lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) :
 316                                        get_image_page(ca->gfp_mask, PG_ANY);
 317                if (!lp)
 318                        return NULL;
 319
 320                lp->next = ca->chain;
 321                ca->chain = lp;
 322                ca->used_space = 0;
 323        }
 324        ret = ca->chain->data + ca->used_space;
 325        ca->used_space += size;
 326        return ret;
 327}
 328
 329/**
 330 * Data types related to memory bitmaps.
 331 *
 332 * Memory bitmap is a structure consiting of many linked lists of
 333 * objects.  The main list's elements are of type struct zone_bitmap
 334 * and each of them corresonds to one zone.  For each zone bitmap
 335 * object there is a list of objects of type struct bm_block that
 336 * represent each blocks of bitmap in which information is stored.
 337 *
 338 * struct memory_bitmap contains a pointer to the main list of zone
 339 * bitmap objects, a struct bm_position used for browsing the bitmap,
 340 * and a pointer to the list of pages used for allocating all of the
 341 * zone bitmap objects and bitmap block objects.
 342 *
 343 * NOTE: It has to be possible to lay out the bitmap in memory
 344 * using only allocations of order 0.  Additionally, the bitmap is
 345 * designed to work with arbitrary number of zones (this is over the
 346 * top for now, but let's avoid making unnecessary assumptions ;-).
 347 *
 348 * struct zone_bitmap contains a pointer to a list of bitmap block
 349 * objects and a pointer to the bitmap block object that has been
 350 * most recently used for setting bits.  Additionally, it contains the
 351 * PFNs that correspond to the start and end of the represented zone.
 352 *
 353 * struct bm_block contains a pointer to the memory page in which
 354 * information is stored (in the form of a block of bitmap)
 355 * It also contains the pfns that correspond to the start and end of
 356 * the represented memory area.
 357 *
 358 * The memory bitmap is organized as a radix tree to guarantee fast random
 359 * access to the bits. There is one radix tree for each zone (as returned
 360 * from create_mem_extents).
 361 *
 362 * One radix tree is represented by one struct mem_zone_bm_rtree. There are
 363 * two linked lists for the nodes of the tree, one for the inner nodes and
 364 * one for the leave nodes. The linked leave nodes are used for fast linear
 365 * access of the memory bitmap.
 366 *
 367 * The struct rtree_node represents one node of the radix tree.
 368 */
 369
 370#define BM_END_OF_MAP   (~0UL)
 371
 372#define BM_BITS_PER_BLOCK       (PAGE_SIZE * BITS_PER_BYTE)
 373#define BM_BLOCK_SHIFT          (PAGE_SHIFT + 3)
 374#define BM_BLOCK_MASK           ((1UL << BM_BLOCK_SHIFT) - 1)
 375
 376/*
 377 * struct rtree_node is a wrapper struct to link the nodes
 378 * of the rtree together for easy linear iteration over
 379 * bits and easy freeing
 380 */
 381struct rtree_node {
 382        struct list_head list;
 383        unsigned long *data;
 384};
 385
 386/*
 387 * struct mem_zone_bm_rtree represents a bitmap used for one
 388 * populated memory zone.
 389 */
 390struct mem_zone_bm_rtree {
 391        struct list_head list;          /* Link Zones together         */
 392        struct list_head nodes;         /* Radix Tree inner nodes      */
 393        struct list_head leaves;        /* Radix Tree leaves           */
 394        unsigned long start_pfn;        /* Zone start page frame       */
 395        unsigned long end_pfn;          /* Zone end page frame + 1     */
 396        struct rtree_node *rtree;       /* Radix Tree Root             */
 397        int levels;                     /* Number of Radix Tree Levels */
 398        unsigned int blocks;            /* Number of Bitmap Blocks     */
 399};
 400
 401/* strcut bm_position is used for browsing memory bitmaps */
 402
 403struct bm_position {
 404        struct mem_zone_bm_rtree *zone;
 405        struct rtree_node *node;
 406        unsigned long node_pfn;
 407        int node_bit;
 408};
 409
 410struct memory_bitmap {
 411        struct list_head zones;
 412        struct linked_page *p_list;     /* list of pages used to store zone
 413                                           bitmap objects and bitmap block
 414                                           objects */
 415        struct bm_position cur; /* most recently used bit position */
 416};
 417
 418/* Functions that operate on memory bitmaps */
 419
 420#define BM_ENTRIES_PER_LEVEL    (PAGE_SIZE / sizeof(unsigned long))
 421#if BITS_PER_LONG == 32
 422#define BM_RTREE_LEVEL_SHIFT    (PAGE_SHIFT - 2)
 423#else
 424#define BM_RTREE_LEVEL_SHIFT    (PAGE_SHIFT - 3)
 425#endif
 426#define BM_RTREE_LEVEL_MASK     ((1UL << BM_RTREE_LEVEL_SHIFT) - 1)
 427
 428/**
 429 * alloc_rtree_node - Allocate a new node and add it to the radix tree.
 430 *
 431 * This function is used to allocate inner nodes as well as the
 432 * leave nodes of the radix tree. It also adds the node to the
 433 * corresponding linked list passed in by the *list parameter.
 434 */
 435static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed,
 436                                           struct chain_allocator *ca,
 437                                           struct list_head *list)
 438{
 439        struct rtree_node *node;
 440
 441        node = chain_alloc(ca, sizeof(struct rtree_node));
 442        if (!node)
 443                return NULL;
 444
 445        node->data = get_image_page(gfp_mask, safe_needed);
 446        if (!node->data)
 447                return NULL;
 448
 449        list_add_tail(&node->list, list);
 450
 451        return node;
 452}
 453
 454/**
 455 * add_rtree_block - Add a new leave node to the radix tree.
 456 *
 457 * The leave nodes need to be allocated in order to keep the leaves
 458 * linked list in order. This is guaranteed by the zone->blocks
 459 * counter.
 460 */
 461static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask,
 462                           int safe_needed, struct chain_allocator *ca)
 463{
 464        struct rtree_node *node, *block, **dst;
 465        unsigned int levels_needed, block_nr;
 466        int i;
 467
 468        block_nr = zone->blocks;
 469        levels_needed = 0;
 470
 471        /* How many levels do we need for this block nr? */
 472        while (block_nr) {
 473                levels_needed += 1;
 474                block_nr >>= BM_RTREE_LEVEL_SHIFT;
 475        }
 476
 477        /* Make sure the rtree has enough levels */
 478        for (i = zone->levels; i < levels_needed; i++) {
 479                node = alloc_rtree_node(gfp_mask, safe_needed, ca,
 480                                        &zone->nodes);
 481                if (!node)
 482                        return -ENOMEM;
 483
 484                node->data[0] = (unsigned long)zone->rtree;
 485                zone->rtree = node;
 486                zone->levels += 1;
 487        }
 488
 489        /* Allocate new block */
 490        block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves);
 491        if (!block)
 492                return -ENOMEM;
 493
 494        /* Now walk the rtree to insert the block */
 495        node = zone->rtree;
 496        dst = &zone->rtree;
 497        block_nr = zone->blocks;
 498        for (i = zone->levels; i > 0; i--) {
 499                int index;
 500
 501                if (!node) {
 502                        node = alloc_rtree_node(gfp_mask, safe_needed, ca,
 503                                                &zone->nodes);
 504                        if (!node)
 505                                return -ENOMEM;
 506                        *dst = node;
 507                }
 508
 509                index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
 510                index &= BM_RTREE_LEVEL_MASK;
 511                dst = (struct rtree_node **)&((*dst)->data[index]);
 512                node = *dst;
 513        }
 514
 515        zone->blocks += 1;
 516        *dst = block;
 517
 518        return 0;
 519}
 520
 521static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
 522                               int clear_nosave_free);
 523
 524/**
 525 * create_zone_bm_rtree - Create a radix tree for one zone.
 526 *
 527 * Allocated the mem_zone_bm_rtree structure and initializes it.
 528 * This function also allocated and builds the radix tree for the
 529 * zone.
 530 */
 531static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask,
 532                                                      int safe_needed,
 533                                                      struct chain_allocator *ca,
 534                                                      unsigned long start,
 535                                                      unsigned long end)
 536{
 537        struct mem_zone_bm_rtree *zone;
 538        unsigned int i, nr_blocks;
 539        unsigned long pages;
 540
 541        pages = end - start;
 542        zone  = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree));
 543        if (!zone)
 544                return NULL;
 545
 546        INIT_LIST_HEAD(&zone->nodes);
 547        INIT_LIST_HEAD(&zone->leaves);
 548        zone->start_pfn = start;
 549        zone->end_pfn = end;
 550        nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
 551
 552        for (i = 0; i < nr_blocks; i++) {
 553                if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) {
 554                        free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR);
 555                        return NULL;
 556                }
 557        }
 558
 559        return zone;
 560}
 561
 562/**
 563 * free_zone_bm_rtree - Free the memory of the radix tree.
 564 *
 565 * Free all node pages of the radix tree. The mem_zone_bm_rtree
 566 * structure itself is not freed here nor are the rtree_node
 567 * structs.
 568 */
 569static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
 570                               int clear_nosave_free)
 571{
 572        struct rtree_node *node;
 573
 574        list_for_each_entry(node, &zone->nodes, list)
 575                free_image_page(node->data, clear_nosave_free);
 576
 577        list_for_each_entry(node, &zone->leaves, list)
 578                free_image_page(node->data, clear_nosave_free);
 579}
 580
 581static void memory_bm_position_reset(struct memory_bitmap *bm)
 582{
 583        bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
 584                                  list);
 585        bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 586                                  struct rtree_node, list);
 587        bm->cur.node_pfn = 0;
 588        bm->cur.node_bit = 0;
 589}
 590
 591static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 592
 593struct mem_extent {
 594        struct list_head hook;
 595        unsigned long start;
 596        unsigned long end;
 597};
 598
 599/**
 600 * free_mem_extents - Free a list of memory extents.
 601 * @list: List of extents to free.
 602 */
 603static void free_mem_extents(struct list_head *list)
 604{
 605        struct mem_extent *ext, *aux;
 606
 607        list_for_each_entry_safe(ext, aux, list, hook) {
 608                list_del(&ext->hook);
 609                kfree(ext);
 610        }
 611}
 612
 613/**
 614 * create_mem_extents - Create a list of memory extents.
 615 * @list: List to put the extents into.
 616 * @gfp_mask: Mask to use for memory allocations.
 617 *
 618 * The extents represent contiguous ranges of PFNs.
 619 */
 620static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 621{
 622        struct zone *zone;
 623
 624        INIT_LIST_HEAD(list);
 625
 626        for_each_populated_zone(zone) {
 627                unsigned long zone_start, zone_end;
 628                struct mem_extent *ext, *cur, *aux;
 629
 630                zone_start = zone->zone_start_pfn;
 631                zone_end = zone_end_pfn(zone);
 632
 633                list_for_each_entry(ext, list, hook)
 634                        if (zone_start <= ext->end)
 635                                break;
 636
 637                if (&ext->hook == list || zone_end < ext->start) {
 638                        /* New extent is necessary */
 639                        struct mem_extent *new_ext;
 640
 641                        new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
 642                        if (!new_ext) {
 643                                free_mem_extents(list);
 644                                return -ENOMEM;
 645                        }
 646                        new_ext->start = zone_start;
 647                        new_ext->end = zone_end;
 648                        list_add_tail(&new_ext->hook, &ext->hook);
 649                        continue;
 650                }
 651
 652                /* Merge this zone's range of PFNs with the existing one */
 653                if (zone_start < ext->start)
 654                        ext->start = zone_start;
 655                if (zone_end > ext->end)
 656                        ext->end = zone_end;
 657
 658                /* More merging may be possible */
 659                cur = ext;
 660                list_for_each_entry_safe_continue(cur, aux, list, hook) {
 661                        if (zone_end < cur->start)
 662                                break;
 663                        if (zone_end < cur->end)
 664                                ext->end = cur->end;
 665                        list_del(&cur->hook);
 666                        kfree(cur);
 667                }
 668        }
 669
 670        return 0;
 671}
 672
 673/**
 674 * memory_bm_create - Allocate memory for a memory bitmap.
 675 */
 676static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
 677                            int safe_needed)
 678{
 679        struct chain_allocator ca;
 680        struct list_head mem_extents;
 681        struct mem_extent *ext;
 682        int error;
 683
 684        chain_init(&ca, gfp_mask, safe_needed);
 685        INIT_LIST_HEAD(&bm->zones);
 686
 687        error = create_mem_extents(&mem_extents, gfp_mask);
 688        if (error)
 689                return error;
 690
 691        list_for_each_entry(ext, &mem_extents, hook) {
 692                struct mem_zone_bm_rtree *zone;
 693
 694                zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca,
 695                                            ext->start, ext->end);
 696                if (!zone) {
 697                        error = -ENOMEM;
 698                        goto Error;
 699                }
 700                list_add_tail(&zone->list, &bm->zones);
 701        }
 702
 703        bm->p_list = ca.chain;
 704        memory_bm_position_reset(bm);
 705 Exit:
 706        free_mem_extents(&mem_extents);
 707        return error;
 708
 709 Error:
 710        bm->p_list = ca.chain;
 711        memory_bm_free(bm, PG_UNSAFE_CLEAR);
 712        goto Exit;
 713}
 714
 715/**
 716 * memory_bm_free - Free memory occupied by the memory bitmap.
 717 * @bm: Memory bitmap.
 718 */
 719static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 720{
 721        struct mem_zone_bm_rtree *zone;
 722
 723        list_for_each_entry(zone, &bm->zones, list)
 724                free_zone_bm_rtree(zone, clear_nosave_free);
 725
 726        free_list_of_pages(bm->p_list, clear_nosave_free);
 727
 728        INIT_LIST_HEAD(&bm->zones);
 729}
 730
 731/**
 732 * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap.
 733 *
 734 * Find the bit in memory bitmap @bm that corresponds to the given PFN.
 735 * The cur.zone, cur.block and cur.node_pfn members of @bm are updated.
 736 *
 737 * Walk the radix tree to find the page containing the bit that represents @pfn
 738 * and return the position of the bit in @addr and @bit_nr.
 739 */
 740static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 741                              void **addr, unsigned int *bit_nr)
 742{
 743        struct mem_zone_bm_rtree *curr, *zone;
 744        struct rtree_node *node;
 745        int i, block_nr;
 746
 747        zone = bm->cur.zone;
 748
 749        if (pfn >= zone->start_pfn && pfn < zone->end_pfn)
 750                goto zone_found;
 751
 752        zone = NULL;
 753
 754        /* Find the right zone */
 755        list_for_each_entry(curr, &bm->zones, list) {
 756                if (pfn >= curr->start_pfn && pfn < curr->end_pfn) {
 757                        zone = curr;
 758                        break;
 759                }
 760        }
 761
 762        if (!zone)
 763                return -EFAULT;
 764
 765zone_found:
 766        /*
 767         * We have found the zone. Now walk the radix tree to find the leaf node
 768         * for our PFN.
 769         */
 770
 771        /*
 772         * If the zone we wish to scan is the current zone and the
 773         * pfn falls into the current node then we do not need to walk
 774         * the tree.
 775         */
 776        node = bm->cur.node;
 777        if (zone == bm->cur.zone &&
 778            ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
 779                goto node_found;
 780
 781        node      = zone->rtree;
 782        block_nr  = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT;
 783
 784        for (i = zone->levels; i > 0; i--) {
 785                int index;
 786
 787                index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
 788                index &= BM_RTREE_LEVEL_MASK;
 789                BUG_ON(node->data[index] == 0);
 790                node = (struct rtree_node *)node->data[index];
 791        }
 792
 793node_found:
 794        /* Update last position */
 795        bm->cur.zone = zone;
 796        bm->cur.node = node;
 797        bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
 798
 799        /* Set return values */
 800        *addr = node->data;
 801        *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK;
 802
 803        return 0;
 804}
 805
 806static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 807{
 808        void *addr;
 809        unsigned int bit;
 810        int error;
 811
 812        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 813        BUG_ON(error);
 814        set_bit(bit, addr);
 815}
 816
 817static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
 818{
 819        void *addr;
 820        unsigned int bit;
 821        int error;
 822
 823        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 824        if (!error)
 825                set_bit(bit, addr);
 826
 827        return error;
 828}
 829
 830static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
 831{
 832        void *addr;
 833        unsigned int bit;
 834        int error;
 835
 836        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 837        BUG_ON(error);
 838        clear_bit(bit, addr);
 839}
 840
 841static void memory_bm_clear_current(struct memory_bitmap *bm)
 842{
 843        int bit;
 844
 845        bit = max(bm->cur.node_bit - 1, 0);
 846        clear_bit(bit, bm->cur.node->data);
 847}
 848
 849static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 850{
 851        void *addr;
 852        unsigned int bit;
 853        int error;
 854
 855        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 856        BUG_ON(error);
 857        return test_bit(bit, addr);
 858}
 859
 860static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
 861{
 862        void *addr;
 863        unsigned int bit;
 864
 865        return !memory_bm_find_bit(bm, pfn, &addr, &bit);
 866}
 867
 868/*
 869 * rtree_next_node - Jump to the next leaf node.
 870 *
 871 * Set the position to the beginning of the next node in the
 872 * memory bitmap. This is either the next node in the current
 873 * zone's radix tree or the first node in the radix tree of the
 874 * next zone.
 875 *
 876 * Return true if there is a next node, false otherwise.
 877 */
 878static bool rtree_next_node(struct memory_bitmap *bm)
 879{
 880        if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
 881                bm->cur.node = list_entry(bm->cur.node->list.next,
 882                                          struct rtree_node, list);
 883                bm->cur.node_pfn += BM_BITS_PER_BLOCK;
 884                bm->cur.node_bit  = 0;
 885                touch_softlockup_watchdog();
 886                return true;
 887        }
 888
 889        /* No more nodes, goto next zone */
 890        if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
 891                bm->cur.zone = list_entry(bm->cur.zone->list.next,
 892                                  struct mem_zone_bm_rtree, list);
 893                bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 894                                          struct rtree_node, list);
 895                bm->cur.node_pfn = 0;
 896                bm->cur.node_bit = 0;
 897                return true;
 898        }
 899
 900        /* No more zones */
 901        return false;
 902}
 903
 904/**
 905 * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap.
 906 * @bm: Memory bitmap.
 907 *
 908 * Starting from the last returned position this function searches for the next
 909 * set bit in @bm and returns the PFN represented by it.  If no more bits are
 910 * set, BM_END_OF_MAP is returned.
 911 *
 912 * It is required to run memory_bm_position_reset() before the first call to
 913 * this function for the given memory bitmap.
 914 */
 915static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 916{
 917        unsigned long bits, pfn, pages;
 918        int bit;
 919
 920        do {
 921                pages     = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn;
 922                bits      = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK);
 923                bit       = find_next_bit(bm->cur.node->data, bits,
 924                                          bm->cur.node_bit);
 925                if (bit < bits) {
 926                        pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 927                        bm->cur.node_bit = bit + 1;
 928                        return pfn;
 929                }
 930        } while (rtree_next_node(bm));
 931
 932        return BM_END_OF_MAP;
 933}
 934
 935/*
 936 * This structure represents a range of page frames the contents of which
 937 * should not be saved during hibernation.
 938 */
 939struct nosave_region {
 940        struct list_head list;
 941        unsigned long start_pfn;
 942        unsigned long end_pfn;
 943};
 944
 945static LIST_HEAD(nosave_regions);
 946
 947static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone)
 948{
 949        struct rtree_node *node;
 950
 951        list_for_each_entry(node, &zone->nodes, list)
 952                recycle_safe_page(node->data);
 953
 954        list_for_each_entry(node, &zone->leaves, list)
 955                recycle_safe_page(node->data);
 956}
 957
 958static void memory_bm_recycle(struct memory_bitmap *bm)
 959{
 960        struct mem_zone_bm_rtree *zone;
 961        struct linked_page *p_list;
 962
 963        list_for_each_entry(zone, &bm->zones, list)
 964                recycle_zone_bm_rtree(zone);
 965
 966        p_list = bm->p_list;
 967        while (p_list) {
 968                struct linked_page *lp = p_list;
 969
 970                p_list = lp->next;
 971                recycle_safe_page(lp);
 972        }
 973}
 974
 975/**
 976 * register_nosave_region - Register a region of unsaveable memory.
 977 *
 978 * Register a range of page frames the contents of which should not be saved
 979 * during hibernation (to be used in the early initialization code).
 980 */
 981void __init __register_nosave_region(unsigned long start_pfn,
 982                                     unsigned long end_pfn, int use_kmalloc)
 983{
 984        struct nosave_region *region;
 985
 986        if (start_pfn >= end_pfn)
 987                return;
 988
 989        if (!list_empty(&nosave_regions)) {
 990                /* Try to extend the previous region (they should be sorted) */
 991                region = list_entry(nosave_regions.prev,
 992                                        struct nosave_region, list);
 993                if (region->end_pfn == start_pfn) {
 994                        region->end_pfn = end_pfn;
 995                        goto Report;
 996                }
 997        }
 998        if (use_kmalloc) {
 999                /* During init, this shouldn't fail */
1000                region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
1001                BUG_ON(!region);
1002        } else {
1003                /* This allocation cannot fail */
1004                region = memblock_alloc(sizeof(struct nosave_region),
1005                                        SMP_CACHE_BYTES);
1006                if (!region)
1007                        panic("%s: Failed to allocate %zu bytes\n", __func__,
1008                              sizeof(struct nosave_region));
1009        }
1010        region->start_pfn = start_pfn;
1011        region->end_pfn = end_pfn;
1012        list_add_tail(&region->list, &nosave_regions);
1013 Report:
1014        pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n",
1015                (unsigned long long) start_pfn << PAGE_SHIFT,
1016                ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
1017}
1018
1019/*
1020 * Set bits in this map correspond to the page frames the contents of which
1021 * should not be saved during the suspend.
1022 */
1023static struct memory_bitmap *forbidden_pages_map;
1024
1025/* Set bits in this map correspond to free page frames. */
1026static struct memory_bitmap *free_pages_map;
1027
1028/*
1029 * Each page frame allocated for creating the image is marked by setting the
1030 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
1031 */
1032
1033void swsusp_set_page_free(struct page *page)
1034{
1035        if (free_pages_map)
1036                memory_bm_set_bit(free_pages_map, page_to_pfn(page));
1037}
1038
1039static int swsusp_page_is_free(struct page *page)
1040{
1041        return free_pages_map ?
1042                memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
1043}
1044
1045void swsusp_unset_page_free(struct page *page)
1046{
1047        if (free_pages_map)
1048                memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
1049}
1050
1051static void swsusp_set_page_forbidden(struct page *page)
1052{
1053        if (forbidden_pages_map)
1054                memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
1055}
1056
1057int swsusp_page_is_forbidden(struct page *page)
1058{
1059        return forbidden_pages_map ?
1060                memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
1061}
1062
1063static void swsusp_unset_page_forbidden(struct page *page)
1064{
1065        if (forbidden_pages_map)
1066                memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
1067}
1068
1069/**
1070 * mark_nosave_pages - Mark pages that should not be saved.
1071 * @bm: Memory bitmap.
1072 *
1073 * Set the bits in @bm that correspond to the page frames the contents of which
1074 * should not be saved.
1075 */
1076static void mark_nosave_pages(struct memory_bitmap *bm)
1077{
1078        struct nosave_region *region;
1079
1080        if (list_empty(&nosave_regions))
1081                return;
1082
1083        list_for_each_entry(region, &nosave_regions, list) {
1084                unsigned long pfn;
1085
1086                pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n",
1087                         (unsigned long long) region->start_pfn << PAGE_SHIFT,
1088                         ((unsigned long long) region->end_pfn << PAGE_SHIFT)
1089                                - 1);
1090
1091                for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
1092                        if (pfn_valid(pfn)) {
1093                                /*
1094                                 * It is safe to ignore the result of
1095                                 * mem_bm_set_bit_check() here, since we won't
1096                                 * touch the PFNs for which the error is
1097                                 * returned anyway.
1098                                 */
1099                                mem_bm_set_bit_check(bm, pfn);
1100                        }
1101        }
1102}
1103
1104/**
1105 * create_basic_memory_bitmaps - Create bitmaps to hold basic page information.
1106 *
1107 * Create bitmaps needed for marking page frames that should not be saved and
1108 * free page frames.  The forbidden_pages_map and free_pages_map pointers are
1109 * only modified if everything goes well, because we don't want the bits to be
1110 * touched before both bitmaps are set up.
1111 */
1112int create_basic_memory_bitmaps(void)
1113{
1114        struct memory_bitmap *bm1, *bm2;
1115        int error = 0;
1116
1117        if (forbidden_pages_map && free_pages_map)
1118                return 0;
1119        else
1120                BUG_ON(forbidden_pages_map || free_pages_map);
1121
1122        bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
1123        if (!bm1)
1124                return -ENOMEM;
1125
1126        error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
1127        if (error)
1128                goto Free_first_object;
1129
1130        bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
1131        if (!bm2)
1132                goto Free_first_bitmap;
1133
1134        error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
1135        if (error)
1136                goto Free_second_object;
1137
1138        forbidden_pages_map = bm1;
1139        free_pages_map = bm2;
1140        mark_nosave_pages(forbidden_pages_map);
1141
1142        pr_debug("Basic memory bitmaps created\n");
1143
1144        return 0;
1145
1146 Free_second_object:
1147        kfree(bm2);
1148 Free_first_bitmap:
1149        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
1150 Free_first_object:
1151        kfree(bm1);
1152        return -ENOMEM;
1153}
1154
1155/**
1156 * free_basic_memory_bitmaps - Free memory bitmaps holding basic information.
1157 *
1158 * Free memory bitmaps allocated by create_basic_memory_bitmaps().  The
1159 * auxiliary pointers are necessary so that the bitmaps themselves are not
1160 * referred to while they are being freed.
1161 */
1162void free_basic_memory_bitmaps(void)
1163{
1164        struct memory_bitmap *bm1, *bm2;
1165
1166        if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
1167                return;
1168
1169        bm1 = forbidden_pages_map;
1170        bm2 = free_pages_map;
1171        forbidden_pages_map = NULL;
1172        free_pages_map = NULL;
1173        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
1174        kfree(bm1);
1175        memory_bm_free(bm2, PG_UNSAFE_CLEAR);
1176        kfree(bm2);
1177
1178        pr_debug("Basic memory bitmaps freed\n");
1179}
1180
1181static void clear_or_poison_free_page(struct page *page)
1182{
1183        if (page_poisoning_enabled_static())
1184                __kernel_poison_pages(page, 1);
1185        else if (want_init_on_free())
1186                clear_highpage(page);
1187}
1188
1189void clear_or_poison_free_pages(void)
1190{
1191        struct memory_bitmap *bm = free_pages_map;
1192        unsigned long pfn;
1193
1194        if (WARN_ON(!(free_pages_map)))
1195                return;
1196
1197        if (page_poisoning_enabled() || want_init_on_free()) {
1198                memory_bm_position_reset(bm);
1199                pfn = memory_bm_next_pfn(bm);
1200                while (pfn != BM_END_OF_MAP) {
1201                        if (pfn_valid(pfn))
1202                                clear_or_poison_free_page(pfn_to_page(pfn));
1203
1204                        pfn = memory_bm_next_pfn(bm);
1205                }
1206                memory_bm_position_reset(bm);
1207                pr_info("free pages cleared after restore\n");
1208        }
1209}
1210
1211/**
1212 * snapshot_additional_pages - Estimate the number of extra pages needed.
1213 * @zone: Memory zone to carry out the computation for.
1214 *
1215 * Estimate the number of additional pages needed for setting up a hibernation
1216 * image data structures for @zone (usually, the returned value is greater than
1217 * the exact number).
1218 */
1219unsigned int snapshot_additional_pages(struct zone *zone)
1220{
1221        unsigned int rtree, nodes;
1222
1223        rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
1224        rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node),
1225                              LINKED_PAGE_DATA_SIZE);
1226        while (nodes > 1) {
1227                nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL);
1228                rtree += nodes;
1229        }
1230
1231        return 2 * rtree;
1232}
1233
1234#ifdef CONFIG_HIGHMEM
1235/**
1236 * count_free_highmem_pages - Compute the total number of free highmem pages.
1237 *
1238 * The returned number is system-wide.
1239 */
1240static unsigned int count_free_highmem_pages(void)
1241{
1242        struct zone *zone;
1243        unsigned int cnt = 0;
1244
1245        for_each_populated_zone(zone)
1246                if (is_highmem(zone))
1247                        cnt += zone_page_state(zone, NR_FREE_PAGES);
1248
1249        return cnt;
1250}
1251
1252/**
1253 * saveable_highmem_page - Check if a highmem page is saveable.
1254 *
1255 * Determine whether a highmem page should be included in a hibernation image.
1256 *
1257 * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
1258 * and it isn't part of a free chunk of pages.
1259 */
1260static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
1261{
1262        struct page *page;
1263
1264        if (!pfn_valid(pfn))
1265                return NULL;
1266
1267        page = pfn_to_online_page(pfn);
1268        if (!page || page_zone(page) != zone)
1269                return NULL;
1270
1271        BUG_ON(!PageHighMem(page));
1272
1273        if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page))
1274                return NULL;
1275
1276        if (PageReserved(page) || PageOffline(page))
1277                return NULL;
1278
1279        if (page_is_guard(page))
1280                return NULL;
1281
1282        return page;
1283}
1284
1285/**
1286 * count_highmem_pages - Compute the total number of saveable highmem pages.
1287 */
1288static unsigned int count_highmem_pages(void)
1289{
1290        struct zone *zone;
1291        unsigned int n = 0;
1292
1293        for_each_populated_zone(zone) {
1294                unsigned long pfn, max_zone_pfn;
1295
1296                if (!is_highmem(zone))
1297                        continue;
1298
1299                mark_free_pages(zone);
1300                max_zone_pfn = zone_end_pfn(zone);
1301                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1302                        if (saveable_highmem_page(zone, pfn))
1303                                n++;
1304        }
1305        return n;
1306}
1307#else
1308static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
1309{
1310        return NULL;
1311}
1312#endif /* CONFIG_HIGHMEM */
1313
1314/**
1315 * saveable_page - Check if the given page is saveable.
1316 *
1317 * Determine whether a non-highmem page should be included in a hibernation
1318 * image.
1319 *
1320 * We should save the page if it isn't Nosave, and is not in the range
1321 * of pages statically defined as 'unsaveable', and it isn't part of
1322 * a free chunk of pages.
1323 */
1324static struct page *saveable_page(struct zone *zone, unsigned long pfn)
1325{
1326        struct page *page;
1327
1328        if (!pfn_valid(pfn))
1329                return NULL;
1330
1331        page = pfn_to_online_page(pfn);
1332        if (!page || page_zone(page) != zone)
1333                return NULL;
1334
1335        BUG_ON(PageHighMem(page));
1336
1337        if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
1338                return NULL;
1339
1340        if (PageOffline(page))
1341                return NULL;
1342
1343        if (PageReserved(page)
1344            && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
1345                return NULL;
1346
1347        if (page_is_guard(page))
1348                return NULL;
1349
1350        return page;
1351}
1352
1353/**
1354 * count_data_pages - Compute the total number of saveable non-highmem pages.
1355 */
1356static unsigned int count_data_pages(void)
1357{
1358        struct zone *zone;
1359        unsigned long pfn, max_zone_pfn;
1360        unsigned int n = 0;
1361
1362        for_each_populated_zone(zone) {
1363                if (is_highmem(zone))
1364                        continue;
1365
1366                mark_free_pages(zone);
1367                max_zone_pfn = zone_end_pfn(zone);
1368                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1369                        if (saveable_page(zone, pfn))
1370                                n++;
1371        }
1372        return n;
1373}
1374
1375/*
1376 * This is needed, because copy_page and memcpy are not usable for copying
1377 * task structs.
1378 */
1379static inline void do_copy_page(long *dst, long *src)
1380{
1381        int n;
1382
1383        for (n = PAGE_SIZE / sizeof(long); n; n--)
1384                *dst++ = *src++;
1385}
1386
1387/**
1388 * safe_copy_page - Copy a page in a safe way.
1389 *
1390 * Check if the page we are going to copy is marked as present in the kernel
1391 * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
1392 * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
1393 * always returns 'true'.
1394 */
1395static void safe_copy_page(void *dst, struct page *s_page)
1396{
1397        if (kernel_page_present(s_page)) {
1398                do_copy_page(dst, page_address(s_page));
1399        } else {
1400                hibernate_map_page(s_page);
1401                do_copy_page(dst, page_address(s_page));
1402                hibernate_unmap_page(s_page);
1403        }
1404}
1405
1406#ifdef CONFIG_HIGHMEM
1407static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn)
1408{
1409        return is_highmem(zone) ?
1410                saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
1411}
1412
1413static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1414{
1415        struct page *s_page, *d_page;
1416        void *src, *dst;
1417
1418        s_page = pfn_to_page(src_pfn);
1419        d_page = pfn_to_page(dst_pfn);
1420        if (PageHighMem(s_page)) {
1421                src = kmap_atomic(s_page);
1422                dst = kmap_atomic(d_page);
1423                do_copy_page(dst, src);
1424                kunmap_atomic(dst);
1425                kunmap_atomic(src);
1426        } else {
1427                if (PageHighMem(d_page)) {
1428                        /*
1429                         * The page pointed to by src may contain some kernel
1430                         * data modified by kmap_atomic()
1431                         */
1432                        safe_copy_page(buffer, s_page);
1433                        dst = kmap_atomic(d_page);
1434                        copy_page(dst, buffer);
1435                        kunmap_atomic(dst);
1436                } else {
1437                        safe_copy_page(page_address(d_page), s_page);
1438                }
1439        }
1440}
1441#else
1442#define page_is_saveable(zone, pfn)     saveable_page(zone, pfn)
1443
1444static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1445{
1446        safe_copy_page(page_address(pfn_to_page(dst_pfn)),
1447                                pfn_to_page(src_pfn));
1448}
1449#endif /* CONFIG_HIGHMEM */
1450
1451static void copy_data_pages(struct memory_bitmap *copy_bm,
1452                            struct memory_bitmap *orig_bm)
1453{
1454        struct zone *zone;
1455        unsigned long pfn;
1456
1457        for_each_populated_zone(zone) {
1458                unsigned long max_zone_pfn;
1459
1460                mark_free_pages(zone);
1461                max_zone_pfn = zone_end_pfn(zone);
1462                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1463                        if (page_is_saveable(zone, pfn))
1464                                memory_bm_set_bit(orig_bm, pfn);
1465        }
1466        memory_bm_position_reset(orig_bm);
1467        memory_bm_position_reset(copy_bm);
1468        for(;;) {
1469                pfn = memory_bm_next_pfn(orig_bm);
1470                if (unlikely(pfn == BM_END_OF_MAP))
1471                        break;
1472                copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1473        }
1474}
1475
1476/* Total number of image pages */
1477static unsigned int nr_copy_pages;
1478/* Number of pages needed for saving the original pfns of the image pages */
1479static unsigned int nr_meta_pages;
1480/*
1481 * Numbers of normal and highmem page frames allocated for hibernation image
1482 * before suspending devices.
1483 */
1484static unsigned int alloc_normal, alloc_highmem;
1485/*
1486 * Memory bitmap used for marking saveable pages (during hibernation) or
1487 * hibernation image pages (during restore)
1488 */
1489static struct memory_bitmap orig_bm;
1490/*
1491 * Memory bitmap used during hibernation for marking allocated page frames that
1492 * will contain copies of saveable pages.  During restore it is initially used
1493 * for marking hibernation image pages, but then the set bits from it are
1494 * duplicated in @orig_bm and it is released.  On highmem systems it is next
1495 * used for marking "safe" highmem pages, but it has to be reinitialized for
1496 * this purpose.
1497 */
1498static struct memory_bitmap copy_bm;
1499
1500/**
1501 * swsusp_free - Free pages allocated for hibernation image.
1502 *
1503 * Image pages are alocated before snapshot creation, so they need to be
1504 * released after resume.
1505 */
1506void swsusp_free(void)
1507{
1508        unsigned long fb_pfn, fr_pfn;
1509
1510        if (!forbidden_pages_map || !free_pages_map)
1511                goto out;
1512
1513        memory_bm_position_reset(forbidden_pages_map);
1514        memory_bm_position_reset(free_pages_map);
1515
1516loop:
1517        fr_pfn = memory_bm_next_pfn(free_pages_map);
1518        fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
1519
1520        /*
1521         * Find the next bit set in both bitmaps. This is guaranteed to
1522         * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP.
1523         */
1524        do {
1525                if (fb_pfn < fr_pfn)
1526                        fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
1527                if (fr_pfn < fb_pfn)
1528                        fr_pfn = memory_bm_next_pfn(free_pages_map);
1529        } while (fb_pfn != fr_pfn);
1530
1531        if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) {
1532                struct page *page = pfn_to_page(fr_pfn);
1533
1534                memory_bm_clear_current(forbidden_pages_map);
1535                memory_bm_clear_current(free_pages_map);
1536                hibernate_restore_unprotect_page(page_address(page));
1537                __free_page(page);
1538                goto loop;
1539        }
1540
1541out:
1542        nr_copy_pages = 0;
1543        nr_meta_pages = 0;
1544        restore_pblist = NULL;
1545        buffer = NULL;
1546        alloc_normal = 0;
1547        alloc_highmem = 0;
1548        hibernate_restore_protection_end();
1549}
1550
1551/* Helper functions used for the shrinking of memory. */
1552
1553#define GFP_IMAGE       (GFP_KERNEL | __GFP_NOWARN)
1554
1555/**
1556 * preallocate_image_pages - Allocate a number of pages for hibernation image.
1557 * @nr_pages: Number of page frames to allocate.
1558 * @mask: GFP flags to use for the allocation.
1559 *
1560 * Return value: Number of page frames actually allocated
1561 */
1562static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1563{
1564        unsigned long nr_alloc = 0;
1565
1566        while (nr_pages > 0) {
1567                struct page *page;
1568
1569                page = alloc_image_page(mask);
1570                if (!page)
1571                        break;
1572                memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1573                if (PageHighMem(page))
1574                        alloc_highmem++;
1575                else
1576                        alloc_normal++;
1577                nr_pages--;
1578                nr_alloc++;
1579        }
1580
1581        return nr_alloc;
1582}
1583
1584static unsigned long preallocate_image_memory(unsigned long nr_pages,
1585                                              unsigned long avail_normal)
1586{
1587        unsigned long alloc;
1588
1589        if (avail_normal <= alloc_normal)
1590                return 0;
1591
1592        alloc = avail_normal - alloc_normal;
1593        if (nr_pages < alloc)
1594                alloc = nr_pages;
1595
1596        return preallocate_image_pages(alloc, GFP_IMAGE);
1597}
1598
1599#ifdef CONFIG_HIGHMEM
1600static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1601{
1602        return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1603}
1604
1605/**
1606 *  __fraction - Compute (an approximation of) x * (multiplier / base).
1607 */
1608static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
1609{
1610        return div64_u64(x * multiplier, base);
1611}
1612
1613static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1614                                                  unsigned long highmem,
1615                                                  unsigned long total)
1616{
1617        unsigned long alloc = __fraction(nr_pages, highmem, total);
1618
1619        return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
1620}
1621#else /* CONFIG_HIGHMEM */
1622static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1623{
1624        return 0;
1625}
1626
1627static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1628                                                         unsigned long highmem,
1629                                                         unsigned long total)
1630{
1631        return 0;
1632}
1633#endif /* CONFIG_HIGHMEM */
1634
1635/**
1636 * free_unnecessary_pages - Release preallocated pages not needed for the image.
1637 */
1638static unsigned long free_unnecessary_pages(void)
1639{
1640        unsigned long save, to_free_normal, to_free_highmem, free;
1641
1642        save = count_data_pages();
1643        if (alloc_normal >= save) {
1644                to_free_normal = alloc_normal - save;
1645                save = 0;
1646        } else {
1647                to_free_normal = 0;
1648                save -= alloc_normal;
1649        }
1650        save += count_highmem_pages();
1651        if (alloc_highmem >= save) {
1652                to_free_highmem = alloc_highmem - save;
1653        } else {
1654                to_free_highmem = 0;
1655                save -= alloc_highmem;
1656                if (to_free_normal > save)
1657                        to_free_normal -= save;
1658                else
1659                        to_free_normal = 0;
1660        }
1661        free = to_free_normal + to_free_highmem;
1662
1663        memory_bm_position_reset(&copy_bm);
1664
1665        while (to_free_normal > 0 || to_free_highmem > 0) {
1666                unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1667                struct page *page = pfn_to_page(pfn);
1668
1669                if (PageHighMem(page)) {
1670                        if (!to_free_highmem)
1671                                continue;
1672                        to_free_highmem--;
1673                        alloc_highmem--;
1674                } else {
1675                        if (!to_free_normal)
1676                                continue;
1677                        to_free_normal--;
1678                        alloc_normal--;
1679                }
1680                memory_bm_clear_bit(&copy_bm, pfn);
1681                swsusp_unset_page_forbidden(page);
1682                swsusp_unset_page_free(page);
1683                __free_page(page);
1684        }
1685
1686        return free;
1687}
1688
1689/**
1690 * minimum_image_size - Estimate the minimum acceptable size of an image.
1691 * @saveable: Number of saveable pages in the system.
1692 *
1693 * We want to avoid attempting to free too much memory too hard, so estimate the
1694 * minimum acceptable size of a hibernation image to use as the lower limit for
1695 * preallocating memory.
1696 *
1697 * We assume that the minimum image size should be proportional to
1698 *
1699 * [number of saveable pages] - [number of pages that can be freed in theory]
1700 *
1701 * where the second term is the sum of (1) reclaimable slab pages, (2) active
1702 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages.
1703 */
1704static unsigned long minimum_image_size(unsigned long saveable)
1705{
1706        unsigned long size;
1707
1708        size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B)
1709                + global_node_page_state(NR_ACTIVE_ANON)
1710                + global_node_page_state(NR_INACTIVE_ANON)
1711                + global_node_page_state(NR_ACTIVE_FILE)
1712                + global_node_page_state(NR_INACTIVE_FILE);
1713
1714        return saveable <= size ? 0 : saveable - size;
1715}
1716
1717/**
1718 * hibernate_preallocate_memory - Preallocate memory for hibernation image.
1719 *
1720 * To create a hibernation image it is necessary to make a copy of every page
1721 * frame in use.  We also need a number of page frames to be free during
1722 * hibernation for allocations made while saving the image and for device
1723 * drivers, in case they need to allocate memory from their hibernation
1724 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
1725 * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through
1726 * /sys/power/reserved_size, respectively).  To make this happen, we compute the
1727 * total number of available page frames and allocate at least
1728 *
1729 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
1730 *  + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
1731 *
1732 * of them, which corresponds to the maximum size of a hibernation image.
1733 *
1734 * If image_size is set below the number following from the above formula,
1735 * the preallocation of memory is continued until the total number of saveable
1736 * pages in the system is below the requested image size or the minimum
1737 * acceptable image size returned by minimum_image_size(), whichever is greater.
1738 */
1739int hibernate_preallocate_memory(void)
1740{
1741        struct zone *zone;
1742        unsigned long saveable, size, max_size, count, highmem, pages = 0;
1743        unsigned long alloc, save_highmem, pages_highmem, avail_normal;
1744        ktime_t start, stop;
1745        int error;
1746
1747        pr_info("Preallocating image memory\n");
1748        start = ktime_get();
1749
1750        error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
1751        if (error) {
1752                pr_err("Cannot allocate original bitmap\n");
1753                goto err_out;
1754        }
1755
1756        error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
1757        if (error) {
1758                pr_err("Cannot allocate copy bitmap\n");
1759                goto err_out;
1760        }
1761
1762        alloc_normal = 0;
1763        alloc_highmem = 0;
1764
1765        /* Count the number of saveable data pages. */
1766        save_highmem = count_highmem_pages();
1767        saveable = count_data_pages();
1768
1769        /*
1770         * Compute the total number of page frames we can use (count) and the
1771         * number of pages needed for image metadata (size).
1772         */
1773        count = saveable;
1774        saveable += save_highmem;
1775        highmem = save_highmem;
1776        size = 0;
1777        for_each_populated_zone(zone) {
1778                size += snapshot_additional_pages(zone);
1779                if (is_highmem(zone))
1780                        highmem += zone_page_state(zone, NR_FREE_PAGES);
1781                else
1782                        count += zone_page_state(zone, NR_FREE_PAGES);
1783        }
1784        avail_normal = count;
1785        count += highmem;
1786        count -= totalreserve_pages;
1787
1788        /* Compute the maximum number of saveable pages to leave in memory. */
1789        max_size = (count - (size + PAGES_FOR_IO)) / 2
1790                        - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
1791        /* Compute the desired number of image pages specified by image_size. */
1792        size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1793        if (size > max_size)
1794                size = max_size;
1795        /*
1796         * If the desired number of image pages is at least as large as the
1797         * current number of saveable pages in memory, allocate page frames for
1798         * the image and we're done.
1799         */
1800        if (size >= saveable) {
1801                pages = preallocate_image_highmem(save_highmem);
1802                pages += preallocate_image_memory(saveable - pages, avail_normal);
1803                goto out;
1804        }
1805
1806        /* Estimate the minimum size of the image. */
1807        pages = minimum_image_size(saveable);
1808        /*
1809         * To avoid excessive pressure on the normal zone, leave room in it to
1810         * accommodate an image of the minimum size (unless it's already too
1811         * small, in which case don't preallocate pages from it at all).
1812         */
1813        if (avail_normal > pages)
1814                avail_normal -= pages;
1815        else
1816                avail_normal = 0;
1817        if (size < pages)
1818                size = min_t(unsigned long, pages, max_size);
1819
1820        /*
1821         * Let the memory management subsystem know that we're going to need a
1822         * large number of page frames to allocate and make it free some memory.
1823         * NOTE: If this is not done, performance will be hurt badly in some
1824         * test cases.
1825         */
1826        shrink_all_memory(saveable - size);
1827
1828        /*
1829         * The number of saveable pages in memory was too high, so apply some
1830         * pressure to decrease it.  First, make room for the largest possible
1831         * image and fail if that doesn't work.  Next, try to decrease the size
1832         * of the image as much as indicated by 'size' using allocations from
1833         * highmem and non-highmem zones separately.
1834         */
1835        pages_highmem = preallocate_image_highmem(highmem / 2);
1836        alloc = count - max_size;
1837        if (alloc > pages_highmem)
1838                alloc -= pages_highmem;
1839        else
1840                alloc = 0;
1841        pages = preallocate_image_memory(alloc, avail_normal);
1842        if (pages < alloc) {
1843                /* We have exhausted non-highmem pages, try highmem. */
1844                alloc -= pages;
1845                pages += pages_highmem;
1846                pages_highmem = preallocate_image_highmem(alloc);
1847                if (pages_highmem < alloc) {
1848                        pr_err("Image allocation is %lu pages short\n",
1849                                alloc - pages_highmem);
1850                        goto err_out;
1851                }
1852                pages += pages_highmem;
1853                /*
1854                 * size is the desired number of saveable pages to leave in
1855                 * memory, so try to preallocate (all memory - size) pages.
1856                 */
1857                alloc = (count - pages) - size;
1858                pages += preallocate_image_highmem(alloc);
1859        } else {
1860                /*
1861                 * There are approximately max_size saveable pages at this point
1862                 * and we want to reduce this number down to size.
1863                 */
1864                alloc = max_size - size;
1865                size = preallocate_highmem_fraction(alloc, highmem, count);
1866                pages_highmem += size;
1867                alloc -= size;
1868                size = preallocate_image_memory(alloc, avail_normal);
1869                pages_highmem += preallocate_image_highmem(alloc - size);
1870                pages += pages_highmem + size;
1871        }
1872
1873        /*
1874         * We only need as many page frames for the image as there are saveable
1875         * pages in memory, but we have allocated more.  Release the excessive
1876         * ones now.
1877         */
1878        pages -= free_unnecessary_pages();
1879
1880 out:
1881        stop = ktime_get();
1882        pr_info("Allocated %lu pages for snapshot\n", pages);
1883        swsusp_show_speed(start, stop, pages, "Allocated");
1884
1885        return 0;
1886
1887 err_out:
1888        swsusp_free();
1889        return -ENOMEM;
1890}
1891
1892#ifdef CONFIG_HIGHMEM
1893/**
1894 * count_pages_for_highmem - Count non-highmem pages needed for copying highmem.
1895 *
1896 * Compute the number of non-highmem pages that will be necessary for creating
1897 * copies of highmem pages.
1898 */
1899static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1900{
1901        unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
1902
1903        if (free_highmem >= nr_highmem)
1904                nr_highmem = 0;
1905        else
1906                nr_highmem -= free_highmem;
1907
1908        return nr_highmem;
1909}
1910#else
1911static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1912#endif /* CONFIG_HIGHMEM */
1913
1914/**
1915 * enough_free_mem - Check if there is enough free memory for the image.
1916 */
1917static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1918{
1919        struct zone *zone;
1920        unsigned int free = alloc_normal;
1921
1922        for_each_populated_zone(zone)
1923                if (!is_highmem(zone))
1924                        free += zone_page_state(zone, NR_FREE_PAGES);
1925
1926        nr_pages += count_pages_for_highmem(nr_highmem);
1927        pr_debug("Normal pages needed: %u + %u, available pages: %u\n",
1928                 nr_pages, PAGES_FOR_IO, free);
1929
1930        return free > nr_pages + PAGES_FOR_IO;
1931}
1932
1933#ifdef CONFIG_HIGHMEM
1934/**
1935 * get_highmem_buffer - Allocate a buffer for highmem pages.
1936 *
1937 * If there are some highmem pages in the hibernation image, we may need a
1938 * buffer to copy them and/or load their data.
1939 */
1940static inline int get_highmem_buffer(int safe_needed)
1941{
1942        buffer = get_image_page(GFP_ATOMIC, safe_needed);
1943        return buffer ? 0 : -ENOMEM;
1944}
1945
1946/**
1947 * alloc_highmem_image_pages - Allocate some highmem pages for the image.
1948 *
1949 * Try to allocate as many pages as needed, but if the number of free highmem
1950 * pages is less than that, allocate them all.
1951 */
1952static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
1953                                               unsigned int nr_highmem)
1954{
1955        unsigned int to_alloc = count_free_highmem_pages();
1956
1957        if (to_alloc > nr_highmem)
1958                to_alloc = nr_highmem;
1959
1960        nr_highmem -= to_alloc;
1961        while (to_alloc-- > 0) {
1962                struct page *page;
1963
1964                page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
1965                memory_bm_set_bit(bm, page_to_pfn(page));
1966        }
1967        return nr_highmem;
1968}
1969#else
1970static inline int get_highmem_buffer(int safe_needed) { return 0; }
1971
1972static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
1973                                               unsigned int n) { return 0; }
1974#endif /* CONFIG_HIGHMEM */
1975
1976/**
1977 * swsusp_alloc - Allocate memory for hibernation image.
1978 *
1979 * We first try to allocate as many highmem pages as there are
1980 * saveable highmem pages in the system.  If that fails, we allocate
1981 * non-highmem pages for the copies of the remaining highmem ones.
1982 *
1983 * In this approach it is likely that the copies of highmem pages will
1984 * also be located in the high memory, because of the way in which
1985 * copy_data_pages() works.
1986 */
1987static int swsusp_alloc(struct memory_bitmap *copy_bm,
1988                        unsigned int nr_pages, unsigned int nr_highmem)
1989{
1990        if (nr_highmem > 0) {
1991                if (get_highmem_buffer(PG_ANY))
1992                        goto err_out;
1993                if (nr_highmem > alloc_highmem) {
1994                        nr_highmem -= alloc_highmem;
1995                        nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
1996                }
1997        }
1998        if (nr_pages > alloc_normal) {
1999                nr_pages -= alloc_normal;
2000                while (nr_pages-- > 0) {
2001                        struct page *page;
2002
2003                        page = alloc_image_page(GFP_ATOMIC);
2004                        if (!page)
2005                                goto err_out;
2006                        memory_bm_set_bit(copy_bm, page_to_pfn(page));
2007                }
2008        }
2009
2010        return 0;
2011
2012 err_out:
2013        swsusp_free();
2014        return -ENOMEM;
2015}
2016
2017asmlinkage __visible int swsusp_save(void)
2018{
2019        unsigned int nr_pages, nr_highmem;
2020
2021        pr_info("Creating image:\n");
2022
2023        drain_local_pages(NULL);
2024        nr_pages = count_data_pages();
2025        nr_highmem = count_highmem_pages();
2026        pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
2027
2028        if (!enough_free_mem(nr_pages, nr_highmem)) {
2029                pr_err("Not enough free memory\n");
2030                return -ENOMEM;
2031        }
2032
2033        if (swsusp_alloc(&copy_bm, nr_pages, nr_highmem)) {
2034                pr_err("Memory allocation failed\n");
2035                return -ENOMEM;
2036        }
2037
2038        /*
2039         * During allocating of suspend pagedir, new cold pages may appear.
2040         * Kill them.
2041         */
2042        drain_local_pages(NULL);
2043        copy_data_pages(&copy_bm, &orig_bm);
2044
2045        /*
2046         * End of critical section. From now on, we can write to memory,
2047         * but we should not touch disk. This specially means we must _not_
2048         * touch swap space! Except we must write out our image of course.
2049         */
2050
2051        nr_pages += nr_highmem;
2052        nr_copy_pages = nr_pages;
2053        nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
2054
2055        pr_info("Image created (%d pages copied)\n", nr_pages);
2056
2057        return 0;
2058}
2059
2060#ifndef CONFIG_ARCH_HIBERNATION_HEADER
2061static int init_header_complete(struct swsusp_info *info)
2062{
2063        memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
2064        info->version_code = LINUX_VERSION_CODE;
2065        return 0;
2066}
2067
2068static const char *check_image_kernel(struct swsusp_info *info)
2069{
2070        if (info->version_code != LINUX_VERSION_CODE)
2071                return "kernel version";
2072        if (strcmp(info->uts.sysname,init_utsname()->sysname))
2073                return "system type";
2074        if (strcmp(info->uts.release,init_utsname()->release))
2075                return "kernel release";
2076        if (strcmp(info->uts.version,init_utsname()->version))
2077                return "version";
2078        if (strcmp(info->uts.machine,init_utsname()->machine))
2079                return "machine";
2080        return NULL;
2081}
2082#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
2083
2084unsigned long snapshot_get_image_size(void)
2085{
2086        return nr_copy_pages + nr_meta_pages + 1;
2087}
2088
2089static int init_header(struct swsusp_info *info)
2090{
2091        memset(info, 0, sizeof(struct swsusp_info));
2092        info->num_physpages = get_num_physpages();
2093        info->image_pages = nr_copy_pages;
2094        info->pages = snapshot_get_image_size();
2095        info->size = info->pages;
2096        info->size <<= PAGE_SHIFT;
2097        return init_header_complete(info);
2098}
2099
2100/**
2101 * pack_pfns - Prepare PFNs for saving.
2102 * @bm: Memory bitmap.
2103 * @buf: Memory buffer to store the PFNs in.
2104 *
2105 * PFNs corresponding to set bits in @bm are stored in the area of memory
2106 * pointed to by @buf (1 page at a time).
2107 */
2108static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
2109{
2110        int j;
2111
2112        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
2113                buf[j] = memory_bm_next_pfn(bm);
2114                if (unlikely(buf[j] == BM_END_OF_MAP))
2115                        break;
2116        }
2117}
2118
2119/**
2120 * snapshot_read_next - Get the address to read the next image page from.
2121 * @handle: Snapshot handle to be used for the reading.
2122 *
2123 * On the first call, @handle should point to a zeroed snapshot_handle
2124 * structure.  The structure gets populated then and a pointer to it should be
2125 * passed to this function every next time.
2126 *
2127 * On success, the function returns a positive number.  Then, the caller
2128 * is allowed to read up to the returned number of bytes from the memory
2129 * location computed by the data_of() macro.
2130 *
2131 * The function returns 0 to indicate the end of the data stream condition,
2132 * and negative numbers are returned on errors.  If that happens, the structure
2133 * pointed to by @handle is not updated and should not be used any more.
2134 */
2135int snapshot_read_next(struct snapshot_handle *handle)
2136{
2137        if (handle->cur > nr_meta_pages + nr_copy_pages)
2138                return 0;
2139
2140        if (!buffer) {
2141                /* This makes the buffer be freed by swsusp_free() */
2142                buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2143                if (!buffer)
2144                        return -ENOMEM;
2145        }
2146        if (!handle->cur) {
2147                int error;
2148
2149                error = init_header((struct swsusp_info *)buffer);
2150                if (error)
2151                        return error;
2152                handle->buffer = buffer;
2153                memory_bm_position_reset(&orig_bm);
2154                memory_bm_position_reset(&copy_bm);
2155        } else if (handle->cur <= nr_meta_pages) {
2156                clear_page(buffer);
2157                pack_pfns(buffer, &orig_bm);
2158        } else {
2159                struct page *page;
2160
2161                page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
2162                if (PageHighMem(page)) {
2163                        /*
2164                         * Highmem pages are copied to the buffer,
2165                         * because we can't return with a kmapped
2166                         * highmem page (we may not be called again).
2167                         */
2168                        void *kaddr;
2169
2170                        kaddr = kmap_atomic(page);
2171                        copy_page(buffer, kaddr);
2172                        kunmap_atomic(kaddr);
2173                        handle->buffer = buffer;
2174                } else {
2175                        handle->buffer = page_address(page);
2176                }
2177        }
2178        handle->cur++;
2179        return PAGE_SIZE;
2180}
2181
2182static void duplicate_memory_bitmap(struct memory_bitmap *dst,
2183                                    struct memory_bitmap *src)
2184{
2185        unsigned long pfn;
2186
2187        memory_bm_position_reset(src);
2188        pfn = memory_bm_next_pfn(src);
2189        while (pfn != BM_END_OF_MAP) {
2190                memory_bm_set_bit(dst, pfn);
2191                pfn = memory_bm_next_pfn(src);
2192        }
2193}
2194
2195/**
2196 * mark_unsafe_pages - Mark pages that were used before hibernation.
2197 *
2198 * Mark the pages that cannot be used for storing the image during restoration,
2199 * because they conflict with the pages that had been used before hibernation.
2200 */
2201static void mark_unsafe_pages(struct memory_bitmap *bm)
2202{
2203        unsigned long pfn;
2204
2205        /* Clear the "free"/"unsafe" bit for all PFNs */
2206        memory_bm_position_reset(free_pages_map);
2207        pfn = memory_bm_next_pfn(free_pages_map);
2208        while (pfn != BM_END_OF_MAP) {
2209                memory_bm_clear_current(free_pages_map);
2210                pfn = memory_bm_next_pfn(free_pages_map);
2211        }
2212
2213        /* Mark pages that correspond to the "original" PFNs as "unsafe" */
2214        duplicate_memory_bitmap(free_pages_map, bm);
2215
2216        allocated_unsafe_pages = 0;
2217}
2218
2219static int check_header(struct swsusp_info *info)
2220{
2221        const char *reason;
2222
2223        reason = check_image_kernel(info);
2224        if (!reason && info->num_physpages != get_num_physpages())
2225                reason = "memory size";
2226        if (reason) {
2227                pr_err("Image mismatch: %s\n", reason);
2228                return -EPERM;
2229        }
2230        return 0;
2231}
2232
2233/**
2234 * load header - Check the image header and copy the data from it.
2235 */
2236static int load_header(struct swsusp_info *info)
2237{
2238        int error;
2239
2240        restore_pblist = NULL;
2241        error = check_header(info);
2242        if (!error) {
2243                nr_copy_pages = info->image_pages;
2244                nr_meta_pages = info->pages - info->image_pages - 1;
2245        }
2246        return error;
2247}
2248
2249/**
2250 * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
2251 * @bm: Memory bitmap.
2252 * @buf: Area of memory containing the PFNs.
2253 *
2254 * For each element of the array pointed to by @buf (1 page at a time), set the
2255 * corresponding bit in @bm.
2256 */
2257static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
2258{
2259        int j;
2260
2261        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
2262                if (unlikely(buf[j] == BM_END_OF_MAP))
2263                        break;
2264
2265                if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j]))
2266                        memory_bm_set_bit(bm, buf[j]);
2267                else
2268                        return -EFAULT;
2269        }
2270
2271        return 0;
2272}
2273
2274#ifdef CONFIG_HIGHMEM
2275/*
2276 * struct highmem_pbe is used for creating the list of highmem pages that
2277 * should be restored atomically during the resume from disk, because the page
2278 * frames they have occupied before the suspend are in use.
2279 */
2280struct highmem_pbe {
2281        struct page *copy_page; /* data is here now */
2282        struct page *orig_page; /* data was here before the suspend */
2283        struct highmem_pbe *next;
2284};
2285
2286/*
2287 * List of highmem PBEs needed for restoring the highmem pages that were
2288 * allocated before the suspend and included in the suspend image, but have
2289 * also been allocated by the "resume" kernel, so their contents cannot be
2290 * written directly to their "original" page frames.
2291 */
2292static struct highmem_pbe *highmem_pblist;
2293
2294/**
2295 * count_highmem_image_pages - Compute the number of highmem pages in the image.
2296 * @bm: Memory bitmap.
2297 *
2298 * The bits in @bm that correspond to image pages are assumed to be set.
2299 */
2300static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
2301{
2302        unsigned long pfn;
2303        unsigned int cnt = 0;
2304
2305        memory_bm_position_reset(bm);
2306        pfn = memory_bm_next_pfn(bm);
2307        while (pfn != BM_END_OF_MAP) {
2308                if (PageHighMem(pfn_to_page(pfn)))
2309                        cnt++;
2310
2311                pfn = memory_bm_next_pfn(bm);
2312        }
2313        return cnt;
2314}
2315
2316static unsigned int safe_highmem_pages;
2317
2318static struct memory_bitmap *safe_highmem_bm;
2319
2320/**
2321 * prepare_highmem_image - Allocate memory for loading highmem data from image.
2322 * @bm: Pointer to an uninitialized memory bitmap structure.
2323 * @nr_highmem_p: Pointer to the number of highmem image pages.
2324 *
2325 * Try to allocate as many highmem pages as there are highmem image pages
2326 * (@nr_highmem_p points to the variable containing the number of highmem image
2327 * pages).  The pages that are "safe" (ie. will not be overwritten when the
2328 * hibernation image is restored entirely) have the corresponding bits set in
2329 * @bm (it must be unitialized).
2330 *
2331 * NOTE: This function should not be called if there are no highmem image pages.
2332 */
2333static int prepare_highmem_image(struct memory_bitmap *bm,
2334                                 unsigned int *nr_highmem_p)
2335{
2336        unsigned int to_alloc;
2337
2338        if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
2339                return -ENOMEM;
2340
2341        if (get_highmem_buffer(PG_SAFE))
2342                return -ENOMEM;
2343
2344        to_alloc = count_free_highmem_pages();
2345        if (to_alloc > *nr_highmem_p)
2346                to_alloc = *nr_highmem_p;
2347        else
2348                *nr_highmem_p = to_alloc;
2349
2350        safe_highmem_pages = 0;
2351        while (to_alloc-- > 0) {
2352                struct page *page;
2353
2354                page = alloc_page(__GFP_HIGHMEM);
2355                if (!swsusp_page_is_free(page)) {
2356                        /* The page is "safe", set its bit the bitmap */
2357                        memory_bm_set_bit(bm, page_to_pfn(page));
2358                        safe_highmem_pages++;
2359                }
2360                /* Mark the page as allocated */
2361                swsusp_set_page_forbidden(page);
2362                swsusp_set_page_free(page);
2363        }
2364        memory_bm_position_reset(bm);
2365        safe_highmem_bm = bm;
2366        return 0;
2367}
2368
2369static struct page *last_highmem_page;
2370
2371/**
2372 * get_highmem_page_buffer - Prepare a buffer to store a highmem image page.
2373 *
2374 * For a given highmem image page get a buffer that suspend_write_next() should
2375 * return to its caller to write to.
2376 *
2377 * If the page is to be saved to its "original" page frame or a copy of
2378 * the page is to be made in the highmem, @buffer is returned.  Otherwise,
2379 * the copy of the page is to be made in normal memory, so the address of
2380 * the copy is returned.
2381 *
2382 * If @buffer is returned, the caller of suspend_write_next() will write
2383 * the page's contents to @buffer, so they will have to be copied to the
2384 * right location on the next call to suspend_write_next() and it is done
2385 * with the help of copy_last_highmem_page().  For this purpose, if
2386 * @buffer is returned, @last_highmem_page is set to the page to which
2387 * the data will have to be copied from @buffer.
2388 */
2389static void *get_highmem_page_buffer(struct page *page,
2390                                     struct chain_allocator *ca)
2391{
2392        struct highmem_pbe *pbe;
2393        void *kaddr;
2394
2395        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
2396                /*
2397                 * We have allocated the "original" page frame and we can
2398                 * use it directly to store the loaded page.
2399                 */
2400                last_highmem_page = page;
2401                return buffer;
2402        }
2403        /*
2404         * The "original" page frame has not been allocated and we have to
2405         * use a "safe" page frame to store the loaded page.
2406         */
2407        pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
2408        if (!pbe) {
2409                swsusp_free();
2410                return ERR_PTR(-ENOMEM);
2411        }
2412        pbe->orig_page = page;
2413        if (safe_highmem_pages > 0) {
2414                struct page *tmp;
2415
2416                /* Copy of the page will be stored in high memory */
2417                kaddr = buffer;
2418                tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
2419                safe_highmem_pages--;
2420                last_highmem_page = tmp;
2421                pbe->copy_page = tmp;
2422        } else {
2423                /* Copy of the page will be stored in normal memory */
2424                kaddr = safe_pages_list;
2425                safe_pages_list = safe_pages_list->next;
2426                pbe->copy_page = virt_to_page(kaddr);
2427        }
2428        pbe->next = highmem_pblist;
2429        highmem_pblist = pbe;
2430        return kaddr;
2431}
2432
2433/**
2434 * copy_last_highmem_page - Copy most the most recent highmem image page.
2435 *
2436 * Copy the contents of a highmem image from @buffer, where the caller of
2437 * snapshot_write_next() has stored them, to the right location represented by
2438 * @last_highmem_page .
2439 */
2440static void copy_last_highmem_page(void)
2441{
2442        if (last_highmem_page) {
2443                void *dst;
2444
2445                dst = kmap_atomic(last_highmem_page);
2446                copy_page(dst, buffer);
2447                kunmap_atomic(dst);
2448                last_highmem_page = NULL;
2449        }
2450}
2451
2452static inline int last_highmem_page_copied(void)
2453{
2454        return !last_highmem_page;
2455}
2456
2457static inline void free_highmem_data(void)
2458{
2459        if (safe_highmem_bm)
2460                memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
2461
2462        if (buffer)
2463                free_image_page(buffer, PG_UNSAFE_CLEAR);
2464}
2465#else
2466static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
2467
2468static inline int prepare_highmem_image(struct memory_bitmap *bm,
2469                                        unsigned int *nr_highmem_p) { return 0; }
2470
2471static inline void *get_highmem_page_buffer(struct page *page,
2472                                            struct chain_allocator *ca)
2473{
2474        return ERR_PTR(-EINVAL);
2475}
2476
2477static inline void copy_last_highmem_page(void) {}
2478static inline int last_highmem_page_copied(void) { return 1; }
2479static inline void free_highmem_data(void) {}
2480#endif /* CONFIG_HIGHMEM */
2481
2482#define PBES_PER_LINKED_PAGE    (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2483
2484/**
2485 * prepare_image - Make room for loading hibernation image.
2486 * @new_bm: Unitialized memory bitmap structure.
2487 * @bm: Memory bitmap with unsafe pages marked.
2488 *
2489 * Use @bm to mark the pages that will be overwritten in the process of
2490 * restoring the system memory state from the suspend image ("unsafe" pages)
2491 * and allocate memory for the image.
2492 *
2493 * The idea is to allocate a new memory bitmap first and then allocate
2494 * as many pages as needed for image data, but without specifying what those
2495 * pages will be used for just yet.  Instead, we mark them all as allocated and
2496 * create a lists of "safe" pages to be used later.  On systems with high
2497 * memory a list of "safe" highmem pages is created too.
2498 */
2499static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
2500{
2501        unsigned int nr_pages, nr_highmem;
2502        struct linked_page *lp;
2503        int error;
2504
2505        /* If there is no highmem, the buffer will not be necessary */
2506        free_image_page(buffer, PG_UNSAFE_CLEAR);
2507        buffer = NULL;
2508
2509        nr_highmem = count_highmem_image_pages(bm);
2510        mark_unsafe_pages(bm);
2511
2512        error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2513        if (error)
2514                goto Free;
2515
2516        duplicate_memory_bitmap(new_bm, bm);
2517        memory_bm_free(bm, PG_UNSAFE_KEEP);
2518        if (nr_highmem > 0) {
2519                error = prepare_highmem_image(bm, &nr_highmem);
2520                if (error)
2521                        goto Free;
2522        }
2523        /*
2524         * Reserve some safe pages for potential later use.
2525         *
2526         * NOTE: This way we make sure there will be enough safe pages for the
2527         * chain_alloc() in get_buffer().  It is a bit wasteful, but
2528         * nr_copy_pages cannot be greater than 50% of the memory anyway.
2529         *
2530         * nr_copy_pages cannot be less than allocated_unsafe_pages too.
2531         */
2532        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2533        nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2534        while (nr_pages > 0) {
2535                lp = get_image_page(GFP_ATOMIC, PG_SAFE);
2536                if (!lp) {
2537                        error = -ENOMEM;
2538                        goto Free;
2539                }
2540                lp->next = safe_pages_list;
2541                safe_pages_list = lp;
2542                nr_pages--;
2543        }
2544        /* Preallocate memory for the image */
2545        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2546        while (nr_pages > 0) {
2547                lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2548                if (!lp) {
2549                        error = -ENOMEM;
2550                        goto Free;
2551                }
2552                if (!swsusp_page_is_free(virt_to_page(lp))) {
2553                        /* The page is "safe", add it to the list */
2554                        lp->next = safe_pages_list;
2555                        safe_pages_list = lp;
2556                }
2557                /* Mark the page as allocated */
2558                swsusp_set_page_forbidden(virt_to_page(lp));
2559                swsusp_set_page_free(virt_to_page(lp));
2560                nr_pages--;
2561        }
2562        return 0;
2563
2564 Free:
2565        swsusp_free();
2566        return error;
2567}
2568
2569/**
2570 * get_buffer - Get the address to store the next image data page.
2571 *
2572 * Get the address that snapshot_write_next() should return to its caller to
2573 * write to.
2574 */
2575static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2576{
2577        struct pbe *pbe;
2578        struct page *page;
2579        unsigned long pfn = memory_bm_next_pfn(bm);
2580
2581        if (pfn == BM_END_OF_MAP)
2582                return ERR_PTR(-EFAULT);
2583
2584        page = pfn_to_page(pfn);
2585        if (PageHighMem(page))
2586                return get_highmem_page_buffer(page, ca);
2587
2588        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
2589                /*
2590                 * We have allocated the "original" page frame and we can
2591                 * use it directly to store the loaded page.
2592                 */
2593                return page_address(page);
2594
2595        /*
2596         * The "original" page frame has not been allocated and we have to
2597         * use a "safe" page frame to store the loaded page.
2598         */
2599        pbe = chain_alloc(ca, sizeof(struct pbe));
2600        if (!pbe) {
2601                swsusp_free();
2602                return ERR_PTR(-ENOMEM);
2603        }
2604        pbe->orig_address = page_address(page);
2605        pbe->address = safe_pages_list;
2606        safe_pages_list = safe_pages_list->next;
2607        pbe->next = restore_pblist;
2608        restore_pblist = pbe;
2609        return pbe->address;
2610}
2611
2612/**
2613 * snapshot_write_next - Get the address to store the next image page.
2614 * @handle: Snapshot handle structure to guide the writing.
2615 *
2616 * On the first call, @handle should point to a zeroed snapshot_handle
2617 * structure.  The structure gets populated then and a pointer to it should be
2618 * passed to this function every next time.
2619 *
2620 * On success, the function returns a positive number.  Then, the caller
2621 * is allowed to write up to the returned number of bytes to the memory
2622 * location computed by the data_of() macro.
2623 *
2624 * The function returns 0 to indicate the "end of file" condition.  Negative
2625 * numbers are returned on errors, in which cases the structure pointed to by
2626 * @handle is not updated and should not be used any more.
2627 */
2628int snapshot_write_next(struct snapshot_handle *handle)
2629{
2630        static struct chain_allocator ca;
2631        int error = 0;
2632
2633        /* Check if we have already loaded the entire image */
2634        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
2635                return 0;
2636
2637        handle->sync_read = 1;
2638
2639        if (!handle->cur) {
2640                if (!buffer)
2641                        /* This makes the buffer be freed by swsusp_free() */
2642                        buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2643
2644                if (!buffer)
2645                        return -ENOMEM;
2646
2647                handle->buffer = buffer;
2648        } else if (handle->cur == 1) {
2649                error = load_header(buffer);
2650                if (error)
2651                        return error;
2652
2653                safe_pages_list = NULL;
2654
2655                error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2656                if (error)
2657                        return error;
2658
2659                hibernate_restore_protection_begin();
2660        } else if (handle->cur <= nr_meta_pages + 1) {
2661                error = unpack_orig_pfns(buffer, &copy_bm);
2662                if (error)
2663                        return error;
2664
2665                if (handle->cur == nr_meta_pages + 1) {
2666                        error = prepare_image(&orig_bm, &copy_bm);
2667                        if (error)
2668                                return error;
2669
2670                        chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2671                        memory_bm_position_reset(&orig_bm);
2672                        restore_pblist = NULL;
2673                        handle->buffer = get_buffer(&orig_bm, &ca);
2674                        handle->sync_read = 0;
2675                        if (IS_ERR(handle->buffer))
2676                                return PTR_ERR(handle->buffer);
2677                }
2678        } else {
2679                copy_last_highmem_page();
2680                hibernate_restore_protect_page(handle->buffer);
2681                handle->buffer = get_buffer(&orig_bm, &ca);
2682                if (IS_ERR(handle->buffer))
2683                        return PTR_ERR(handle->buffer);
2684                if (handle->buffer != buffer)
2685                        handle->sync_read = 0;
2686        }
2687        handle->cur++;
2688        return PAGE_SIZE;
2689}
2690
2691/**
2692 * snapshot_write_finalize - Complete the loading of a hibernation image.
2693 *
2694 * Must be called after the last call to snapshot_write_next() in case the last
2695 * page in the image happens to be a highmem page and its contents should be
2696 * stored in highmem.  Additionally, it recycles bitmap memory that's not
2697 * necessary any more.
2698 */
2699void snapshot_write_finalize(struct snapshot_handle *handle)
2700{
2701        copy_last_highmem_page();
2702        hibernate_restore_protect_page(handle->buffer);
2703        /* Do that only if we have loaded the image entirely */
2704        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2705                memory_bm_recycle(&orig_bm);
2706                free_highmem_data();
2707        }
2708}
2709
2710int snapshot_image_loaded(struct snapshot_handle *handle)
2711{
2712        return !(!nr_copy_pages || !last_highmem_page_copied() ||
2713                        handle->cur <= nr_meta_pages + nr_copy_pages);
2714}
2715
2716#ifdef CONFIG_HIGHMEM
2717/* Assumes that @buf is ready and points to a "safe" page */
2718static inline void swap_two_pages_data(struct page *p1, struct page *p2,
2719                                       void *buf)
2720{
2721        void *kaddr1, *kaddr2;
2722
2723        kaddr1 = kmap_atomic(p1);
2724        kaddr2 = kmap_atomic(p2);
2725        copy_page(buf, kaddr1);
2726        copy_page(kaddr1, kaddr2);
2727        copy_page(kaddr2, buf);
2728        kunmap_atomic(kaddr2);
2729        kunmap_atomic(kaddr1);
2730}
2731
2732/**
2733 * restore_highmem - Put highmem image pages into their original locations.
2734 *
2735 * For each highmem page that was in use before hibernation and is included in
2736 * the image, and also has been allocated by the "restore" kernel, swap its
2737 * current contents with the previous (ie. "before hibernation") ones.
2738 *
2739 * If the restore eventually fails, we can call this function once again and
2740 * restore the highmem state as seen by the restore kernel.
2741 */
2742int restore_highmem(void)
2743{
2744        struct highmem_pbe *pbe = highmem_pblist;
2745        void *buf;
2746
2747        if (!pbe)
2748                return 0;
2749
2750        buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2751        if (!buf)
2752                return -ENOMEM;
2753
2754        while (pbe) {
2755                swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2756                pbe = pbe->next;
2757        }
2758        free_image_page(buf, PG_UNSAFE_CLEAR);
2759        return 0;
2760}
2761#endif /* CONFIG_HIGHMEM */
2762