linux/kernel/power/snapshot.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/power/snapshot.c
   3 *
   4 * This file provides system snapshot/restore functionality for swsusp.
   5 *
   6 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
   7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
   8 *
   9 * This file is released under the GPLv2.
  10 *
  11 */
  12
  13#include <linux/version.h>
  14#include <linux/module.h>
  15#include <linux/mm.h>
  16#include <linux/suspend.h>
  17#include <linux/delay.h>
  18#include <linux/bitops.h>
  19#include <linux/spinlock.h>
  20#include <linux/kernel.h>
  21#include <linux/pm.h>
  22#include <linux/device.h>
  23#include <linux/init.h>
  24#include <linux/bootmem.h>
  25#include <linux/syscalls.h>
  26#include <linux/console.h>
  27#include <linux/highmem.h>
  28#include <linux/list.h>
  29#include <linux/slab.h>
  30
  31#include <asm/uaccess.h>
  32#include <asm/mmu_context.h>
  33#include <asm/pgtable.h>
  34#include <asm/tlbflush.h>
  35#include <asm/io.h>
  36
  37#include "power.h"
  38
  39static int swsusp_page_is_free(struct page *);
  40static void swsusp_set_page_forbidden(struct page *);
  41static void swsusp_unset_page_forbidden(struct page *);
  42
  43/*
  44 * Number of bytes to reserve for memory allocations made by device drivers
  45 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
  46 * cause image creation to fail (tunable via /sys/power/reserved_size).
  47 */
  48unsigned long reserved_size;
  49
  50void __init hibernate_reserved_size_init(void)
  51{
  52        reserved_size = SPARE_PAGES * PAGE_SIZE;
  53}
  54
  55/*
  56 * Preferred image size in bytes (tunable via /sys/power/image_size).
  57 * When it is set to N, swsusp will do its best to ensure the image
  58 * size will not exceed N bytes, but if that is impossible, it will
  59 * try to create the smallest image possible.
  60 */
  61unsigned long image_size;
  62
  63void __init hibernate_image_size_init(void)
  64{
  65        image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
  66}
  67
  68/* List of PBEs needed for restoring the pages that were allocated before
  69 * the suspend and included in the suspend image, but have also been
  70 * allocated by the "resume" kernel, so their contents cannot be written
  71 * directly to their "original" page frames.
  72 */
  73struct pbe *restore_pblist;
  74
  75/* Pointer to an auxiliary buffer (1 page) */
  76static void *buffer;
  77
  78/**
  79 *      @safe_needed - on resume, for storing the PBE list and the image,
  80 *      we can only use memory pages that do not conflict with the pages
  81 *      used before suspend.  The unsafe pages have PageNosaveFree set
  82 *      and we count them using unsafe_pages.
  83 *
  84 *      Each allocated image page is marked as PageNosave and PageNosaveFree
  85 *      so that swsusp_free() can release it.
  86 */
  87
  88#define PG_ANY          0
  89#define PG_SAFE         1
  90#define PG_UNSAFE_CLEAR 1
  91#define PG_UNSAFE_KEEP  0
  92
  93static unsigned int allocated_unsafe_pages;
  94
  95static void *get_image_page(gfp_t gfp_mask, int safe_needed)
  96{
  97        void *res;
  98
  99        res = (void *)get_zeroed_page(gfp_mask);
 100        if (safe_needed)
 101                while (res && swsusp_page_is_free(virt_to_page(res))) {
 102                        /* The page is unsafe, mark it for swsusp_free() */
 103                        swsusp_set_page_forbidden(virt_to_page(res));
 104                        allocated_unsafe_pages++;
 105                        res = (void *)get_zeroed_page(gfp_mask);
 106                }
 107        if (res) {
 108                swsusp_set_page_forbidden(virt_to_page(res));
 109                swsusp_set_page_free(virt_to_page(res));
 110        }
 111        return res;
 112}
 113
 114unsigned long get_safe_page(gfp_t gfp_mask)
 115{
 116        return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
 117}
 118
 119static struct page *alloc_image_page(gfp_t gfp_mask)
 120{
 121        struct page *page;
 122
 123        page = alloc_page(gfp_mask);
 124        if (page) {
 125                swsusp_set_page_forbidden(page);
 126                swsusp_set_page_free(page);
 127        }
 128        return page;
 129}
 130
 131/**
 132 *      free_image_page - free page represented by @addr, allocated with
 133 *      get_image_page (page flags set by it must be cleared)
 134 */
 135
 136static inline void free_image_page(void *addr, int clear_nosave_free)
 137{
 138        struct page *page;
 139
 140        BUG_ON(!virt_addr_valid(addr));
 141
 142        page = virt_to_page(addr);
 143
 144        swsusp_unset_page_forbidden(page);
 145        if (clear_nosave_free)
 146                swsusp_unset_page_free(page);
 147
 148        __free_page(page);
 149}
 150
 151/* struct linked_page is used to build chains of pages */
 152
 153#define LINKED_PAGE_DATA_SIZE   (PAGE_SIZE - sizeof(void *))
 154
 155struct linked_page {
 156        struct linked_page *next;
 157        char data[LINKED_PAGE_DATA_SIZE];
 158} __attribute__((packed));
 159
 160static inline void
 161free_list_of_pages(struct linked_page *list, int clear_page_nosave)
 162{
 163        while (list) {
 164                struct linked_page *lp = list->next;
 165
 166                free_image_page(list, clear_page_nosave);
 167                list = lp;
 168        }
 169}
 170
 171/**
 172  *     struct chain_allocator is used for allocating small objects out of
 173  *     a linked list of pages called 'the chain'.
 174  *
 175  *     The chain grows each time when there is no room for a new object in
 176  *     the current page.  The allocated objects cannot be freed individually.
 177  *     It is only possible to free them all at once, by freeing the entire
 178  *     chain.
 179  *
 180  *     NOTE: The chain allocator may be inefficient if the allocated objects
 181  *     are not much smaller than PAGE_SIZE.
 182  */
 183
 184struct chain_allocator {
 185        struct linked_page *chain;      /* the chain */
 186        unsigned int used_space;        /* total size of objects allocated out
 187                                         * of the current page
 188                                         */
 189        gfp_t gfp_mask;         /* mask for allocating pages */
 190        int safe_needed;        /* if set, only "safe" pages are allocated */
 191};
 192
 193static void
 194chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
 195{
 196        ca->chain = NULL;
 197        ca->used_space = LINKED_PAGE_DATA_SIZE;
 198        ca->gfp_mask = gfp_mask;
 199        ca->safe_needed = safe_needed;
 200}
 201
 202static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 203{
 204        void *ret;
 205
 206        if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 207                struct linked_page *lp;
 208
 209                lp = get_image_page(ca->gfp_mask, ca->safe_needed);
 210                if (!lp)
 211                        return NULL;
 212
 213                lp->next = ca->chain;
 214                ca->chain = lp;
 215                ca->used_space = 0;
 216        }
 217        ret = ca->chain->data + ca->used_space;
 218        ca->used_space += size;
 219        return ret;
 220}
 221
 222/**
 223 *      Data types related to memory bitmaps.
 224 *
 225 *      Memory bitmap is a structure consiting of many linked lists of
 226 *      objects.  The main list's elements are of type struct zone_bitmap
 227 *      and each of them corresonds to one zone.  For each zone bitmap
 228 *      object there is a list of objects of type struct bm_block that
 229 *      represent each blocks of bitmap in which information is stored.
 230 *
 231 *      struct memory_bitmap contains a pointer to the main list of zone
 232 *      bitmap objects, a struct bm_position used for browsing the bitmap,
 233 *      and a pointer to the list of pages used for allocating all of the
 234 *      zone bitmap objects and bitmap block objects.
 235 *
 236 *      NOTE: It has to be possible to lay out the bitmap in memory
 237 *      using only allocations of order 0.  Additionally, the bitmap is
 238 *      designed to work with arbitrary number of zones (this is over the
 239 *      top for now, but let's avoid making unnecessary assumptions ;-).
 240 *
 241 *      struct zone_bitmap contains a pointer to a list of bitmap block
 242 *      objects and a pointer to the bitmap block object that has been
 243 *      most recently used for setting bits.  Additionally, it contains the
 244 *      pfns that correspond to the start and end of the represented zone.
 245 *
 246 *      struct bm_block contains a pointer to the memory page in which
 247 *      information is stored (in the form of a block of bitmap)
 248 *      It also contains the pfns that correspond to the start and end of
 249 *      the represented memory area.
 250 */
 251
 252#define BM_END_OF_MAP   (~0UL)
 253
 254#define BM_BITS_PER_BLOCK       (PAGE_SIZE * BITS_PER_BYTE)
 255
 256struct bm_block {
 257        struct list_head hook;  /* hook into a list of bitmap blocks */
 258        unsigned long start_pfn;        /* pfn represented by the first bit */
 259        unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
 260        unsigned long *data;    /* bitmap representing pages */
 261};
 262
 263static inline unsigned long bm_block_bits(struct bm_block *bb)
 264{
 265        return bb->end_pfn - bb->start_pfn;
 266}
 267
 268/* strcut bm_position is used for browsing memory bitmaps */
 269
 270struct bm_position {
 271        struct bm_block *block;
 272        int bit;
 273};
 274
 275struct memory_bitmap {
 276        struct list_head blocks;        /* list of bitmap blocks */
 277        struct linked_page *p_list;     /* list of pages used to store zone
 278                                         * bitmap objects and bitmap block
 279                                         * objects
 280                                         */
 281        struct bm_position cur; /* most recently used bit position */
 282};
 283
 284/* Functions that operate on memory bitmaps */
 285
 286static void memory_bm_position_reset(struct memory_bitmap *bm)
 287{
 288        bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
 289        bm->cur.bit = 0;
 290}
 291
 292static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 293
 294/**
 295 *      create_bm_block_list - create a list of block bitmap objects
 296 *      @pages - number of pages to track
 297 *      @list - list to put the allocated blocks into
 298 *      @ca - chain allocator to be used for allocating memory
 299 */
 300static int create_bm_block_list(unsigned long pages,
 301                                struct list_head *list,
 302                                struct chain_allocator *ca)
 303{
 304        unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
 305
 306        while (nr_blocks-- > 0) {
 307                struct bm_block *bb;
 308
 309                bb = chain_alloc(ca, sizeof(struct bm_block));
 310                if (!bb)
 311                        return -ENOMEM;
 312                list_add(&bb->hook, list);
 313        }
 314
 315        return 0;
 316}
 317
 318struct mem_extent {
 319        struct list_head hook;
 320        unsigned long start;
 321        unsigned long end;
 322};
 323
 324/**
 325 *      free_mem_extents - free a list of memory extents
 326 *      @list - list of extents to empty
 327 */
 328static void free_mem_extents(struct list_head *list)
 329{
 330        struct mem_extent *ext, *aux;
 331
 332        list_for_each_entry_safe(ext, aux, list, hook) {
 333                list_del(&ext->hook);
 334                kfree(ext);
 335        }
 336}
 337
 338/**
 339 *      create_mem_extents - create a list of memory extents representing
 340 *                           contiguous ranges of PFNs
 341 *      @list - list to put the extents into
 342 *      @gfp_mask - mask to use for memory allocations
 343 */
 344static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 345{
 346        struct zone *zone;
 347
 348        INIT_LIST_HEAD(list);
 349
 350        for_each_populated_zone(zone) {
 351                unsigned long zone_start, zone_end;
 352                struct mem_extent *ext, *cur, *aux;
 353
 354                zone_start = zone->zone_start_pfn;
 355                zone_end = zone->zone_start_pfn + zone->spanned_pages;
 356
 357                list_for_each_entry(ext, list, hook)
 358                        if (zone_start <= ext->end)
 359                                break;
 360
 361                if (&ext->hook == list || zone_end < ext->start) {
 362                        /* New extent is necessary */
 363                        struct mem_extent *new_ext;
 364
 365                        new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
 366                        if (!new_ext) {
 367                                free_mem_extents(list);
 368                                return -ENOMEM;
 369                        }
 370                        new_ext->start = zone_start;
 371                        new_ext->end = zone_end;
 372                        list_add_tail(&new_ext->hook, &ext->hook);
 373                        continue;
 374                }
 375
 376                /* Merge this zone's range of PFNs with the existing one */
 377                if (zone_start < ext->start)
 378                        ext->start = zone_start;
 379                if (zone_end > ext->end)
 380                        ext->end = zone_end;
 381
 382                /* More merging may be possible */
 383                cur = ext;
 384                list_for_each_entry_safe_continue(cur, aux, list, hook) {
 385                        if (zone_end < cur->start)
 386                                break;
 387                        if (zone_end < cur->end)
 388                                ext->end = cur->end;
 389                        list_del(&cur->hook);
 390                        kfree(cur);
 391                }
 392        }
 393
 394        return 0;
 395}
 396
 397/**
 398  *     memory_bm_create - allocate memory for a memory bitmap
 399  */
 400static int
 401memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 402{
 403        struct chain_allocator ca;
 404        struct list_head mem_extents;
 405        struct mem_extent *ext;
 406        int error;
 407
 408        chain_init(&ca, gfp_mask, safe_needed);
 409        INIT_LIST_HEAD(&bm->blocks);
 410
 411        error = create_mem_extents(&mem_extents, gfp_mask);
 412        if (error)
 413                return error;
 414
 415        list_for_each_entry(ext, &mem_extents, hook) {
 416                struct bm_block *bb;
 417                unsigned long pfn = ext->start;
 418                unsigned long pages = ext->end - ext->start;
 419
 420                bb = list_entry(bm->blocks.prev, struct bm_block, hook);
 421
 422                error = create_bm_block_list(pages, bm->blocks.prev, &ca);
 423                if (error)
 424                        goto Error;
 425
 426                list_for_each_entry_continue(bb, &bm->blocks, hook) {
 427                        bb->data = get_image_page(gfp_mask, safe_needed);
 428                        if (!bb->data) {
 429                                error = -ENOMEM;
 430                                goto Error;
 431                        }
 432
 433                        bb->start_pfn = pfn;
 434                        if (pages >= BM_BITS_PER_BLOCK) {
 435                                pfn += BM_BITS_PER_BLOCK;
 436                                pages -= BM_BITS_PER_BLOCK;
 437                        } else {
 438                                /* This is executed only once in the loop */
 439                                pfn += pages;
 440                        }
 441                        bb->end_pfn = pfn;
 442                }
 443        }
 444
 445        bm->p_list = ca.chain;
 446        memory_bm_position_reset(bm);
 447 Exit:
 448        free_mem_extents(&mem_extents);
 449        return error;
 450
 451 Error:
 452        bm->p_list = ca.chain;
 453        memory_bm_free(bm, PG_UNSAFE_CLEAR);
 454        goto Exit;
 455}
 456
 457/**
 458  *     memory_bm_free - free memory occupied by the memory bitmap @bm
 459  */
 460static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 461{
 462        struct bm_block *bb;
 463
 464        list_for_each_entry(bb, &bm->blocks, hook)
 465                if (bb->data)
 466                        free_image_page(bb->data, clear_nosave_free);
 467
 468        free_list_of_pages(bm->p_list, clear_nosave_free);
 469
 470        INIT_LIST_HEAD(&bm->blocks);
 471}
 472
 473/**
 474 *      memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
 475 *      to given pfn.  The cur_zone_bm member of @bm and the cur_block member
 476 *      of @bm->cur_zone_bm are updated.
 477 */
 478static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 479                                void **addr, unsigned int *bit_nr)
 480{
 481        struct bm_block *bb;
 482
 483        /*
 484         * Check if the pfn corresponds to the current bitmap block and find
 485         * the block where it fits if this is not the case.
 486         */
 487        bb = bm->cur.block;
 488        if (pfn < bb->start_pfn)
 489                list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
 490                        if (pfn >= bb->start_pfn)
 491                                break;
 492
 493        if (pfn >= bb->end_pfn)
 494                list_for_each_entry_continue(bb, &bm->blocks, hook)
 495                        if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
 496                                break;
 497
 498        if (&bb->hook == &bm->blocks)
 499                return -EFAULT;
 500
 501        /* The block has been found */
 502        bm->cur.block = bb;
 503        pfn -= bb->start_pfn;
 504        bm->cur.bit = pfn + 1;
 505        *bit_nr = pfn;
 506        *addr = bb->data;
 507        return 0;
 508}
 509
 510static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 511{
 512        void *addr;
 513        unsigned int bit;
 514        int error;
 515
 516        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 517        BUG_ON(error);
 518        set_bit(bit, addr);
 519}
 520
 521static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
 522{
 523        void *addr;
 524        unsigned int bit;
 525        int error;
 526
 527        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 528        if (!error)
 529                set_bit(bit, addr);
 530        return error;
 531}
 532
 533static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
 534{
 535        void *addr;
 536        unsigned int bit;
 537        int error;
 538
 539        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 540        BUG_ON(error);
 541        clear_bit(bit, addr);
 542}
 543
 544static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 545{
 546        void *addr;
 547        unsigned int bit;
 548        int error;
 549
 550        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 551        BUG_ON(error);
 552        return test_bit(bit, addr);
 553}
 554
 555static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
 556{
 557        void *addr;
 558        unsigned int bit;
 559
 560        return !memory_bm_find_bit(bm, pfn, &addr, &bit);
 561}
 562
 563/**
 564 *      memory_bm_next_pfn - find the pfn that corresponds to the next set bit
 565 *      in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
 566 *      returned.
 567 *
 568 *      It is required to run memory_bm_position_reset() before the first call to
 569 *      this function.
 570 */
 571
 572static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 573{
 574        struct bm_block *bb;
 575        int bit;
 576
 577        bb = bm->cur.block;
 578        do {
 579                bit = bm->cur.bit;
 580                bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
 581                if (bit < bm_block_bits(bb))
 582                        goto Return_pfn;
 583
 584                bb = list_entry(bb->hook.next, struct bm_block, hook);
 585                bm->cur.block = bb;
 586                bm->cur.bit = 0;
 587        } while (&bb->hook != &bm->blocks);
 588
 589        memory_bm_position_reset(bm);
 590        return BM_END_OF_MAP;
 591
 592 Return_pfn:
 593        bm->cur.bit = bit + 1;
 594        return bb->start_pfn + bit;
 595}
 596
 597/**
 598 *      This structure represents a range of page frames the contents of which
 599 *      should not be saved during the suspend.
 600 */
 601
 602struct nosave_region {
 603        struct list_head list;
 604        unsigned long start_pfn;
 605        unsigned long end_pfn;
 606};
 607
 608static LIST_HEAD(nosave_regions);
 609
 610/**
 611 *      register_nosave_region - register a range of page frames the contents
 612 *      of which should not be saved during the suspend (to be used in the early
 613 *      initialization code)
 614 */
 615
 616void __init
 617__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
 618                         int use_kmalloc)
 619{
 620        struct nosave_region *region;
 621
 622        if (start_pfn >= end_pfn)
 623                return;
 624
 625        if (!list_empty(&nosave_regions)) {
 626                /* Try to extend the previous region (they should be sorted) */
 627                region = list_entry(nosave_regions.prev,
 628                                        struct nosave_region, list);
 629                if (region->end_pfn == start_pfn) {
 630                        region->end_pfn = end_pfn;
 631                        goto Report;
 632                }
 633        }
 634        if (use_kmalloc) {
 635                /* during init, this shouldn't fail */
 636                region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
 637                BUG_ON(!region);
 638        } else
 639                /* This allocation cannot fail */
 640                region = alloc_bootmem(sizeof(struct nosave_region));
 641        region->start_pfn = start_pfn;
 642        region->end_pfn = end_pfn;
 643        list_add_tail(&region->list, &nosave_regions);
 644 Report:
 645        printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
 646                start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 647}
 648
 649/*
 650 * Set bits in this map correspond to the page frames the contents of which
 651 * should not be saved during the suspend.
 652 */
 653static struct memory_bitmap *forbidden_pages_map;
 654
 655/* Set bits in this map correspond to free page frames. */
 656static struct memory_bitmap *free_pages_map;
 657
 658/*
 659 * Each page frame allocated for creating the image is marked by setting the
 660 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
 661 */
 662
 663void swsusp_set_page_free(struct page *page)
 664{
 665        if (free_pages_map)
 666                memory_bm_set_bit(free_pages_map, page_to_pfn(page));
 667}
 668
 669static int swsusp_page_is_free(struct page *page)
 670{
 671        return free_pages_map ?
 672                memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
 673}
 674
 675void swsusp_unset_page_free(struct page *page)
 676{
 677        if (free_pages_map)
 678                memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
 679}
 680
 681static void swsusp_set_page_forbidden(struct page *page)
 682{
 683        if (forbidden_pages_map)
 684                memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
 685}
 686
 687int swsusp_page_is_forbidden(struct page *page)
 688{
 689        return forbidden_pages_map ?
 690                memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
 691}
 692
 693static void swsusp_unset_page_forbidden(struct page *page)
 694{
 695        if (forbidden_pages_map)
 696                memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
 697}
 698
 699/**
 700 *      mark_nosave_pages - set bits corresponding to the page frames the
 701 *      contents of which should not be saved in a given bitmap.
 702 */
 703
 704static void mark_nosave_pages(struct memory_bitmap *bm)
 705{
 706        struct nosave_region *region;
 707
 708        if (list_empty(&nosave_regions))
 709                return;
 710
 711        list_for_each_entry(region, &nosave_regions, list) {
 712                unsigned long pfn;
 713
 714                pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n",
 715                         (unsigned long long) region->start_pfn << PAGE_SHIFT,
 716                         ((unsigned long long) region->end_pfn << PAGE_SHIFT)
 717                                - 1);
 718
 719                for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
 720                        if (pfn_valid(pfn)) {
 721                                /*
 722                                 * It is safe to ignore the result of
 723                                 * mem_bm_set_bit_check() here, since we won't
 724                                 * touch the PFNs for which the error is
 725                                 * returned anyway.
 726                                 */
 727                                mem_bm_set_bit_check(bm, pfn);
 728                        }
 729        }
 730}
 731
 732/**
 733 *      create_basic_memory_bitmaps - create bitmaps needed for marking page
 734 *      frames that should not be saved and free page frames.  The pointers
 735 *      forbidden_pages_map and free_pages_map are only modified if everything
 736 *      goes well, because we don't want the bits to be used before both bitmaps
 737 *      are set up.
 738 */
 739
 740int create_basic_memory_bitmaps(void)
 741{
 742        struct memory_bitmap *bm1, *bm2;
 743        int error = 0;
 744
 745        BUG_ON(forbidden_pages_map || free_pages_map);
 746
 747        bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 748        if (!bm1)
 749                return -ENOMEM;
 750
 751        error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
 752        if (error)
 753                goto Free_first_object;
 754
 755        bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 756        if (!bm2)
 757                goto Free_first_bitmap;
 758
 759        error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
 760        if (error)
 761                goto Free_second_object;
 762
 763        forbidden_pages_map = bm1;
 764        free_pages_map = bm2;
 765        mark_nosave_pages(forbidden_pages_map);
 766
 767        pr_debug("PM: Basic memory bitmaps created\n");
 768
 769        return 0;
 770
 771 Free_second_object:
 772        kfree(bm2);
 773 Free_first_bitmap:
 774        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 775 Free_first_object:
 776        kfree(bm1);
 777        return -ENOMEM;
 778}
 779
 780/**
 781 *      free_basic_memory_bitmaps - free memory bitmaps allocated by
 782 *      create_basic_memory_bitmaps().  The auxiliary pointers are necessary
 783 *      so that the bitmaps themselves are not referred to while they are being
 784 *      freed.
 785 */
 786
 787void free_basic_memory_bitmaps(void)
 788{
 789        struct memory_bitmap *bm1, *bm2;
 790
 791        BUG_ON(!(forbidden_pages_map && free_pages_map));
 792
 793        bm1 = forbidden_pages_map;
 794        bm2 = free_pages_map;
 795        forbidden_pages_map = NULL;
 796        free_pages_map = NULL;
 797        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 798        kfree(bm1);
 799        memory_bm_free(bm2, PG_UNSAFE_CLEAR);
 800        kfree(bm2);
 801
 802        pr_debug("PM: Basic memory bitmaps freed\n");
 803}
 804
 805/**
 806 *      snapshot_additional_pages - estimate the number of additional pages
 807 *      be needed for setting up the suspend image data structures for given
 808 *      zone (usually the returned value is greater than the exact number)
 809 */
 810
 811unsigned int snapshot_additional_pages(struct zone *zone)
 812{
 813        unsigned int res;
 814
 815        res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
 816        res += DIV_ROUND_UP(res * sizeof(struct bm_block),
 817                            LINKED_PAGE_DATA_SIZE);
 818        return 2 * res;
 819}
 820
 821#ifdef CONFIG_HIGHMEM
 822/**
 823 *      count_free_highmem_pages - compute the total number of free highmem
 824 *      pages, system-wide.
 825 */
 826
 827static unsigned int count_free_highmem_pages(void)
 828{
 829        struct zone *zone;
 830        unsigned int cnt = 0;
 831
 832        for_each_populated_zone(zone)
 833                if (is_highmem(zone))
 834                        cnt += zone_page_state(zone, NR_FREE_PAGES);
 835
 836        return cnt;
 837}
 838
 839/**
 840 *      saveable_highmem_page - Determine whether a highmem page should be
 841 *      included in the suspend image.
 842 *
 843 *      We should save the page if it isn't Nosave or NosaveFree, or Reserved,
 844 *      and it isn't a part of a free chunk of pages.
 845 */
 846static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 847{
 848        struct page *page;
 849
 850        if (!pfn_valid(pfn))
 851                return NULL;
 852
 853        page = pfn_to_page(pfn);
 854        if (page_zone(page) != zone)
 855                return NULL;
 856
 857        BUG_ON(!PageHighMem(page));
 858
 859        if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
 860            PageReserved(page))
 861                return NULL;
 862
 863        if (page_is_guard(page))
 864                return NULL;
 865
 866        return page;
 867}
 868
 869/**
 870 *      count_highmem_pages - compute the total number of saveable highmem
 871 *      pages.
 872 */
 873
 874static unsigned int count_highmem_pages(void)
 875{
 876        struct zone *zone;
 877        unsigned int n = 0;
 878
 879        for_each_populated_zone(zone) {
 880                unsigned long pfn, max_zone_pfn;
 881
 882                if (!is_highmem(zone))
 883                        continue;
 884
 885                mark_free_pages(zone);
 886                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 887                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 888                        if (saveable_highmem_page(zone, pfn))
 889                                n++;
 890        }
 891        return n;
 892}
 893#else
 894static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
 895{
 896        return NULL;
 897}
 898#endif /* CONFIG_HIGHMEM */
 899
 900/**
 901 *      saveable_page - Determine whether a non-highmem page should be included
 902 *      in the suspend image.
 903 *
 904 *      We should save the page if it isn't Nosave, and is not in the range
 905 *      of pages statically defined as 'unsaveable', and it isn't a part of
 906 *      a free chunk of pages.
 907 */
 908static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 909{
 910        struct page *page;
 911
 912        if (!pfn_valid(pfn))
 913                return NULL;
 914
 915        page = pfn_to_page(pfn);
 916        if (page_zone(page) != zone)
 917                return NULL;
 918
 919        BUG_ON(PageHighMem(page));
 920
 921        if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
 922                return NULL;
 923
 924        if (PageReserved(page)
 925            && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
 926                return NULL;
 927
 928        if (page_is_guard(page))
 929                return NULL;
 930
 931        return page;
 932}
 933
 934/**
 935 *      count_data_pages - compute the total number of saveable non-highmem
 936 *      pages.
 937 */
 938
 939static unsigned int count_data_pages(void)
 940{
 941        struct zone *zone;
 942        unsigned long pfn, max_zone_pfn;
 943        unsigned int n = 0;
 944
 945        for_each_populated_zone(zone) {
 946                if (is_highmem(zone))
 947                        continue;
 948
 949                mark_free_pages(zone);
 950                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 951                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 952                        if (saveable_page(zone, pfn))
 953                                n++;
 954        }
 955        return n;
 956}
 957
 958/* This is needed, because copy_page and memcpy are not usable for copying
 959 * task structs.
 960 */
 961static inline void do_copy_page(long *dst, long *src)
 962{
 963        int n;
 964
 965        for (n = PAGE_SIZE / sizeof(long); n; n--)
 966                *dst++ = *src++;
 967}
 968
 969
 970/**
 971 *      safe_copy_page - check if the page we are going to copy is marked as
 972 *              present in the kernel page tables (this always is the case if
 973 *              CONFIG_DEBUG_PAGEALLOC is not set and in that case
 974 *              kernel_page_present() always returns 'true').
 975 */
 976static void safe_copy_page(void *dst, struct page *s_page)
 977{
 978        if (kernel_page_present(s_page)) {
 979                do_copy_page(dst, page_address(s_page));
 980        } else {
 981                kernel_map_pages(s_page, 1, 1);
 982                do_copy_page(dst, page_address(s_page));
 983                kernel_map_pages(s_page, 1, 0);
 984        }
 985}
 986
 987
 988#ifdef CONFIG_HIGHMEM
 989static inline struct page *
 990page_is_saveable(struct zone *zone, unsigned long pfn)
 991{
 992        return is_highmem(zone) ?
 993                saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 994}
 995
 996static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 997{
 998        struct page *s_page, *d_page;
 999        void *src, *dst;
1000
1001        s_page = pfn_to_page(src_pfn);
1002        d_page = pfn_to_page(dst_pfn);
1003        if (PageHighMem(s_page)) {
1004                src = kmap_atomic(s_page);
1005                dst = kmap_atomic(d_page);
1006                do_copy_page(dst, src);
1007                kunmap_atomic(dst);
1008                kunmap_atomic(src);
1009        } else {
1010                if (PageHighMem(d_page)) {
1011                        /* Page pointed to by src may contain some kernel
1012                         * data modified by kmap_atomic()
1013                         */
1014                        safe_copy_page(buffer, s_page);
1015                        dst = kmap_atomic(d_page);
1016                        copy_page(dst, buffer);
1017                        kunmap_atomic(dst);
1018                } else {
1019                        safe_copy_page(page_address(d_page), s_page);
1020                }
1021        }
1022}
1023#else
1024#define page_is_saveable(zone, pfn)     saveable_page(zone, pfn)
1025
1026static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1027{
1028        safe_copy_page(page_address(pfn_to_page(dst_pfn)),
1029                                pfn_to_page(src_pfn));
1030}
1031#endif /* CONFIG_HIGHMEM */
1032
1033static void
1034copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1035{
1036        struct zone *zone;
1037        unsigned long pfn;
1038
1039        for_each_populated_zone(zone) {
1040                unsigned long max_zone_pfn;
1041
1042                mark_free_pages(zone);
1043                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1044                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1045                        if (page_is_saveable(zone, pfn))
1046                                memory_bm_set_bit(orig_bm, pfn);
1047        }
1048        memory_bm_position_reset(orig_bm);
1049        memory_bm_position_reset(copy_bm);
1050        for(;;) {
1051                pfn = memory_bm_next_pfn(orig_bm);
1052                if (unlikely(pfn == BM_END_OF_MAP))
1053                        break;
1054                copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1055        }
1056}
1057
1058/* Total number of image pages */
1059static unsigned int nr_copy_pages;
1060/* Number of pages needed for saving the original pfns of the image pages */
1061static unsigned int nr_meta_pages;
1062/*
1063 * Numbers of normal and highmem page frames allocated for hibernation image
1064 * before suspending devices.
1065 */
1066unsigned int alloc_normal, alloc_highmem;
1067/*
1068 * Memory bitmap used for marking saveable pages (during hibernation) or
1069 * hibernation image pages (during restore)
1070 */
1071static struct memory_bitmap orig_bm;
1072/*
1073 * Memory bitmap used during hibernation for marking allocated page frames that
1074 * will contain copies of saveable pages.  During restore it is initially used
1075 * for marking hibernation image pages, but then the set bits from it are
1076 * duplicated in @orig_bm and it is released.  On highmem systems it is next
1077 * used for marking "safe" highmem pages, but it has to be reinitialized for
1078 * this purpose.
1079 */
1080static struct memory_bitmap copy_bm;
1081
1082/**
1083 *      swsusp_free - free pages allocated for the suspend.
1084 *
1085 *      Suspend pages are alocated before the atomic copy is made, so we
1086 *      need to release them after the resume.
1087 */
1088
1089void swsusp_free(void)
1090{
1091        struct zone *zone;
1092        unsigned long pfn, max_zone_pfn;
1093
1094        for_each_populated_zone(zone) {
1095                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1096                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1097                        if (pfn_valid(pfn)) {
1098                                struct page *page = pfn_to_page(pfn);
1099
1100                                if (swsusp_page_is_forbidden(page) &&
1101                                    swsusp_page_is_free(page)) {
1102                                        swsusp_unset_page_forbidden(page);
1103                                        swsusp_unset_page_free(page);
1104                                        __free_page(page);
1105                                }
1106                        }
1107        }
1108        nr_copy_pages = 0;
1109        nr_meta_pages = 0;
1110        restore_pblist = NULL;
1111        buffer = NULL;
1112        alloc_normal = 0;
1113        alloc_highmem = 0;
1114}
1115
1116/* Helper functions used for the shrinking of memory. */
1117
1118#define GFP_IMAGE       (GFP_KERNEL | __GFP_NOWARN)
1119
1120/**
1121 * preallocate_image_pages - Allocate a number of pages for hibernation image
1122 * @nr_pages: Number of page frames to allocate.
1123 * @mask: GFP flags to use for the allocation.
1124 *
1125 * Return value: Number of page frames actually allocated
1126 */
1127static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1128{
1129        unsigned long nr_alloc = 0;
1130
1131        while (nr_pages > 0) {
1132                struct page *page;
1133
1134                page = alloc_image_page(mask);
1135                if (!page)
1136                        break;
1137                memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1138                if (PageHighMem(page))
1139                        alloc_highmem++;
1140                else
1141                        alloc_normal++;
1142                nr_pages--;
1143                nr_alloc++;
1144        }
1145
1146        return nr_alloc;
1147}
1148
1149static unsigned long preallocate_image_memory(unsigned long nr_pages,
1150                                              unsigned long avail_normal)
1151{
1152        unsigned long alloc;
1153
1154        if (avail_normal <= alloc_normal)
1155                return 0;
1156
1157        alloc = avail_normal - alloc_normal;
1158        if (nr_pages < alloc)
1159                alloc = nr_pages;
1160
1161        return preallocate_image_pages(alloc, GFP_IMAGE);
1162}
1163
1164#ifdef CONFIG_HIGHMEM
1165static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1166{
1167        return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1168}
1169
1170/**
1171 *  __fraction - Compute (an approximation of) x * (multiplier / base)
1172 */
1173static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
1174{
1175        x *= multiplier;
1176        do_div(x, base);
1177        return (unsigned long)x;
1178}
1179
1180static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1181                                                unsigned long highmem,
1182                                                unsigned long total)
1183{
1184        unsigned long alloc = __fraction(nr_pages, highmem, total);
1185
1186        return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
1187}
1188#else /* CONFIG_HIGHMEM */
1189static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1190{
1191        return 0;
1192}
1193
1194static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1195                                                unsigned long highmem,
1196                                                unsigned long total)
1197{
1198        return 0;
1199}
1200#endif /* CONFIG_HIGHMEM */
1201
1202/**
1203 * free_unnecessary_pages - Release preallocated pages not needed for the image
1204 */
1205static void free_unnecessary_pages(void)
1206{
1207        unsigned long save, to_free_normal, to_free_highmem;
1208
1209        save = count_data_pages();
1210        if (alloc_normal >= save) {
1211                to_free_normal = alloc_normal - save;
1212                save = 0;
1213        } else {
1214                to_free_normal = 0;
1215                save -= alloc_normal;
1216        }
1217        save += count_highmem_pages();
1218        if (alloc_highmem >= save) {
1219                to_free_highmem = alloc_highmem - save;
1220        } else {
1221                to_free_highmem = 0;
1222                save -= alloc_highmem;
1223                if (to_free_normal > save)
1224                        to_free_normal -= save;
1225                else
1226                        to_free_normal = 0;
1227        }
1228
1229        memory_bm_position_reset(&copy_bm);
1230
1231        while (to_free_normal > 0 || to_free_highmem > 0) {
1232                unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1233                struct page *page = pfn_to_page(pfn);
1234
1235                if (PageHighMem(page)) {
1236                        if (!to_free_highmem)
1237                                continue;
1238                        to_free_highmem--;
1239                        alloc_highmem--;
1240                } else {
1241                        if (!to_free_normal)
1242                                continue;
1243                        to_free_normal--;
1244                        alloc_normal--;
1245                }
1246                memory_bm_clear_bit(&copy_bm, pfn);
1247                swsusp_unset_page_forbidden(page);
1248                swsusp_unset_page_free(page);
1249                __free_page(page);
1250        }
1251}
1252
1253/**
1254 * minimum_image_size - Estimate the minimum acceptable size of an image
1255 * @saveable: Number of saveable pages in the system.
1256 *
1257 * We want to avoid attempting to free too much memory too hard, so estimate the
1258 * minimum acceptable size of a hibernation image to use as the lower limit for
1259 * preallocating memory.
1260 *
1261 * We assume that the minimum image size should be proportional to
1262 *
1263 * [number of saveable pages] - [number of pages that can be freed in theory]
1264 *
1265 * where the second term is the sum of (1) reclaimable slab pages, (2) active
1266 * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages,
1267 * minus mapped file pages.
1268 */
1269static unsigned long minimum_image_size(unsigned long saveable)
1270{
1271        unsigned long size;
1272
1273        size = global_page_state(NR_SLAB_RECLAIMABLE)
1274                + global_page_state(NR_ACTIVE_ANON)
1275                + global_page_state(NR_INACTIVE_ANON)
1276                + global_page_state(NR_ACTIVE_FILE)
1277                + global_page_state(NR_INACTIVE_FILE)
1278                - global_page_state(NR_FILE_MAPPED);
1279
1280        return saveable <= size ? 0 : saveable - size;
1281}
1282
1283/**
1284 * hibernate_preallocate_memory - Preallocate memory for hibernation image
1285 *
1286 * To create a hibernation image it is necessary to make a copy of every page
1287 * frame in use.  We also need a number of page frames to be free during
1288 * hibernation for allocations made while saving the image and for device
1289 * drivers, in case they need to allocate memory from their hibernation
1290 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
1291 * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
1292 * /sys/power/reserved_size, respectively).  To make this happen, we compute the
1293 * total number of available page frames and allocate at least
1294 *
1295 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
1296 *  + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
1297 *
1298 * of them, which corresponds to the maximum size of a hibernation image.
1299 *
1300 * If image_size is set below the number following from the above formula,
1301 * the preallocation of memory is continued until the total number of saveable
1302 * pages in the system is below the requested image size or the minimum
1303 * acceptable image size returned by minimum_image_size(), whichever is greater.
1304 */
1305int hibernate_preallocate_memory(void)
1306{
1307        struct zone *zone;
1308        unsigned long saveable, size, max_size, count, highmem, pages = 0;
1309        unsigned long alloc, save_highmem, pages_highmem, avail_normal;
1310        struct timeval start, stop;
1311        int error;
1312
1313        printk(KERN_INFO "PM: Preallocating image memory... ");
1314        do_gettimeofday(&start);
1315
1316        error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
1317        if (error)
1318                goto err_out;
1319
1320        error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
1321        if (error)
1322                goto err_out;
1323
1324        alloc_normal = 0;
1325        alloc_highmem = 0;
1326
1327        /* Count the number of saveable data pages. */
1328        save_highmem = count_highmem_pages();
1329        saveable = count_data_pages();
1330
1331        /*
1332         * Compute the total number of page frames we can use (count) and the
1333         * number of pages needed for image metadata (size).
1334         */
1335        count = saveable;
1336        saveable += save_highmem;
1337        highmem = save_highmem;
1338        size = 0;
1339        for_each_populated_zone(zone) {
1340                size += snapshot_additional_pages(zone);
1341                if (is_highmem(zone))
1342                        highmem += zone_page_state(zone, NR_FREE_PAGES);
1343                else
1344                        count += zone_page_state(zone, NR_FREE_PAGES);
1345        }
1346        avail_normal = count;
1347        count += highmem;
1348        count -= totalreserve_pages;
1349
1350        /* Add number of pages required for page keys (s390 only). */
1351        size += page_key_additional_pages(saveable);
1352
1353        /* Compute the maximum number of saveable pages to leave in memory. */
1354        max_size = (count - (size + PAGES_FOR_IO)) / 2
1355                        - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
1356        /* Compute the desired number of image pages specified by image_size. */
1357        size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1358        if (size > max_size)
1359                size = max_size;
1360        /*
1361         * If the desired number of image pages is at least as large as the
1362         * current number of saveable pages in memory, allocate page frames for
1363         * the image and we're done.
1364         */
1365        if (size >= saveable) {
1366                pages = preallocate_image_highmem(save_highmem);
1367                pages += preallocate_image_memory(saveable - pages, avail_normal);
1368                goto out;
1369        }
1370
1371        /* Estimate the minimum size of the image. */
1372        pages = minimum_image_size(saveable);
1373        /*
1374         * To avoid excessive pressure on the normal zone, leave room in it to
1375         * accommodate an image of the minimum size (unless it's already too
1376         * small, in which case don't preallocate pages from it at all).
1377         */
1378        if (avail_normal > pages)
1379                avail_normal -= pages;
1380        else
1381                avail_normal = 0;
1382        if (size < pages)
1383                size = min_t(unsigned long, pages, max_size);
1384
1385        /*
1386         * Let the memory management subsystem know that we're going to need a
1387         * large number of page frames to allocate and make it free some memory.
1388         * NOTE: If this is not done, performance will be hurt badly in some
1389         * test cases.
1390         */
1391        shrink_all_memory(saveable - size);
1392
1393        /*
1394         * The number of saveable pages in memory was too high, so apply some
1395         * pressure to decrease it.  First, make room for the largest possible
1396         * image and fail if that doesn't work.  Next, try to decrease the size
1397         * of the image as much as indicated by 'size' using allocations from
1398         * highmem and non-highmem zones separately.
1399         */
1400        pages_highmem = preallocate_image_highmem(highmem / 2);
1401        alloc = (count - max_size) - pages_highmem;
1402        pages = preallocate_image_memory(alloc, avail_normal);
1403        if (pages < alloc) {
1404                /* We have exhausted non-highmem pages, try highmem. */
1405                alloc -= pages;
1406                pages += pages_highmem;
1407                pages_highmem = preallocate_image_highmem(alloc);
1408                if (pages_highmem < alloc)
1409                        goto err_out;
1410                pages += pages_highmem;
1411                /*
1412                 * size is the desired number of saveable pages to leave in
1413                 * memory, so try to preallocate (all memory - size) pages.
1414                 */
1415                alloc = (count - pages) - size;
1416                pages += preallocate_image_highmem(alloc);
1417        } else {
1418                /*
1419                 * There are approximately max_size saveable pages at this point
1420                 * and we want to reduce this number down to size.
1421                 */
1422                alloc = max_size - size;
1423                size = preallocate_highmem_fraction(alloc, highmem, count);
1424                pages_highmem += size;
1425                alloc -= size;
1426                size = preallocate_image_memory(alloc, avail_normal);
1427                pages_highmem += preallocate_image_highmem(alloc - size);
1428                pages += pages_highmem + size;
1429        }
1430
1431        /*
1432         * We only need as many page frames for the image as there are saveable
1433         * pages in memory, but we have allocated more.  Release the excessive
1434         * ones now.
1435         */
1436        free_unnecessary_pages();
1437
1438 out:
1439        do_gettimeofday(&stop);
1440        printk(KERN_CONT "done (allocated %lu pages)\n", pages);
1441        swsusp_show_speed(&start, &stop, pages, "Allocated");
1442
1443        return 0;
1444
1445 err_out:
1446        printk(KERN_CONT "\n");
1447        swsusp_free();
1448        return -ENOMEM;
1449}
1450
1451#ifdef CONFIG_HIGHMEM
1452/**
1453  *     count_pages_for_highmem - compute the number of non-highmem pages
1454  *     that will be necessary for creating copies of highmem pages.
1455  */
1456
1457static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1458{
1459        unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
1460
1461        if (free_highmem >= nr_highmem)
1462                nr_highmem = 0;
1463        else
1464                nr_highmem -= free_highmem;
1465
1466        return nr_highmem;
1467}
1468#else
1469static unsigned int
1470count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1471#endif /* CONFIG_HIGHMEM */
1472
1473/**
1474 *      enough_free_mem - Make sure we have enough free memory for the
1475 *      snapshot image.
1476 */
1477
1478static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1479{
1480        struct zone *zone;
1481        unsigned int free = alloc_normal;
1482
1483        for_each_populated_zone(zone)
1484                if (!is_highmem(zone))
1485                        free += zone_page_state(zone, NR_FREE_PAGES);
1486
1487        nr_pages += count_pages_for_highmem(nr_highmem);
1488        pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n",
1489                nr_pages, PAGES_FOR_IO, free);
1490
1491        return free > nr_pages + PAGES_FOR_IO;
1492}
1493
1494#ifdef CONFIG_HIGHMEM
1495/**
1496 *      get_highmem_buffer - if there are some highmem pages in the suspend
1497 *      image, we may need the buffer to copy them and/or load their data.
1498 */
1499
1500static inline int get_highmem_buffer(int safe_needed)
1501{
1502        buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1503        return buffer ? 0 : -ENOMEM;
1504}
1505
1506/**
1507 *      alloc_highmem_image_pages - allocate some highmem pages for the image.
1508 *      Try to allocate as many pages as needed, but if the number of free
1509 *      highmem pages is lesser than that, allocate them all.
1510 */
1511
1512static inline unsigned int
1513alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1514{
1515        unsigned int to_alloc = count_free_highmem_pages();
1516
1517        if (to_alloc > nr_highmem)
1518                to_alloc = nr_highmem;
1519
1520        nr_highmem -= to_alloc;
1521        while (to_alloc-- > 0) {
1522                struct page *page;
1523
1524                page = alloc_image_page(__GFP_HIGHMEM);
1525                memory_bm_set_bit(bm, page_to_pfn(page));
1526        }
1527        return nr_highmem;
1528}
1529#else
1530static inline int get_highmem_buffer(int safe_needed) { return 0; }
1531
1532static inline unsigned int
1533alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1534#endif /* CONFIG_HIGHMEM */
1535
1536/**
1537 *      swsusp_alloc - allocate memory for the suspend image
1538 *
1539 *      We first try to allocate as many highmem pages as there are
1540 *      saveable highmem pages in the system.  If that fails, we allocate
1541 *      non-highmem pages for the copies of the remaining highmem ones.
1542 *
1543 *      In this approach it is likely that the copies of highmem pages will
1544 *      also be located in the high memory, because of the way in which
1545 *      copy_data_pages() works.
1546 */
1547
1548static int
1549swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1550                unsigned int nr_pages, unsigned int nr_highmem)
1551{
1552        if (nr_highmem > 0) {
1553                if (get_highmem_buffer(PG_ANY))
1554                        goto err_out;
1555                if (nr_highmem > alloc_highmem) {
1556                        nr_highmem -= alloc_highmem;
1557                        nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
1558                }
1559        }
1560        if (nr_pages > alloc_normal) {
1561                nr_pages -= alloc_normal;
1562                while (nr_pages-- > 0) {
1563                        struct page *page;
1564
1565                        page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1566                        if (!page)
1567                                goto err_out;
1568                        memory_bm_set_bit(copy_bm, page_to_pfn(page));
1569                }
1570        }
1571
1572        return 0;
1573
1574 err_out:
1575        swsusp_free();
1576        return -ENOMEM;
1577}
1578
1579asmlinkage int swsusp_save(void)
1580{
1581        unsigned int nr_pages, nr_highmem;
1582
1583        printk(KERN_INFO "PM: Creating hibernation image:\n");
1584
1585        drain_local_pages(NULL);
1586        nr_pages = count_data_pages();
1587        nr_highmem = count_highmem_pages();
1588        printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1589
1590        if (!enough_free_mem(nr_pages, nr_highmem)) {
1591                printk(KERN_ERR "PM: Not enough free memory\n");
1592                return -ENOMEM;
1593        }
1594
1595        if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1596                printk(KERN_ERR "PM: Memory allocation failed\n");
1597                return -ENOMEM;
1598        }
1599
1600        /* During allocating of suspend pagedir, new cold pages may appear.
1601         * Kill them.
1602         */
1603        drain_local_pages(NULL);
1604        copy_data_pages(&copy_bm, &orig_bm);
1605
1606        /*
1607         * End of critical section. From now on, we can write to memory,
1608         * but we should not touch disk. This specially means we must _not_
1609         * touch swap space! Except we must write out our image of course.
1610         */
1611
1612        nr_pages += nr_highmem;
1613        nr_copy_pages = nr_pages;
1614        nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1615
1616        printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1617                nr_pages);
1618
1619        return 0;
1620}
1621
1622#ifndef CONFIG_ARCH_HIBERNATION_HEADER
1623static int init_header_complete(struct swsusp_info *info)
1624{
1625        memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1626        info->version_code = LINUX_VERSION_CODE;
1627        return 0;
1628}
1629
1630static char *check_image_kernel(struct swsusp_info *info)
1631{
1632        if (info->version_code != LINUX_VERSION_CODE)
1633                return "kernel version";
1634        if (strcmp(info->uts.sysname,init_utsname()->sysname))
1635                return "system type";
1636        if (strcmp(info->uts.release,init_utsname()->release))
1637                return "kernel release";
1638        if (strcmp(info->uts.version,init_utsname()->version))
1639                return "version";
1640        if (strcmp(info->uts.machine,init_utsname()->machine))
1641                return "machine";
1642        return NULL;
1643}
1644#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1645
1646unsigned long snapshot_get_image_size(void)
1647{
1648        return nr_copy_pages + nr_meta_pages + 1;
1649}
1650
1651static int init_header(struct swsusp_info *info)
1652{
1653        memset(info, 0, sizeof(struct swsusp_info));
1654        info->num_physpages = num_physpages;
1655        info->image_pages = nr_copy_pages;
1656        info->pages = snapshot_get_image_size();
1657        info->size = info->pages;
1658        info->size <<= PAGE_SHIFT;
1659        return init_header_complete(info);
1660}
1661
1662/**
1663 *      pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1664 *      are stored in the array @buf[] (1 page at a time)
1665 */
1666
1667static inline void
1668pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1669{
1670        int j;
1671
1672        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1673                buf[j] = memory_bm_next_pfn(bm);
1674                if (unlikely(buf[j] == BM_END_OF_MAP))
1675                        break;
1676                /* Save page key for data page (s390 only). */
1677                page_key_read(buf + j);
1678        }
1679}
1680
1681/**
1682 *      snapshot_read_next - used for reading the system memory snapshot.
1683 *
1684 *      On the first call to it @handle should point to a zeroed
1685 *      snapshot_handle structure.  The structure gets updated and a pointer
1686 *      to it should be passed to this function every next time.
1687 *
1688 *      On success the function returns a positive number.  Then, the caller
1689 *      is allowed to read up to the returned number of bytes from the memory
1690 *      location computed by the data_of() macro.
1691 *
1692 *      The function returns 0 to indicate the end of data stream condition,
1693 *      and a negative number is returned on error.  In such cases the
1694 *      structure pointed to by @handle is not updated and should not be used
1695 *      any more.
1696 */
1697
1698int snapshot_read_next(struct snapshot_handle *handle)
1699{
1700        if (handle->cur > nr_meta_pages + nr_copy_pages)
1701                return 0;
1702
1703        if (!buffer) {
1704                /* This makes the buffer be freed by swsusp_free() */
1705                buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1706                if (!buffer)
1707                        return -ENOMEM;
1708        }
1709        if (!handle->cur) {
1710                int error;
1711
1712                error = init_header((struct swsusp_info *)buffer);
1713                if (error)
1714                        return error;
1715                handle->buffer = buffer;
1716                memory_bm_position_reset(&orig_bm);
1717                memory_bm_position_reset(&copy_bm);
1718        } else if (handle->cur <= nr_meta_pages) {
1719                clear_page(buffer);
1720                pack_pfns(buffer, &orig_bm);
1721        } else {
1722                struct page *page;
1723
1724                page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1725                if (PageHighMem(page)) {
1726                        /* Highmem pages are copied to the buffer,
1727                         * because we can't return with a kmapped
1728                         * highmem page (we may not be called again).
1729                         */
1730                        void *kaddr;
1731
1732                        kaddr = kmap_atomic(page);
1733                        copy_page(buffer, kaddr);
1734                        kunmap_atomic(kaddr);
1735                        handle->buffer = buffer;
1736                } else {
1737                        handle->buffer = page_address(page);
1738                }
1739        }
1740        handle->cur++;
1741        return PAGE_SIZE;
1742}
1743
1744/**
1745 *      mark_unsafe_pages - mark the pages that cannot be used for storing
1746 *      the image during resume, because they conflict with the pages that
1747 *      had been used before suspend
1748 */
1749
1750static int mark_unsafe_pages(struct memory_bitmap *bm)
1751{
1752        struct zone *zone;
1753        unsigned long pfn, max_zone_pfn;
1754
1755        /* Clear page flags */
1756        for_each_populated_zone(zone) {
1757                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1758                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1759                        if (pfn_valid(pfn))
1760                                swsusp_unset_page_free(pfn_to_page(pfn));
1761        }
1762
1763        /* Mark pages that correspond to the "original" pfns as "unsafe" */
1764        memory_bm_position_reset(bm);
1765        do {
1766                pfn = memory_bm_next_pfn(bm);
1767                if (likely(pfn != BM_END_OF_MAP)) {
1768                        if (likely(pfn_valid(pfn)))
1769                                swsusp_set_page_free(pfn_to_page(pfn));
1770                        else
1771                                return -EFAULT;
1772                }
1773        } while (pfn != BM_END_OF_MAP);
1774
1775        allocated_unsafe_pages = 0;
1776
1777        return 0;
1778}
1779
1780static void
1781duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1782{
1783        unsigned long pfn;
1784
1785        memory_bm_position_reset(src);
1786        pfn = memory_bm_next_pfn(src);
1787        while (pfn != BM_END_OF_MAP) {
1788                memory_bm_set_bit(dst, pfn);
1789                pfn = memory_bm_next_pfn(src);
1790        }
1791}
1792
1793static int check_header(struct swsusp_info *info)
1794{
1795        char *reason;
1796
1797        reason = check_image_kernel(info);
1798        if (!reason && info->num_physpages != num_physpages)
1799                reason = "memory size";
1800        if (reason) {
1801                printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1802                return -EPERM;
1803        }
1804        return 0;
1805}
1806
1807/**
1808 *      load header - check the image header and copy data from it
1809 */
1810
1811static int
1812load_header(struct swsusp_info *info)
1813{
1814        int error;
1815
1816        restore_pblist = NULL;
1817        error = check_header(info);
1818        if (!error) {
1819                nr_copy_pages = info->image_pages;
1820                nr_meta_pages = info->pages - info->image_pages - 1;
1821        }
1822        return error;
1823}
1824
1825/**
1826 *      unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1827 *      the corresponding bit in the memory bitmap @bm
1828 */
1829static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1830{
1831        int j;
1832
1833        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1834                if (unlikely(buf[j] == BM_END_OF_MAP))
1835                        break;
1836
1837                /* Extract and buffer page key for data page (s390 only). */
1838                page_key_memorize(buf + j);
1839
1840                if (memory_bm_pfn_present(bm, buf[j]))
1841                        memory_bm_set_bit(bm, buf[j]);
1842                else
1843                        return -EFAULT;
1844        }
1845
1846        return 0;
1847}
1848
1849/* List of "safe" pages that may be used to store data loaded from the suspend
1850 * image
1851 */
1852static struct linked_page *safe_pages_list;
1853
1854#ifdef CONFIG_HIGHMEM
1855/* struct highmem_pbe is used for creating the list of highmem pages that
1856 * should be restored atomically during the resume from disk, because the page
1857 * frames they have occupied before the suspend are in use.
1858 */
1859struct highmem_pbe {
1860        struct page *copy_page; /* data is here now */
1861        struct page *orig_page; /* data was here before the suspend */
1862        struct highmem_pbe *next;
1863};
1864
1865/* List of highmem PBEs needed for restoring the highmem pages that were
1866 * allocated before the suspend and included in the suspend image, but have
1867 * also been allocated by the "resume" kernel, so their contents cannot be
1868 * written directly to their "original" page frames.
1869 */
1870static struct highmem_pbe *highmem_pblist;
1871
1872/**
1873 *      count_highmem_image_pages - compute the number of highmem pages in the
1874 *      suspend image.  The bits in the memory bitmap @bm that correspond to the
1875 *      image pages are assumed to be set.
1876 */
1877
1878static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1879{
1880        unsigned long pfn;
1881        unsigned int cnt = 0;
1882
1883        memory_bm_position_reset(bm);
1884        pfn = memory_bm_next_pfn(bm);
1885        while (pfn != BM_END_OF_MAP) {
1886                if (PageHighMem(pfn_to_page(pfn)))
1887                        cnt++;
1888
1889                pfn = memory_bm_next_pfn(bm);
1890        }
1891        return cnt;
1892}
1893
1894/**
1895 *      prepare_highmem_image - try to allocate as many highmem pages as
1896 *      there are highmem image pages (@nr_highmem_p points to the variable
1897 *      containing the number of highmem image pages).  The pages that are
1898 *      "safe" (ie. will not be overwritten when the suspend image is
1899 *      restored) have the corresponding bits set in @bm (it must be
1900 *      unitialized).
1901 *
1902 *      NOTE: This function should not be called if there are no highmem
1903 *      image pages.
1904 */
1905
1906static unsigned int safe_highmem_pages;
1907
1908static struct memory_bitmap *safe_highmem_bm;
1909
1910static int
1911prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1912{
1913        unsigned int to_alloc;
1914
1915        if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1916                return -ENOMEM;
1917
1918        if (get_highmem_buffer(PG_SAFE))
1919                return -ENOMEM;
1920
1921        to_alloc = count_free_highmem_pages();
1922        if (to_alloc > *nr_highmem_p)
1923                to_alloc = *nr_highmem_p;
1924        else
1925                *nr_highmem_p = to_alloc;
1926
1927        safe_highmem_pages = 0;
1928        while (to_alloc-- > 0) {
1929                struct page *page;
1930
1931                page = alloc_page(__GFP_HIGHMEM);
1932                if (!swsusp_page_is_free(page)) {
1933                        /* The page is "safe", set its bit the bitmap */
1934                        memory_bm_set_bit(bm, page_to_pfn(page));
1935                        safe_highmem_pages++;
1936                }
1937                /* Mark the page as allocated */
1938                swsusp_set_page_forbidden(page);
1939                swsusp_set_page_free(page);
1940        }
1941        memory_bm_position_reset(bm);
1942        safe_highmem_bm = bm;
1943        return 0;
1944}
1945
1946/**
1947 *      get_highmem_page_buffer - for given highmem image page find the buffer
1948 *      that suspend_write_next() should set for its caller to write to.
1949 *
1950 *      If the page is to be saved to its "original" page frame or a copy of
1951 *      the page is to be made in the highmem, @buffer is returned.  Otherwise,
1952 *      the copy of the page is to be made in normal memory, so the address of
1953 *      the copy is returned.
1954 *
1955 *      If @buffer is returned, the caller of suspend_write_next() will write
1956 *      the page's contents to @buffer, so they will have to be copied to the
1957 *      right location on the next call to suspend_write_next() and it is done
1958 *      with the help of copy_last_highmem_page().  For this purpose, if
1959 *      @buffer is returned, @last_highmem page is set to the page to which
1960 *      the data will have to be copied from @buffer.
1961 */
1962
1963static struct page *last_highmem_page;
1964
1965static void *
1966get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1967{
1968        struct highmem_pbe *pbe;
1969        void *kaddr;
1970
1971        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1972                /* We have allocated the "original" page frame and we can
1973                 * use it directly to store the loaded page.
1974                 */
1975                last_highmem_page = page;
1976                return buffer;
1977        }
1978        /* The "original" page frame has not been allocated and we have to
1979         * use a "safe" page frame to store the loaded page.
1980         */
1981        pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1982        if (!pbe) {
1983                swsusp_free();
1984                return ERR_PTR(-ENOMEM);
1985        }
1986        pbe->orig_page = page;
1987        if (safe_highmem_pages > 0) {
1988                struct page *tmp;
1989
1990                /* Copy of the page will be stored in high memory */
1991                kaddr = buffer;
1992                tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1993                safe_highmem_pages--;
1994                last_highmem_page = tmp;
1995                pbe->copy_page = tmp;
1996        } else {
1997                /* Copy of the page will be stored in normal memory */
1998                kaddr = safe_pages_list;
1999                safe_pages_list = safe_pages_list->next;
2000                pbe->copy_page = virt_to_page(kaddr);
2001        }
2002        pbe->next = highmem_pblist;
2003        highmem_pblist = pbe;
2004        return kaddr;
2005}
2006
2007/**
2008 *      copy_last_highmem_page - copy the contents of a highmem image from
2009 *      @buffer, where the caller of snapshot_write_next() has place them,
2010 *      to the right location represented by @last_highmem_page .
2011 */
2012
2013static void copy_last_highmem_page(void)
2014{
2015        if (last_highmem_page) {
2016                void *dst;
2017
2018                dst = kmap_atomic(last_highmem_page);
2019                copy_page(dst, buffer);
2020                kunmap_atomic(dst);
2021                last_highmem_page = NULL;
2022        }
2023}
2024
2025static inline int last_highmem_page_copied(void)
2026{
2027        return !last_highmem_page;
2028}
2029
2030static inline void free_highmem_data(void)
2031{
2032        if (safe_highmem_bm)
2033                memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
2034
2035        if (buffer)
2036                free_image_page(buffer, PG_UNSAFE_CLEAR);
2037}
2038#else
2039static inline int get_safe_write_buffer(void) { return 0; }
2040
2041static unsigned int
2042count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
2043
2044static inline int
2045prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
2046{
2047        return 0;
2048}
2049
2050static inline void *
2051get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
2052{
2053        return ERR_PTR(-EINVAL);
2054}
2055
2056static inline void copy_last_highmem_page(void) {}
2057static inline int last_highmem_page_copied(void) { return 1; }
2058static inline void free_highmem_data(void) {}
2059#endif /* CONFIG_HIGHMEM */
2060
2061/**
2062 *      prepare_image - use the memory bitmap @bm to mark the pages that will
2063 *      be overwritten in the process of restoring the system memory state
2064 *      from the suspend image ("unsafe" pages) and allocate memory for the
2065 *      image.
2066 *
2067 *      The idea is to allocate a new memory bitmap first and then allocate
2068 *      as many pages as needed for the image data, but not to assign these
2069 *      pages to specific tasks initially.  Instead, we just mark them as
2070 *      allocated and create a lists of "safe" pages that will be used
2071 *      later.  On systems with high memory a list of "safe" highmem pages is
2072 *      also created.
2073 */
2074
2075#define PBES_PER_LINKED_PAGE    (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2076
2077static int
2078prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
2079{
2080        unsigned int nr_pages, nr_highmem;
2081        struct linked_page *sp_list, *lp;
2082        int error;
2083
2084        /* If there is no highmem, the buffer will not be necessary */
2085        free_image_page(buffer, PG_UNSAFE_CLEAR);
2086        buffer = NULL;
2087
2088        nr_highmem = count_highmem_image_pages(bm);
2089        error = mark_unsafe_pages(bm);
2090        if (error)
2091                goto Free;
2092
2093        error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2094        if (error)
2095                goto Free;
2096
2097        duplicate_memory_bitmap(new_bm, bm);
2098        memory_bm_free(bm, PG_UNSAFE_KEEP);
2099        if (nr_highmem > 0) {
2100                error = prepare_highmem_image(bm, &nr_highmem);
2101                if (error)
2102                        goto Free;
2103        }
2104        /* Reserve some safe pages for potential later use.
2105         *
2106         * NOTE: This way we make sure there will be enough safe pages for the
2107         * chain_alloc() in get_buffer().  It is a bit wasteful, but
2108         * nr_copy_pages cannot be greater than 50% of the memory anyway.
2109         */
2110        sp_list = NULL;
2111        /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
2112        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2113        nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2114        while (nr_pages > 0) {
2115                lp = get_image_page(GFP_ATOMIC, PG_SAFE);
2116                if (!lp) {
2117                        error = -ENOMEM;
2118                        goto Free;
2119                }
2120                lp->next = sp_list;
2121                sp_list = lp;
2122                nr_pages--;
2123        }
2124        /* Preallocate memory for the image */
2125        safe_pages_list = NULL;
2126        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2127        while (nr_pages > 0) {
2128                lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2129                if (!lp) {
2130                        error = -ENOMEM;
2131                        goto Free;
2132                }
2133                if (!swsusp_page_is_free(virt_to_page(lp))) {
2134                        /* The page is "safe", add it to the list */
2135                        lp->next = safe_pages_list;
2136                        safe_pages_list = lp;
2137                }
2138                /* Mark the page as allocated */
2139                swsusp_set_page_forbidden(virt_to_page(lp));
2140                swsusp_set_page_free(virt_to_page(lp));
2141                nr_pages--;
2142        }
2143        /* Free the reserved safe pages so that chain_alloc() can use them */
2144        while (sp_list) {
2145                lp = sp_list->next;
2146                free_image_page(sp_list, PG_UNSAFE_CLEAR);
2147                sp_list = lp;
2148        }
2149        return 0;
2150
2151 Free:
2152        swsusp_free();
2153        return error;
2154}
2155
2156/**
2157 *      get_buffer - compute the address that snapshot_write_next() should
2158 *      set for its caller to write to.
2159 */
2160
2161static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2162{
2163        struct pbe *pbe;
2164        struct page *page;
2165        unsigned long pfn = memory_bm_next_pfn(bm);
2166
2167        if (pfn == BM_END_OF_MAP)
2168                return ERR_PTR(-EFAULT);
2169
2170        page = pfn_to_page(pfn);
2171        if (PageHighMem(page))
2172                return get_highmem_page_buffer(page, ca);
2173
2174        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
2175                /* We have allocated the "original" page frame and we can
2176                 * use it directly to store the loaded page.
2177                 */
2178                return page_address(page);
2179
2180        /* The "original" page frame has not been allocated and we have to
2181         * use a "safe" page frame to store the loaded page.
2182         */
2183        pbe = chain_alloc(ca, sizeof(struct pbe));
2184        if (!pbe) {
2185                swsusp_free();
2186                return ERR_PTR(-ENOMEM);
2187        }
2188        pbe->orig_address = page_address(page);
2189        pbe->address = safe_pages_list;
2190        safe_pages_list = safe_pages_list->next;
2191        pbe->next = restore_pblist;
2192        restore_pblist = pbe;
2193        return pbe->address;
2194}
2195
2196/**
2197 *      snapshot_write_next - used for writing the system memory snapshot.
2198 *
2199 *      On the first call to it @handle should point to a zeroed
2200 *      snapshot_handle structure.  The structure gets updated and a pointer
2201 *      to it should be passed to this function every next time.
2202 *
2203 *      On success the function returns a positive number.  Then, the caller
2204 *      is allowed to write up to the returned number of bytes to the memory
2205 *      location computed by the data_of() macro.
2206 *
2207 *      The function returns 0 to indicate the "end of file" condition,
2208 *      and a negative number is returned on error.  In such cases the
2209 *      structure pointed to by @handle is not updated and should not be used
2210 *      any more.
2211 */
2212
2213int snapshot_write_next(struct snapshot_handle *handle)
2214{
2215        static struct chain_allocator ca;
2216        int error = 0;
2217
2218        /* Check if we have already loaded the entire image */
2219        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
2220                return 0;
2221
2222        handle->sync_read = 1;
2223
2224        if (!handle->cur) {
2225                if (!buffer)
2226                        /* This makes the buffer be freed by swsusp_free() */
2227                        buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2228
2229                if (!buffer)
2230                        return -ENOMEM;
2231
2232                handle->buffer = buffer;
2233        } else if (handle->cur == 1) {
2234                error = load_header(buffer);
2235                if (error)
2236                        return error;
2237
2238                error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2239                if (error)
2240                        return error;
2241
2242                /* Allocate buffer for page keys. */
2243                error = page_key_alloc(nr_copy_pages);
2244                if (error)
2245                        return error;
2246
2247        } else if (handle->cur <= nr_meta_pages + 1) {
2248                error = unpack_orig_pfns(buffer, &copy_bm);
2249                if (error)
2250                        return error;
2251
2252                if (handle->cur == nr_meta_pages + 1) {
2253                        error = prepare_image(&orig_bm, &copy_bm);
2254                        if (error)
2255                                return error;
2256
2257                        chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2258                        memory_bm_position_reset(&orig_bm);
2259                        restore_pblist = NULL;
2260                        handle->buffer = get_buffer(&orig_bm, &ca);
2261                        handle->sync_read = 0;
2262                        if (IS_ERR(handle->buffer))
2263                                return PTR_ERR(handle->buffer);
2264                }
2265        } else {
2266                copy_last_highmem_page();
2267                /* Restore page key for data page (s390 only). */
2268                page_key_write(handle->buffer);
2269                handle->buffer = get_buffer(&orig_bm, &ca);
2270                if (IS_ERR(handle->buffer))
2271                        return PTR_ERR(handle->buffer);
2272                if (handle->buffer != buffer)
2273                        handle->sync_read = 0;
2274        }
2275        handle->cur++;
2276        return PAGE_SIZE;
2277}
2278
2279/**
2280 *      snapshot_write_finalize - must be called after the last call to
2281 *      snapshot_write_next() in case the last page in the image happens
2282 *      to be a highmem page and its contents should be stored in the
2283 *      highmem.  Additionally, it releases the memory that will not be
2284 *      used any more.
2285 */
2286
2287void snapshot_write_finalize(struct snapshot_handle *handle)
2288{
2289        copy_last_highmem_page();
2290        /* Restore page key for data page (s390 only). */
2291        page_key_write(handle->buffer);
2292        page_key_free();
2293        /* Free only if we have loaded the image entirely */
2294        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2295                memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
2296                free_highmem_data();
2297        }
2298}
2299
2300int snapshot_image_loaded(struct snapshot_handle *handle)
2301{
2302        return !(!nr_copy_pages || !last_highmem_page_copied() ||
2303                        handle->cur <= nr_meta_pages + nr_copy_pages);
2304}
2305
2306#ifdef CONFIG_HIGHMEM
2307/* Assumes that @buf is ready and points to a "safe" page */
2308static inline void
2309swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
2310{
2311        void *kaddr1, *kaddr2;
2312
2313        kaddr1 = kmap_atomic(p1);
2314        kaddr2 = kmap_atomic(p2);
2315        copy_page(buf, kaddr1);
2316        copy_page(kaddr1, kaddr2);
2317        copy_page(kaddr2, buf);
2318        kunmap_atomic(kaddr2);
2319        kunmap_atomic(kaddr1);
2320}
2321
2322/**
2323 *      restore_highmem - for each highmem page that was allocated before
2324 *      the suspend and included in the suspend image, and also has been
2325 *      allocated by the "resume" kernel swap its current (ie. "before
2326 *      resume") contents with the previous (ie. "before suspend") one.
2327 *
2328 *      If the resume eventually fails, we can call this function once
2329 *      again and restore the "before resume" highmem state.
2330 */
2331
2332int restore_highmem(void)
2333{
2334        struct highmem_pbe *pbe = highmem_pblist;
2335        void *buf;
2336
2337        if (!pbe)
2338                return 0;
2339
2340        buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2341        if (!buf)
2342                return -ENOMEM;
2343
2344        while (pbe) {
2345                swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2346                pbe = pbe->next;
2347        }
2348        free_image_page(buf, PG_UNSAFE_CLEAR);
2349        return 0;
2350}
2351#endif /* CONFIG_HIGHMEM */
2352