linux/kernel/power/snapshot.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/power/snapshot.c
   3 *
   4 * This file provides system snapshot/restore functionality for swsusp.
   5 *
   6 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
   7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
   8 *
   9 * This file is released under the GPLv2.
  10 *
  11 */
  12
  13#include <linux/version.h>
  14#include <linux/module.h>
  15#include <linux/mm.h>
  16#include <linux/suspend.h>
  17#include <linux/delay.h>
  18#include <linux/bitops.h>
  19#include <linux/spinlock.h>
  20#include <linux/kernel.h>
  21#include <linux/pm.h>
  22#include <linux/device.h>
  23#include <linux/init.h>
  24#include <linux/bootmem.h>
  25#include <linux/syscalls.h>
  26#include <linux/console.h>
  27#include <linux/highmem.h>
  28#include <linux/list.h>
  29#include <linux/slab.h>
  30
  31#include <asm/uaccess.h>
  32#include <asm/mmu_context.h>
  33#include <asm/pgtable.h>
  34#include <asm/tlbflush.h>
  35#include <asm/io.h>
  36
  37#include "power.h"
  38
  39static int swsusp_page_is_free(struct page *);
  40static void swsusp_set_page_forbidden(struct page *);
  41static void swsusp_unset_page_forbidden(struct page *);
  42
  43/*
  44 * Number of bytes to reserve for memory allocations made by device drivers
  45 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
  46 * cause image creation to fail (tunable via /sys/power/reserved_size).
  47 */
  48unsigned long reserved_size;
  49
  50void __init hibernate_reserved_size_init(void)
  51{
  52        reserved_size = SPARE_PAGES * PAGE_SIZE;
  53}
  54
  55/*
  56 * Preferred image size in bytes (tunable via /sys/power/image_size).
  57 * When it is set to N, swsusp will do its best to ensure the image
  58 * size will not exceed N bytes, but if that is impossible, it will
  59 * try to create the smallest image possible.
  60 */
  61unsigned long image_size;
  62
  63void __init hibernate_image_size_init(void)
  64{
  65        image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
  66}
  67
  68/* List of PBEs needed for restoring the pages that were allocated before
  69 * the suspend and included in the suspend image, but have also been
  70 * allocated by the "resume" kernel, so their contents cannot be written
  71 * directly to their "original" page frames.
  72 */
  73struct pbe *restore_pblist;
  74
  75/* Pointer to an auxiliary buffer (1 page) */
  76static void *buffer;
  77
  78/**
  79 *      @safe_needed - on resume, for storing the PBE list and the image,
  80 *      we can only use memory pages that do not conflict with the pages
  81 *      used before suspend.  The unsafe pages have PageNosaveFree set
  82 *      and we count them using unsafe_pages.
  83 *
  84 *      Each allocated image page is marked as PageNosave and PageNosaveFree
  85 *      so that swsusp_free() can release it.
  86 */
  87
  88#define PG_ANY          0
  89#define PG_SAFE         1
  90#define PG_UNSAFE_CLEAR 1
  91#define PG_UNSAFE_KEEP  0
  92
  93static unsigned int allocated_unsafe_pages;
  94
  95static void *get_image_page(gfp_t gfp_mask, int safe_needed)
  96{
  97        void *res;
  98
  99        res = (void *)get_zeroed_page(gfp_mask);
 100        if (safe_needed)
 101                while (res && swsusp_page_is_free(virt_to_page(res))) {
 102                        /* The page is unsafe, mark it for swsusp_free() */
 103                        swsusp_set_page_forbidden(virt_to_page(res));
 104                        allocated_unsafe_pages++;
 105                        res = (void *)get_zeroed_page(gfp_mask);
 106                }
 107        if (res) {
 108                swsusp_set_page_forbidden(virt_to_page(res));
 109                swsusp_set_page_free(virt_to_page(res));
 110        }
 111        return res;
 112}
 113
 114unsigned long get_safe_page(gfp_t gfp_mask)
 115{
 116        return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
 117}
 118
 119static struct page *alloc_image_page(gfp_t gfp_mask)
 120{
 121        struct page *page;
 122
 123        page = alloc_page(gfp_mask);
 124        if (page) {
 125                swsusp_set_page_forbidden(page);
 126                swsusp_set_page_free(page);
 127        }
 128        return page;
 129}
 130
 131/**
 132 *      free_image_page - free page represented by @addr, allocated with
 133 *      get_image_page (page flags set by it must be cleared)
 134 */
 135
 136static inline void free_image_page(void *addr, int clear_nosave_free)
 137{
 138        struct page *page;
 139
 140        BUG_ON(!virt_addr_valid(addr));
 141
 142        page = virt_to_page(addr);
 143
 144        swsusp_unset_page_forbidden(page);
 145        if (clear_nosave_free)
 146                swsusp_unset_page_free(page);
 147
 148        __free_page(page);
 149}
 150
 151/* struct linked_page is used to build chains of pages */
 152
 153#define LINKED_PAGE_DATA_SIZE   (PAGE_SIZE - sizeof(void *))
 154
 155struct linked_page {
 156        struct linked_page *next;
 157        char data[LINKED_PAGE_DATA_SIZE];
 158} __attribute__((packed));
 159
 160static inline void
 161free_list_of_pages(struct linked_page *list, int clear_page_nosave)
 162{
 163        while (list) {
 164                struct linked_page *lp = list->next;
 165
 166                free_image_page(list, clear_page_nosave);
 167                list = lp;
 168        }
 169}
 170
 171/**
 172  *     struct chain_allocator is used for allocating small objects out of
 173  *     a linked list of pages called 'the chain'.
 174  *
 175  *     The chain grows each time when there is no room for a new object in
 176  *     the current page.  The allocated objects cannot be freed individually.
 177  *     It is only possible to free them all at once, by freeing the entire
 178  *     chain.
 179  *
 180  *     NOTE: The chain allocator may be inefficient if the allocated objects
 181  *     are not much smaller than PAGE_SIZE.
 182  */
 183
 184struct chain_allocator {
 185        struct linked_page *chain;      /* the chain */
 186        unsigned int used_space;        /* total size of objects allocated out
 187                                         * of the current page
 188                                         */
 189        gfp_t gfp_mask;         /* mask for allocating pages */
 190        int safe_needed;        /* if set, only "safe" pages are allocated */
 191};
 192
 193static void
 194chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
 195{
 196        ca->chain = NULL;
 197        ca->used_space = LINKED_PAGE_DATA_SIZE;
 198        ca->gfp_mask = gfp_mask;
 199        ca->safe_needed = safe_needed;
 200}
 201
 202static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 203{
 204        void *ret;
 205
 206        if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 207                struct linked_page *lp;
 208
 209                lp = get_image_page(ca->gfp_mask, ca->safe_needed);
 210                if (!lp)
 211                        return NULL;
 212
 213                lp->next = ca->chain;
 214                ca->chain = lp;
 215                ca->used_space = 0;
 216        }
 217        ret = ca->chain->data + ca->used_space;
 218        ca->used_space += size;
 219        return ret;
 220}
 221
 222/**
 223 *      Data types related to memory bitmaps.
 224 *
 225 *      Memory bitmap is a structure consiting of many linked lists of
 226 *      objects.  The main list's elements are of type struct zone_bitmap
 227 *      and each of them corresonds to one zone.  For each zone bitmap
 228 *      object there is a list of objects of type struct bm_block that
 229 *      represent each blocks of bitmap in which information is stored.
 230 *
 231 *      struct memory_bitmap contains a pointer to the main list of zone
 232 *      bitmap objects, a struct bm_position used for browsing the bitmap,
 233 *      and a pointer to the list of pages used for allocating all of the
 234 *      zone bitmap objects and bitmap block objects.
 235 *
 236 *      NOTE: It has to be possible to lay out the bitmap in memory
 237 *      using only allocations of order 0.  Additionally, the bitmap is
 238 *      designed to work with arbitrary number of zones (this is over the
 239 *      top for now, but let's avoid making unnecessary assumptions ;-).
 240 *
 241 *      struct zone_bitmap contains a pointer to a list of bitmap block
 242 *      objects and a pointer to the bitmap block object that has been
 243 *      most recently used for setting bits.  Additionally, it contains the
 244 *      pfns that correspond to the start and end of the represented zone.
 245 *
 246 *      struct bm_block contains a pointer to the memory page in which
 247 *      information is stored (in the form of a block of bitmap)
 248 *      It also contains the pfns that correspond to the start and end of
 249 *      the represented memory area.
 250 */
 251
 252#define BM_END_OF_MAP   (~0UL)
 253
 254#define BM_BITS_PER_BLOCK       (PAGE_SIZE * BITS_PER_BYTE)
 255
 256struct bm_block {
 257        struct list_head hook;  /* hook into a list of bitmap blocks */
 258        unsigned long start_pfn;        /* pfn represented by the first bit */
 259        unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
 260        unsigned long *data;    /* bitmap representing pages */
 261};
 262
 263static inline unsigned long bm_block_bits(struct bm_block *bb)
 264{
 265        return bb->end_pfn - bb->start_pfn;
 266}
 267
 268/* strcut bm_position is used for browsing memory bitmaps */
 269
 270struct bm_position {
 271        struct bm_block *block;
 272        int bit;
 273};
 274
 275struct memory_bitmap {
 276        struct list_head blocks;        /* list of bitmap blocks */
 277        struct linked_page *p_list;     /* list of pages used to store zone
 278                                         * bitmap objects and bitmap block
 279                                         * objects
 280                                         */
 281        struct bm_position cur; /* most recently used bit position */
 282};
 283
 284/* Functions that operate on memory bitmaps */
 285
 286static void memory_bm_position_reset(struct memory_bitmap *bm)
 287{
 288        bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
 289        bm->cur.bit = 0;
 290}
 291
 292static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 293
 294/**
 295 *      create_bm_block_list - create a list of block bitmap objects
 296 *      @pages - number of pages to track
 297 *      @list - list to put the allocated blocks into
 298 *      @ca - chain allocator to be used for allocating memory
 299 */
 300static int create_bm_block_list(unsigned long pages,
 301                                struct list_head *list,
 302                                struct chain_allocator *ca)
 303{
 304        unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
 305
 306        while (nr_blocks-- > 0) {
 307                struct bm_block *bb;
 308
 309                bb = chain_alloc(ca, sizeof(struct bm_block));
 310                if (!bb)
 311                        return -ENOMEM;
 312                list_add(&bb->hook, list);
 313        }
 314
 315        return 0;
 316}
 317
 318struct mem_extent {
 319        struct list_head hook;
 320        unsigned long start;
 321        unsigned long end;
 322};
 323
 324/**
 325 *      free_mem_extents - free a list of memory extents
 326 *      @list - list of extents to empty
 327 */
 328static void free_mem_extents(struct list_head *list)
 329{
 330        struct mem_extent *ext, *aux;
 331
 332        list_for_each_entry_safe(ext, aux, list, hook) {
 333                list_del(&ext->hook);
 334                kfree(ext);
 335        }
 336}
 337
 338/**
 339 *      create_mem_extents - create a list of memory extents representing
 340 *                           contiguous ranges of PFNs
 341 *      @list - list to put the extents into
 342 *      @gfp_mask - mask to use for memory allocations
 343 */
 344static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 345{
 346        struct zone *zone;
 347
 348        INIT_LIST_HEAD(list);
 349
 350        for_each_populated_zone(zone) {
 351                unsigned long zone_start, zone_end;
 352                struct mem_extent *ext, *cur, *aux;
 353
 354                zone_start = zone->zone_start_pfn;
 355                zone_end = zone->zone_start_pfn + zone->spanned_pages;
 356
 357                list_for_each_entry(ext, list, hook)
 358                        if (zone_start <= ext->end)
 359                                break;
 360
 361                if (&ext->hook == list || zone_end < ext->start) {
 362                        /* New extent is necessary */
 363                        struct mem_extent *new_ext;
 364
 365                        new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
 366                        if (!new_ext) {
 367                                free_mem_extents(list);
 368                                return -ENOMEM;
 369                        }
 370                        new_ext->start = zone_start;
 371                        new_ext->end = zone_end;
 372                        list_add_tail(&new_ext->hook, &ext->hook);
 373                        continue;
 374                }
 375
 376                /* Merge this zone's range of PFNs with the existing one */
 377                if (zone_start < ext->start)
 378                        ext->start = zone_start;
 379                if (zone_end > ext->end)
 380                        ext->end = zone_end;
 381
 382                /* More merging may be possible */
 383                cur = ext;
 384                list_for_each_entry_safe_continue(cur, aux, list, hook) {
 385                        if (zone_end < cur->start)
 386                                break;
 387                        if (zone_end < cur->end)
 388                                ext->end = cur->end;
 389                        list_del(&cur->hook);
 390                        kfree(cur);
 391                }
 392        }
 393
 394        return 0;
 395}
 396
 397/**
 398  *     memory_bm_create - allocate memory for a memory bitmap
 399  */
 400static int
 401memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 402{
 403        struct chain_allocator ca;
 404        struct list_head mem_extents;
 405        struct mem_extent *ext;
 406        int error;
 407
 408        chain_init(&ca, gfp_mask, safe_needed);
 409        INIT_LIST_HEAD(&bm->blocks);
 410
 411        error = create_mem_extents(&mem_extents, gfp_mask);
 412        if (error)
 413                return error;
 414
 415        list_for_each_entry(ext, &mem_extents, hook) {
 416                struct bm_block *bb;
 417                unsigned long pfn = ext->start;
 418                unsigned long pages = ext->end - ext->start;
 419
 420                bb = list_entry(bm->blocks.prev, struct bm_block, hook);
 421
 422                error = create_bm_block_list(pages, bm->blocks.prev, &ca);
 423                if (error)
 424                        goto Error;
 425
 426                list_for_each_entry_continue(bb, &bm->blocks, hook) {
 427                        bb->data = get_image_page(gfp_mask, safe_needed);
 428                        if (!bb->data) {
 429                                error = -ENOMEM;
 430                                goto Error;
 431                        }
 432
 433                        bb->start_pfn = pfn;
 434                        if (pages >= BM_BITS_PER_BLOCK) {
 435                                pfn += BM_BITS_PER_BLOCK;
 436                                pages -= BM_BITS_PER_BLOCK;
 437                        } else {
 438                                /* This is executed only once in the loop */
 439                                pfn += pages;
 440                        }
 441                        bb->end_pfn = pfn;
 442                }
 443        }
 444
 445        bm->p_list = ca.chain;
 446        memory_bm_position_reset(bm);
 447 Exit:
 448        free_mem_extents(&mem_extents);
 449        return error;
 450
 451 Error:
 452        bm->p_list = ca.chain;
 453        memory_bm_free(bm, PG_UNSAFE_CLEAR);
 454        goto Exit;
 455}
 456
 457/**
 458  *     memory_bm_free - free memory occupied by the memory bitmap @bm
 459  */
 460static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 461{
 462        struct bm_block *bb;
 463
 464        list_for_each_entry(bb, &bm->blocks, hook)
 465                if (bb->data)
 466                        free_image_page(bb->data, clear_nosave_free);
 467
 468        free_list_of_pages(bm->p_list, clear_nosave_free);
 469
 470        INIT_LIST_HEAD(&bm->blocks);
 471}
 472
 473/**
 474 *      memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
 475 *      to given pfn.  The cur_zone_bm member of @bm and the cur_block member
 476 *      of @bm->cur_zone_bm are updated.
 477 */
 478static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 479                                void **addr, unsigned int *bit_nr)
 480{
 481        struct bm_block *bb;
 482
 483        /*
 484         * Check if the pfn corresponds to the current bitmap block and find
 485         * the block where it fits if this is not the case.
 486         */
 487        bb = bm->cur.block;
 488        if (pfn < bb->start_pfn)
 489                list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
 490                        if (pfn >= bb->start_pfn)
 491                                break;
 492
 493        if (pfn >= bb->end_pfn)
 494                list_for_each_entry_continue(bb, &bm->blocks, hook)
 495                        if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
 496                                break;
 497
 498        if (&bb->hook == &bm->blocks)
 499                return -EFAULT;
 500
 501        /* The block has been found */
 502        bm->cur.block = bb;
 503        pfn -= bb->start_pfn;
 504        bm->cur.bit = pfn + 1;
 505        *bit_nr = pfn;
 506        *addr = bb->data;
 507        return 0;
 508}
 509
 510static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 511{
 512        void *addr;
 513        unsigned int bit;
 514        int error;
 515
 516        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 517        BUG_ON(error);
 518        set_bit(bit, addr);
 519}
 520
 521static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
 522{
 523        void *addr;
 524        unsigned int bit;
 525        int error;
 526
 527        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 528        if (!error)
 529                set_bit(bit, addr);
 530        return error;
 531}
 532
 533static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
 534{
 535        void *addr;
 536        unsigned int bit;
 537        int error;
 538
 539        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 540        BUG_ON(error);
 541        clear_bit(bit, addr);
 542}
 543
 544static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 545{
 546        void *addr;
 547        unsigned int bit;
 548        int error;
 549
 550        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 551        BUG_ON(error);
 552        return test_bit(bit, addr);
 553}
 554
 555static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
 556{
 557        void *addr;
 558        unsigned int bit;
 559
 560        return !memory_bm_find_bit(bm, pfn, &addr, &bit);
 561}
 562
 563/**
 564 *      memory_bm_next_pfn - find the pfn that corresponds to the next set bit
 565 *      in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
 566 *      returned.
 567 *
 568 *      It is required to run memory_bm_position_reset() before the first call to
 569 *      this function.
 570 */
 571
 572static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 573{
 574        struct bm_block *bb;
 575        int bit;
 576
 577        bb = bm->cur.block;
 578        do {
 579                bit = bm->cur.bit;
 580                bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
 581                if (bit < bm_block_bits(bb))
 582                        goto Return_pfn;
 583
 584                bb = list_entry(bb->hook.next, struct bm_block, hook);
 585                bm->cur.block = bb;
 586                bm->cur.bit = 0;
 587        } while (&bb->hook != &bm->blocks);
 588
 589        memory_bm_position_reset(bm);
 590        return BM_END_OF_MAP;
 591
 592 Return_pfn:
 593        bm->cur.bit = bit + 1;
 594        return bb->start_pfn + bit;
 595}
 596
 597/**
 598 *      This structure represents a range of page frames the contents of which
 599 *      should not be saved during the suspend.
 600 */
 601
 602struct nosave_region {
 603        struct list_head list;
 604        unsigned long start_pfn;
 605        unsigned long end_pfn;
 606};
 607
 608static LIST_HEAD(nosave_regions);
 609
 610/**
 611 *      register_nosave_region - register a range of page frames the contents
 612 *      of which should not be saved during the suspend (to be used in the early
 613 *      initialization code)
 614 */
 615
 616void __init
 617__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
 618                         int use_kmalloc)
 619{
 620        struct nosave_region *region;
 621
 622        if (start_pfn >= end_pfn)
 623                return;
 624
 625        if (!list_empty(&nosave_regions)) {
 626                /* Try to extend the previous region (they should be sorted) */
 627                region = list_entry(nosave_regions.prev,
 628                                        struct nosave_region, list);
 629                if (region->end_pfn == start_pfn) {
 630                        region->end_pfn = end_pfn;
 631                        goto Report;
 632                }
 633        }
 634        if (use_kmalloc) {
 635                /* during init, this shouldn't fail */
 636                region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
 637                BUG_ON(!region);
 638        } else
 639                /* This allocation cannot fail */
 640                region = alloc_bootmem(sizeof(struct nosave_region));
 641        region->start_pfn = start_pfn;
 642        region->end_pfn = end_pfn;
 643        list_add_tail(&region->list, &nosave_regions);
 644 Report:
 645        printk(KERN_INFO "PM: Registered nosave memory: [mem %#010llx-%#010llx]\n",
 646                (unsigned long long) start_pfn << PAGE_SHIFT,
 647                ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
 648}
 649
 650/*
 651 * Set bits in this map correspond to the page frames the contents of which
 652 * should not be saved during the suspend.
 653 */
 654static struct memory_bitmap *forbidden_pages_map;
 655
 656/* Set bits in this map correspond to free page frames. */
 657static struct memory_bitmap *free_pages_map;
 658
 659/*
 660 * Each page frame allocated for creating the image is marked by setting the
 661 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
 662 */
 663
 664void swsusp_set_page_free(struct page *page)
 665{
 666        if (free_pages_map)
 667                memory_bm_set_bit(free_pages_map, page_to_pfn(page));
 668}
 669
 670static int swsusp_page_is_free(struct page *page)
 671{
 672        return free_pages_map ?
 673                memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
 674}
 675
 676void swsusp_unset_page_free(struct page *page)
 677{
 678        if (free_pages_map)
 679                memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
 680}
 681
 682static void swsusp_set_page_forbidden(struct page *page)
 683{
 684        if (forbidden_pages_map)
 685                memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
 686}
 687
 688int swsusp_page_is_forbidden(struct page *page)
 689{
 690        return forbidden_pages_map ?
 691                memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
 692}
 693
 694static void swsusp_unset_page_forbidden(struct page *page)
 695{
 696        if (forbidden_pages_map)
 697                memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
 698}
 699
 700/**
 701 *      mark_nosave_pages - set bits corresponding to the page frames the
 702 *      contents of which should not be saved in a given bitmap.
 703 */
 704
 705static void mark_nosave_pages(struct memory_bitmap *bm)
 706{
 707        struct nosave_region *region;
 708
 709        if (list_empty(&nosave_regions))
 710                return;
 711
 712        list_for_each_entry(region, &nosave_regions, list) {
 713                unsigned long pfn;
 714
 715                pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n",
 716                         (unsigned long long) region->start_pfn << PAGE_SHIFT,
 717                         ((unsigned long long) region->end_pfn << PAGE_SHIFT)
 718                                - 1);
 719
 720                for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
 721                        if (pfn_valid(pfn)) {
 722                                /*
 723                                 * It is safe to ignore the result of
 724                                 * mem_bm_set_bit_check() here, since we won't
 725                                 * touch the PFNs for which the error is
 726                                 * returned anyway.
 727                                 */
 728                                mem_bm_set_bit_check(bm, pfn);
 729                        }
 730        }
 731}
 732
 733/**
 734 *      create_basic_memory_bitmaps - create bitmaps needed for marking page
 735 *      frames that should not be saved and free page frames.  The pointers
 736 *      forbidden_pages_map and free_pages_map are only modified if everything
 737 *      goes well, because we don't want the bits to be used before both bitmaps
 738 *      are set up.
 739 */
 740
 741int create_basic_memory_bitmaps(void)
 742{
 743        struct memory_bitmap *bm1, *bm2;
 744        int error = 0;
 745
 746        BUG_ON(forbidden_pages_map || free_pages_map);
 747
 748        bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 749        if (!bm1)
 750                return -ENOMEM;
 751
 752        error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
 753        if (error)
 754                goto Free_first_object;
 755
 756        bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 757        if (!bm2)
 758                goto Free_first_bitmap;
 759
 760        error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
 761        if (error)
 762                goto Free_second_object;
 763
 764        forbidden_pages_map = bm1;
 765        free_pages_map = bm2;
 766        mark_nosave_pages(forbidden_pages_map);
 767
 768        pr_debug("PM: Basic memory bitmaps created\n");
 769
 770        return 0;
 771
 772 Free_second_object:
 773        kfree(bm2);
 774 Free_first_bitmap:
 775        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 776 Free_first_object:
 777        kfree(bm1);
 778        return -ENOMEM;
 779}
 780
 781/**
 782 *      free_basic_memory_bitmaps - free memory bitmaps allocated by
 783 *      create_basic_memory_bitmaps().  The auxiliary pointers are necessary
 784 *      so that the bitmaps themselves are not referred to while they are being
 785 *      freed.
 786 */
 787
 788void free_basic_memory_bitmaps(void)
 789{
 790        struct memory_bitmap *bm1, *bm2;
 791
 792        BUG_ON(!(forbidden_pages_map && free_pages_map));
 793
 794        bm1 = forbidden_pages_map;
 795        bm2 = free_pages_map;
 796        forbidden_pages_map = NULL;
 797        free_pages_map = NULL;
 798        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 799        kfree(bm1);
 800        memory_bm_free(bm2, PG_UNSAFE_CLEAR);
 801        kfree(bm2);
 802
 803        pr_debug("PM: Basic memory bitmaps freed\n");
 804}
 805
 806/**
 807 *      snapshot_additional_pages - estimate the number of additional pages
 808 *      be needed for setting up the suspend image data structures for given
 809 *      zone (usually the returned value is greater than the exact number)
 810 */
 811
 812unsigned int snapshot_additional_pages(struct zone *zone)
 813{
 814        unsigned int res;
 815
 816        res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
 817        res += DIV_ROUND_UP(res * sizeof(struct bm_block),
 818                            LINKED_PAGE_DATA_SIZE);
 819        return 2 * res;
 820}
 821
 822#ifdef CONFIG_HIGHMEM
 823/**
 824 *      count_free_highmem_pages - compute the total number of free highmem
 825 *      pages, system-wide.
 826 */
 827
 828static unsigned int count_free_highmem_pages(void)
 829{
 830        struct zone *zone;
 831        unsigned int cnt = 0;
 832
 833        for_each_populated_zone(zone)
 834                if (is_highmem(zone))
 835                        cnt += zone_page_state(zone, NR_FREE_PAGES);
 836
 837        return cnt;
 838}
 839
 840/**
 841 *      saveable_highmem_page - Determine whether a highmem page should be
 842 *      included in the suspend image.
 843 *
 844 *      We should save the page if it isn't Nosave or NosaveFree, or Reserved,
 845 *      and it isn't a part of a free chunk of pages.
 846 */
 847static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 848{
 849        struct page *page;
 850
 851        if (!pfn_valid(pfn))
 852                return NULL;
 853
 854        page = pfn_to_page(pfn);
 855        if (page_zone(page) != zone)
 856                return NULL;
 857
 858        BUG_ON(!PageHighMem(page));
 859
 860        if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
 861            PageReserved(page))
 862                return NULL;
 863
 864        if (page_is_guard(page))
 865                return NULL;
 866
 867        return page;
 868}
 869
 870/**
 871 *      count_highmem_pages - compute the total number of saveable highmem
 872 *      pages.
 873 */
 874
 875static unsigned int count_highmem_pages(void)
 876{
 877        struct zone *zone;
 878        unsigned int n = 0;
 879
 880        for_each_populated_zone(zone) {
 881                unsigned long pfn, max_zone_pfn;
 882
 883                if (!is_highmem(zone))
 884                        continue;
 885
 886                mark_free_pages(zone);
 887                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 888                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 889                        if (saveable_highmem_page(zone, pfn))
 890                                n++;
 891        }
 892        return n;
 893}
 894#else
 895static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
 896{
 897        return NULL;
 898}
 899#endif /* CONFIG_HIGHMEM */
 900
 901/**
 902 *      saveable_page - Determine whether a non-highmem page should be included
 903 *      in the suspend image.
 904 *
 905 *      We should save the page if it isn't Nosave, and is not in the range
 906 *      of pages statically defined as 'unsaveable', and it isn't a part of
 907 *      a free chunk of pages.
 908 */
 909static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 910{
 911        struct page *page;
 912
 913        if (!pfn_valid(pfn))
 914                return NULL;
 915
 916        page = pfn_to_page(pfn);
 917        if (page_zone(page) != zone)
 918                return NULL;
 919
 920        BUG_ON(PageHighMem(page));
 921
 922        if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
 923                return NULL;
 924
 925        if (PageReserved(page)
 926            && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
 927                return NULL;
 928
 929        if (page_is_guard(page))
 930                return NULL;
 931
 932        return page;
 933}
 934
 935/**
 936 *      count_data_pages - compute the total number of saveable non-highmem
 937 *      pages.
 938 */
 939
 940static unsigned int count_data_pages(void)
 941{
 942        struct zone *zone;
 943        unsigned long pfn, max_zone_pfn;
 944        unsigned int n = 0;
 945
 946        for_each_populated_zone(zone) {
 947                if (is_highmem(zone))
 948                        continue;
 949
 950                mark_free_pages(zone);
 951                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 952                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 953                        if (saveable_page(zone, pfn))
 954                                n++;
 955        }
 956        return n;
 957}
 958
 959/* This is needed, because copy_page and memcpy are not usable for copying
 960 * task structs.
 961 */
 962static inline void do_copy_page(long *dst, long *src)
 963{
 964        int n;
 965
 966        for (n = PAGE_SIZE / sizeof(long); n; n--)
 967                *dst++ = *src++;
 968}
 969
 970
 971/**
 972 *      safe_copy_page - check if the page we are going to copy is marked as
 973 *              present in the kernel page tables (this always is the case if
 974 *              CONFIG_DEBUG_PAGEALLOC is not set and in that case
 975 *              kernel_page_present() always returns 'true').
 976 */
 977static void safe_copy_page(void *dst, struct page *s_page)
 978{
 979        if (kernel_page_present(s_page)) {
 980                do_copy_page(dst, page_address(s_page));
 981        } else {
 982                kernel_map_pages(s_page, 1, 1);
 983                do_copy_page(dst, page_address(s_page));
 984                kernel_map_pages(s_page, 1, 0);
 985        }
 986}
 987
 988
 989#ifdef CONFIG_HIGHMEM
 990static inline struct page *
 991page_is_saveable(struct zone *zone, unsigned long pfn)
 992{
 993        return is_highmem(zone) ?
 994                saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 995}
 996
 997static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 998{
 999        struct page *s_page, *d_page;
1000        void *src, *dst;
1001
1002        s_page = pfn_to_page(src_pfn);
1003        d_page = pfn_to_page(dst_pfn);
1004        if (PageHighMem(s_page)) {
1005                src = kmap_atomic(s_page);
1006                dst = kmap_atomic(d_page);
1007                do_copy_page(dst, src);
1008                kunmap_atomic(dst);
1009                kunmap_atomic(src);
1010        } else {
1011                if (PageHighMem(d_page)) {
1012                        /* Page pointed to by src may contain some kernel
1013                         * data modified by kmap_atomic()
1014                         */
1015                        safe_copy_page(buffer, s_page);
1016                        dst = kmap_atomic(d_page);
1017                        copy_page(dst, buffer);
1018                        kunmap_atomic(dst);
1019                } else {
1020                        safe_copy_page(page_address(d_page), s_page);
1021                }
1022        }
1023}
1024#else
1025#define page_is_saveable(zone, pfn)     saveable_page(zone, pfn)
1026
1027static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1028{
1029        safe_copy_page(page_address(pfn_to_page(dst_pfn)),
1030                                pfn_to_page(src_pfn));
1031}
1032#endif /* CONFIG_HIGHMEM */
1033
1034static void
1035copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1036{
1037        struct zone *zone;
1038        unsigned long pfn;
1039
1040        for_each_populated_zone(zone) {
1041                unsigned long max_zone_pfn;
1042
1043                mark_free_pages(zone);
1044                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1045                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1046                        if (page_is_saveable(zone, pfn))
1047                                memory_bm_set_bit(orig_bm, pfn);
1048        }
1049        memory_bm_position_reset(orig_bm);
1050        memory_bm_position_reset(copy_bm);
1051        for(;;) {
1052                pfn = memory_bm_next_pfn(orig_bm);
1053                if (unlikely(pfn == BM_END_OF_MAP))
1054                        break;
1055                copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1056        }
1057}
1058
1059/* Total number of image pages */
1060static unsigned int nr_copy_pages;
1061/* Number of pages needed for saving the original pfns of the image pages */
1062static unsigned int nr_meta_pages;
1063/*
1064 * Numbers of normal and highmem page frames allocated for hibernation image
1065 * before suspending devices.
1066 */
1067unsigned int alloc_normal, alloc_highmem;
1068/*
1069 * Memory bitmap used for marking saveable pages (during hibernation) or
1070 * hibernation image pages (during restore)
1071 */
1072static struct memory_bitmap orig_bm;
1073/*
1074 * Memory bitmap used during hibernation for marking allocated page frames that
1075 * will contain copies of saveable pages.  During restore it is initially used
1076 * for marking hibernation image pages, but then the set bits from it are
1077 * duplicated in @orig_bm and it is released.  On highmem systems it is next
1078 * used for marking "safe" highmem pages, but it has to be reinitialized for
1079 * this purpose.
1080 */
1081static struct memory_bitmap copy_bm;
1082
1083/**
1084 *      swsusp_free - free pages allocated for the suspend.
1085 *
1086 *      Suspend pages are alocated before the atomic copy is made, so we
1087 *      need to release them after the resume.
1088 */
1089
1090void swsusp_free(void)
1091{
1092        struct zone *zone;
1093        unsigned long pfn, max_zone_pfn;
1094
1095        for_each_populated_zone(zone) {
1096                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1097                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1098                        if (pfn_valid(pfn)) {
1099                                struct page *page = pfn_to_page(pfn);
1100
1101                                if (swsusp_page_is_forbidden(page) &&
1102                                    swsusp_page_is_free(page)) {
1103                                        swsusp_unset_page_forbidden(page);
1104                                        swsusp_unset_page_free(page);
1105                                        __free_page(page);
1106                                }
1107                        }
1108        }
1109        nr_copy_pages = 0;
1110        nr_meta_pages = 0;
1111        restore_pblist = NULL;
1112        buffer = NULL;
1113        alloc_normal = 0;
1114        alloc_highmem = 0;
1115}
1116
1117/* Helper functions used for the shrinking of memory. */
1118
1119#define GFP_IMAGE       (GFP_KERNEL | __GFP_NOWARN)
1120
1121/**
1122 * preallocate_image_pages - Allocate a number of pages for hibernation image
1123 * @nr_pages: Number of page frames to allocate.
1124 * @mask: GFP flags to use for the allocation.
1125 *
1126 * Return value: Number of page frames actually allocated
1127 */
1128static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1129{
1130        unsigned long nr_alloc = 0;
1131
1132        while (nr_pages > 0) {
1133                struct page *page;
1134
1135                page = alloc_image_page(mask);
1136                if (!page)
1137                        break;
1138                memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1139                if (PageHighMem(page))
1140                        alloc_highmem++;
1141                else
1142                        alloc_normal++;
1143                nr_pages--;
1144                nr_alloc++;
1145        }
1146
1147        return nr_alloc;
1148}
1149
1150static unsigned long preallocate_image_memory(unsigned long nr_pages,
1151                                              unsigned long avail_normal)
1152{
1153        unsigned long alloc;
1154
1155        if (avail_normal <= alloc_normal)
1156                return 0;
1157
1158        alloc = avail_normal - alloc_normal;
1159        if (nr_pages < alloc)
1160                alloc = nr_pages;
1161
1162        return preallocate_image_pages(alloc, GFP_IMAGE);
1163}
1164
1165#ifdef CONFIG_HIGHMEM
1166static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1167{
1168        return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1169}
1170
1171/**
1172 *  __fraction - Compute (an approximation of) x * (multiplier / base)
1173 */
1174static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
1175{
1176        x *= multiplier;
1177        do_div(x, base);
1178        return (unsigned long)x;
1179}
1180
1181static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1182                                                unsigned long highmem,
1183                                                unsigned long total)
1184{
1185        unsigned long alloc = __fraction(nr_pages, highmem, total);
1186
1187        return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
1188}
1189#else /* CONFIG_HIGHMEM */
1190static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1191{
1192        return 0;
1193}
1194
1195static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1196                                                unsigned long highmem,
1197                                                unsigned long total)
1198{
1199        return 0;
1200}
1201#endif /* CONFIG_HIGHMEM */
1202
1203/**
1204 * free_unnecessary_pages - Release preallocated pages not needed for the image
1205 */
1206static void free_unnecessary_pages(void)
1207{
1208        unsigned long save, to_free_normal, to_free_highmem;
1209
1210        save = count_data_pages();
1211        if (alloc_normal >= save) {
1212                to_free_normal = alloc_normal - save;
1213                save = 0;
1214        } else {
1215                to_free_normal = 0;
1216                save -= alloc_normal;
1217        }
1218        save += count_highmem_pages();
1219        if (alloc_highmem >= save) {
1220                to_free_highmem = alloc_highmem - save;
1221        } else {
1222                to_free_highmem = 0;
1223                save -= alloc_highmem;
1224                if (to_free_normal > save)
1225                        to_free_normal -= save;
1226                else
1227                        to_free_normal = 0;
1228        }
1229
1230        memory_bm_position_reset(&copy_bm);
1231
1232        while (to_free_normal > 0 || to_free_highmem > 0) {
1233                unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1234                struct page *page = pfn_to_page(pfn);
1235
1236                if (PageHighMem(page)) {
1237                        if (!to_free_highmem)
1238                                continue;
1239                        to_free_highmem--;
1240                        alloc_highmem--;
1241                } else {
1242                        if (!to_free_normal)
1243                                continue;
1244                        to_free_normal--;
1245                        alloc_normal--;
1246                }
1247                memory_bm_clear_bit(&copy_bm, pfn);
1248                swsusp_unset_page_forbidden(page);
1249                swsusp_unset_page_free(page);
1250                __free_page(page);
1251        }
1252}
1253
1254/**
1255 * minimum_image_size - Estimate the minimum acceptable size of an image
1256 * @saveable: Number of saveable pages in the system.
1257 *
1258 * We want to avoid attempting to free too much memory too hard, so estimate the
1259 * minimum acceptable size of a hibernation image to use as the lower limit for
1260 * preallocating memory.
1261 *
1262 * We assume that the minimum image size should be proportional to
1263 *
1264 * [number of saveable pages] - [number of pages that can be freed in theory]
1265 *
1266 * where the second term is the sum of (1) reclaimable slab pages, (2) active
1267 * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages,
1268 * minus mapped file pages.
1269 */
1270static unsigned long minimum_image_size(unsigned long saveable)
1271{
1272        unsigned long size;
1273
1274        size = global_page_state(NR_SLAB_RECLAIMABLE)
1275                + global_page_state(NR_ACTIVE_ANON)
1276                + global_page_state(NR_INACTIVE_ANON)
1277                + global_page_state(NR_ACTIVE_FILE)
1278                + global_page_state(NR_INACTIVE_FILE)
1279                - global_page_state(NR_FILE_MAPPED);
1280
1281        return saveable <= size ? 0 : saveable - size;
1282}
1283
1284/**
1285 * hibernate_preallocate_memory - Preallocate memory for hibernation image
1286 *
1287 * To create a hibernation image it is necessary to make a copy of every page
1288 * frame in use.  We also need a number of page frames to be free during
1289 * hibernation for allocations made while saving the image and for device
1290 * drivers, in case they need to allocate memory from their hibernation
1291 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
1292 * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
1293 * /sys/power/reserved_size, respectively).  To make this happen, we compute the
1294 * total number of available page frames and allocate at least
1295 *
1296 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
1297 *  + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
1298 *
1299 * of them, which corresponds to the maximum size of a hibernation image.
1300 *
1301 * If image_size is set below the number following from the above formula,
1302 * the preallocation of memory is continued until the total number of saveable
1303 * pages in the system is below the requested image size or the minimum
1304 * acceptable image size returned by minimum_image_size(), whichever is greater.
1305 */
1306int hibernate_preallocate_memory(void)
1307{
1308        struct zone *zone;
1309        unsigned long saveable, size, max_size, count, highmem, pages = 0;
1310        unsigned long alloc, save_highmem, pages_highmem, avail_normal;
1311        struct timeval start, stop;
1312        int error;
1313
1314        printk(KERN_INFO "PM: Preallocating image memory... ");
1315        do_gettimeofday(&start);
1316
1317        error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
1318        if (error)
1319                goto err_out;
1320
1321        error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
1322        if (error)
1323                goto err_out;
1324
1325        alloc_normal = 0;
1326        alloc_highmem = 0;
1327
1328        /* Count the number of saveable data pages. */
1329        save_highmem = count_highmem_pages();
1330        saveable = count_data_pages();
1331
1332        /*
1333         * Compute the total number of page frames we can use (count) and the
1334         * number of pages needed for image metadata (size).
1335         */
1336        count = saveable;
1337        saveable += save_highmem;
1338        highmem = save_highmem;
1339        size = 0;
1340        for_each_populated_zone(zone) {
1341                size += snapshot_additional_pages(zone);
1342                if (is_highmem(zone))
1343                        highmem += zone_page_state(zone, NR_FREE_PAGES);
1344                else
1345                        count += zone_page_state(zone, NR_FREE_PAGES);
1346        }
1347        avail_normal = count;
1348        count += highmem;
1349        count -= totalreserve_pages;
1350
1351        /* Add number of pages required for page keys (s390 only). */
1352        size += page_key_additional_pages(saveable);
1353
1354        /* Compute the maximum number of saveable pages to leave in memory. */
1355        max_size = (count - (size + PAGES_FOR_IO)) / 2
1356                        - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
1357        /* Compute the desired number of image pages specified by image_size. */
1358        size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1359        if (size > max_size)
1360                size = max_size;
1361        /*
1362         * If the desired number of image pages is at least as large as the
1363         * current number of saveable pages in memory, allocate page frames for
1364         * the image and we're done.
1365         */
1366        if (size >= saveable) {
1367                pages = preallocate_image_highmem(save_highmem);
1368                pages += preallocate_image_memory(saveable - pages, avail_normal);
1369                goto out;
1370        }
1371
1372        /* Estimate the minimum size of the image. */
1373        pages = minimum_image_size(saveable);
1374        /*
1375         * To avoid excessive pressure on the normal zone, leave room in it to
1376         * accommodate an image of the minimum size (unless it's already too
1377         * small, in which case don't preallocate pages from it at all).
1378         */
1379        if (avail_normal > pages)
1380                avail_normal -= pages;
1381        else
1382                avail_normal = 0;
1383        if (size < pages)
1384                size = min_t(unsigned long, pages, max_size);
1385
1386        /*
1387         * Let the memory management subsystem know that we're going to need a
1388         * large number of page frames to allocate and make it free some memory.
1389         * NOTE: If this is not done, performance will be hurt badly in some
1390         * test cases.
1391         */
1392        shrink_all_memory(saveable - size);
1393
1394        /*
1395         * The number of saveable pages in memory was too high, so apply some
1396         * pressure to decrease it.  First, make room for the largest possible
1397         * image and fail if that doesn't work.  Next, try to decrease the size
1398         * of the image as much as indicated by 'size' using allocations from
1399         * highmem and non-highmem zones separately.
1400         */
1401        pages_highmem = preallocate_image_highmem(highmem / 2);
1402        alloc = (count - max_size) - pages_highmem;
1403        pages = preallocate_image_memory(alloc, avail_normal);
1404        if (pages < alloc) {
1405                /* We have exhausted non-highmem pages, try highmem. */
1406                alloc -= pages;
1407                pages += pages_highmem;
1408                pages_highmem = preallocate_image_highmem(alloc);
1409                if (pages_highmem < alloc)
1410                        goto err_out;
1411                pages += pages_highmem;
1412                /*
1413                 * size is the desired number of saveable pages to leave in
1414                 * memory, so try to preallocate (all memory - size) pages.
1415                 */
1416                alloc = (count - pages) - size;
1417                pages += preallocate_image_highmem(alloc);
1418        } else {
1419                /*
1420                 * There are approximately max_size saveable pages at this point
1421                 * and we want to reduce this number down to size.
1422                 */
1423                alloc = max_size - size;
1424                size = preallocate_highmem_fraction(alloc, highmem, count);
1425                pages_highmem += size;
1426                alloc -= size;
1427                size = preallocate_image_memory(alloc, avail_normal);
1428                pages_highmem += preallocate_image_highmem(alloc - size);
1429                pages += pages_highmem + size;
1430        }
1431
1432        /*
1433         * We only need as many page frames for the image as there are saveable
1434         * pages in memory, but we have allocated more.  Release the excessive
1435         * ones now.
1436         */
1437        free_unnecessary_pages();
1438
1439 out:
1440        do_gettimeofday(&stop);
1441        printk(KERN_CONT "done (allocated %lu pages)\n", pages);
1442        swsusp_show_speed(&start, &stop, pages, "Allocated");
1443
1444        return 0;
1445
1446 err_out:
1447        printk(KERN_CONT "\n");
1448        swsusp_free();
1449        return -ENOMEM;
1450}
1451
1452#ifdef CONFIG_HIGHMEM
1453/**
1454  *     count_pages_for_highmem - compute the number of non-highmem pages
1455  *     that will be necessary for creating copies of highmem pages.
1456  */
1457
1458static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1459{
1460        unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
1461
1462        if (free_highmem >= nr_highmem)
1463                nr_highmem = 0;
1464        else
1465                nr_highmem -= free_highmem;
1466
1467        return nr_highmem;
1468}
1469#else
1470static unsigned int
1471count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1472#endif /* CONFIG_HIGHMEM */
1473
1474/**
1475 *      enough_free_mem - Make sure we have enough free memory for the
1476 *      snapshot image.
1477 */
1478
1479static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1480{
1481        struct zone *zone;
1482        unsigned int free = alloc_normal;
1483
1484        for_each_populated_zone(zone)
1485                if (!is_highmem(zone))
1486                        free += zone_page_state(zone, NR_FREE_PAGES);
1487
1488        nr_pages += count_pages_for_highmem(nr_highmem);
1489        pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n",
1490                nr_pages, PAGES_FOR_IO, free);
1491
1492        return free > nr_pages + PAGES_FOR_IO;
1493}
1494
1495#ifdef CONFIG_HIGHMEM
1496/**
1497 *      get_highmem_buffer - if there are some highmem pages in the suspend
1498 *      image, we may need the buffer to copy them and/or load their data.
1499 */
1500
1501static inline int get_highmem_buffer(int safe_needed)
1502{
1503        buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1504        return buffer ? 0 : -ENOMEM;
1505}
1506
1507/**
1508 *      alloc_highmem_image_pages - allocate some highmem pages for the image.
1509 *      Try to allocate as many pages as needed, but if the number of free
1510 *      highmem pages is lesser than that, allocate them all.
1511 */
1512
1513static inline unsigned int
1514alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1515{
1516        unsigned int to_alloc = count_free_highmem_pages();
1517
1518        if (to_alloc > nr_highmem)
1519                to_alloc = nr_highmem;
1520
1521        nr_highmem -= to_alloc;
1522        while (to_alloc-- > 0) {
1523                struct page *page;
1524
1525                page = alloc_image_page(__GFP_HIGHMEM);
1526                memory_bm_set_bit(bm, page_to_pfn(page));
1527        }
1528        return nr_highmem;
1529}
1530#else
1531static inline int get_highmem_buffer(int safe_needed) { return 0; }
1532
1533static inline unsigned int
1534alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1535#endif /* CONFIG_HIGHMEM */
1536
1537/**
1538 *      swsusp_alloc - allocate memory for the suspend image
1539 *
1540 *      We first try to allocate as many highmem pages as there are
1541 *      saveable highmem pages in the system.  If that fails, we allocate
1542 *      non-highmem pages for the copies of the remaining highmem ones.
1543 *
1544 *      In this approach it is likely that the copies of highmem pages will
1545 *      also be located in the high memory, because of the way in which
1546 *      copy_data_pages() works.
1547 */
1548
1549static int
1550swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1551                unsigned int nr_pages, unsigned int nr_highmem)
1552{
1553        if (nr_highmem > 0) {
1554                if (get_highmem_buffer(PG_ANY))
1555                        goto err_out;
1556                if (nr_highmem > alloc_highmem) {
1557                        nr_highmem -= alloc_highmem;
1558                        nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
1559                }
1560        }
1561        if (nr_pages > alloc_normal) {
1562                nr_pages -= alloc_normal;
1563                while (nr_pages-- > 0) {
1564                        struct page *page;
1565
1566                        page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1567                        if (!page)
1568                                goto err_out;
1569                        memory_bm_set_bit(copy_bm, page_to_pfn(page));
1570                }
1571        }
1572
1573        return 0;
1574
1575 err_out:
1576        swsusp_free();
1577        return -ENOMEM;
1578}
1579
1580asmlinkage int swsusp_save(void)
1581{
1582        unsigned int nr_pages, nr_highmem;
1583
1584        printk(KERN_INFO "PM: Creating hibernation image:\n");
1585
1586        drain_local_pages(NULL);
1587        nr_pages = count_data_pages();
1588        nr_highmem = count_highmem_pages();
1589        printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1590
1591        if (!enough_free_mem(nr_pages, nr_highmem)) {
1592                printk(KERN_ERR "PM: Not enough free memory\n");
1593                return -ENOMEM;
1594        }
1595
1596        if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1597                printk(KERN_ERR "PM: Memory allocation failed\n");
1598                return -ENOMEM;
1599        }
1600
1601        /* During allocating of suspend pagedir, new cold pages may appear.
1602         * Kill them.
1603         */
1604        drain_local_pages(NULL);
1605        copy_data_pages(&copy_bm, &orig_bm);
1606
1607        /*
1608         * End of critical section. From now on, we can write to memory,
1609         * but we should not touch disk. This specially means we must _not_
1610         * touch swap space! Except we must write out our image of course.
1611         */
1612
1613        nr_pages += nr_highmem;
1614        nr_copy_pages = nr_pages;
1615        nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1616
1617        printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1618                nr_pages);
1619
1620        return 0;
1621}
1622
1623#ifndef CONFIG_ARCH_HIBERNATION_HEADER
1624static int init_header_complete(struct swsusp_info *info)
1625{
1626        memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1627        info->version_code = LINUX_VERSION_CODE;
1628        return 0;
1629}
1630
1631static char *check_image_kernel(struct swsusp_info *info)
1632{
1633        if (info->version_code != LINUX_VERSION_CODE)
1634                return "kernel version";
1635        if (strcmp(info->uts.sysname,init_utsname()->sysname))
1636                return "system type";
1637        if (strcmp(info->uts.release,init_utsname()->release))
1638                return "kernel release";
1639        if (strcmp(info->uts.version,init_utsname()->version))
1640                return "version";
1641        if (strcmp(info->uts.machine,init_utsname()->machine))
1642                return "machine";
1643        return NULL;
1644}
1645#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1646
1647unsigned long snapshot_get_image_size(void)
1648{
1649        return nr_copy_pages + nr_meta_pages + 1;
1650}
1651
1652static int init_header(struct swsusp_info *info)
1653{
1654        memset(info, 0, sizeof(struct swsusp_info));
1655        info->num_physpages = get_num_physpages();
1656        info->image_pages = nr_copy_pages;
1657        info->pages = snapshot_get_image_size();
1658        info->size = info->pages;
1659        info->size <<= PAGE_SHIFT;
1660        return init_header_complete(info);
1661}
1662
1663/**
1664 *      pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1665 *      are stored in the array @buf[] (1 page at a time)
1666 */
1667
1668static inline void
1669pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1670{
1671        int j;
1672
1673        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1674                buf[j] = memory_bm_next_pfn(bm);
1675                if (unlikely(buf[j] == BM_END_OF_MAP))
1676                        break;
1677                /* Save page key for data page (s390 only). */
1678                page_key_read(buf + j);
1679        }
1680}
1681
1682/**
1683 *      snapshot_read_next - used for reading the system memory snapshot.
1684 *
1685 *      On the first call to it @handle should point to a zeroed
1686 *      snapshot_handle structure.  The structure gets updated and a pointer
1687 *      to it should be passed to this function every next time.
1688 *
1689 *      On success the function returns a positive number.  Then, the caller
1690 *      is allowed to read up to the returned number of bytes from the memory
1691 *      location computed by the data_of() macro.
1692 *
1693 *      The function returns 0 to indicate the end of data stream condition,
1694 *      and a negative number is returned on error.  In such cases the
1695 *      structure pointed to by @handle is not updated and should not be used
1696 *      any more.
1697 */
1698
1699int snapshot_read_next(struct snapshot_handle *handle)
1700{
1701        if (handle->cur > nr_meta_pages + nr_copy_pages)
1702                return 0;
1703
1704        if (!buffer) {
1705                /* This makes the buffer be freed by swsusp_free() */
1706                buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1707                if (!buffer)
1708                        return -ENOMEM;
1709        }
1710        if (!handle->cur) {
1711                int error;
1712
1713                error = init_header((struct swsusp_info *)buffer);
1714                if (error)
1715                        return error;
1716                handle->buffer = buffer;
1717                memory_bm_position_reset(&orig_bm);
1718                memory_bm_position_reset(&copy_bm);
1719        } else if (handle->cur <= nr_meta_pages) {
1720                clear_page(buffer);
1721                pack_pfns(buffer, &orig_bm);
1722        } else {
1723                struct page *page;
1724
1725                page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1726                if (PageHighMem(page)) {
1727                        /* Highmem pages are copied to the buffer,
1728                         * because we can't return with a kmapped
1729                         * highmem page (we may not be called again).
1730                         */
1731                        void *kaddr;
1732
1733                        kaddr = kmap_atomic(page);
1734                        copy_page(buffer, kaddr);
1735                        kunmap_atomic(kaddr);
1736                        handle->buffer = buffer;
1737                } else {
1738                        handle->buffer = page_address(page);
1739                }
1740        }
1741        handle->cur++;
1742        return PAGE_SIZE;
1743}
1744
1745/**
1746 *      mark_unsafe_pages - mark the pages that cannot be used for storing
1747 *      the image during resume, because they conflict with the pages that
1748 *      had been used before suspend
1749 */
1750
1751static int mark_unsafe_pages(struct memory_bitmap *bm)
1752{
1753        struct zone *zone;
1754        unsigned long pfn, max_zone_pfn;
1755
1756        /* Clear page flags */
1757        for_each_populated_zone(zone) {
1758                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1759                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1760                        if (pfn_valid(pfn))
1761                                swsusp_unset_page_free(pfn_to_page(pfn));
1762        }
1763
1764        /* Mark pages that correspond to the "original" pfns as "unsafe" */
1765        memory_bm_position_reset(bm);
1766        do {
1767                pfn = memory_bm_next_pfn(bm);
1768                if (likely(pfn != BM_END_OF_MAP)) {
1769                        if (likely(pfn_valid(pfn)))
1770                                swsusp_set_page_free(pfn_to_page(pfn));
1771                        else
1772                                return -EFAULT;
1773                }
1774        } while (pfn != BM_END_OF_MAP);
1775
1776        allocated_unsafe_pages = 0;
1777
1778        return 0;
1779}
1780
1781static void
1782duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1783{
1784        unsigned long pfn;
1785
1786        memory_bm_position_reset(src);
1787        pfn = memory_bm_next_pfn(src);
1788        while (pfn != BM_END_OF_MAP) {
1789                memory_bm_set_bit(dst, pfn);
1790                pfn = memory_bm_next_pfn(src);
1791        }
1792}
1793
1794static int check_header(struct swsusp_info *info)
1795{
1796        char *reason;
1797
1798        reason = check_image_kernel(info);
1799        if (!reason && info->num_physpages != get_num_physpages())
1800                reason = "memory size";
1801        if (reason) {
1802                printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1803                return -EPERM;
1804        }
1805        return 0;
1806}
1807
1808/**
1809 *      load header - check the image header and copy data from it
1810 */
1811
1812static int
1813load_header(struct swsusp_info *info)
1814{
1815        int error;
1816
1817        restore_pblist = NULL;
1818        error = check_header(info);
1819        if (!error) {
1820                nr_copy_pages = info->image_pages;
1821                nr_meta_pages = info->pages - info->image_pages - 1;
1822        }
1823        return error;
1824}
1825
1826/**
1827 *      unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1828 *      the corresponding bit in the memory bitmap @bm
1829 */
1830static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1831{
1832        int j;
1833
1834        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1835                if (unlikely(buf[j] == BM_END_OF_MAP))
1836                        break;
1837
1838                /* Extract and buffer page key for data page (s390 only). */
1839                page_key_memorize(buf + j);
1840
1841                if (memory_bm_pfn_present(bm, buf[j]))
1842                        memory_bm_set_bit(bm, buf[j]);
1843                else
1844                        return -EFAULT;
1845        }
1846
1847        return 0;
1848}
1849
1850/* List of "safe" pages that may be used to store data loaded from the suspend
1851 * image
1852 */
1853static struct linked_page *safe_pages_list;
1854
1855#ifdef CONFIG_HIGHMEM
1856/* struct highmem_pbe is used for creating the list of highmem pages that
1857 * should be restored atomically during the resume from disk, because the page
1858 * frames they have occupied before the suspend are in use.
1859 */
1860struct highmem_pbe {
1861        struct page *copy_page; /* data is here now */
1862        struct page *orig_page; /* data was here before the suspend */
1863        struct highmem_pbe *next;
1864};
1865
1866/* List of highmem PBEs needed for restoring the highmem pages that were
1867 * allocated before the suspend and included in the suspend image, but have
1868 * also been allocated by the "resume" kernel, so their contents cannot be
1869 * written directly to their "original" page frames.
1870 */
1871static struct highmem_pbe *highmem_pblist;
1872
1873/**
1874 *      count_highmem_image_pages - compute the number of highmem pages in the
1875 *      suspend image.  The bits in the memory bitmap @bm that correspond to the
1876 *      image pages are assumed to be set.
1877 */
1878
1879static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1880{
1881        unsigned long pfn;
1882        unsigned int cnt = 0;
1883
1884        memory_bm_position_reset(bm);
1885        pfn = memory_bm_next_pfn(bm);
1886        while (pfn != BM_END_OF_MAP) {
1887                if (PageHighMem(pfn_to_page(pfn)))
1888                        cnt++;
1889
1890                pfn = memory_bm_next_pfn(bm);
1891        }
1892        return cnt;
1893}
1894
1895/**
1896 *      prepare_highmem_image - try to allocate as many highmem pages as
1897 *      there are highmem image pages (@nr_highmem_p points to the variable
1898 *      containing the number of highmem image pages).  The pages that are
1899 *      "safe" (ie. will not be overwritten when the suspend image is
1900 *      restored) have the corresponding bits set in @bm (it must be
1901 *      unitialized).
1902 *
1903 *      NOTE: This function should not be called if there are no highmem
1904 *      image pages.
1905 */
1906
1907static unsigned int safe_highmem_pages;
1908
1909static struct memory_bitmap *safe_highmem_bm;
1910
1911static int
1912prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1913{
1914        unsigned int to_alloc;
1915
1916        if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1917                return -ENOMEM;
1918
1919        if (get_highmem_buffer(PG_SAFE))
1920                return -ENOMEM;
1921
1922        to_alloc = count_free_highmem_pages();
1923        if (to_alloc > *nr_highmem_p)
1924                to_alloc = *nr_highmem_p;
1925        else
1926                *nr_highmem_p = to_alloc;
1927
1928        safe_highmem_pages = 0;
1929        while (to_alloc-- > 0) {
1930                struct page *page;
1931
1932                page = alloc_page(__GFP_HIGHMEM);
1933                if (!swsusp_page_is_free(page)) {
1934                        /* The page is "safe", set its bit the bitmap */
1935                        memory_bm_set_bit(bm, page_to_pfn(page));
1936                        safe_highmem_pages++;
1937                }
1938                /* Mark the page as allocated */
1939                swsusp_set_page_forbidden(page);
1940                swsusp_set_page_free(page);
1941        }
1942        memory_bm_position_reset(bm);
1943        safe_highmem_bm = bm;
1944        return 0;
1945}
1946
1947/**
1948 *      get_highmem_page_buffer - for given highmem image page find the buffer
1949 *      that suspend_write_next() should set for its caller to write to.
1950 *
1951 *      If the page is to be saved to its "original" page frame or a copy of
1952 *      the page is to be made in the highmem, @buffer is returned.  Otherwise,
1953 *      the copy of the page is to be made in normal memory, so the address of
1954 *      the copy is returned.
1955 *
1956 *      If @buffer is returned, the caller of suspend_write_next() will write
1957 *      the page's contents to @buffer, so they will have to be copied to the
1958 *      right location on the next call to suspend_write_next() and it is done
1959 *      with the help of copy_last_highmem_page().  For this purpose, if
1960 *      @buffer is returned, @last_highmem page is set to the page to which
1961 *      the data will have to be copied from @buffer.
1962 */
1963
1964static struct page *last_highmem_page;
1965
1966static void *
1967get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1968{
1969        struct highmem_pbe *pbe;
1970        void *kaddr;
1971
1972        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1973                /* We have allocated the "original" page frame and we can
1974                 * use it directly to store the loaded page.
1975                 */
1976                last_highmem_page = page;
1977                return buffer;
1978        }
1979        /* The "original" page frame has not been allocated and we have to
1980         * use a "safe" page frame to store the loaded page.
1981         */
1982        pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1983        if (!pbe) {
1984                swsusp_free();
1985                return ERR_PTR(-ENOMEM);
1986        }
1987        pbe->orig_page = page;
1988        if (safe_highmem_pages > 0) {
1989                struct page *tmp;
1990
1991                /* Copy of the page will be stored in high memory */
1992                kaddr = buffer;
1993                tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1994                safe_highmem_pages--;
1995                last_highmem_page = tmp;
1996                pbe->copy_page = tmp;
1997        } else {
1998                /* Copy of the page will be stored in normal memory */
1999                kaddr = safe_pages_list;
2000                safe_pages_list = safe_pages_list->next;
2001                pbe->copy_page = virt_to_page(kaddr);
2002        }
2003        pbe->next = highmem_pblist;
2004        highmem_pblist = pbe;
2005        return kaddr;
2006}
2007
2008/**
2009 *      copy_last_highmem_page - copy the contents of a highmem image from
2010 *      @buffer, where the caller of snapshot_write_next() has place them,
2011 *      to the right location represented by @last_highmem_page .
2012 */
2013
2014static void copy_last_highmem_page(void)
2015{
2016        if (last_highmem_page) {
2017                void *dst;
2018
2019                dst = kmap_atomic(last_highmem_page);
2020                copy_page(dst, buffer);
2021                kunmap_atomic(dst);
2022                last_highmem_page = NULL;
2023        }
2024}
2025
2026static inline int last_highmem_page_copied(void)
2027{
2028        return !last_highmem_page;
2029}
2030
2031static inline void free_highmem_data(void)
2032{
2033        if (safe_highmem_bm)
2034                memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
2035
2036        if (buffer)
2037                free_image_page(buffer, PG_UNSAFE_CLEAR);
2038}
2039#else
2040static inline int get_safe_write_buffer(void) { return 0; }
2041
2042static unsigned int
2043count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
2044
2045static inline int
2046prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
2047{
2048        return 0;
2049}
2050
2051static inline void *
2052get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
2053{
2054        return ERR_PTR(-EINVAL);
2055}
2056
2057static inline void copy_last_highmem_page(void) {}
2058static inline int last_highmem_page_copied(void) { return 1; }
2059static inline void free_highmem_data(void) {}
2060#endif /* CONFIG_HIGHMEM */
2061
2062/**
2063 *      prepare_image - use the memory bitmap @bm to mark the pages that will
2064 *      be overwritten in the process of restoring the system memory state
2065 *      from the suspend image ("unsafe" pages) and allocate memory for the
2066 *      image.
2067 *
2068 *      The idea is to allocate a new memory bitmap first and then allocate
2069 *      as many pages as needed for the image data, but not to assign these
2070 *      pages to specific tasks initially.  Instead, we just mark them as
2071 *      allocated and create a lists of "safe" pages that will be used
2072 *      later.  On systems with high memory a list of "safe" highmem pages is
2073 *      also created.
2074 */
2075
2076#define PBES_PER_LINKED_PAGE    (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2077
2078static int
2079prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
2080{
2081        unsigned int nr_pages, nr_highmem;
2082        struct linked_page *sp_list, *lp;
2083        int error;
2084
2085        /* If there is no highmem, the buffer will not be necessary */
2086        free_image_page(buffer, PG_UNSAFE_CLEAR);
2087        buffer = NULL;
2088
2089        nr_highmem = count_highmem_image_pages(bm);
2090        error = mark_unsafe_pages(bm);
2091        if (error)
2092                goto Free;
2093
2094        error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2095        if (error)
2096                goto Free;
2097
2098        duplicate_memory_bitmap(new_bm, bm);
2099        memory_bm_free(bm, PG_UNSAFE_KEEP);
2100        if (nr_highmem > 0) {
2101                error = prepare_highmem_image(bm, &nr_highmem);
2102                if (error)
2103                        goto Free;
2104        }
2105        /* Reserve some safe pages for potential later use.
2106         *
2107         * NOTE: This way we make sure there will be enough safe pages for the
2108         * chain_alloc() in get_buffer().  It is a bit wasteful, but
2109         * nr_copy_pages cannot be greater than 50% of the memory anyway.
2110         */
2111        sp_list = NULL;
2112        /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
2113        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2114        nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2115        while (nr_pages > 0) {
2116                lp = get_image_page(GFP_ATOMIC, PG_SAFE);
2117                if (!lp) {
2118                        error = -ENOMEM;
2119                        goto Free;
2120                }
2121                lp->next = sp_list;
2122                sp_list = lp;
2123                nr_pages--;
2124        }
2125        /* Preallocate memory for the image */
2126        safe_pages_list = NULL;
2127        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2128        while (nr_pages > 0) {
2129                lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2130                if (!lp) {
2131                        error = -ENOMEM;
2132                        goto Free;
2133                }
2134                if (!swsusp_page_is_free(virt_to_page(lp))) {
2135                        /* The page is "safe", add it to the list */
2136                        lp->next = safe_pages_list;
2137                        safe_pages_list = lp;
2138                }
2139                /* Mark the page as allocated */
2140                swsusp_set_page_forbidden(virt_to_page(lp));
2141                swsusp_set_page_free(virt_to_page(lp));
2142                nr_pages--;
2143        }
2144        /* Free the reserved safe pages so that chain_alloc() can use them */
2145        while (sp_list) {
2146                lp = sp_list->next;
2147                free_image_page(sp_list, PG_UNSAFE_CLEAR);
2148                sp_list = lp;
2149        }
2150        return 0;
2151
2152 Free:
2153        swsusp_free();
2154        return error;
2155}
2156
2157/**
2158 *      get_buffer - compute the address that snapshot_write_next() should
2159 *      set for its caller to write to.
2160 */
2161
2162static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2163{
2164        struct pbe *pbe;
2165        struct page *page;
2166        unsigned long pfn = memory_bm_next_pfn(bm);
2167
2168        if (pfn == BM_END_OF_MAP)
2169                return ERR_PTR(-EFAULT);
2170
2171        page = pfn_to_page(pfn);
2172        if (PageHighMem(page))
2173                return get_highmem_page_buffer(page, ca);
2174
2175        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
2176                /* We have allocated the "original" page frame and we can
2177                 * use it directly to store the loaded page.
2178                 */
2179                return page_address(page);
2180
2181        /* The "original" page frame has not been allocated and we have to
2182         * use a "safe" page frame to store the loaded page.
2183         */
2184        pbe = chain_alloc(ca, sizeof(struct pbe));
2185        if (!pbe) {
2186                swsusp_free();
2187                return ERR_PTR(-ENOMEM);
2188        }
2189        pbe->orig_address = page_address(page);
2190        pbe->address = safe_pages_list;
2191        safe_pages_list = safe_pages_list->next;
2192        pbe->next = restore_pblist;
2193        restore_pblist = pbe;
2194        return pbe->address;
2195}
2196
2197/**
2198 *      snapshot_write_next - used for writing the system memory snapshot.
2199 *
2200 *      On the first call to it @handle should point to a zeroed
2201 *      snapshot_handle structure.  The structure gets updated and a pointer
2202 *      to it should be passed to this function every next time.
2203 *
2204 *      On success the function returns a positive number.  Then, the caller
2205 *      is allowed to write up to the returned number of bytes to the memory
2206 *      location computed by the data_of() macro.
2207 *
2208 *      The function returns 0 to indicate the "end of file" condition,
2209 *      and a negative number is returned on error.  In such cases the
2210 *      structure pointed to by @handle is not updated and should not be used
2211 *      any more.
2212 */
2213
2214int snapshot_write_next(struct snapshot_handle *handle)
2215{
2216        static struct chain_allocator ca;
2217        int error = 0;
2218
2219        /* Check if we have already loaded the entire image */
2220        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
2221                return 0;
2222
2223        handle->sync_read = 1;
2224
2225        if (!handle->cur) {
2226                if (!buffer)
2227                        /* This makes the buffer be freed by swsusp_free() */
2228                        buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2229
2230                if (!buffer)
2231                        return -ENOMEM;
2232
2233                handle->buffer = buffer;
2234        } else if (handle->cur == 1) {
2235                error = load_header(buffer);
2236                if (error)
2237                        return error;
2238
2239                error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2240                if (error)
2241                        return error;
2242
2243                /* Allocate buffer for page keys. */
2244                error = page_key_alloc(nr_copy_pages);
2245                if (error)
2246                        return error;
2247
2248        } else if (handle->cur <= nr_meta_pages + 1) {
2249                error = unpack_orig_pfns(buffer, &copy_bm);
2250                if (error)
2251                        return error;
2252
2253                if (handle->cur == nr_meta_pages + 1) {
2254                        error = prepare_image(&orig_bm, &copy_bm);
2255                        if (error)
2256                                return error;
2257
2258                        chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2259                        memory_bm_position_reset(&orig_bm);
2260                        restore_pblist = NULL;
2261                        handle->buffer = get_buffer(&orig_bm, &ca);
2262                        handle->sync_read = 0;
2263                        if (IS_ERR(handle->buffer))
2264                                return PTR_ERR(handle->buffer);
2265                }
2266        } else {
2267                copy_last_highmem_page();
2268                /* Restore page key for data page (s390 only). */
2269                page_key_write(handle->buffer);
2270                handle->buffer = get_buffer(&orig_bm, &ca);
2271                if (IS_ERR(handle->buffer))
2272                        return PTR_ERR(handle->buffer);
2273                if (handle->buffer != buffer)
2274                        handle->sync_read = 0;
2275        }
2276        handle->cur++;
2277        return PAGE_SIZE;
2278}
2279
2280/**
2281 *      snapshot_write_finalize - must be called after the last call to
2282 *      snapshot_write_next() in case the last page in the image happens
2283 *      to be a highmem page and its contents should be stored in the
2284 *      highmem.  Additionally, it releases the memory that will not be
2285 *      used any more.
2286 */
2287
2288void snapshot_write_finalize(struct snapshot_handle *handle)
2289{
2290        copy_last_highmem_page();
2291        /* Restore page key for data page (s390 only). */
2292        page_key_write(handle->buffer);
2293        page_key_free();
2294        /* Free only if we have loaded the image entirely */
2295        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2296                memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
2297                free_highmem_data();
2298        }
2299}
2300
2301int snapshot_image_loaded(struct snapshot_handle *handle)
2302{
2303        return !(!nr_copy_pages || !last_highmem_page_copied() ||
2304                        handle->cur <= nr_meta_pages + nr_copy_pages);
2305}
2306
2307#ifdef CONFIG_HIGHMEM
2308/* Assumes that @buf is ready and points to a "safe" page */
2309static inline void
2310swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
2311{
2312        void *kaddr1, *kaddr2;
2313
2314        kaddr1 = kmap_atomic(p1);
2315        kaddr2 = kmap_atomic(p2);
2316        copy_page(buf, kaddr1);
2317        copy_page(kaddr1, kaddr2);
2318        copy_page(kaddr2, buf);
2319        kunmap_atomic(kaddr2);
2320        kunmap_atomic(kaddr1);
2321}
2322
2323/**
2324 *      restore_highmem - for each highmem page that was allocated before
2325 *      the suspend and included in the suspend image, and also has been
2326 *      allocated by the "resume" kernel swap its current (ie. "before
2327 *      resume") contents with the previous (ie. "before suspend") one.
2328 *
2329 *      If the resume eventually fails, we can call this function once
2330 *      again and restore the "before resume" highmem state.
2331 */
2332
2333int restore_highmem(void)
2334{
2335        struct highmem_pbe *pbe = highmem_pblist;
2336        void *buf;
2337
2338        if (!pbe)
2339                return 0;
2340
2341        buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2342        if (!buf)
2343                return -ENOMEM;
2344
2345        while (pbe) {
2346                swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2347                pbe = pbe->next;
2348        }
2349        free_image_page(buf, PG_UNSAFE_CLEAR);
2350        return 0;
2351}
2352#endif /* CONFIG_HIGHMEM */
2353