linux/kernel/power/snapshot.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/power/snapshot.c
   3 *
   4 * This file provides system snapshot/restore functionality for swsusp.
   5 *
   6 * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
   7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
   8 *
   9 * This file is released under the GPLv2.
  10 *
  11 */
  12
  13#include <linux/version.h>
  14#include <linux/module.h>
  15#include <linux/mm.h>
  16#include <linux/suspend.h>
  17#include <linux/delay.h>
  18#include <linux/bitops.h>
  19#include <linux/spinlock.h>
  20#include <linux/kernel.h>
  21#include <linux/pm.h>
  22#include <linux/device.h>
  23#include <linux/init.h>
  24#include <linux/bootmem.h>
  25#include <linux/syscalls.h>
  26#include <linux/console.h>
  27#include <linux/highmem.h>
  28#include <linux/list.h>
  29
  30#include <asm/uaccess.h>
  31#include <asm/mmu_context.h>
  32#include <asm/pgtable.h>
  33#include <asm/tlbflush.h>
  34#include <asm/io.h>
  35
  36#include "power.h"
  37
  38static int swsusp_page_is_free(struct page *);
  39static void swsusp_set_page_forbidden(struct page *);
  40static void swsusp_unset_page_forbidden(struct page *);
  41
  42/*
  43 * Preferred image size in bytes (tunable via /sys/power/image_size).
  44 * When it is set to N, swsusp will do its best to ensure the image
  45 * size will not exceed N bytes, but if that is impossible, it will
  46 * try to create the smallest image possible.
  47 */
  48unsigned long image_size = 500 * 1024 * 1024;
  49
  50/* List of PBEs needed for restoring the pages that were allocated before
  51 * the suspend and included in the suspend image, but have also been
  52 * allocated by the "resume" kernel, so their contents cannot be written
  53 * directly to their "original" page frames.
  54 */
  55struct pbe *restore_pblist;
  56
  57/* Pointer to an auxiliary buffer (1 page) */
  58static void *buffer;
  59
  60/**
  61 *      @safe_needed - on resume, for storing the PBE list and the image,
  62 *      we can only use memory pages that do not conflict with the pages
  63 *      used before suspend.  The unsafe pages have PageNosaveFree set
  64 *      and we count them using unsafe_pages.
  65 *
  66 *      Each allocated image page is marked as PageNosave and PageNosaveFree
  67 *      so that swsusp_free() can release it.
  68 */
  69
  70#define PG_ANY          0
  71#define PG_SAFE         1
  72#define PG_UNSAFE_CLEAR 1
  73#define PG_UNSAFE_KEEP  0
  74
  75static unsigned int allocated_unsafe_pages;
  76
  77static void *get_image_page(gfp_t gfp_mask, int safe_needed)
  78{
  79        void *res;
  80
  81        res = (void *)get_zeroed_page(gfp_mask);
  82        if (safe_needed)
  83                while (res && swsusp_page_is_free(virt_to_page(res))) {
  84                        /* The page is unsafe, mark it for swsusp_free() */
  85                        swsusp_set_page_forbidden(virt_to_page(res));
  86                        allocated_unsafe_pages++;
  87                        res = (void *)get_zeroed_page(gfp_mask);
  88                }
  89        if (res) {
  90                swsusp_set_page_forbidden(virt_to_page(res));
  91                swsusp_set_page_free(virt_to_page(res));
  92        }
  93        return res;
  94}
  95
  96unsigned long get_safe_page(gfp_t gfp_mask)
  97{
  98        return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
  99}
 100
 101static struct page *alloc_image_page(gfp_t gfp_mask)
 102{
 103        struct page *page;
 104
 105        page = alloc_page(gfp_mask);
 106        if (page) {
 107                swsusp_set_page_forbidden(page);
 108                swsusp_set_page_free(page);
 109        }
 110        return page;
 111}
 112
 113/**
 114 *      free_image_page - free page represented by @addr, allocated with
 115 *      get_image_page (page flags set by it must be cleared)
 116 */
 117
 118static inline void free_image_page(void *addr, int clear_nosave_free)
 119{
 120        struct page *page;
 121
 122        BUG_ON(!virt_addr_valid(addr));
 123
 124        page = virt_to_page(addr);
 125
 126        swsusp_unset_page_forbidden(page);
 127        if (clear_nosave_free)
 128                swsusp_unset_page_free(page);
 129
 130        __free_page(page);
 131}
 132
 133/* struct linked_page is used to build chains of pages */
 134
 135#define LINKED_PAGE_DATA_SIZE   (PAGE_SIZE - sizeof(void *))
 136
 137struct linked_page {
 138        struct linked_page *next;
 139        char data[LINKED_PAGE_DATA_SIZE];
 140} __attribute__((packed));
 141
 142static inline void
 143free_list_of_pages(struct linked_page *list, int clear_page_nosave)
 144{
 145        while (list) {
 146                struct linked_page *lp = list->next;
 147
 148                free_image_page(list, clear_page_nosave);
 149                list = lp;
 150        }
 151}
 152
 153/**
 154  *     struct chain_allocator is used for allocating small objects out of
 155  *     a linked list of pages called 'the chain'.
 156  *
 157  *     The chain grows each time when there is no room for a new object in
 158  *     the current page.  The allocated objects cannot be freed individually.
 159  *     It is only possible to free them all at once, by freeing the entire
 160  *     chain.
 161  *
 162  *     NOTE: The chain allocator may be inefficient if the allocated objects
 163  *     are not much smaller than PAGE_SIZE.
 164  */
 165
 166struct chain_allocator {
 167        struct linked_page *chain;      /* the chain */
 168        unsigned int used_space;        /* total size of objects allocated out
 169                                         * of the current page
 170                                         */
 171        gfp_t gfp_mask;         /* mask for allocating pages */
 172        int safe_needed;        /* if set, only "safe" pages are allocated */
 173};
 174
 175static void
 176chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
 177{
 178        ca->chain = NULL;
 179        ca->used_space = LINKED_PAGE_DATA_SIZE;
 180        ca->gfp_mask = gfp_mask;
 181        ca->safe_needed = safe_needed;
 182}
 183
 184static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 185{
 186        void *ret;
 187
 188        if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 189                struct linked_page *lp;
 190
 191                lp = get_image_page(ca->gfp_mask, ca->safe_needed);
 192                if (!lp)
 193                        return NULL;
 194
 195                lp->next = ca->chain;
 196                ca->chain = lp;
 197                ca->used_space = 0;
 198        }
 199        ret = ca->chain->data + ca->used_space;
 200        ca->used_space += size;
 201        return ret;
 202}
 203
 204/**
 205 *      Data types related to memory bitmaps.
 206 *
 207 *      Memory bitmap is a structure consiting of many linked lists of
 208 *      objects.  The main list's elements are of type struct zone_bitmap
 209 *      and each of them corresonds to one zone.  For each zone bitmap
 210 *      object there is a list of objects of type struct bm_block that
 211 *      represent each blocks of bitmap in which information is stored.
 212 *
 213 *      struct memory_bitmap contains a pointer to the main list of zone
 214 *      bitmap objects, a struct bm_position used for browsing the bitmap,
 215 *      and a pointer to the list of pages used for allocating all of the
 216 *      zone bitmap objects and bitmap block objects.
 217 *
 218 *      NOTE: It has to be possible to lay out the bitmap in memory
 219 *      using only allocations of order 0.  Additionally, the bitmap is
 220 *      designed to work with arbitrary number of zones (this is over the
 221 *      top for now, but let's avoid making unnecessary assumptions ;-).
 222 *
 223 *      struct zone_bitmap contains a pointer to a list of bitmap block
 224 *      objects and a pointer to the bitmap block object that has been
 225 *      most recently used for setting bits.  Additionally, it contains the
 226 *      pfns that correspond to the start and end of the represented zone.
 227 *
 228 *      struct bm_block contains a pointer to the memory page in which
 229 *      information is stored (in the form of a block of bitmap)
 230 *      It also contains the pfns that correspond to the start and end of
 231 *      the represented memory area.
 232 */
 233
 234#define BM_END_OF_MAP   (~0UL)
 235
 236#define BM_BITS_PER_BLOCK       (PAGE_SIZE * BITS_PER_BYTE)
 237
 238struct bm_block {
 239        struct list_head hook;  /* hook into a list of bitmap blocks */
 240        unsigned long start_pfn;        /* pfn represented by the first bit */
 241        unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
 242        unsigned long *data;    /* bitmap representing pages */
 243};
 244
 245static inline unsigned long bm_block_bits(struct bm_block *bb)
 246{
 247        return bb->end_pfn - bb->start_pfn;
 248}
 249
 250/* strcut bm_position is used for browsing memory bitmaps */
 251
 252struct bm_position {
 253        struct bm_block *block;
 254        int bit;
 255};
 256
 257struct memory_bitmap {
 258        struct list_head blocks;        /* list of bitmap blocks */
 259        struct linked_page *p_list;     /* list of pages used to store zone
 260                                         * bitmap objects and bitmap block
 261                                         * objects
 262                                         */
 263        struct bm_position cur; /* most recently used bit position */
 264};
 265
 266/* Functions that operate on memory bitmaps */
 267
 268static void memory_bm_position_reset(struct memory_bitmap *bm)
 269{
 270        bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
 271        bm->cur.bit = 0;
 272}
 273
 274static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 275
 276/**
 277 *      create_bm_block_list - create a list of block bitmap objects
 278 *      @pages - number of pages to track
 279 *      @list - list to put the allocated blocks into
 280 *      @ca - chain allocator to be used for allocating memory
 281 */
 282static int create_bm_block_list(unsigned long pages,
 283                                struct list_head *list,
 284                                struct chain_allocator *ca)
 285{
 286        unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
 287
 288        while (nr_blocks-- > 0) {
 289                struct bm_block *bb;
 290
 291                bb = chain_alloc(ca, sizeof(struct bm_block));
 292                if (!bb)
 293                        return -ENOMEM;
 294                list_add(&bb->hook, list);
 295        }
 296
 297        return 0;
 298}
 299
 300struct mem_extent {
 301        struct list_head hook;
 302        unsigned long start;
 303        unsigned long end;
 304};
 305
 306/**
 307 *      free_mem_extents - free a list of memory extents
 308 *      @list - list of extents to empty
 309 */
 310static void free_mem_extents(struct list_head *list)
 311{
 312        struct mem_extent *ext, *aux;
 313
 314        list_for_each_entry_safe(ext, aux, list, hook) {
 315                list_del(&ext->hook);
 316                kfree(ext);
 317        }
 318}
 319
 320/**
 321 *      create_mem_extents - create a list of memory extents representing
 322 *                           contiguous ranges of PFNs
 323 *      @list - list to put the extents into
 324 *      @gfp_mask - mask to use for memory allocations
 325 */
 326static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 327{
 328        struct zone *zone;
 329
 330        INIT_LIST_HEAD(list);
 331
 332        for_each_populated_zone(zone) {
 333                unsigned long zone_start, zone_end;
 334                struct mem_extent *ext, *cur, *aux;
 335
 336                zone_start = zone->zone_start_pfn;
 337                zone_end = zone->zone_start_pfn + zone->spanned_pages;
 338
 339                list_for_each_entry(ext, list, hook)
 340                        if (zone_start <= ext->end)
 341                                break;
 342
 343                if (&ext->hook == list || zone_end < ext->start) {
 344                        /* New extent is necessary */
 345                        struct mem_extent *new_ext;
 346
 347                        new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
 348                        if (!new_ext) {
 349                                free_mem_extents(list);
 350                                return -ENOMEM;
 351                        }
 352                        new_ext->start = zone_start;
 353                        new_ext->end = zone_end;
 354                        list_add_tail(&new_ext->hook, &ext->hook);
 355                        continue;
 356                }
 357
 358                /* Merge this zone's range of PFNs with the existing one */
 359                if (zone_start < ext->start)
 360                        ext->start = zone_start;
 361                if (zone_end > ext->end)
 362                        ext->end = zone_end;
 363
 364                /* More merging may be possible */
 365                cur = ext;
 366                list_for_each_entry_safe_continue(cur, aux, list, hook) {
 367                        if (zone_end < cur->start)
 368                                break;
 369                        if (zone_end < cur->end)
 370                                ext->end = cur->end;
 371                        list_del(&cur->hook);
 372                        kfree(cur);
 373                }
 374        }
 375
 376        return 0;
 377}
 378
 379/**
 380  *     memory_bm_create - allocate memory for a memory bitmap
 381  */
 382static int
 383memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 384{
 385        struct chain_allocator ca;
 386        struct list_head mem_extents;
 387        struct mem_extent *ext;
 388        int error;
 389
 390        chain_init(&ca, gfp_mask, safe_needed);
 391        INIT_LIST_HEAD(&bm->blocks);
 392
 393        error = create_mem_extents(&mem_extents, gfp_mask);
 394        if (error)
 395                return error;
 396
 397        list_for_each_entry(ext, &mem_extents, hook) {
 398                struct bm_block *bb;
 399                unsigned long pfn = ext->start;
 400                unsigned long pages = ext->end - ext->start;
 401
 402                bb = list_entry(bm->blocks.prev, struct bm_block, hook);
 403
 404                error = create_bm_block_list(pages, bm->blocks.prev, &ca);
 405                if (error)
 406                        goto Error;
 407
 408                list_for_each_entry_continue(bb, &bm->blocks, hook) {
 409                        bb->data = get_image_page(gfp_mask, safe_needed);
 410                        if (!bb->data) {
 411                                error = -ENOMEM;
 412                                goto Error;
 413                        }
 414
 415                        bb->start_pfn = pfn;
 416                        if (pages >= BM_BITS_PER_BLOCK) {
 417                                pfn += BM_BITS_PER_BLOCK;
 418                                pages -= BM_BITS_PER_BLOCK;
 419                        } else {
 420                                /* This is executed only once in the loop */
 421                                pfn += pages;
 422                        }
 423                        bb->end_pfn = pfn;
 424                }
 425        }
 426
 427        bm->p_list = ca.chain;
 428        memory_bm_position_reset(bm);
 429 Exit:
 430        free_mem_extents(&mem_extents);
 431        return error;
 432
 433 Error:
 434        bm->p_list = ca.chain;
 435        memory_bm_free(bm, PG_UNSAFE_CLEAR);
 436        goto Exit;
 437}
 438
 439/**
 440  *     memory_bm_free - free memory occupied by the memory bitmap @bm
 441  */
 442static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 443{
 444        struct bm_block *bb;
 445
 446        list_for_each_entry(bb, &bm->blocks, hook)
 447                if (bb->data)
 448                        free_image_page(bb->data, clear_nosave_free);
 449
 450        free_list_of_pages(bm->p_list, clear_nosave_free);
 451
 452        INIT_LIST_HEAD(&bm->blocks);
 453}
 454
 455/**
 456 *      memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
 457 *      to given pfn.  The cur_zone_bm member of @bm and the cur_block member
 458 *      of @bm->cur_zone_bm are updated.
 459 */
 460static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 461                                void **addr, unsigned int *bit_nr)
 462{
 463        struct bm_block *bb;
 464
 465        /*
 466         * Check if the pfn corresponds to the current bitmap block and find
 467         * the block where it fits if this is not the case.
 468         */
 469        bb = bm->cur.block;
 470        if (pfn < bb->start_pfn)
 471                list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
 472                        if (pfn >= bb->start_pfn)
 473                                break;
 474
 475        if (pfn >= bb->end_pfn)
 476                list_for_each_entry_continue(bb, &bm->blocks, hook)
 477                        if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
 478                                break;
 479
 480        if (&bb->hook == &bm->blocks)
 481                return -EFAULT;
 482
 483        /* The block has been found */
 484        bm->cur.block = bb;
 485        pfn -= bb->start_pfn;
 486        bm->cur.bit = pfn + 1;
 487        *bit_nr = pfn;
 488        *addr = bb->data;
 489        return 0;
 490}
 491
 492static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 493{
 494        void *addr;
 495        unsigned int bit;
 496        int error;
 497
 498        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 499        BUG_ON(error);
 500        set_bit(bit, addr);
 501}
 502
 503static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
 504{
 505        void *addr;
 506        unsigned int bit;
 507        int error;
 508
 509        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 510        if (!error)
 511                set_bit(bit, addr);
 512        return error;
 513}
 514
 515static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
 516{
 517        void *addr;
 518        unsigned int bit;
 519        int error;
 520
 521        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 522        BUG_ON(error);
 523        clear_bit(bit, addr);
 524}
 525
 526static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 527{
 528        void *addr;
 529        unsigned int bit;
 530        int error;
 531
 532        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 533        BUG_ON(error);
 534        return test_bit(bit, addr);
 535}
 536
 537static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
 538{
 539        void *addr;
 540        unsigned int bit;
 541
 542        return !memory_bm_find_bit(bm, pfn, &addr, &bit);
 543}
 544
 545/**
 546 *      memory_bm_next_pfn - find the pfn that corresponds to the next set bit
 547 *      in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
 548 *      returned.
 549 *
 550 *      It is required to run memory_bm_position_reset() before the first call to
 551 *      this function.
 552 */
 553
 554static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 555{
 556        struct bm_block *bb;
 557        int bit;
 558
 559        bb = bm->cur.block;
 560        do {
 561                bit = bm->cur.bit;
 562                bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
 563                if (bit < bm_block_bits(bb))
 564                        goto Return_pfn;
 565
 566                bb = list_entry(bb->hook.next, struct bm_block, hook);
 567                bm->cur.block = bb;
 568                bm->cur.bit = 0;
 569        } while (&bb->hook != &bm->blocks);
 570
 571        memory_bm_position_reset(bm);
 572        return BM_END_OF_MAP;
 573
 574 Return_pfn:
 575        bm->cur.bit = bit + 1;
 576        return bb->start_pfn + bit;
 577}
 578
 579/**
 580 *      This structure represents a range of page frames the contents of which
 581 *      should not be saved during the suspend.
 582 */
 583
 584struct nosave_region {
 585        struct list_head list;
 586        unsigned long start_pfn;
 587        unsigned long end_pfn;
 588};
 589
 590static LIST_HEAD(nosave_regions);
 591
 592/**
 593 *      register_nosave_region - register a range of page frames the contents
 594 *      of which should not be saved during the suspend (to be used in the early
 595 *      initialization code)
 596 */
 597
 598void __init
 599__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
 600                         int use_kmalloc)
 601{
 602        struct nosave_region *region;
 603
 604        if (start_pfn >= end_pfn)
 605                return;
 606
 607        if (!list_empty(&nosave_regions)) {
 608                /* Try to extend the previous region (they should be sorted) */
 609                region = list_entry(nosave_regions.prev,
 610                                        struct nosave_region, list);
 611                if (region->end_pfn == start_pfn) {
 612                        region->end_pfn = end_pfn;
 613                        goto Report;
 614                }
 615        }
 616        if (use_kmalloc) {
 617                /* during init, this shouldn't fail */
 618                region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
 619                BUG_ON(!region);
 620        } else
 621                /* This allocation cannot fail */
 622                region = alloc_bootmem(sizeof(struct nosave_region));
 623        region->start_pfn = start_pfn;
 624        region->end_pfn = end_pfn;
 625        list_add_tail(&region->list, &nosave_regions);
 626 Report:
 627        printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
 628                start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 629}
 630
 631/*
 632 * Set bits in this map correspond to the page frames the contents of which
 633 * should not be saved during the suspend.
 634 */
 635static struct memory_bitmap *forbidden_pages_map;
 636
 637/* Set bits in this map correspond to free page frames. */
 638static struct memory_bitmap *free_pages_map;
 639
 640/*
 641 * Each page frame allocated for creating the image is marked by setting the
 642 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
 643 */
 644
 645void swsusp_set_page_free(struct page *page)
 646{
 647        if (free_pages_map)
 648                memory_bm_set_bit(free_pages_map, page_to_pfn(page));
 649}
 650
 651static int swsusp_page_is_free(struct page *page)
 652{
 653        return free_pages_map ?
 654                memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
 655}
 656
 657void swsusp_unset_page_free(struct page *page)
 658{
 659        if (free_pages_map)
 660                memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
 661}
 662
 663static void swsusp_set_page_forbidden(struct page *page)
 664{
 665        if (forbidden_pages_map)
 666                memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
 667}
 668
 669int swsusp_page_is_forbidden(struct page *page)
 670{
 671        return forbidden_pages_map ?
 672                memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
 673}
 674
 675static void swsusp_unset_page_forbidden(struct page *page)
 676{
 677        if (forbidden_pages_map)
 678                memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
 679}
 680
 681/**
 682 *      mark_nosave_pages - set bits corresponding to the page frames the
 683 *      contents of which should not be saved in a given bitmap.
 684 */
 685
 686static void mark_nosave_pages(struct memory_bitmap *bm)
 687{
 688        struct nosave_region *region;
 689
 690        if (list_empty(&nosave_regions))
 691                return;
 692
 693        list_for_each_entry(region, &nosave_regions, list) {
 694                unsigned long pfn;
 695
 696                pr_debug("PM: Marking nosave pages: %016lx - %016lx\n",
 697                                region->start_pfn << PAGE_SHIFT,
 698                                region->end_pfn << PAGE_SHIFT);
 699
 700                for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
 701                        if (pfn_valid(pfn)) {
 702                                /*
 703                                 * It is safe to ignore the result of
 704                                 * mem_bm_set_bit_check() here, since we won't
 705                                 * touch the PFNs for which the error is
 706                                 * returned anyway.
 707                                 */
 708                                mem_bm_set_bit_check(bm, pfn);
 709                        }
 710        }
 711}
 712
 713/**
 714 *      create_basic_memory_bitmaps - create bitmaps needed for marking page
 715 *      frames that should not be saved and free page frames.  The pointers
 716 *      forbidden_pages_map and free_pages_map are only modified if everything
 717 *      goes well, because we don't want the bits to be used before both bitmaps
 718 *      are set up.
 719 */
 720
 721int create_basic_memory_bitmaps(void)
 722{
 723        struct memory_bitmap *bm1, *bm2;
 724        int error = 0;
 725
 726        BUG_ON(forbidden_pages_map || free_pages_map);
 727
 728        bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 729        if (!bm1)
 730                return -ENOMEM;
 731
 732        error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
 733        if (error)
 734                goto Free_first_object;
 735
 736        bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 737        if (!bm2)
 738                goto Free_first_bitmap;
 739
 740        error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
 741        if (error)
 742                goto Free_second_object;
 743
 744        forbidden_pages_map = bm1;
 745        free_pages_map = bm2;
 746        mark_nosave_pages(forbidden_pages_map);
 747
 748        pr_debug("PM: Basic memory bitmaps created\n");
 749
 750        return 0;
 751
 752 Free_second_object:
 753        kfree(bm2);
 754 Free_first_bitmap:
 755        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 756 Free_first_object:
 757        kfree(bm1);
 758        return -ENOMEM;
 759}
 760
 761/**
 762 *      free_basic_memory_bitmaps - free memory bitmaps allocated by
 763 *      create_basic_memory_bitmaps().  The auxiliary pointers are necessary
 764 *      so that the bitmaps themselves are not referred to while they are being
 765 *      freed.
 766 */
 767
 768void free_basic_memory_bitmaps(void)
 769{
 770        struct memory_bitmap *bm1, *bm2;
 771
 772        BUG_ON(!(forbidden_pages_map && free_pages_map));
 773
 774        bm1 = forbidden_pages_map;
 775        bm2 = free_pages_map;
 776        forbidden_pages_map = NULL;
 777        free_pages_map = NULL;
 778        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 779        kfree(bm1);
 780        memory_bm_free(bm2, PG_UNSAFE_CLEAR);
 781        kfree(bm2);
 782
 783        pr_debug("PM: Basic memory bitmaps freed\n");
 784}
 785
 786/**
 787 *      snapshot_additional_pages - estimate the number of additional pages
 788 *      be needed for setting up the suspend image data structures for given
 789 *      zone (usually the returned value is greater than the exact number)
 790 */
 791
 792unsigned int snapshot_additional_pages(struct zone *zone)
 793{
 794        unsigned int res;
 795
 796        res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
 797        res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
 798        return 2 * res;
 799}
 800
 801#ifdef CONFIG_HIGHMEM
 802/**
 803 *      count_free_highmem_pages - compute the total number of free highmem
 804 *      pages, system-wide.
 805 */
 806
 807static unsigned int count_free_highmem_pages(void)
 808{
 809        struct zone *zone;
 810        unsigned int cnt = 0;
 811
 812        for_each_populated_zone(zone)
 813                if (is_highmem(zone))
 814                        cnt += zone_page_state(zone, NR_FREE_PAGES);
 815
 816        return cnt;
 817}
 818
 819/**
 820 *      saveable_highmem_page - Determine whether a highmem page should be
 821 *      included in the suspend image.
 822 *
 823 *      We should save the page if it isn't Nosave or NosaveFree, or Reserved,
 824 *      and it isn't a part of a free chunk of pages.
 825 */
 826static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 827{
 828        struct page *page;
 829
 830        if (!pfn_valid(pfn))
 831                return NULL;
 832
 833        page = pfn_to_page(pfn);
 834        if (page_zone(page) != zone)
 835                return NULL;
 836
 837        BUG_ON(!PageHighMem(page));
 838
 839        if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
 840            PageReserved(page))
 841                return NULL;
 842
 843        return page;
 844}
 845
 846/**
 847 *      count_highmem_pages - compute the total number of saveable highmem
 848 *      pages.
 849 */
 850
 851static unsigned int count_highmem_pages(void)
 852{
 853        struct zone *zone;
 854        unsigned int n = 0;
 855
 856        for_each_populated_zone(zone) {
 857                unsigned long pfn, max_zone_pfn;
 858
 859                if (!is_highmem(zone))
 860                        continue;
 861
 862                mark_free_pages(zone);
 863                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 864                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 865                        if (saveable_highmem_page(zone, pfn))
 866                                n++;
 867        }
 868        return n;
 869}
 870#else
 871static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
 872{
 873        return NULL;
 874}
 875#endif /* CONFIG_HIGHMEM */
 876
 877/**
 878 *      saveable_page - Determine whether a non-highmem page should be included
 879 *      in the suspend image.
 880 *
 881 *      We should save the page if it isn't Nosave, and is not in the range
 882 *      of pages statically defined as 'unsaveable', and it isn't a part of
 883 *      a free chunk of pages.
 884 */
 885static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 886{
 887        struct page *page;
 888
 889        if (!pfn_valid(pfn))
 890                return NULL;
 891
 892        page = pfn_to_page(pfn);
 893        if (page_zone(page) != zone)
 894                return NULL;
 895
 896        BUG_ON(PageHighMem(page));
 897
 898        if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
 899                return NULL;
 900
 901        if (PageReserved(page)
 902            && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
 903                return NULL;
 904
 905        return page;
 906}
 907
 908/**
 909 *      count_data_pages - compute the total number of saveable non-highmem
 910 *      pages.
 911 */
 912
 913static unsigned int count_data_pages(void)
 914{
 915        struct zone *zone;
 916        unsigned long pfn, max_zone_pfn;
 917        unsigned int n = 0;
 918
 919        for_each_populated_zone(zone) {
 920                if (is_highmem(zone))
 921                        continue;
 922
 923                mark_free_pages(zone);
 924                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 925                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 926                        if (saveable_page(zone, pfn))
 927                                n++;
 928        }
 929        return n;
 930}
 931
 932/* This is needed, because copy_page and memcpy are not usable for copying
 933 * task structs.
 934 */
 935static inline void do_copy_page(long *dst, long *src)
 936{
 937        int n;
 938
 939        for (n = PAGE_SIZE / sizeof(long); n; n--)
 940                *dst++ = *src++;
 941}
 942
 943
 944/**
 945 *      safe_copy_page - check if the page we are going to copy is marked as
 946 *              present in the kernel page tables (this always is the case if
 947 *              CONFIG_DEBUG_PAGEALLOC is not set and in that case
 948 *              kernel_page_present() always returns 'true').
 949 */
 950static void safe_copy_page(void *dst, struct page *s_page)
 951{
 952        if (kernel_page_present(s_page)) {
 953                do_copy_page(dst, page_address(s_page));
 954        } else {
 955                kernel_map_pages(s_page, 1, 1);
 956                do_copy_page(dst, page_address(s_page));
 957                kernel_map_pages(s_page, 1, 0);
 958        }
 959}
 960
 961
 962#ifdef CONFIG_HIGHMEM
 963static inline struct page *
 964page_is_saveable(struct zone *zone, unsigned long pfn)
 965{
 966        return is_highmem(zone) ?
 967                saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 968}
 969
 970static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 971{
 972        struct page *s_page, *d_page;
 973        void *src, *dst;
 974
 975        s_page = pfn_to_page(src_pfn);
 976        d_page = pfn_to_page(dst_pfn);
 977        if (PageHighMem(s_page)) {
 978                src = kmap_atomic(s_page, KM_USER0);
 979                dst = kmap_atomic(d_page, KM_USER1);
 980                do_copy_page(dst, src);
 981                kunmap_atomic(src, KM_USER0);
 982                kunmap_atomic(dst, KM_USER1);
 983        } else {
 984                if (PageHighMem(d_page)) {
 985                        /* Page pointed to by src may contain some kernel
 986                         * data modified by kmap_atomic()
 987                         */
 988                        safe_copy_page(buffer, s_page);
 989                        dst = kmap_atomic(d_page, KM_USER0);
 990                        memcpy(dst, buffer, PAGE_SIZE);
 991                        kunmap_atomic(dst, KM_USER0);
 992                } else {
 993                        safe_copy_page(page_address(d_page), s_page);
 994                }
 995        }
 996}
 997#else
 998#define page_is_saveable(zone, pfn)     saveable_page(zone, pfn)
 999
1000static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1001{
1002        safe_copy_page(page_address(pfn_to_page(dst_pfn)),
1003                                pfn_to_page(src_pfn));
1004}
1005#endif /* CONFIG_HIGHMEM */
1006
1007static void
1008copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1009{
1010        struct zone *zone;
1011        unsigned long pfn;
1012
1013        for_each_populated_zone(zone) {
1014                unsigned long max_zone_pfn;
1015
1016                mark_free_pages(zone);
1017                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1018                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1019                        if (page_is_saveable(zone, pfn))
1020                                memory_bm_set_bit(orig_bm, pfn);
1021        }
1022        memory_bm_position_reset(orig_bm);
1023        memory_bm_position_reset(copy_bm);
1024        for(;;) {
1025                pfn = memory_bm_next_pfn(orig_bm);
1026                if (unlikely(pfn == BM_END_OF_MAP))
1027                        break;
1028                copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1029        }
1030}
1031
1032/* Total number of image pages */
1033static unsigned int nr_copy_pages;
1034/* Number of pages needed for saving the original pfns of the image pages */
1035static unsigned int nr_meta_pages;
1036/*
1037 * Numbers of normal and highmem page frames allocated for hibernation image
1038 * before suspending devices.
1039 */
1040unsigned int alloc_normal, alloc_highmem;
1041/*
1042 * Memory bitmap used for marking saveable pages (during hibernation) or
1043 * hibernation image pages (during restore)
1044 */
1045static struct memory_bitmap orig_bm;
1046/*
1047 * Memory bitmap used during hibernation for marking allocated page frames that
1048 * will contain copies of saveable pages.  During restore it is initially used
1049 * for marking hibernation image pages, but then the set bits from it are
1050 * duplicated in @orig_bm and it is released.  On highmem systems it is next
1051 * used for marking "safe" highmem pages, but it has to be reinitialized for
1052 * this purpose.
1053 */
1054static struct memory_bitmap copy_bm;
1055
1056/**
1057 *      swsusp_free - free pages allocated for the suspend.
1058 *
1059 *      Suspend pages are alocated before the atomic copy is made, so we
1060 *      need to release them after the resume.
1061 */
1062
1063void swsusp_free(void)
1064{
1065        struct zone *zone;
1066        unsigned long pfn, max_zone_pfn;
1067
1068        for_each_populated_zone(zone) {
1069                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1070                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1071                        if (pfn_valid(pfn)) {
1072                                struct page *page = pfn_to_page(pfn);
1073
1074                                if (swsusp_page_is_forbidden(page) &&
1075                                    swsusp_page_is_free(page)) {
1076                                        swsusp_unset_page_forbidden(page);
1077                                        swsusp_unset_page_free(page);
1078                                        __free_page(page);
1079                                }
1080                        }
1081        }
1082        nr_copy_pages = 0;
1083        nr_meta_pages = 0;
1084        restore_pblist = NULL;
1085        buffer = NULL;
1086        alloc_normal = 0;
1087        alloc_highmem = 0;
1088}
1089
1090/* Helper functions used for the shrinking of memory. */
1091
1092#define GFP_IMAGE       (GFP_KERNEL | __GFP_NOWARN)
1093
1094/**
1095 * preallocate_image_pages - Allocate a number of pages for hibernation image
1096 * @nr_pages: Number of page frames to allocate.
1097 * @mask: GFP flags to use for the allocation.
1098 *
1099 * Return value: Number of page frames actually allocated
1100 */
1101static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1102{
1103        unsigned long nr_alloc = 0;
1104
1105        while (nr_pages > 0) {
1106                struct page *page;
1107
1108                page = alloc_image_page(mask);
1109                if (!page)
1110                        break;
1111                memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1112                if (PageHighMem(page))
1113                        alloc_highmem++;
1114                else
1115                        alloc_normal++;
1116                nr_pages--;
1117                nr_alloc++;
1118        }
1119
1120        return nr_alloc;
1121}
1122
1123static unsigned long preallocate_image_memory(unsigned long nr_pages)
1124{
1125        return preallocate_image_pages(nr_pages, GFP_IMAGE);
1126}
1127
1128#ifdef CONFIG_HIGHMEM
1129static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1130{
1131        return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1132}
1133
1134/**
1135 *  __fraction - Compute (an approximation of) x * (multiplier / base)
1136 */
1137static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
1138{
1139        x *= multiplier;
1140        do_div(x, base);
1141        return (unsigned long)x;
1142}
1143
1144static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1145                                                unsigned long highmem,
1146                                                unsigned long total)
1147{
1148        unsigned long alloc = __fraction(nr_pages, highmem, total);
1149
1150        return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
1151}
1152#else /* CONFIG_HIGHMEM */
1153static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1154{
1155        return 0;
1156}
1157
1158static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1159                                                unsigned long highmem,
1160                                                unsigned long total)
1161{
1162        return 0;
1163}
1164#endif /* CONFIG_HIGHMEM */
1165
1166/**
1167 * free_unnecessary_pages - Release preallocated pages not needed for the image
1168 */
1169static void free_unnecessary_pages(void)
1170{
1171        unsigned long save_highmem, to_free_normal, to_free_highmem;
1172
1173        to_free_normal = alloc_normal - count_data_pages();
1174        save_highmem = count_highmem_pages();
1175        if (alloc_highmem > save_highmem) {
1176                to_free_highmem = alloc_highmem - save_highmem;
1177        } else {
1178                to_free_highmem = 0;
1179                to_free_normal -= save_highmem - alloc_highmem;
1180        }
1181
1182        memory_bm_position_reset(&copy_bm);
1183
1184        while (to_free_normal > 0 && to_free_highmem > 0) {
1185                unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1186                struct page *page = pfn_to_page(pfn);
1187
1188                if (PageHighMem(page)) {
1189                        if (!to_free_highmem)
1190                                continue;
1191                        to_free_highmem--;
1192                        alloc_highmem--;
1193                } else {
1194                        if (!to_free_normal)
1195                                continue;
1196                        to_free_normal--;
1197                        alloc_normal--;
1198                }
1199                memory_bm_clear_bit(&copy_bm, pfn);
1200                swsusp_unset_page_forbidden(page);
1201                swsusp_unset_page_free(page);
1202                __free_page(page);
1203        }
1204}
1205
1206/**
1207 * minimum_image_size - Estimate the minimum acceptable size of an image
1208 * @saveable: Number of saveable pages in the system.
1209 *
1210 * We want to avoid attempting to free too much memory too hard, so estimate the
1211 * minimum acceptable size of a hibernation image to use as the lower limit for
1212 * preallocating memory.
1213 *
1214 * We assume that the minimum image size should be proportional to
1215 *
1216 * [number of saveable pages] - [number of pages that can be freed in theory]
1217 *
1218 * where the second term is the sum of (1) reclaimable slab pages, (2) active
1219 * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages,
1220 * minus mapped file pages.
1221 */
1222static unsigned long minimum_image_size(unsigned long saveable)
1223{
1224        unsigned long size;
1225
1226        size = global_page_state(NR_SLAB_RECLAIMABLE)
1227                + global_page_state(NR_ACTIVE_ANON)
1228                + global_page_state(NR_INACTIVE_ANON)
1229                + global_page_state(NR_ACTIVE_FILE)
1230                + global_page_state(NR_INACTIVE_FILE)
1231                - global_page_state(NR_FILE_MAPPED);
1232
1233        return saveable <= size ? 0 : saveable - size;
1234}
1235
1236/**
1237 * hibernate_preallocate_memory - Preallocate memory for hibernation image
1238 *
1239 * To create a hibernation image it is necessary to make a copy of every page
1240 * frame in use.  We also need a number of page frames to be free during
1241 * hibernation for allocations made while saving the image and for device
1242 * drivers, in case they need to allocate memory from their hibernation
1243 * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES,
1244 * respectively, both of which are rough estimates).  To make this happen, we
1245 * compute the total number of available page frames and allocate at least
1246 *
1247 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES
1248 *
1249 * of them, which corresponds to the maximum size of a hibernation image.
1250 *
1251 * If image_size is set below the number following from the above formula,
1252 * the preallocation of memory is continued until the total number of saveable
1253 * pages in the system is below the requested image size or the minimum
1254 * acceptable image size returned by minimum_image_size(), whichever is greater.
1255 */
1256int hibernate_preallocate_memory(void)
1257{
1258        struct zone *zone;
1259        unsigned long saveable, size, max_size, count, highmem, pages = 0;
1260        unsigned long alloc, save_highmem, pages_highmem;
1261        struct timeval start, stop;
1262        int error;
1263
1264        printk(KERN_INFO "PM: Preallocating image memory... ");
1265        do_gettimeofday(&start);
1266
1267        error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
1268        if (error)
1269                goto err_out;
1270
1271        error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
1272        if (error)
1273                goto err_out;
1274
1275        alloc_normal = 0;
1276        alloc_highmem = 0;
1277
1278        /* Count the number of saveable data pages. */
1279        save_highmem = count_highmem_pages();
1280        saveable = count_data_pages();
1281
1282        /*
1283         * Compute the total number of page frames we can use (count) and the
1284         * number of pages needed for image metadata (size).
1285         */
1286        count = saveable;
1287        saveable += save_highmem;
1288        highmem = save_highmem;
1289        size = 0;
1290        for_each_populated_zone(zone) {
1291                size += snapshot_additional_pages(zone);
1292                if (is_highmem(zone))
1293                        highmem += zone_page_state(zone, NR_FREE_PAGES);
1294                else
1295                        count += zone_page_state(zone, NR_FREE_PAGES);
1296        }
1297        count += highmem;
1298        count -= totalreserve_pages;
1299
1300        /* Compute the maximum number of saveable pages to leave in memory. */
1301        max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES;
1302        size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1303        if (size > max_size)
1304                size = max_size;
1305        /*
1306         * If the maximum is not less than the current number of saveable pages
1307         * in memory, allocate page frames for the image and we're done.
1308         */
1309        if (size >= saveable) {
1310                pages = preallocate_image_highmem(save_highmem);
1311                pages += preallocate_image_memory(saveable - pages);
1312                goto out;
1313        }
1314
1315        /* Estimate the minimum size of the image. */
1316        pages = minimum_image_size(saveable);
1317        if (size < pages)
1318                size = min_t(unsigned long, pages, max_size);
1319
1320        /*
1321         * Let the memory management subsystem know that we're going to need a
1322         * large number of page frames to allocate and make it free some memory.
1323         * NOTE: If this is not done, performance will be hurt badly in some
1324         * test cases.
1325         */
1326        shrink_all_memory(saveable - size);
1327
1328        /*
1329         * The number of saveable pages in memory was too high, so apply some
1330         * pressure to decrease it.  First, make room for the largest possible
1331         * image and fail if that doesn't work.  Next, try to decrease the size
1332         * of the image as much as indicated by 'size' using allocations from
1333         * highmem and non-highmem zones separately.
1334         */
1335        pages_highmem = preallocate_image_highmem(highmem / 2);
1336        alloc = (count - max_size) - pages_highmem;
1337        pages = preallocate_image_memory(alloc);
1338        if (pages < alloc)
1339                goto err_out;
1340        size = max_size - size;
1341        alloc = size;
1342        size = preallocate_highmem_fraction(size, highmem, count);
1343        pages_highmem += size;
1344        alloc -= size;
1345        pages += preallocate_image_memory(alloc);
1346        pages += pages_highmem;
1347
1348        /*
1349         * We only need as many page frames for the image as there are saveable
1350         * pages in memory, but we have allocated more.  Release the excessive
1351         * ones now.
1352         */
1353        free_unnecessary_pages();
1354
1355 out:
1356        do_gettimeofday(&stop);
1357        printk(KERN_CONT "done (allocated %lu pages)\n", pages);
1358        swsusp_show_speed(&start, &stop, pages, "Allocated");
1359
1360        return 0;
1361
1362 err_out:
1363        printk(KERN_CONT "\n");
1364        swsusp_free();
1365        return -ENOMEM;
1366}
1367
1368#ifdef CONFIG_HIGHMEM
1369/**
1370  *     count_pages_for_highmem - compute the number of non-highmem pages
1371  *     that will be necessary for creating copies of highmem pages.
1372  */
1373
1374static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1375{
1376        unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
1377
1378        if (free_highmem >= nr_highmem)
1379                nr_highmem = 0;
1380        else
1381                nr_highmem -= free_highmem;
1382
1383        return nr_highmem;
1384}
1385#else
1386static unsigned int
1387count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1388#endif /* CONFIG_HIGHMEM */
1389
1390/**
1391 *      enough_free_mem - Make sure we have enough free memory for the
1392 *      snapshot image.
1393 */
1394
1395static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1396{
1397        struct zone *zone;
1398        unsigned int free = alloc_normal;
1399
1400        for_each_populated_zone(zone)
1401                if (!is_highmem(zone))
1402                        free += zone_page_state(zone, NR_FREE_PAGES);
1403
1404        nr_pages += count_pages_for_highmem(nr_highmem);
1405        pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n",
1406                nr_pages, PAGES_FOR_IO, free);
1407
1408        return free > nr_pages + PAGES_FOR_IO;
1409}
1410
1411#ifdef CONFIG_HIGHMEM
1412/**
1413 *      get_highmem_buffer - if there are some highmem pages in the suspend
1414 *      image, we may need the buffer to copy them and/or load their data.
1415 */
1416
1417static inline int get_highmem_buffer(int safe_needed)
1418{
1419        buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1420        return buffer ? 0 : -ENOMEM;
1421}
1422
1423/**
1424 *      alloc_highmem_image_pages - allocate some highmem pages for the image.
1425 *      Try to allocate as many pages as needed, but if the number of free
1426 *      highmem pages is lesser than that, allocate them all.
1427 */
1428
1429static inline unsigned int
1430alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1431{
1432        unsigned int to_alloc = count_free_highmem_pages();
1433
1434        if (to_alloc > nr_highmem)
1435                to_alloc = nr_highmem;
1436
1437        nr_highmem -= to_alloc;
1438        while (to_alloc-- > 0) {
1439                struct page *page;
1440
1441                page = alloc_image_page(__GFP_HIGHMEM);
1442                memory_bm_set_bit(bm, page_to_pfn(page));
1443        }
1444        return nr_highmem;
1445}
1446#else
1447static inline int get_highmem_buffer(int safe_needed) { return 0; }
1448
1449static inline unsigned int
1450alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1451#endif /* CONFIG_HIGHMEM */
1452
1453/**
1454 *      swsusp_alloc - allocate memory for the suspend image
1455 *
1456 *      We first try to allocate as many highmem pages as there are
1457 *      saveable highmem pages in the system.  If that fails, we allocate
1458 *      non-highmem pages for the copies of the remaining highmem ones.
1459 *
1460 *      In this approach it is likely that the copies of highmem pages will
1461 *      also be located in the high memory, because of the way in which
1462 *      copy_data_pages() works.
1463 */
1464
1465static int
1466swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1467                unsigned int nr_pages, unsigned int nr_highmem)
1468{
1469        int error = 0;
1470
1471        if (nr_highmem > 0) {
1472                error = get_highmem_buffer(PG_ANY);
1473                if (error)
1474                        goto err_out;
1475                if (nr_highmem > alloc_highmem) {
1476                        nr_highmem -= alloc_highmem;
1477                        nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
1478                }
1479        }
1480        if (nr_pages > alloc_normal) {
1481                nr_pages -= alloc_normal;
1482                while (nr_pages-- > 0) {
1483                        struct page *page;
1484
1485                        page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1486                        if (!page)
1487                                goto err_out;
1488                        memory_bm_set_bit(copy_bm, page_to_pfn(page));
1489                }
1490        }
1491
1492        return 0;
1493
1494 err_out:
1495        swsusp_free();
1496        return error;
1497}
1498
1499asmlinkage int swsusp_save(void)
1500{
1501        unsigned int nr_pages, nr_highmem;
1502
1503        printk(KERN_INFO "PM: Creating hibernation image: \n");
1504
1505        drain_local_pages(NULL);
1506        nr_pages = count_data_pages();
1507        nr_highmem = count_highmem_pages();
1508        printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1509
1510        if (!enough_free_mem(nr_pages, nr_highmem)) {
1511                printk(KERN_ERR "PM: Not enough free memory\n");
1512                return -ENOMEM;
1513        }
1514
1515        if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1516                printk(KERN_ERR "PM: Memory allocation failed\n");
1517                return -ENOMEM;
1518        }
1519
1520        /* During allocating of suspend pagedir, new cold pages may appear.
1521         * Kill them.
1522         */
1523        drain_local_pages(NULL);
1524        copy_data_pages(&copy_bm, &orig_bm);
1525
1526        /*
1527         * End of critical section. From now on, we can write to memory,
1528         * but we should not touch disk. This specially means we must _not_
1529         * touch swap space! Except we must write out our image of course.
1530         */
1531
1532        nr_pages += nr_highmem;
1533        nr_copy_pages = nr_pages;
1534        nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1535
1536        printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1537                nr_pages);
1538
1539        return 0;
1540}
1541
1542#ifndef CONFIG_ARCH_HIBERNATION_HEADER
1543static int init_header_complete(struct swsusp_info *info)
1544{
1545        memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1546        info->version_code = LINUX_VERSION_CODE;
1547        return 0;
1548}
1549
1550static char *check_image_kernel(struct swsusp_info *info)
1551{
1552        if (info->version_code != LINUX_VERSION_CODE)
1553                return "kernel version";
1554        if (strcmp(info->uts.sysname,init_utsname()->sysname))
1555                return "system type";
1556        if (strcmp(info->uts.release,init_utsname()->release))
1557                return "kernel release";
1558        if (strcmp(info->uts.version,init_utsname()->version))
1559                return "version";
1560        if (strcmp(info->uts.machine,init_utsname()->machine))
1561                return "machine";
1562        return NULL;
1563}
1564#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1565
1566unsigned long snapshot_get_image_size(void)
1567{
1568        return nr_copy_pages + nr_meta_pages + 1;
1569}
1570
1571static int init_header(struct swsusp_info *info)
1572{
1573        memset(info, 0, sizeof(struct swsusp_info));
1574        info->num_physpages = num_physpages;
1575        info->image_pages = nr_copy_pages;
1576        info->pages = snapshot_get_image_size();
1577        info->size = info->pages;
1578        info->size <<= PAGE_SHIFT;
1579        return init_header_complete(info);
1580}
1581
1582/**
1583 *      pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1584 *      are stored in the array @buf[] (1 page at a time)
1585 */
1586
1587static inline void
1588pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1589{
1590        int j;
1591
1592        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1593                buf[j] = memory_bm_next_pfn(bm);
1594                if (unlikely(buf[j] == BM_END_OF_MAP))
1595                        break;
1596        }
1597}
1598
1599/**
1600 *      snapshot_read_next - used for reading the system memory snapshot.
1601 *
1602 *      On the first call to it @handle should point to a zeroed
1603 *      snapshot_handle structure.  The structure gets updated and a pointer
1604 *      to it should be passed to this function every next time.
1605 *
1606 *      The @count parameter should contain the number of bytes the caller
1607 *      wants to read from the snapshot.  It must not be zero.
1608 *
1609 *      On success the function returns a positive number.  Then, the caller
1610 *      is allowed to read up to the returned number of bytes from the memory
1611 *      location computed by the data_of() macro.  The number returned
1612 *      may be smaller than @count, but this only happens if the read would
1613 *      cross a page boundary otherwise.
1614 *
1615 *      The function returns 0 to indicate the end of data stream condition,
1616 *      and a negative number is returned on error.  In such cases the
1617 *      structure pointed to by @handle is not updated and should not be used
1618 *      any more.
1619 */
1620
1621int snapshot_read_next(struct snapshot_handle *handle, size_t count)
1622{
1623        if (handle->cur > nr_meta_pages + nr_copy_pages)
1624                return 0;
1625
1626        if (!buffer) {
1627                /* This makes the buffer be freed by swsusp_free() */
1628                buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1629                if (!buffer)
1630                        return -ENOMEM;
1631        }
1632        if (!handle->offset) {
1633                int error;
1634
1635                error = init_header((struct swsusp_info *)buffer);
1636                if (error)
1637                        return error;
1638                handle->buffer = buffer;
1639                memory_bm_position_reset(&orig_bm);
1640                memory_bm_position_reset(&copy_bm);
1641        }
1642        if (handle->prev < handle->cur) {
1643                if (handle->cur <= nr_meta_pages) {
1644                        memset(buffer, 0, PAGE_SIZE);
1645                        pack_pfns(buffer, &orig_bm);
1646                } else {
1647                        struct page *page;
1648
1649                        page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1650                        if (PageHighMem(page)) {
1651                                /* Highmem pages are copied to the buffer,
1652                                 * because we can't return with a kmapped
1653                                 * highmem page (we may not be called again).
1654                                 */
1655                                void *kaddr;
1656
1657                                kaddr = kmap_atomic(page, KM_USER0);
1658                                memcpy(buffer, kaddr, PAGE_SIZE);
1659                                kunmap_atomic(kaddr, KM_USER0);
1660                                handle->buffer = buffer;
1661                        } else {
1662                                handle->buffer = page_address(page);
1663                        }
1664                }
1665                handle->prev = handle->cur;
1666        }
1667        handle->buf_offset = handle->cur_offset;
1668        if (handle->cur_offset + count >= PAGE_SIZE) {
1669                count = PAGE_SIZE - handle->cur_offset;
1670                handle->cur_offset = 0;
1671                handle->cur++;
1672        } else {
1673                handle->cur_offset += count;
1674        }
1675        handle->offset += count;
1676        return count;
1677}
1678
1679/**
1680 *      mark_unsafe_pages - mark the pages that cannot be used for storing
1681 *      the image during resume, because they conflict with the pages that
1682 *      had been used before suspend
1683 */
1684
1685static int mark_unsafe_pages(struct memory_bitmap *bm)
1686{
1687        struct zone *zone;
1688        unsigned long pfn, max_zone_pfn;
1689
1690        /* Clear page flags */
1691        for_each_populated_zone(zone) {
1692                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1693                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1694                        if (pfn_valid(pfn))
1695                                swsusp_unset_page_free(pfn_to_page(pfn));
1696        }
1697
1698        /* Mark pages that correspond to the "original" pfns as "unsafe" */
1699        memory_bm_position_reset(bm);
1700        do {
1701                pfn = memory_bm_next_pfn(bm);
1702                if (likely(pfn != BM_END_OF_MAP)) {
1703                        if (likely(pfn_valid(pfn)))
1704                                swsusp_set_page_free(pfn_to_page(pfn));
1705                        else
1706                                return -EFAULT;
1707                }
1708        } while (pfn != BM_END_OF_MAP);
1709
1710        allocated_unsafe_pages = 0;
1711
1712        return 0;
1713}
1714
1715static void
1716duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1717{
1718        unsigned long pfn;
1719
1720        memory_bm_position_reset(src);
1721        pfn = memory_bm_next_pfn(src);
1722        while (pfn != BM_END_OF_MAP) {
1723                memory_bm_set_bit(dst, pfn);
1724                pfn = memory_bm_next_pfn(src);
1725        }
1726}
1727
1728static int check_header(struct swsusp_info *info)
1729{
1730        char *reason;
1731
1732        reason = check_image_kernel(info);
1733        if (!reason && info->num_physpages != num_physpages)
1734                reason = "memory size";
1735        if (reason) {
1736                printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1737                return -EPERM;
1738        }
1739        return 0;
1740}
1741
1742/**
1743 *      load header - check the image header and copy data from it
1744 */
1745
1746static int
1747load_header(struct swsusp_info *info)
1748{
1749        int error;
1750
1751        restore_pblist = NULL;
1752        error = check_header(info);
1753        if (!error) {
1754                nr_copy_pages = info->image_pages;
1755                nr_meta_pages = info->pages - info->image_pages - 1;
1756        }
1757        return error;
1758}
1759
1760/**
1761 *      unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1762 *      the corresponding bit in the memory bitmap @bm
1763 */
1764static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1765{
1766        int j;
1767
1768        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1769                if (unlikely(buf[j] == BM_END_OF_MAP))
1770                        break;
1771
1772                if (memory_bm_pfn_present(bm, buf[j]))
1773                        memory_bm_set_bit(bm, buf[j]);
1774                else
1775                        return -EFAULT;
1776        }
1777
1778        return 0;
1779}
1780
1781/* List of "safe" pages that may be used to store data loaded from the suspend
1782 * image
1783 */
1784static struct linked_page *safe_pages_list;
1785
1786#ifdef CONFIG_HIGHMEM
1787/* struct highmem_pbe is used for creating the list of highmem pages that
1788 * should be restored atomically during the resume from disk, because the page
1789 * frames they have occupied before the suspend are in use.
1790 */
1791struct highmem_pbe {
1792        struct page *copy_page; /* data is here now */
1793        struct page *orig_page; /* data was here before the suspend */
1794        struct highmem_pbe *next;
1795};
1796
1797/* List of highmem PBEs needed for restoring the highmem pages that were
1798 * allocated before the suspend and included in the suspend image, but have
1799 * also been allocated by the "resume" kernel, so their contents cannot be
1800 * written directly to their "original" page frames.
1801 */
1802static struct highmem_pbe *highmem_pblist;
1803
1804/**
1805 *      count_highmem_image_pages - compute the number of highmem pages in the
1806 *      suspend image.  The bits in the memory bitmap @bm that correspond to the
1807 *      image pages are assumed to be set.
1808 */
1809
1810static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1811{
1812        unsigned long pfn;
1813        unsigned int cnt = 0;
1814
1815        memory_bm_position_reset(bm);
1816        pfn = memory_bm_next_pfn(bm);
1817        while (pfn != BM_END_OF_MAP) {
1818                if (PageHighMem(pfn_to_page(pfn)))
1819                        cnt++;
1820
1821                pfn = memory_bm_next_pfn(bm);
1822        }
1823        return cnt;
1824}
1825
1826/**
1827 *      prepare_highmem_image - try to allocate as many highmem pages as
1828 *      there are highmem image pages (@nr_highmem_p points to the variable
1829 *      containing the number of highmem image pages).  The pages that are
1830 *      "safe" (ie. will not be overwritten when the suspend image is
1831 *      restored) have the corresponding bits set in @bm (it must be
1832 *      unitialized).
1833 *
1834 *      NOTE: This function should not be called if there are no highmem
1835 *      image pages.
1836 */
1837
1838static unsigned int safe_highmem_pages;
1839
1840static struct memory_bitmap *safe_highmem_bm;
1841
1842static int
1843prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1844{
1845        unsigned int to_alloc;
1846
1847        if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1848                return -ENOMEM;
1849
1850        if (get_highmem_buffer(PG_SAFE))
1851                return -ENOMEM;
1852
1853        to_alloc = count_free_highmem_pages();
1854        if (to_alloc > *nr_highmem_p)
1855                to_alloc = *nr_highmem_p;
1856        else
1857                *nr_highmem_p = to_alloc;
1858
1859        safe_highmem_pages = 0;
1860        while (to_alloc-- > 0) {
1861                struct page *page;
1862
1863                page = alloc_page(__GFP_HIGHMEM);
1864                if (!swsusp_page_is_free(page)) {
1865                        /* The page is "safe", set its bit the bitmap */
1866                        memory_bm_set_bit(bm, page_to_pfn(page));
1867                        safe_highmem_pages++;
1868                }
1869                /* Mark the page as allocated */
1870                swsusp_set_page_forbidden(page);
1871                swsusp_set_page_free(page);
1872        }
1873        memory_bm_position_reset(bm);
1874        safe_highmem_bm = bm;
1875        return 0;
1876}
1877
1878/**
1879 *      get_highmem_page_buffer - for given highmem image page find the buffer
1880 *      that suspend_write_next() should set for its caller to write to.
1881 *
1882 *      If the page is to be saved to its "original" page frame or a copy of
1883 *      the page is to be made in the highmem, @buffer is returned.  Otherwise,
1884 *      the copy of the page is to be made in normal memory, so the address of
1885 *      the copy is returned.
1886 *
1887 *      If @buffer is returned, the caller of suspend_write_next() will write
1888 *      the page's contents to @buffer, so they will have to be copied to the
1889 *      right location on the next call to suspend_write_next() and it is done
1890 *      with the help of copy_last_highmem_page().  For this purpose, if
1891 *      @buffer is returned, @last_highmem page is set to the page to which
1892 *      the data will have to be copied from @buffer.
1893 */
1894
1895static struct page *last_highmem_page;
1896
1897static void *
1898get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1899{
1900        struct highmem_pbe *pbe;
1901        void *kaddr;
1902
1903        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1904                /* We have allocated the "original" page frame and we can
1905                 * use it directly to store the loaded page.
1906                 */
1907                last_highmem_page = page;
1908                return buffer;
1909        }
1910        /* The "original" page frame has not been allocated and we have to
1911         * use a "safe" page frame to store the loaded page.
1912         */
1913        pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1914        if (!pbe) {
1915                swsusp_free();
1916                return ERR_PTR(-ENOMEM);
1917        }
1918        pbe->orig_page = page;
1919        if (safe_highmem_pages > 0) {
1920                struct page *tmp;
1921
1922                /* Copy of the page will be stored in high memory */
1923                kaddr = buffer;
1924                tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1925                safe_highmem_pages--;
1926                last_highmem_page = tmp;
1927                pbe->copy_page = tmp;
1928        } else {
1929                /* Copy of the page will be stored in normal memory */
1930                kaddr = safe_pages_list;
1931                safe_pages_list = safe_pages_list->next;
1932                pbe->copy_page = virt_to_page(kaddr);
1933        }
1934        pbe->next = highmem_pblist;
1935        highmem_pblist = pbe;
1936        return kaddr;
1937}
1938
1939/**
1940 *      copy_last_highmem_page - copy the contents of a highmem image from
1941 *      @buffer, where the caller of snapshot_write_next() has place them,
1942 *      to the right location represented by @last_highmem_page .
1943 */
1944
1945static void copy_last_highmem_page(void)
1946{
1947        if (last_highmem_page) {
1948                void *dst;
1949
1950                dst = kmap_atomic(last_highmem_page, KM_USER0);
1951                memcpy(dst, buffer, PAGE_SIZE);
1952                kunmap_atomic(dst, KM_USER0);
1953                last_highmem_page = NULL;
1954        }
1955}
1956
1957static inline int last_highmem_page_copied(void)
1958{
1959        return !last_highmem_page;
1960}
1961
1962static inline void free_highmem_data(void)
1963{
1964        if (safe_highmem_bm)
1965                memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
1966
1967        if (buffer)
1968                free_image_page(buffer, PG_UNSAFE_CLEAR);
1969}
1970#else
1971static inline int get_safe_write_buffer(void) { return 0; }
1972
1973static unsigned int
1974count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
1975
1976static inline int
1977prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1978{
1979        return 0;
1980}
1981
1982static inline void *
1983get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1984{
1985        return ERR_PTR(-EINVAL);
1986}
1987
1988static inline void copy_last_highmem_page(void) {}
1989static inline int last_highmem_page_copied(void) { return 1; }
1990static inline void free_highmem_data(void) {}
1991#endif /* CONFIG_HIGHMEM */
1992
1993/**
1994 *      prepare_image - use the memory bitmap @bm to mark the pages that will
1995 *      be overwritten in the process of restoring the system memory state
1996 *      from the suspend image ("unsafe" pages) and allocate memory for the
1997 *      image.
1998 *
1999 *      The idea is to allocate a new memory bitmap first and then allocate
2000 *      as many pages as needed for the image data, but not to assign these
2001 *      pages to specific tasks initially.  Instead, we just mark them as
2002 *      allocated and create a lists of "safe" pages that will be used
2003 *      later.  On systems with high memory a list of "safe" highmem pages is
2004 *      also created.
2005 */
2006
2007#define PBES_PER_LINKED_PAGE    (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2008
2009static int
2010prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
2011{
2012        unsigned int nr_pages, nr_highmem;
2013        struct linked_page *sp_list, *lp;
2014        int error;
2015
2016        /* If there is no highmem, the buffer will not be necessary */
2017        free_image_page(buffer, PG_UNSAFE_CLEAR);
2018        buffer = NULL;
2019
2020        nr_highmem = count_highmem_image_pages(bm);
2021        error = mark_unsafe_pages(bm);
2022        if (error)
2023                goto Free;
2024
2025        error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2026        if (error)
2027                goto Free;
2028
2029        duplicate_memory_bitmap(new_bm, bm);
2030        memory_bm_free(bm, PG_UNSAFE_KEEP);
2031        if (nr_highmem > 0) {
2032                error = prepare_highmem_image(bm, &nr_highmem);
2033                if (error)
2034                        goto Free;
2035        }
2036        /* Reserve some safe pages for potential later use.
2037         *
2038         * NOTE: This way we make sure there will be enough safe pages for the
2039         * chain_alloc() in get_buffer().  It is a bit wasteful, but
2040         * nr_copy_pages cannot be greater than 50% of the memory anyway.
2041         */
2042        sp_list = NULL;
2043        /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
2044        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2045        nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2046        while (nr_pages > 0) {
2047                lp = get_image_page(GFP_ATOMIC, PG_SAFE);
2048                if (!lp) {
2049                        error = -ENOMEM;
2050                        goto Free;
2051                }
2052                lp->next = sp_list;
2053                sp_list = lp;
2054                nr_pages--;
2055        }
2056        /* Preallocate memory for the image */
2057        safe_pages_list = NULL;
2058        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2059        while (nr_pages > 0) {
2060                lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2061                if (!lp) {
2062                        error = -ENOMEM;
2063                        goto Free;
2064                }
2065                if (!swsusp_page_is_free(virt_to_page(lp))) {
2066                        /* The page is "safe", add it to the list */
2067                        lp->next = safe_pages_list;
2068                        safe_pages_list = lp;
2069                }
2070                /* Mark the page as allocated */
2071                swsusp_set_page_forbidden(virt_to_page(lp));
2072                swsusp_set_page_free(virt_to_page(lp));
2073                nr_pages--;
2074        }
2075        /* Free the reserved safe pages so that chain_alloc() can use them */
2076        while (sp_list) {
2077                lp = sp_list->next;
2078                free_image_page(sp_list, PG_UNSAFE_CLEAR);
2079                sp_list = lp;
2080        }
2081        return 0;
2082
2083 Free:
2084        swsusp_free();
2085        return error;
2086}
2087
2088/**
2089 *      get_buffer - compute the address that snapshot_write_next() should
2090 *      set for its caller to write to.
2091 */
2092
2093static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2094{
2095        struct pbe *pbe;
2096        struct page *page;
2097        unsigned long pfn = memory_bm_next_pfn(bm);
2098
2099        if (pfn == BM_END_OF_MAP)
2100                return ERR_PTR(-EFAULT);
2101
2102        page = pfn_to_page(pfn);
2103        if (PageHighMem(page))
2104                return get_highmem_page_buffer(page, ca);
2105
2106        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
2107                /* We have allocated the "original" page frame and we can
2108                 * use it directly to store the loaded page.
2109                 */
2110                return page_address(page);
2111
2112        /* The "original" page frame has not been allocated and we have to
2113         * use a "safe" page frame to store the loaded page.
2114         */
2115        pbe = chain_alloc(ca, sizeof(struct pbe));
2116        if (!pbe) {
2117                swsusp_free();
2118                return ERR_PTR(-ENOMEM);
2119        }
2120        pbe->orig_address = page_address(page);
2121        pbe->address = safe_pages_list;
2122        safe_pages_list = safe_pages_list->next;
2123        pbe->next = restore_pblist;
2124        restore_pblist = pbe;
2125        return pbe->address;
2126}
2127
2128/**
2129 *      snapshot_write_next - used for writing the system memory snapshot.
2130 *
2131 *      On the first call to it @handle should point to a zeroed
2132 *      snapshot_handle structure.  The structure gets updated and a pointer
2133 *      to it should be passed to this function every next time.
2134 *
2135 *      The @count parameter should contain the number of bytes the caller
2136 *      wants to write to the image.  It must not be zero.
2137 *
2138 *      On success the function returns a positive number.  Then, the caller
2139 *      is allowed to write up to the returned number of bytes to the memory
2140 *      location computed by the data_of() macro.  The number returned
2141 *      may be smaller than @count, but this only happens if the write would
2142 *      cross a page boundary otherwise.
2143 *
2144 *      The function returns 0 to indicate the "end of file" condition,
2145 *      and a negative number is returned on error.  In such cases the
2146 *      structure pointed to by @handle is not updated and should not be used
2147 *      any more.
2148 */
2149
2150int snapshot_write_next(struct snapshot_handle *handle, size_t count)
2151{
2152        static struct chain_allocator ca;
2153        int error = 0;
2154
2155        /* Check if we have already loaded the entire image */
2156        if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
2157                return 0;
2158
2159        if (handle->offset == 0) {
2160                if (!buffer)
2161                        /* This makes the buffer be freed by swsusp_free() */
2162                        buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2163
2164                if (!buffer)
2165                        return -ENOMEM;
2166
2167                handle->buffer = buffer;
2168        }
2169        handle->sync_read = 1;
2170        if (handle->prev < handle->cur) {
2171                if (handle->prev == 0) {
2172                        error = load_header(buffer);
2173                        if (error)
2174                                return error;
2175
2176                        error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2177                        if (error)
2178                                return error;
2179
2180                } else if (handle->prev <= nr_meta_pages) {
2181                        error = unpack_orig_pfns(buffer, &copy_bm);
2182                        if (error)
2183                                return error;
2184
2185                        if (handle->prev == nr_meta_pages) {
2186                                error = prepare_image(&orig_bm, &copy_bm);
2187                                if (error)
2188                                        return error;
2189
2190                                chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2191                                memory_bm_position_reset(&orig_bm);
2192                                restore_pblist = NULL;
2193                                handle->buffer = get_buffer(&orig_bm, &ca);
2194                                handle->sync_read = 0;
2195                                if (IS_ERR(handle->buffer))
2196                                        return PTR_ERR(handle->buffer);
2197                        }
2198                } else {
2199                        copy_last_highmem_page();
2200                        handle->buffer = get_buffer(&orig_bm, &ca);
2201                        if (IS_ERR(handle->buffer))
2202                                return PTR_ERR(handle->buffer);
2203                        if (handle->buffer != buffer)
2204                                handle->sync_read = 0;
2205                }
2206                handle->prev = handle->cur;
2207        }
2208        handle->buf_offset = handle->cur_offset;
2209        if (handle->cur_offset + count >= PAGE_SIZE) {
2210                count = PAGE_SIZE - handle->cur_offset;
2211                handle->cur_offset = 0;
2212                handle->cur++;
2213        } else {
2214                handle->cur_offset += count;
2215        }
2216        handle->offset += count;
2217        return count;
2218}
2219
2220/**
2221 *      snapshot_write_finalize - must be called after the last call to
2222 *      snapshot_write_next() in case the last page in the image happens
2223 *      to be a highmem page and its contents should be stored in the
2224 *      highmem.  Additionally, it releases the memory that will not be
2225 *      used any more.
2226 */
2227
2228void snapshot_write_finalize(struct snapshot_handle *handle)
2229{
2230        copy_last_highmem_page();
2231        /* Free only if we have loaded the image entirely */
2232        if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
2233                memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
2234                free_highmem_data();
2235        }
2236}
2237
2238int snapshot_image_loaded(struct snapshot_handle *handle)
2239{
2240        return !(!nr_copy_pages || !last_highmem_page_copied() ||
2241                        handle->cur <= nr_meta_pages + nr_copy_pages);
2242}
2243
2244#ifdef CONFIG_HIGHMEM
2245/* Assumes that @buf is ready and points to a "safe" page */
2246static inline void
2247swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
2248{
2249        void *kaddr1, *kaddr2;
2250
2251        kaddr1 = kmap_atomic(p1, KM_USER0);
2252        kaddr2 = kmap_atomic(p2, KM_USER1);
2253        memcpy(buf, kaddr1, PAGE_SIZE);
2254        memcpy(kaddr1, kaddr2, PAGE_SIZE);
2255        memcpy(kaddr2, buf, PAGE_SIZE);
2256        kunmap_atomic(kaddr1, KM_USER0);
2257        kunmap_atomic(kaddr2, KM_USER1);
2258}
2259
2260/**
2261 *      restore_highmem - for each highmem page that was allocated before
2262 *      the suspend and included in the suspend image, and also has been
2263 *      allocated by the "resume" kernel swap its current (ie. "before
2264 *      resume") contents with the previous (ie. "before suspend") one.
2265 *
2266 *      If the resume eventually fails, we can call this function once
2267 *      again and restore the "before resume" highmem state.
2268 */
2269
2270int restore_highmem(void)
2271{
2272        struct highmem_pbe *pbe = highmem_pblist;
2273        void *buf;
2274
2275        if (!pbe)
2276                return 0;
2277
2278        buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2279        if (!buf)
2280                return -ENOMEM;
2281
2282        while (pbe) {
2283                swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2284                pbe = pbe->next;
2285        }
2286        free_image_page(buf, PG_UNSAFE_CLEAR);
2287        return 0;
2288}
2289#endif /* CONFIG_HIGHMEM */
2290