linux/kernel/power/snapshot.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/power/snapshot.c
   3 *
   4 * This file provides system snapshot/restore functionality for swsusp.
   5 *
   6 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
   7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
   8 *
   9 * This file is released under the GPLv2.
  10 *
  11 */
  12
  13#include <linux/version.h>
  14#include <linux/module.h>
  15#include <linux/mm.h>
  16#include <linux/suspend.h>
  17#include <linux/delay.h>
  18#include <linux/bitops.h>
  19#include <linux/spinlock.h>
  20#include <linux/kernel.h>
  21#include <linux/pm.h>
  22#include <linux/device.h>
  23#include <linux/init.h>
  24#include <linux/bootmem.h>
  25#include <linux/syscalls.h>
  26#include <linux/console.h>
  27#include <linux/highmem.h>
  28#include <linux/list.h>
  29#include <linux/slab.h>
  30#include <linux/compiler.h>
  31
  32#include <asm/uaccess.h>
  33#include <asm/mmu_context.h>
  34#include <asm/pgtable.h>
  35#include <asm/tlbflush.h>
  36#include <asm/io.h>
  37
  38#include "power.h"
  39
  40static int swsusp_page_is_free(struct page *);
  41static void swsusp_set_page_forbidden(struct page *);
  42static void swsusp_unset_page_forbidden(struct page *);
  43
  44/*
  45 * Number of bytes to reserve for memory allocations made by device drivers
  46 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
  47 * cause image creation to fail (tunable via /sys/power/reserved_size).
  48 */
  49unsigned long reserved_size;
  50
  51void __init hibernate_reserved_size_init(void)
  52{
  53        reserved_size = SPARE_PAGES * PAGE_SIZE;
  54}
  55
  56/*
  57 * Preferred image size in bytes (tunable via /sys/power/image_size).
  58 * When it is set to N, swsusp will do its best to ensure the image
  59 * size will not exceed N bytes, but if that is impossible, it will
  60 * try to create the smallest image possible.
  61 */
  62unsigned long image_size;
  63
  64void __init hibernate_image_size_init(void)
  65{
  66        image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
  67}
  68
  69/* List of PBEs needed for restoring the pages that were allocated before
  70 * the suspend and included in the suspend image, but have also been
  71 * allocated by the "resume" kernel, so their contents cannot be written
  72 * directly to their "original" page frames.
  73 */
  74struct pbe *restore_pblist;
  75
  76/* Pointer to an auxiliary buffer (1 page) */
  77static void *buffer;
  78
  79/**
  80 *      @safe_needed - on resume, for storing the PBE list and the image,
  81 *      we can only use memory pages that do not conflict with the pages
  82 *      used before suspend.  The unsafe pages have PageNosaveFree set
  83 *      and we count them using unsafe_pages.
  84 *
  85 *      Each allocated image page is marked as PageNosave and PageNosaveFree
  86 *      so that swsusp_free() can release it.
  87 */
  88
  89#define PG_ANY          0
  90#define PG_SAFE         1
  91#define PG_UNSAFE_CLEAR 1
  92#define PG_UNSAFE_KEEP  0
  93
  94static unsigned int allocated_unsafe_pages;
  95
  96static void *get_image_page(gfp_t gfp_mask, int safe_needed)
  97{
  98        void *res;
  99
 100        res = (void *)get_zeroed_page(gfp_mask);
 101        if (safe_needed)
 102                while (res && swsusp_page_is_free(virt_to_page(res))) {
 103                        /* The page is unsafe, mark it for swsusp_free() */
 104                        swsusp_set_page_forbidden(virt_to_page(res));
 105                        allocated_unsafe_pages++;
 106                        res = (void *)get_zeroed_page(gfp_mask);
 107                }
 108        if (res) {
 109                swsusp_set_page_forbidden(virt_to_page(res));
 110                swsusp_set_page_free(virt_to_page(res));
 111        }
 112        return res;
 113}
 114
 115unsigned long get_safe_page(gfp_t gfp_mask)
 116{
 117        return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
 118}
 119
 120static struct page *alloc_image_page(gfp_t gfp_mask)
 121{
 122        struct page *page;
 123
 124        page = alloc_page(gfp_mask);
 125        if (page) {
 126                swsusp_set_page_forbidden(page);
 127                swsusp_set_page_free(page);
 128        }
 129        return page;
 130}
 131
 132/**
 133 *      free_image_page - free page represented by @addr, allocated with
 134 *      get_image_page (page flags set by it must be cleared)
 135 */
 136
 137static inline void free_image_page(void *addr, int clear_nosave_free)
 138{
 139        struct page *page;
 140
 141        BUG_ON(!virt_addr_valid(addr));
 142
 143        page = virt_to_page(addr);
 144
 145        swsusp_unset_page_forbidden(page);
 146        if (clear_nosave_free)
 147                swsusp_unset_page_free(page);
 148
 149        __free_page(page);
 150}
 151
 152/* struct linked_page is used to build chains of pages */
 153
 154#define LINKED_PAGE_DATA_SIZE   (PAGE_SIZE - sizeof(void *))
 155
 156struct linked_page {
 157        struct linked_page *next;
 158        char data[LINKED_PAGE_DATA_SIZE];
 159} __packed;
 160
 161static inline void
 162free_list_of_pages(struct linked_page *list, int clear_page_nosave)
 163{
 164        while (list) {
 165                struct linked_page *lp = list->next;
 166
 167                free_image_page(list, clear_page_nosave);
 168                list = lp;
 169        }
 170}
 171
 172/**
 173  *     struct chain_allocator is used for allocating small objects out of
 174  *     a linked list of pages called 'the chain'.
 175  *
 176  *     The chain grows each time when there is no room for a new object in
 177  *     the current page.  The allocated objects cannot be freed individually.
 178  *     It is only possible to free them all at once, by freeing the entire
 179  *     chain.
 180  *
 181  *     NOTE: The chain allocator may be inefficient if the allocated objects
 182  *     are not much smaller than PAGE_SIZE.
 183  */
 184
 185struct chain_allocator {
 186        struct linked_page *chain;      /* the chain */
 187        unsigned int used_space;        /* total size of objects allocated out
 188                                         * of the current page
 189                                         */
 190        gfp_t gfp_mask;         /* mask for allocating pages */
 191        int safe_needed;        /* if set, only "safe" pages are allocated */
 192};
 193
 194static void
 195chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
 196{
 197        ca->chain = NULL;
 198        ca->used_space = LINKED_PAGE_DATA_SIZE;
 199        ca->gfp_mask = gfp_mask;
 200        ca->safe_needed = safe_needed;
 201}
 202
 203static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 204{
 205        void *ret;
 206
 207        if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 208                struct linked_page *lp;
 209
 210                lp = get_image_page(ca->gfp_mask, ca->safe_needed);
 211                if (!lp)
 212                        return NULL;
 213
 214                lp->next = ca->chain;
 215                ca->chain = lp;
 216                ca->used_space = 0;
 217        }
 218        ret = ca->chain->data + ca->used_space;
 219        ca->used_space += size;
 220        return ret;
 221}
 222
 223/**
 224 *      Data types related to memory bitmaps.
 225 *
 226 *      Memory bitmap is a structure consiting of many linked lists of
 227 *      objects.  The main list's elements are of type struct zone_bitmap
 228 *      and each of them corresonds to one zone.  For each zone bitmap
 229 *      object there is a list of objects of type struct bm_block that
 230 *      represent each blocks of bitmap in which information is stored.
 231 *
 232 *      struct memory_bitmap contains a pointer to the main list of zone
 233 *      bitmap objects, a struct bm_position used for browsing the bitmap,
 234 *      and a pointer to the list of pages used for allocating all of the
 235 *      zone bitmap objects and bitmap block objects.
 236 *
 237 *      NOTE: It has to be possible to lay out the bitmap in memory
 238 *      using only allocations of order 0.  Additionally, the bitmap is
 239 *      designed to work with arbitrary number of zones (this is over the
 240 *      top for now, but let's avoid making unnecessary assumptions ;-).
 241 *
 242 *      struct zone_bitmap contains a pointer to a list of bitmap block
 243 *      objects and a pointer to the bitmap block object that has been
 244 *      most recently used for setting bits.  Additionally, it contains the
 245 *      pfns that correspond to the start and end of the represented zone.
 246 *
 247 *      struct bm_block contains a pointer to the memory page in which
 248 *      information is stored (in the form of a block of bitmap)
 249 *      It also contains the pfns that correspond to the start and end of
 250 *      the represented memory area.
 251 */
 252
 253#define BM_END_OF_MAP   (~0UL)
 254
 255#define BM_BITS_PER_BLOCK       (PAGE_SIZE * BITS_PER_BYTE)
 256
 257struct bm_block {
 258        struct list_head hook;  /* hook into a list of bitmap blocks */
 259        unsigned long start_pfn;        /* pfn represented by the first bit */
 260        unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
 261        unsigned long *data;    /* bitmap representing pages */
 262};
 263
 264static inline unsigned long bm_block_bits(struct bm_block *bb)
 265{
 266        return bb->end_pfn - bb->start_pfn;
 267}
 268
 269/* strcut bm_position is used for browsing memory bitmaps */
 270
 271struct bm_position {
 272        struct bm_block *block;
 273        int bit;
 274};
 275
 276struct memory_bitmap {
 277        struct list_head blocks;        /* list of bitmap blocks */
 278        struct linked_page *p_list;     /* list of pages used to store zone
 279                                         * bitmap objects and bitmap block
 280                                         * objects
 281                                         */
 282        struct bm_position cur; /* most recently used bit position */
 283};
 284
 285/* Functions that operate on memory bitmaps */
 286
 287static void memory_bm_position_reset(struct memory_bitmap *bm)
 288{
 289        bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
 290        bm->cur.bit = 0;
 291}
 292
 293static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 294
 295/**
 296 *      create_bm_block_list - create a list of block bitmap objects
 297 *      @pages - number of pages to track
 298 *      @list - list to put the allocated blocks into
 299 *      @ca - chain allocator to be used for allocating memory
 300 */
 301static int create_bm_block_list(unsigned long pages,
 302                                struct list_head *list,
 303                                struct chain_allocator *ca)
 304{
 305        unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
 306
 307        while (nr_blocks-- > 0) {
 308                struct bm_block *bb;
 309
 310                bb = chain_alloc(ca, sizeof(struct bm_block));
 311                if (!bb)
 312                        return -ENOMEM;
 313                list_add(&bb->hook, list);
 314        }
 315
 316        return 0;
 317}
 318
 319struct mem_extent {
 320        struct list_head hook;
 321        unsigned long start;
 322        unsigned long end;
 323};
 324
 325/**
 326 *      free_mem_extents - free a list of memory extents
 327 *      @list - list of extents to empty
 328 */
 329static void free_mem_extents(struct list_head *list)
 330{
 331        struct mem_extent *ext, *aux;
 332
 333        list_for_each_entry_safe(ext, aux, list, hook) {
 334                list_del(&ext->hook);
 335                kfree(ext);
 336        }
 337}
 338
 339/**
 340 *      create_mem_extents - create a list of memory extents representing
 341 *                           contiguous ranges of PFNs
 342 *      @list - list to put the extents into
 343 *      @gfp_mask - mask to use for memory allocations
 344 */
 345static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 346{
 347        struct zone *zone;
 348
 349        INIT_LIST_HEAD(list);
 350
 351        for_each_populated_zone(zone) {
 352                unsigned long zone_start, zone_end;
 353                struct mem_extent *ext, *cur, *aux;
 354
 355                zone_start = zone->zone_start_pfn;
 356                zone_end = zone->zone_start_pfn + zone->spanned_pages;
 357
 358                list_for_each_entry(ext, list, hook)
 359                        if (zone_start <= ext->end)
 360                                break;
 361
 362                if (&ext->hook == list || zone_end < ext->start) {
 363                        /* New extent is necessary */
 364                        struct mem_extent *new_ext;
 365
 366                        new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
 367                        if (!new_ext) {
 368                                free_mem_extents(list);
 369                                return -ENOMEM;
 370                        }
 371                        new_ext->start = zone_start;
 372                        new_ext->end = zone_end;
 373                        list_add_tail(&new_ext->hook, &ext->hook);
 374                        continue;
 375                }
 376
 377                /* Merge this zone's range of PFNs with the existing one */
 378                if (zone_start < ext->start)
 379                        ext->start = zone_start;
 380                if (zone_end > ext->end)
 381                        ext->end = zone_end;
 382
 383                /* More merging may be possible */
 384                cur = ext;
 385                list_for_each_entry_safe_continue(cur, aux, list, hook) {
 386                        if (zone_end < cur->start)
 387                                break;
 388                        if (zone_end < cur->end)
 389                                ext->end = cur->end;
 390                        list_del(&cur->hook);
 391                        kfree(cur);
 392                }
 393        }
 394
 395        return 0;
 396}
 397
 398/**
 399  *     memory_bm_create - allocate memory for a memory bitmap
 400  */
 401static int
 402memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 403{
 404        struct chain_allocator ca;
 405        struct list_head mem_extents;
 406        struct mem_extent *ext;
 407        int error;
 408
 409        chain_init(&ca, gfp_mask, safe_needed);
 410        INIT_LIST_HEAD(&bm->blocks);
 411
 412        error = create_mem_extents(&mem_extents, gfp_mask);
 413        if (error)
 414                return error;
 415
 416        list_for_each_entry(ext, &mem_extents, hook) {
 417                struct bm_block *bb;
 418                unsigned long pfn = ext->start;
 419                unsigned long pages = ext->end - ext->start;
 420
 421                bb = list_entry(bm->blocks.prev, struct bm_block, hook);
 422
 423                error = create_bm_block_list(pages, bm->blocks.prev, &ca);
 424                if (error)
 425                        goto Error;
 426
 427                list_for_each_entry_continue(bb, &bm->blocks, hook) {
 428                        bb->data = get_image_page(gfp_mask, safe_needed);
 429                        if (!bb->data) {
 430                                error = -ENOMEM;
 431                                goto Error;
 432                        }
 433
 434                        bb->start_pfn = pfn;
 435                        if (pages >= BM_BITS_PER_BLOCK) {
 436                                pfn += BM_BITS_PER_BLOCK;
 437                                pages -= BM_BITS_PER_BLOCK;
 438                        } else {
 439                                /* This is executed only once in the loop */
 440                                pfn += pages;
 441                        }
 442                        bb->end_pfn = pfn;
 443                }
 444        }
 445
 446        bm->p_list = ca.chain;
 447        memory_bm_position_reset(bm);
 448 Exit:
 449        free_mem_extents(&mem_extents);
 450        return error;
 451
 452 Error:
 453        bm->p_list = ca.chain;
 454        memory_bm_free(bm, PG_UNSAFE_CLEAR);
 455        goto Exit;
 456}
 457
 458/**
 459  *     memory_bm_free - free memory occupied by the memory bitmap @bm
 460  */
 461static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 462{
 463        struct bm_block *bb;
 464
 465        list_for_each_entry(bb, &bm->blocks, hook)
 466                if (bb->data)
 467                        free_image_page(bb->data, clear_nosave_free);
 468
 469        free_list_of_pages(bm->p_list, clear_nosave_free);
 470
 471        INIT_LIST_HEAD(&bm->blocks);
 472}
 473
 474/**
 475 *      memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
 476 *      to given pfn.  The cur_zone_bm member of @bm and the cur_block member
 477 *      of @bm->cur_zone_bm are updated.
 478 */
 479static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 480                                void **addr, unsigned int *bit_nr)
 481{
 482        struct bm_block *bb;
 483
 484        /*
 485         * Check if the pfn corresponds to the current bitmap block and find
 486         * the block where it fits if this is not the case.
 487         */
 488        bb = bm->cur.block;
 489        if (pfn < bb->start_pfn)
 490                list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
 491                        if (pfn >= bb->start_pfn)
 492                                break;
 493
 494        if (pfn >= bb->end_pfn)
 495                list_for_each_entry_continue(bb, &bm->blocks, hook)
 496                        if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
 497                                break;
 498
 499        if (&bb->hook == &bm->blocks)
 500                return -EFAULT;
 501
 502        /* The block has been found */
 503        bm->cur.block = bb;
 504        pfn -= bb->start_pfn;
 505        bm->cur.bit = pfn + 1;
 506        *bit_nr = pfn;
 507        *addr = bb->data;
 508        return 0;
 509}
 510
 511static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 512{
 513        void *addr;
 514        unsigned int bit;
 515        int error;
 516
 517        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 518        BUG_ON(error);
 519        set_bit(bit, addr);
 520}
 521
 522static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
 523{
 524        void *addr;
 525        unsigned int bit;
 526        int error;
 527
 528        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 529        if (!error)
 530                set_bit(bit, addr);
 531        return error;
 532}
 533
 534static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
 535{
 536        void *addr;
 537        unsigned int bit;
 538        int error;
 539
 540        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 541        BUG_ON(error);
 542        clear_bit(bit, addr);
 543}
 544
 545static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 546{
 547        void *addr;
 548        unsigned int bit;
 549        int error;
 550
 551        error = memory_bm_find_bit(bm, pfn, &addr, &bit);
 552        BUG_ON(error);
 553        return test_bit(bit, addr);
 554}
 555
 556static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
 557{
 558        void *addr;
 559        unsigned int bit;
 560
 561        return !memory_bm_find_bit(bm, pfn, &addr, &bit);
 562}
 563
 564/**
 565 *      memory_bm_next_pfn - find the pfn that corresponds to the next set bit
 566 *      in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
 567 *      returned.
 568 *
 569 *      It is required to run memory_bm_position_reset() before the first call to
 570 *      this function.
 571 */
 572
 573static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 574{
 575        struct bm_block *bb;
 576        int bit;
 577
 578        bb = bm->cur.block;
 579        do {
 580                bit = bm->cur.bit;
 581                bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
 582                if (bit < bm_block_bits(bb))
 583                        goto Return_pfn;
 584
 585                bb = list_entry(bb->hook.next, struct bm_block, hook);
 586                bm->cur.block = bb;
 587                bm->cur.bit = 0;
 588        } while (&bb->hook != &bm->blocks);
 589
 590        memory_bm_position_reset(bm);
 591        return BM_END_OF_MAP;
 592
 593 Return_pfn:
 594        bm->cur.bit = bit + 1;
 595        return bb->start_pfn + bit;
 596}
 597
 598/**
 599 *      This structure represents a range of page frames the contents of which
 600 *      should not be saved during the suspend.
 601 */
 602
 603struct nosave_region {
 604        struct list_head list;
 605        unsigned long start_pfn;
 606        unsigned long end_pfn;
 607};
 608
 609static LIST_HEAD(nosave_regions);
 610
 611/**
 612 *      register_nosave_region - register a range of page frames the contents
 613 *      of which should not be saved during the suspend (to be used in the early
 614 *      initialization code)
 615 */
 616
 617void __init
 618__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
 619                         int use_kmalloc)
 620{
 621        struct nosave_region *region;
 622
 623        if (start_pfn >= end_pfn)
 624                return;
 625
 626        if (!list_empty(&nosave_regions)) {
 627                /* Try to extend the previous region (they should be sorted) */
 628                region = list_entry(nosave_regions.prev,
 629                                        struct nosave_region, list);
 630                if (region->end_pfn == start_pfn) {
 631                        region->end_pfn = end_pfn;
 632                        goto Report;
 633                }
 634        }
 635        if (use_kmalloc) {
 636                /* during init, this shouldn't fail */
 637                region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
 638                BUG_ON(!region);
 639        } else
 640                /* This allocation cannot fail */
 641                region = alloc_bootmem(sizeof(struct nosave_region));
 642        region->start_pfn = start_pfn;
 643        region->end_pfn = end_pfn;
 644        list_add_tail(&region->list, &nosave_regions);
 645 Report:
 646        printk(KERN_INFO "PM: Registered nosave memory: [mem %#010llx-%#010llx]\n",
 647                (unsigned long long) start_pfn << PAGE_SHIFT,
 648                ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
 649}
 650
 651/*
 652 * Set bits in this map correspond to the page frames the contents of which
 653 * should not be saved during the suspend.
 654 */
 655static struct memory_bitmap *forbidden_pages_map;
 656
 657/* Set bits in this map correspond to free page frames. */
 658static struct memory_bitmap *free_pages_map;
 659
 660/*
 661 * Each page frame allocated for creating the image is marked by setting the
 662 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
 663 */
 664
 665void swsusp_set_page_free(struct page *page)
 666{
 667        if (free_pages_map)
 668                memory_bm_set_bit(free_pages_map, page_to_pfn(page));
 669}
 670
 671static int swsusp_page_is_free(struct page *page)
 672{
 673        return free_pages_map ?
 674                memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
 675}
 676
 677void swsusp_unset_page_free(struct page *page)
 678{
 679        if (free_pages_map)
 680                memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
 681}
 682
 683static void swsusp_set_page_forbidden(struct page *page)
 684{
 685        if (forbidden_pages_map)
 686                memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
 687}
 688
 689int swsusp_page_is_forbidden(struct page *page)
 690{
 691        return forbidden_pages_map ?
 692                memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
 693}
 694
 695static void swsusp_unset_page_forbidden(struct page *page)
 696{
 697        if (forbidden_pages_map)
 698                memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
 699}
 700
 701/**
 702 *      mark_nosave_pages - set bits corresponding to the page frames the
 703 *      contents of which should not be saved in a given bitmap.
 704 */
 705
 706static void mark_nosave_pages(struct memory_bitmap *bm)
 707{
 708        struct nosave_region *region;
 709
 710        if (list_empty(&nosave_regions))
 711                return;
 712
 713        list_for_each_entry(region, &nosave_regions, list) {
 714                unsigned long pfn;
 715
 716                pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n",
 717                         (unsigned long long) region->start_pfn << PAGE_SHIFT,
 718                         ((unsigned long long) region->end_pfn << PAGE_SHIFT)
 719                                - 1);
 720
 721                for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
 722                        if (pfn_valid(pfn)) {
 723                                /*
 724                                 * It is safe to ignore the result of
 725                                 * mem_bm_set_bit_check() here, since we won't
 726                                 * touch the PFNs for which the error is
 727                                 * returned anyway.
 728                                 */
 729                                mem_bm_set_bit_check(bm, pfn);
 730                        }
 731        }
 732}
 733
 734/**
 735 *      create_basic_memory_bitmaps - create bitmaps needed for marking page
 736 *      frames that should not be saved and free page frames.  The pointers
 737 *      forbidden_pages_map and free_pages_map are only modified if everything
 738 *      goes well, because we don't want the bits to be used before both bitmaps
 739 *      are set up.
 740 */
 741
 742int create_basic_memory_bitmaps(void)
 743{
 744        struct memory_bitmap *bm1, *bm2;
 745        int error = 0;
 746
 747        BUG_ON(forbidden_pages_map || free_pages_map);
 748
 749        bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 750        if (!bm1)
 751                return -ENOMEM;
 752
 753        error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
 754        if (error)
 755                goto Free_first_object;
 756
 757        bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
 758        if (!bm2)
 759                goto Free_first_bitmap;
 760
 761        error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
 762        if (error)
 763                goto Free_second_object;
 764
 765        forbidden_pages_map = bm1;
 766        free_pages_map = bm2;
 767        mark_nosave_pages(forbidden_pages_map);
 768
 769        pr_debug("PM: Basic memory bitmaps created\n");
 770
 771        return 0;
 772
 773 Free_second_object:
 774        kfree(bm2);
 775 Free_first_bitmap:
 776        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 777 Free_first_object:
 778        kfree(bm1);
 779        return -ENOMEM;
 780}
 781
 782/**
 783 *      free_basic_memory_bitmaps - free memory bitmaps allocated by
 784 *      create_basic_memory_bitmaps().  The auxiliary pointers are necessary
 785 *      so that the bitmaps themselves are not referred to while they are being
 786 *      freed.
 787 */
 788
 789void free_basic_memory_bitmaps(void)
 790{
 791        struct memory_bitmap *bm1, *bm2;
 792
 793        BUG_ON(!(forbidden_pages_map && free_pages_map));
 794
 795        bm1 = forbidden_pages_map;
 796        bm2 = free_pages_map;
 797        forbidden_pages_map = NULL;
 798        free_pages_map = NULL;
 799        memory_bm_free(bm1, PG_UNSAFE_CLEAR);
 800        kfree(bm1);
 801        memory_bm_free(bm2, PG_UNSAFE_CLEAR);
 802        kfree(bm2);
 803
 804        pr_debug("PM: Basic memory bitmaps freed\n");
 805}
 806
 807/**
 808 *      snapshot_additional_pages - estimate the number of additional pages
 809 *      be needed for setting up the suspend image data structures for given
 810 *      zone (usually the returned value is greater than the exact number)
 811 */
 812
 813unsigned int snapshot_additional_pages(struct zone *zone)
 814{
 815        unsigned int res;
 816
 817        res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
 818        res += DIV_ROUND_UP(res * sizeof(struct bm_block),
 819                            LINKED_PAGE_DATA_SIZE);
 820        return 2 * res;
 821}
 822
 823#ifdef CONFIG_HIGHMEM
 824/**
 825 *      count_free_highmem_pages - compute the total number of free highmem
 826 *      pages, system-wide.
 827 */
 828
 829static unsigned int count_free_highmem_pages(void)
 830{
 831        struct zone *zone;
 832        unsigned int cnt = 0;
 833
 834        for_each_populated_zone(zone)
 835                if (is_highmem(zone))
 836                        cnt += zone_page_state(zone, NR_FREE_PAGES);
 837
 838        return cnt;
 839}
 840
 841/**
 842 *      saveable_highmem_page - Determine whether a highmem page should be
 843 *      included in the suspend image.
 844 *
 845 *      We should save the page if it isn't Nosave or NosaveFree, or Reserved,
 846 *      and it isn't a part of a free chunk of pages.
 847 */
 848static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 849{
 850        struct page *page;
 851
 852        if (!pfn_valid(pfn))
 853                return NULL;
 854
 855        page = pfn_to_page(pfn);
 856        if (page_zone(page) != zone)
 857                return NULL;
 858
 859        BUG_ON(!PageHighMem(page));
 860
 861        if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page))
 862                return NULL;
 863
 864        if (PageReserved(page) || PageOffline(page))
 865                return NULL;
 866
 867        if (page_is_guard(page))
 868                return NULL;
 869
 870        return page;
 871}
 872
 873/**
 874 *      count_highmem_pages - compute the total number of saveable highmem
 875 *      pages.
 876 */
 877
 878static unsigned int count_highmem_pages(void)
 879{
 880        struct zone *zone;
 881        unsigned int n = 0;
 882
 883        for_each_populated_zone(zone) {
 884                unsigned long pfn, max_zone_pfn;
 885
 886                if (!is_highmem(zone))
 887                        continue;
 888
 889                mark_free_pages(zone);
 890                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 891                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 892                        if (saveable_highmem_page(zone, pfn))
 893                                n++;
 894        }
 895        return n;
 896}
 897#else
 898static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
 899{
 900        return NULL;
 901}
 902#endif /* CONFIG_HIGHMEM */
 903
 904/**
 905 *      saveable_page - Determine whether a non-highmem page should be included
 906 *      in the suspend image.
 907 *
 908 *      We should save the page if it isn't Nosave, and is not in the range
 909 *      of pages statically defined as 'unsaveable', and it isn't a part of
 910 *      a free chunk of pages.
 911 */
 912static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 913{
 914        struct page *page;
 915
 916        if (!pfn_valid(pfn))
 917                return NULL;
 918
 919        page = pfn_to_page(pfn);
 920        if (page_zone(page) != zone)
 921                return NULL;
 922
 923        BUG_ON(PageHighMem(page));
 924
 925        if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
 926                return NULL;
 927
 928        if (PageOffline(page))
 929                return NULL;
 930
 931        if (PageReserved(page)
 932            && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
 933                return NULL;
 934
 935        if (page_is_guard(page))
 936                return NULL;
 937
 938        return page;
 939}
 940
 941/**
 942 *      count_data_pages - compute the total number of saveable non-highmem
 943 *      pages.
 944 */
 945
 946static unsigned int count_data_pages(void)
 947{
 948        struct zone *zone;
 949        unsigned long pfn, max_zone_pfn;
 950        unsigned int n = 0;
 951
 952        for_each_populated_zone(zone) {
 953                if (is_highmem(zone))
 954                        continue;
 955
 956                mark_free_pages(zone);
 957                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
 958                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 959                        if (saveable_page(zone, pfn))
 960                                n++;
 961        }
 962        return n;
 963}
 964
 965/* This is needed, because copy_page and memcpy are not usable for copying
 966 * task structs.
 967 */
 968static inline void do_copy_page(long *dst, long *src)
 969{
 970        int n;
 971
 972        for (n = PAGE_SIZE / sizeof(long); n; n--)
 973                *dst++ = *src++;
 974}
 975
 976
 977/**
 978 *      safe_copy_page - check if the page we are going to copy is marked as
 979 *              present in the kernel page tables (this always is the case if
 980 *              CONFIG_DEBUG_PAGEALLOC is not set and in that case
 981 *              kernel_page_present() always returns 'true').
 982 */
 983static void safe_copy_page(void *dst, struct page *s_page)
 984{
 985        if (kernel_page_present(s_page)) {
 986                do_copy_page(dst, page_address(s_page));
 987        } else {
 988                kernel_map_pages(s_page, 1, 1);
 989                do_copy_page(dst, page_address(s_page));
 990                kernel_map_pages(s_page, 1, 0);
 991        }
 992}
 993
 994
 995#ifdef CONFIG_HIGHMEM
 996static inline struct page *
 997page_is_saveable(struct zone *zone, unsigned long pfn)
 998{
 999        return is_highmem(zone) ?
1000                saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
1001}
1002
1003static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1004{
1005        struct page *s_page, *d_page;
1006        void *src, *dst;
1007
1008        s_page = pfn_to_page(src_pfn);
1009        d_page = pfn_to_page(dst_pfn);
1010        if (PageHighMem(s_page)) {
1011                src = kmap_atomic(s_page);
1012                dst = kmap_atomic(d_page);
1013                do_copy_page(dst, src);
1014                kunmap_atomic(dst);
1015                kunmap_atomic(src);
1016        } else {
1017                if (PageHighMem(d_page)) {
1018                        /* Page pointed to by src may contain some kernel
1019                         * data modified by kmap_atomic()
1020                         */
1021                        safe_copy_page(buffer, s_page);
1022                        dst = kmap_atomic(d_page);
1023                        copy_page(dst, buffer);
1024                        kunmap_atomic(dst);
1025                } else {
1026                        safe_copy_page(page_address(d_page), s_page);
1027                }
1028        }
1029}
1030#else
1031#define page_is_saveable(zone, pfn)     saveable_page(zone, pfn)
1032
1033static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1034{
1035        safe_copy_page(page_address(pfn_to_page(dst_pfn)),
1036                                pfn_to_page(src_pfn));
1037}
1038#endif /* CONFIG_HIGHMEM */
1039
1040static void
1041copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1042{
1043        struct zone *zone;
1044        unsigned long pfn;
1045
1046        for_each_populated_zone(zone) {
1047                unsigned long max_zone_pfn;
1048
1049                mark_free_pages(zone);
1050                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1051                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1052                        if (page_is_saveable(zone, pfn))
1053                                memory_bm_set_bit(orig_bm, pfn);
1054        }
1055        memory_bm_position_reset(orig_bm);
1056        memory_bm_position_reset(copy_bm);
1057        for(;;) {
1058                pfn = memory_bm_next_pfn(orig_bm);
1059                if (unlikely(pfn == BM_END_OF_MAP))
1060                        break;
1061                copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1062        }
1063}
1064
1065/* Total number of image pages */
1066static unsigned int nr_copy_pages;
1067/* Number of pages needed for saving the original pfns of the image pages */
1068static unsigned int nr_meta_pages;
1069/*
1070 * Numbers of normal and highmem page frames allocated for hibernation image
1071 * before suspending devices.
1072 */
1073unsigned int alloc_normal, alloc_highmem;
1074/*
1075 * Memory bitmap used for marking saveable pages (during hibernation) or
1076 * hibernation image pages (during restore)
1077 */
1078static struct memory_bitmap orig_bm;
1079/*
1080 * Memory bitmap used during hibernation for marking allocated page frames that
1081 * will contain copies of saveable pages.  During restore it is initially used
1082 * for marking hibernation image pages, but then the set bits from it are
1083 * duplicated in @orig_bm and it is released.  On highmem systems it is next
1084 * used for marking "safe" highmem pages, but it has to be reinitialized for
1085 * this purpose.
1086 */
1087static struct memory_bitmap copy_bm;
1088
1089/**
1090 *      swsusp_free - free pages allocated for the suspend.
1091 *
1092 *      Suspend pages are alocated before the atomic copy is made, so we
1093 *      need to release them after the resume.
1094 */
1095
1096void swsusp_free(void)
1097{
1098        struct zone *zone;
1099        unsigned long pfn, max_zone_pfn;
1100
1101        for_each_populated_zone(zone) {
1102                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1103                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1104                        if (pfn_valid(pfn)) {
1105                                struct page *page = pfn_to_page(pfn);
1106
1107                                if (swsusp_page_is_forbidden(page) &&
1108                                    swsusp_page_is_free(page)) {
1109                                        swsusp_unset_page_forbidden(page);
1110                                        swsusp_unset_page_free(page);
1111                                        __free_page(page);
1112                                }
1113                        }
1114        }
1115        nr_copy_pages = 0;
1116        nr_meta_pages = 0;
1117        restore_pblist = NULL;
1118        buffer = NULL;
1119        alloc_normal = 0;
1120        alloc_highmem = 0;
1121}
1122
1123/* Helper functions used for the shrinking of memory. */
1124
1125#define GFP_IMAGE       (GFP_KERNEL | __GFP_NOWARN)
1126
1127/**
1128 * preallocate_image_pages - Allocate a number of pages for hibernation image
1129 * @nr_pages: Number of page frames to allocate.
1130 * @mask: GFP flags to use for the allocation.
1131 *
1132 * Return value: Number of page frames actually allocated
1133 */
1134static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1135{
1136        unsigned long nr_alloc = 0;
1137
1138        while (nr_pages > 0) {
1139                struct page *page;
1140
1141                page = alloc_image_page(mask);
1142                if (!page)
1143                        break;
1144                memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1145                if (PageHighMem(page))
1146                        alloc_highmem++;
1147                else
1148                        alloc_normal++;
1149                nr_pages--;
1150                nr_alloc++;
1151        }
1152
1153        return nr_alloc;
1154}
1155
1156static unsigned long preallocate_image_memory(unsigned long nr_pages,
1157                                              unsigned long avail_normal)
1158{
1159        unsigned long alloc;
1160
1161        if (avail_normal <= alloc_normal)
1162                return 0;
1163
1164        alloc = avail_normal - alloc_normal;
1165        if (nr_pages < alloc)
1166                alloc = nr_pages;
1167
1168        return preallocate_image_pages(alloc, GFP_IMAGE);
1169}
1170
1171#ifdef CONFIG_HIGHMEM
1172static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1173{
1174        return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1175}
1176
1177/**
1178 *  __fraction - Compute (an approximation of) x * (multiplier / base)
1179 */
1180static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
1181{
1182        x *= multiplier;
1183        do_div(x, base);
1184        return (unsigned long)x;
1185}
1186
1187static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1188                                                unsigned long highmem,
1189                                                unsigned long total)
1190{
1191        unsigned long alloc = __fraction(nr_pages, highmem, total);
1192
1193        return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
1194}
1195#else /* CONFIG_HIGHMEM */
1196static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1197{
1198        return 0;
1199}
1200
1201static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1202                                                unsigned long highmem,
1203                                                unsigned long total)
1204{
1205        return 0;
1206}
1207#endif /* CONFIG_HIGHMEM */
1208
1209/**
1210 * free_unnecessary_pages - Release preallocated pages not needed for the image
1211 */
1212static void free_unnecessary_pages(void)
1213{
1214        unsigned long save, to_free_normal, to_free_highmem;
1215
1216        save = count_data_pages();
1217        if (alloc_normal >= save) {
1218                to_free_normal = alloc_normal - save;
1219                save = 0;
1220        } else {
1221                to_free_normal = 0;
1222                save -= alloc_normal;
1223        }
1224        save += count_highmem_pages();
1225        if (alloc_highmem >= save) {
1226                to_free_highmem = alloc_highmem - save;
1227        } else {
1228                to_free_highmem = 0;
1229                save -= alloc_highmem;
1230                if (to_free_normal > save)
1231                        to_free_normal -= save;
1232                else
1233                        to_free_normal = 0;
1234        }
1235
1236        memory_bm_position_reset(&copy_bm);
1237
1238        while (to_free_normal > 0 || to_free_highmem > 0) {
1239                unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1240                struct page *page = pfn_to_page(pfn);
1241
1242                if (PageHighMem(page)) {
1243                        if (!to_free_highmem)
1244                                continue;
1245                        to_free_highmem--;
1246                        alloc_highmem--;
1247                } else {
1248                        if (!to_free_normal)
1249                                continue;
1250                        to_free_normal--;
1251                        alloc_normal--;
1252                }
1253                memory_bm_clear_bit(&copy_bm, pfn);
1254                swsusp_unset_page_forbidden(page);
1255                swsusp_unset_page_free(page);
1256                __free_page(page);
1257        }
1258}
1259
1260/**
1261 * minimum_image_size - Estimate the minimum acceptable size of an image
1262 * @saveable: Number of saveable pages in the system.
1263 *
1264 * We want to avoid attempting to free too much memory too hard, so estimate the
1265 * minimum acceptable size of a hibernation image to use as the lower limit for
1266 * preallocating memory.
1267 *
1268 * We assume that the minimum image size should be proportional to
1269 *
1270 * [number of saveable pages] - [number of pages that can be freed in theory]
1271 *
1272 * where the second term is the sum of (1) reclaimable slab pages, (2) active
1273 * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages,
1274 * minus mapped file pages.
1275 */
1276static unsigned long minimum_image_size(unsigned long saveable)
1277{
1278        unsigned long size;
1279
1280        size = global_page_state(NR_SLAB_RECLAIMABLE)
1281                + global_page_state(NR_ACTIVE_ANON)
1282                + global_page_state(NR_INACTIVE_ANON)
1283                + global_page_state(NR_ACTIVE_FILE)
1284                + global_page_state(NR_INACTIVE_FILE)
1285                - global_page_state(NR_FILE_MAPPED);
1286
1287        return saveable <= size ? 0 : saveable - size;
1288}
1289
1290/**
1291 * hibernate_preallocate_memory - Preallocate memory for hibernation image
1292 *
1293 * To create a hibernation image it is necessary to make a copy of every page
1294 * frame in use.  We also need a number of page frames to be free during
1295 * hibernation for allocations made while saving the image and for device
1296 * drivers, in case they need to allocate memory from their hibernation
1297 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
1298 * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
1299 * /sys/power/reserved_size, respectively).  To make this happen, we compute the
1300 * total number of available page frames and allocate at least
1301 *
1302 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
1303 *  + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
1304 *
1305 * of them, which corresponds to the maximum size of a hibernation image.
1306 *
1307 * If image_size is set below the number following from the above formula,
1308 * the preallocation of memory is continued until the total number of saveable
1309 * pages in the system is below the requested image size or the minimum
1310 * acceptable image size returned by minimum_image_size(), whichever is greater.
1311 */
1312int hibernate_preallocate_memory(void)
1313{
1314        struct zone *zone;
1315        unsigned long saveable, size, max_size, count, highmem, pages = 0;
1316        unsigned long alloc, save_highmem, pages_highmem, avail_normal;
1317        struct timeval start, stop;
1318        int error;
1319
1320        printk(KERN_INFO "PM: Preallocating image memory... ");
1321        do_gettimeofday(&start);
1322
1323        error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
1324        if (error)
1325                goto err_out;
1326
1327        error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
1328        if (error)
1329                goto err_out;
1330
1331        alloc_normal = 0;
1332        alloc_highmem = 0;
1333
1334        /* Count the number of saveable data pages. */
1335        save_highmem = count_highmem_pages();
1336        saveable = count_data_pages();
1337
1338        /*
1339         * Compute the total number of page frames we can use (count) and the
1340         * number of pages needed for image metadata (size).
1341         */
1342        count = saveable;
1343        saveable += save_highmem;
1344        highmem = save_highmem;
1345        size = 0;
1346        for_each_populated_zone(zone) {
1347                size += snapshot_additional_pages(zone);
1348                if (is_highmem(zone))
1349                        highmem += zone_page_state(zone, NR_FREE_PAGES);
1350                else
1351                        count += zone_page_state(zone, NR_FREE_PAGES);
1352        }
1353        avail_normal = count;
1354        count += highmem;
1355        count -= totalreserve_pages;
1356
1357        /* Add number of pages required for page keys (s390 only). */
1358        size += page_key_additional_pages(saveable);
1359
1360        /* Compute the maximum number of saveable pages to leave in memory. */
1361        max_size = (count - (size + PAGES_FOR_IO)) / 2
1362                        - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
1363        /* Compute the desired number of image pages specified by image_size. */
1364        size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1365        if (size > max_size)
1366                size = max_size;
1367        /*
1368         * If the desired number of image pages is at least as large as the
1369         * current number of saveable pages in memory, allocate page frames for
1370         * the image and we're done.
1371         */
1372        if (size >= saveable) {
1373                pages = preallocate_image_highmem(save_highmem);
1374                pages += preallocate_image_memory(saveable - pages, avail_normal);
1375                goto out;
1376        }
1377
1378        /* Estimate the minimum size of the image. */
1379        pages = minimum_image_size(saveable);
1380        /*
1381         * To avoid excessive pressure on the normal zone, leave room in it to
1382         * accommodate an image of the minimum size (unless it's already too
1383         * small, in which case don't preallocate pages from it at all).
1384         */
1385        if (avail_normal > pages)
1386                avail_normal -= pages;
1387        else
1388                avail_normal = 0;
1389        if (size < pages)
1390                size = min_t(unsigned long, pages, max_size);
1391
1392        /*
1393         * Let the memory management subsystem know that we're going to need a
1394         * large number of page frames to allocate and make it free some memory.
1395         * NOTE: If this is not done, performance will be hurt badly in some
1396         * test cases.
1397         */
1398        shrink_all_memory(saveable - size);
1399
1400        /*
1401         * The number of saveable pages in memory was too high, so apply some
1402         * pressure to decrease it.  First, make room for the largest possible
1403         * image and fail if that doesn't work.  Next, try to decrease the size
1404         * of the image as much as indicated by 'size' using allocations from
1405         * highmem and non-highmem zones separately.
1406         */
1407        pages_highmem = preallocate_image_highmem(highmem / 2);
1408        alloc = count - max_size;
1409        if (alloc > pages_highmem)
1410                alloc -= pages_highmem;
1411        else
1412                alloc = 0;
1413        pages = preallocate_image_memory(alloc, avail_normal);
1414        if (pages < alloc) {
1415                /* We have exhausted non-highmem pages, try highmem. */
1416                alloc -= pages;
1417                pages += pages_highmem;
1418                pages_highmem = preallocate_image_highmem(alloc);
1419                if (pages_highmem < alloc)
1420                        goto err_out;
1421                pages += pages_highmem;
1422                /*
1423                 * size is the desired number of saveable pages to leave in
1424                 * memory, so try to preallocate (all memory - size) pages.
1425                 */
1426                alloc = (count - pages) - size;
1427                pages += preallocate_image_highmem(alloc);
1428        } else {
1429                /*
1430                 * There are approximately max_size saveable pages at this point
1431                 * and we want to reduce this number down to size.
1432                 */
1433                alloc = max_size - size;
1434                size = preallocate_highmem_fraction(alloc, highmem, count);
1435                pages_highmem += size;
1436                alloc -= size;
1437                size = preallocate_image_memory(alloc, avail_normal);
1438                pages_highmem += preallocate_image_highmem(alloc - size);
1439                pages += pages_highmem + size;
1440        }
1441
1442        /*
1443         * We only need as many page frames for the image as there are saveable
1444         * pages in memory, but we have allocated more.  Release the excessive
1445         * ones now.
1446         */
1447        free_unnecessary_pages();
1448
1449 out:
1450        do_gettimeofday(&stop);
1451        printk(KERN_CONT "done (allocated %lu pages)\n", pages);
1452        swsusp_show_speed(&start, &stop, pages, "Allocated");
1453
1454        return 0;
1455
1456 err_out:
1457        printk(KERN_CONT "\n");
1458        swsusp_free();
1459        return -ENOMEM;
1460}
1461
1462#ifdef CONFIG_HIGHMEM
1463/**
1464  *     count_pages_for_highmem - compute the number of non-highmem pages
1465  *     that will be necessary for creating copies of highmem pages.
1466  */
1467
1468static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1469{
1470        unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
1471
1472        if (free_highmem >= nr_highmem)
1473                nr_highmem = 0;
1474        else
1475                nr_highmem -= free_highmem;
1476
1477        return nr_highmem;
1478}
1479#else
1480static unsigned int
1481count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1482#endif /* CONFIG_HIGHMEM */
1483
1484/**
1485 *      enough_free_mem - Make sure we have enough free memory for the
1486 *      snapshot image.
1487 */
1488
1489static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1490{
1491        struct zone *zone;
1492        unsigned int free = alloc_normal;
1493
1494        for_each_populated_zone(zone)
1495                if (!is_highmem(zone))
1496                        free += zone_page_state(zone, NR_FREE_PAGES);
1497
1498        nr_pages += count_pages_for_highmem(nr_highmem);
1499        pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n",
1500                nr_pages, PAGES_FOR_IO, free);
1501
1502        return free > nr_pages + PAGES_FOR_IO;
1503}
1504
1505#ifdef CONFIG_HIGHMEM
1506/**
1507 *      get_highmem_buffer - if there are some highmem pages in the suspend
1508 *      image, we may need the buffer to copy them and/or load their data.
1509 */
1510
1511static inline int get_highmem_buffer(int safe_needed)
1512{
1513        buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1514        return buffer ? 0 : -ENOMEM;
1515}
1516
1517/**
1518 *      alloc_highmem_image_pages - allocate some highmem pages for the image.
1519 *      Try to allocate as many pages as needed, but if the number of free
1520 *      highmem pages is lesser than that, allocate them all.
1521 */
1522
1523static inline unsigned int
1524alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1525{
1526        unsigned int to_alloc = count_free_highmem_pages();
1527
1528        if (to_alloc > nr_highmem)
1529                to_alloc = nr_highmem;
1530
1531        nr_highmem -= to_alloc;
1532        while (to_alloc-- > 0) {
1533                struct page *page;
1534
1535                page = alloc_image_page(__GFP_HIGHMEM);
1536                memory_bm_set_bit(bm, page_to_pfn(page));
1537        }
1538        return nr_highmem;
1539}
1540#else
1541static inline int get_highmem_buffer(int safe_needed) { return 0; }
1542
1543static inline unsigned int
1544alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1545#endif /* CONFIG_HIGHMEM */
1546
1547/**
1548 *      swsusp_alloc - allocate memory for the suspend image
1549 *
1550 *      We first try to allocate as many highmem pages as there are
1551 *      saveable highmem pages in the system.  If that fails, we allocate
1552 *      non-highmem pages for the copies of the remaining highmem ones.
1553 *
1554 *      In this approach it is likely that the copies of highmem pages will
1555 *      also be located in the high memory, because of the way in which
1556 *      copy_data_pages() works.
1557 */
1558
1559static int
1560swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1561                unsigned int nr_pages, unsigned int nr_highmem)
1562{
1563        if (nr_highmem > 0) {
1564                if (get_highmem_buffer(PG_ANY))
1565                        goto err_out;
1566                if (nr_highmem > alloc_highmem) {
1567                        nr_highmem -= alloc_highmem;
1568                        nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
1569                }
1570        }
1571        if (nr_pages > alloc_normal) {
1572                nr_pages -= alloc_normal;
1573                while (nr_pages-- > 0) {
1574                        struct page *page;
1575
1576                        page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1577                        if (!page)
1578                                goto err_out;
1579                        memory_bm_set_bit(copy_bm, page_to_pfn(page));
1580                }
1581        }
1582
1583        return 0;
1584
1585 err_out:
1586        swsusp_free();
1587        return -ENOMEM;
1588}
1589
1590asmlinkage int swsusp_save(void)
1591{
1592        unsigned int nr_pages, nr_highmem;
1593
1594        printk(KERN_INFO "PM: Creating hibernation image:\n");
1595
1596        drain_local_pages(NULL);
1597        nr_pages = count_data_pages();
1598        nr_highmem = count_highmem_pages();
1599        printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1600
1601        if (!enough_free_mem(nr_pages, nr_highmem)) {
1602                printk(KERN_ERR "PM: Not enough free memory\n");
1603                return -ENOMEM;
1604        }
1605
1606        if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1607                printk(KERN_ERR "PM: Memory allocation failed\n");
1608                return -ENOMEM;
1609        }
1610
1611        /* During allocating of suspend pagedir, new cold pages may appear.
1612         * Kill them.
1613         */
1614        drain_local_pages(NULL);
1615        copy_data_pages(&copy_bm, &orig_bm);
1616
1617        /*
1618         * End of critical section. From now on, we can write to memory,
1619         * but we should not touch disk. This specially means we must _not_
1620         * touch swap space! Except we must write out our image of course.
1621         */
1622
1623        nr_pages += nr_highmem;
1624        nr_copy_pages = nr_pages;
1625        nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1626
1627        printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1628                nr_pages);
1629
1630        return 0;
1631}
1632
1633#ifndef CONFIG_ARCH_HIBERNATION_HEADER
1634static int init_header_complete(struct swsusp_info *info)
1635{
1636        memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1637        info->version_code = LINUX_VERSION_CODE;
1638        return 0;
1639}
1640
1641static char *check_image_kernel(struct swsusp_info *info)
1642{
1643        if (info->version_code != LINUX_VERSION_CODE)
1644                return "kernel version";
1645        if (strcmp(info->uts.sysname,init_utsname()->sysname))
1646                return "system type";
1647        if (strcmp(info->uts.release,init_utsname()->release))
1648                return "kernel release";
1649        if (strcmp(info->uts.version,init_utsname()->version))
1650                return "version";
1651        if (strcmp(info->uts.machine,init_utsname()->machine))
1652                return "machine";
1653        return NULL;
1654}
1655#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1656
1657unsigned long snapshot_get_image_size(void)
1658{
1659        return nr_copy_pages + nr_meta_pages + 1;
1660}
1661
1662static int init_header(struct swsusp_info *info)
1663{
1664        memset(info, 0, sizeof(struct swsusp_info));
1665        info->num_physpages = num_physpages;
1666        info->image_pages = nr_copy_pages;
1667        info->pages = snapshot_get_image_size();
1668        info->size = info->pages;
1669        info->size <<= PAGE_SHIFT;
1670        return init_header_complete(info);
1671}
1672
1673/**
1674 *      pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1675 *      are stored in the array @buf[] (1 page at a time)
1676 */
1677
1678static inline void
1679pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1680{
1681        int j;
1682
1683        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1684                buf[j] = memory_bm_next_pfn(bm);
1685                if (unlikely(buf[j] == BM_END_OF_MAP))
1686                        break;
1687                /* Save page key for data page (s390 only). */
1688                page_key_read(buf + j);
1689        }
1690}
1691
1692/**
1693 *      snapshot_read_next - used for reading the system memory snapshot.
1694 *
1695 *      On the first call to it @handle should point to a zeroed
1696 *      snapshot_handle structure.  The structure gets updated and a pointer
1697 *      to it should be passed to this function every next time.
1698 *
1699 *      On success the function returns a positive number.  Then, the caller
1700 *      is allowed to read up to the returned number of bytes from the memory
1701 *      location computed by the data_of() macro.
1702 *
1703 *      The function returns 0 to indicate the end of data stream condition,
1704 *      and a negative number is returned on error.  In such cases the
1705 *      structure pointed to by @handle is not updated and should not be used
1706 *      any more.
1707 */
1708
1709int snapshot_read_next(struct snapshot_handle *handle)
1710{
1711        if (handle->cur > nr_meta_pages + nr_copy_pages)
1712                return 0;
1713
1714        if (!buffer) {
1715                /* This makes the buffer be freed by swsusp_free() */
1716                buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1717                if (!buffer)
1718                        return -ENOMEM;
1719        }
1720        if (!handle->cur) {
1721                int error;
1722
1723                error = init_header((struct swsusp_info *)buffer);
1724                if (error)
1725                        return error;
1726                handle->buffer = buffer;
1727                memory_bm_position_reset(&orig_bm);
1728                memory_bm_position_reset(&copy_bm);
1729        } else if (handle->cur <= nr_meta_pages) {
1730                clear_page(buffer);
1731                pack_pfns(buffer, &orig_bm);
1732        } else {
1733                struct page *page;
1734
1735                page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1736                if (PageHighMem(page)) {
1737                        /* Highmem pages are copied to the buffer,
1738                         * because we can't return with a kmapped
1739                         * highmem page (we may not be called again).
1740                         */
1741                        void *kaddr;
1742
1743                        kaddr = kmap_atomic(page);
1744                        copy_page(buffer, kaddr);
1745                        kunmap_atomic(kaddr);
1746                        handle->buffer = buffer;
1747                } else {
1748                        handle->buffer = page_address(page);
1749                }
1750        }
1751        handle->cur++;
1752        return PAGE_SIZE;
1753}
1754
1755/**
1756 *      mark_unsafe_pages - mark the pages that cannot be used for storing
1757 *      the image during resume, because they conflict with the pages that
1758 *      had been used before suspend
1759 */
1760
1761static int mark_unsafe_pages(struct memory_bitmap *bm)
1762{
1763        struct zone *zone;
1764        unsigned long pfn, max_zone_pfn;
1765
1766        /* Clear page flags */
1767        for_each_populated_zone(zone) {
1768                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1769                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1770                        if (pfn_valid(pfn))
1771                                swsusp_unset_page_free(pfn_to_page(pfn));
1772        }
1773
1774        /* Mark pages that correspond to the "original" pfns as "unsafe" */
1775        memory_bm_position_reset(bm);
1776        do {
1777                pfn = memory_bm_next_pfn(bm);
1778                if (likely(pfn != BM_END_OF_MAP)) {
1779                        if (likely(pfn_valid(pfn)))
1780                                swsusp_set_page_free(pfn_to_page(pfn));
1781                        else
1782                                return -EFAULT;
1783                }
1784        } while (pfn != BM_END_OF_MAP);
1785
1786        allocated_unsafe_pages = 0;
1787
1788        return 0;
1789}
1790
1791static void
1792duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1793{
1794        unsigned long pfn;
1795
1796        memory_bm_position_reset(src);
1797        pfn = memory_bm_next_pfn(src);
1798        while (pfn != BM_END_OF_MAP) {
1799                memory_bm_set_bit(dst, pfn);
1800                pfn = memory_bm_next_pfn(src);
1801        }
1802}
1803
1804static int check_header(struct swsusp_info *info)
1805{
1806        char *reason;
1807
1808        reason = check_image_kernel(info);
1809        if (!reason && info->num_physpages != num_physpages)
1810                reason = "memory size";
1811        if (reason) {
1812                printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1813                return -EPERM;
1814        }
1815        return 0;
1816}
1817
1818/**
1819 *      load header - check the image header and copy data from it
1820 */
1821
1822static int
1823load_header(struct swsusp_info *info)
1824{
1825        int error;
1826
1827        restore_pblist = NULL;
1828        error = check_header(info);
1829        if (!error) {
1830                nr_copy_pages = info->image_pages;
1831                nr_meta_pages = info->pages - info->image_pages - 1;
1832        }
1833        return error;
1834}
1835
1836/**
1837 *      unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1838 *      the corresponding bit in the memory bitmap @bm
1839 */
1840static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1841{
1842        int j;
1843
1844        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1845                if (unlikely(buf[j] == BM_END_OF_MAP))
1846                        break;
1847
1848                /* Extract and buffer page key for data page (s390 only). */
1849                page_key_memorize(buf + j);
1850
1851                if (memory_bm_pfn_present(bm, buf[j]))
1852                        memory_bm_set_bit(bm, buf[j]);
1853                else
1854                        return -EFAULT;
1855        }
1856
1857        return 0;
1858}
1859
1860/* List of "safe" pages that may be used to store data loaded from the suspend
1861 * image
1862 */
1863static struct linked_page *safe_pages_list;
1864
1865#ifdef CONFIG_HIGHMEM
1866/* struct highmem_pbe is used for creating the list of highmem pages that
1867 * should be restored atomically during the resume from disk, because the page
1868 * frames they have occupied before the suspend are in use.
1869 */
1870struct highmem_pbe {
1871        struct page *copy_page; /* data is here now */
1872        struct page *orig_page; /* data was here before the suspend */
1873        struct highmem_pbe *next;
1874};
1875
1876/* List of highmem PBEs needed for restoring the highmem pages that were
1877 * allocated before the suspend and included in the suspend image, but have
1878 * also been allocated by the "resume" kernel, so their contents cannot be
1879 * written directly to their "original" page frames.
1880 */
1881static struct highmem_pbe *highmem_pblist;
1882
1883/**
1884 *      count_highmem_image_pages - compute the number of highmem pages in the
1885 *      suspend image.  The bits in the memory bitmap @bm that correspond to the
1886 *      image pages are assumed to be set.
1887 */
1888
1889static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1890{
1891        unsigned long pfn;
1892        unsigned int cnt = 0;
1893
1894        memory_bm_position_reset(bm);
1895        pfn = memory_bm_next_pfn(bm);
1896        while (pfn != BM_END_OF_MAP) {
1897                if (PageHighMem(pfn_to_page(pfn)))
1898                        cnt++;
1899
1900                pfn = memory_bm_next_pfn(bm);
1901        }
1902        return cnt;
1903}
1904
1905/**
1906 *      prepare_highmem_image - try to allocate as many highmem pages as
1907 *      there are highmem image pages (@nr_highmem_p points to the variable
1908 *      containing the number of highmem image pages).  The pages that are
1909 *      "safe" (ie. will not be overwritten when the suspend image is
1910 *      restored) have the corresponding bits set in @bm (it must be
1911 *      unitialized).
1912 *
1913 *      NOTE: This function should not be called if there are no highmem
1914 *      image pages.
1915 */
1916
1917static unsigned int safe_highmem_pages;
1918
1919static struct memory_bitmap *safe_highmem_bm;
1920
1921static int
1922prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1923{
1924        unsigned int to_alloc;
1925
1926        if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1927                return -ENOMEM;
1928
1929        if (get_highmem_buffer(PG_SAFE))
1930                return -ENOMEM;
1931
1932        to_alloc = count_free_highmem_pages();
1933        if (to_alloc > *nr_highmem_p)
1934                to_alloc = *nr_highmem_p;
1935        else
1936                *nr_highmem_p = to_alloc;
1937
1938        safe_highmem_pages = 0;
1939        while (to_alloc-- > 0) {
1940                struct page *page;
1941
1942                page = alloc_page(__GFP_HIGHMEM);
1943                if (!swsusp_page_is_free(page)) {
1944                        /* The page is "safe", set its bit the bitmap */
1945                        memory_bm_set_bit(bm, page_to_pfn(page));
1946                        safe_highmem_pages++;
1947                }
1948                /* Mark the page as allocated */
1949                swsusp_set_page_forbidden(page);
1950                swsusp_set_page_free(page);
1951        }
1952        memory_bm_position_reset(bm);
1953        safe_highmem_bm = bm;
1954        return 0;
1955}
1956
1957/**
1958 *      get_highmem_page_buffer - for given highmem image page find the buffer
1959 *      that suspend_write_next() should set for its caller to write to.
1960 *
1961 *      If the page is to be saved to its "original" page frame or a copy of
1962 *      the page is to be made in the highmem, @buffer is returned.  Otherwise,
1963 *      the copy of the page is to be made in normal memory, so the address of
1964 *      the copy is returned.
1965 *
1966 *      If @buffer is returned, the caller of suspend_write_next() will write
1967 *      the page's contents to @buffer, so they will have to be copied to the
1968 *      right location on the next call to suspend_write_next() and it is done
1969 *      with the help of copy_last_highmem_page().  For this purpose, if
1970 *      @buffer is returned, @last_highmem page is set to the page to which
1971 *      the data will have to be copied from @buffer.
1972 */
1973
1974static struct page *last_highmem_page;
1975
1976static void *
1977get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1978{
1979        struct highmem_pbe *pbe;
1980        void *kaddr;
1981
1982        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1983                /* We have allocated the "original" page frame and we can
1984                 * use it directly to store the loaded page.
1985                 */
1986                last_highmem_page = page;
1987                return buffer;
1988        }
1989        /* The "original" page frame has not been allocated and we have to
1990         * use a "safe" page frame to store the loaded page.
1991         */
1992        pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1993        if (!pbe) {
1994                swsusp_free();
1995                return ERR_PTR(-ENOMEM);
1996        }
1997        pbe->orig_page = page;
1998        if (safe_highmem_pages > 0) {
1999                struct page *tmp;
2000
2001                /* Copy of the page will be stored in high memory */
2002                kaddr = buffer;
2003                tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
2004                safe_highmem_pages--;
2005                last_highmem_page = tmp;
2006                pbe->copy_page = tmp;
2007        } else {
2008                /* Copy of the page will be stored in normal memory */
2009                kaddr = safe_pages_list;
2010                safe_pages_list = safe_pages_list->next;
2011                pbe->copy_page = virt_to_page(kaddr);
2012        }
2013        pbe->next = highmem_pblist;
2014        highmem_pblist = pbe;
2015        return kaddr;
2016}
2017
2018/**
2019 *      copy_last_highmem_page - copy the contents of a highmem image from
2020 *      @buffer, where the caller of snapshot_write_next() has place them,
2021 *      to the right location represented by @last_highmem_page .
2022 */
2023
2024static void copy_last_highmem_page(void)
2025{
2026        if (last_highmem_page) {
2027                void *dst;
2028
2029                dst = kmap_atomic(last_highmem_page);
2030                copy_page(dst, buffer);
2031                kunmap_atomic(dst);
2032                last_highmem_page = NULL;
2033        }
2034}
2035
2036static inline int last_highmem_page_copied(void)
2037{
2038        return !last_highmem_page;
2039}
2040
2041static inline void free_highmem_data(void)
2042{
2043        if (safe_highmem_bm)
2044                memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
2045
2046        if (buffer)
2047                free_image_page(buffer, PG_UNSAFE_CLEAR);
2048}
2049#else
2050static inline int get_safe_write_buffer(void) { return 0; }
2051
2052static unsigned int
2053count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
2054
2055static inline int
2056prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
2057{
2058        return 0;
2059}
2060
2061static inline void *
2062get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
2063{
2064        return ERR_PTR(-EINVAL);
2065}
2066
2067static inline void copy_last_highmem_page(void) {}
2068static inline int last_highmem_page_copied(void) { return 1; }
2069static inline void free_highmem_data(void) {}
2070#endif /* CONFIG_HIGHMEM */
2071
2072/**
2073 *      prepare_image - use the memory bitmap @bm to mark the pages that will
2074 *      be overwritten in the process of restoring the system memory state
2075 *      from the suspend image ("unsafe" pages) and allocate memory for the
2076 *      image.
2077 *
2078 *      The idea is to allocate a new memory bitmap first and then allocate
2079 *      as many pages as needed for the image data, but not to assign these
2080 *      pages to specific tasks initially.  Instead, we just mark them as
2081 *      allocated and create a lists of "safe" pages that will be used
2082 *      later.  On systems with high memory a list of "safe" highmem pages is
2083 *      also created.
2084 */
2085
2086#define PBES_PER_LINKED_PAGE    (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2087
2088static int
2089prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
2090{
2091        unsigned int nr_pages, nr_highmem;
2092        struct linked_page *sp_list, *lp;
2093        int error;
2094
2095        /* If there is no highmem, the buffer will not be necessary */
2096        free_image_page(buffer, PG_UNSAFE_CLEAR);
2097        buffer = NULL;
2098
2099        nr_highmem = count_highmem_image_pages(bm);
2100        error = mark_unsafe_pages(bm);
2101        if (error)
2102                goto Free;
2103
2104        error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2105        if (error)
2106                goto Free;
2107
2108        duplicate_memory_bitmap(new_bm, bm);
2109        memory_bm_free(bm, PG_UNSAFE_KEEP);
2110        if (nr_highmem > 0) {
2111                error = prepare_highmem_image(bm, &nr_highmem);
2112                if (error)
2113                        goto Free;
2114        }
2115        /* Reserve some safe pages for potential later use.
2116         *
2117         * NOTE: This way we make sure there will be enough safe pages for the
2118         * chain_alloc() in get_buffer().  It is a bit wasteful, but
2119         * nr_copy_pages cannot be greater than 50% of the memory anyway.
2120         */
2121        sp_list = NULL;
2122        /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
2123        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2124        nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2125        while (nr_pages > 0) {
2126                lp = get_image_page(GFP_ATOMIC, PG_SAFE);
2127                if (!lp) {
2128                        error = -ENOMEM;
2129                        goto Free;
2130                }
2131                lp->next = sp_list;
2132                sp_list = lp;
2133                nr_pages--;
2134        }
2135        /* Preallocate memory for the image */
2136        safe_pages_list = NULL;
2137        nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2138        while (nr_pages > 0) {
2139                lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2140                if (!lp) {
2141                        error = -ENOMEM;
2142                        goto Free;
2143                }
2144                if (!swsusp_page_is_free(virt_to_page(lp))) {
2145                        /* The page is "safe", add it to the list */
2146                        lp->next = safe_pages_list;
2147                        safe_pages_list = lp;
2148                }
2149                /* Mark the page as allocated */
2150                swsusp_set_page_forbidden(virt_to_page(lp));
2151                swsusp_set_page_free(virt_to_page(lp));
2152                nr_pages--;
2153        }
2154        /* Free the reserved safe pages so that chain_alloc() can use them */
2155        while (sp_list) {
2156                lp = sp_list->next;
2157                free_image_page(sp_list, PG_UNSAFE_CLEAR);
2158                sp_list = lp;
2159        }
2160        return 0;
2161
2162 Free:
2163        swsusp_free();
2164        return error;
2165}
2166
2167/**
2168 *      get_buffer - compute the address that snapshot_write_next() should
2169 *      set for its caller to write to.
2170 */
2171
2172static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2173{
2174        struct pbe *pbe;
2175        struct page *page;
2176        unsigned long pfn = memory_bm_next_pfn(bm);
2177
2178        if (pfn == BM_END_OF_MAP)
2179                return ERR_PTR(-EFAULT);
2180
2181        page = pfn_to_page(pfn);
2182        if (PageHighMem(page))
2183                return get_highmem_page_buffer(page, ca);
2184
2185        if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
2186                /* We have allocated the "original" page frame and we can
2187                 * use it directly to store the loaded page.
2188                 */
2189                return page_address(page);
2190
2191        /* The "original" page frame has not been allocated and we have to
2192         * use a "safe" page frame to store the loaded page.
2193         */
2194        pbe = chain_alloc(ca, sizeof(struct pbe));
2195        if (!pbe) {
2196                swsusp_free();
2197                return ERR_PTR(-ENOMEM);
2198        }
2199        pbe->orig_address = page_address(page);
2200        pbe->address = safe_pages_list;
2201        safe_pages_list = safe_pages_list->next;
2202        pbe->next = restore_pblist;
2203        restore_pblist = pbe;
2204        return pbe->address;
2205}
2206
2207/**
2208 *      snapshot_write_next - used for writing the system memory snapshot.
2209 *
2210 *      On the first call to it @handle should point to a zeroed
2211 *      snapshot_handle structure.  The structure gets updated and a pointer
2212 *      to it should be passed to this function every next time.
2213 *
2214 *      On success the function returns a positive number.  Then, the caller
2215 *      is allowed to write up to the returned number of bytes to the memory
2216 *      location computed by the data_of() macro.
2217 *
2218 *      The function returns 0 to indicate the "end of file" condition,
2219 *      and a negative number is returned on error.  In such cases the
2220 *      structure pointed to by @handle is not updated and should not be used
2221 *      any more.
2222 */
2223
2224int snapshot_write_next(struct snapshot_handle *handle)
2225{
2226        static struct chain_allocator ca;
2227        int error = 0;
2228
2229        /* Check if we have already loaded the entire image */
2230        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
2231                return 0;
2232
2233        handle->sync_read = 1;
2234
2235        if (!handle->cur) {
2236                if (!buffer)
2237                        /* This makes the buffer be freed by swsusp_free() */
2238                        buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2239
2240                if (!buffer)
2241                        return -ENOMEM;
2242
2243                handle->buffer = buffer;
2244        } else if (handle->cur == 1) {
2245                error = load_header(buffer);
2246                if (error)
2247                        return error;
2248
2249                error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2250                if (error)
2251                        return error;
2252
2253                /* Allocate buffer for page keys. */
2254                error = page_key_alloc(nr_copy_pages);
2255                if (error)
2256                        return error;
2257
2258        } else if (handle->cur <= nr_meta_pages + 1) {
2259                error = unpack_orig_pfns(buffer, &copy_bm);
2260                if (error)
2261                        return error;
2262
2263                if (handle->cur == nr_meta_pages + 1) {
2264                        error = prepare_image(&orig_bm, &copy_bm);
2265                        if (error)
2266                                return error;
2267
2268                        chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2269                        memory_bm_position_reset(&orig_bm);
2270                        restore_pblist = NULL;
2271                        handle->buffer = get_buffer(&orig_bm, &ca);
2272                        handle->sync_read = 0;
2273                        if (IS_ERR(handle->buffer))
2274                                return PTR_ERR(handle->buffer);
2275                }
2276        } else {
2277                copy_last_highmem_page();
2278                /* Restore page key for data page (s390 only). */
2279                page_key_write(handle->buffer);
2280                handle->buffer = get_buffer(&orig_bm, &ca);
2281                if (IS_ERR(handle->buffer))
2282                        return PTR_ERR(handle->buffer);
2283                if (handle->buffer != buffer)
2284                        handle->sync_read = 0;
2285        }
2286        handle->cur++;
2287        return PAGE_SIZE;
2288}
2289
2290/**
2291 *      snapshot_write_finalize - must be called after the last call to
2292 *      snapshot_write_next() in case the last page in the image happens
2293 *      to be a highmem page and its contents should be stored in the
2294 *      highmem.  Additionally, it releases the memory that will not be
2295 *      used any more.
2296 */
2297
2298void snapshot_write_finalize(struct snapshot_handle *handle)
2299{
2300        copy_last_highmem_page();
2301        /* Restore page key for data page (s390 only). */
2302        page_key_write(handle->buffer);
2303        page_key_free();
2304        /* Free only if we have loaded the image entirely */
2305        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2306                memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
2307                free_highmem_data();
2308        }
2309}
2310
2311int snapshot_image_loaded(struct snapshot_handle *handle)
2312{
2313        return !(!nr_copy_pages || !last_highmem_page_copied() ||
2314                        handle->cur <= nr_meta_pages + nr_copy_pages);
2315}
2316
2317#ifdef CONFIG_HIGHMEM
2318/* Assumes that @buf is ready and points to a "safe" page */
2319static inline void
2320swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
2321{
2322        void *kaddr1, *kaddr2;
2323
2324        kaddr1 = kmap_atomic(p1);
2325        kaddr2 = kmap_atomic(p2);
2326        copy_page(buf, kaddr1);
2327        copy_page(kaddr1, kaddr2);
2328        copy_page(kaddr2, buf);
2329        kunmap_atomic(kaddr2);
2330        kunmap_atomic(kaddr1);
2331}
2332
2333/**
2334 *      restore_highmem - for each highmem page that was allocated before
2335 *      the suspend and included in the suspend image, and also has been
2336 *      allocated by the "resume" kernel swap its current (ie. "before
2337 *      resume") contents with the previous (ie. "before suspend") one.
2338 *
2339 *      If the resume eventually fails, we can call this function once
2340 *      again and restore the "before resume" highmem state.
2341 */
2342
2343int restore_highmem(void)
2344{
2345        struct highmem_pbe *pbe = highmem_pblist;
2346        void *buf;
2347
2348        if (!pbe)
2349                return 0;
2350
2351        buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2352        if (!buf)
2353                return -ENOMEM;
2354
2355        while (pbe) {
2356                swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2357                pbe = pbe->next;
2358        }
2359        free_image_page(buf, PG_UNSAFE_CLEAR);
2360        return 0;
2361}
2362#endif /* CONFIG_HIGHMEM */
2363