linux/arch/x86/kernel/e820.c
<<
>>
Prefs
   1/*
   2 * Handle the memory map.
   3 * The functions here do the job until bootmem takes over.
   4 *
   5 *  Getting sanitize_e820_map() in sync with i386 version by applying change:
   6 *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
   7 *     Alex Achenbach <xela@slit.de>, December 2002.
   8 *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
   9 *
  10 */
  11#include <linux/kernel.h>
  12#include <linux/types.h>
  13#include <linux/init.h>
  14#include <linux/bootmem.h>
  15#include <linux/pfn.h>
  16#include <linux/suspend.h>
  17#include <linux/acpi.h>
  18#include <linux/firmware-map.h>
  19#include <linux/memblock.h>
  20
  21#include <asm/e820.h>
  22#include <asm/proto.h>
  23#include <asm/setup.h>
  24
  25/*
  26 * The e820 map is the map that gets modified e.g. with command line parameters
  27 * and that is also registered with modifications in the kernel resource tree
  28 * with the iomem_resource as parent.
  29 *
  30 * The e820_saved is directly saved after the BIOS-provided memory map is
  31 * copied. It doesn't get modified afterwards. It's registered for the
  32 * /sys/firmware/memmap interface.
  33 *
  34 * That memory map is not modified and is used as base for kexec. The kexec'd
  35 * kernel should get the same memory map as the firmware provides. Then the
  36 * user can e.g. boot the original kernel with mem=1G while still booting the
  37 * next kernel with full memory.
  38 */
  39struct e820map e820;
  40struct e820map e820_saved;
  41
  42/* For PCI or other memory-mapped resources */
  43unsigned long pci_mem_start = 0xaeedbabe;
  44#ifdef CONFIG_PCI
  45EXPORT_SYMBOL(pci_mem_start);
  46#endif
  47
  48/*
  49 * This function checks if any part of the range <start,end> is mapped
  50 * with type.
  51 */
  52int
  53e820_any_mapped(u64 start, u64 end, unsigned type)
  54{
  55        int i;
  56
  57        for (i = 0; i < e820.nr_map; i++) {
  58                struct e820entry *ei = &e820.map[i];
  59
  60                if (type && ei->type != type)
  61                        continue;
  62                if (ei->addr >= end || ei->addr + ei->size <= start)
  63                        continue;
  64                return 1;
  65        }
  66        return 0;
  67}
  68EXPORT_SYMBOL_GPL(e820_any_mapped);
  69
  70/*
  71 * This function checks if the entire range <start,end> is mapped with type.
  72 *
  73 * Note: this function only works correct if the e820 table is sorted and
  74 * not-overlapping, which is the case
  75 */
  76int __init e820_all_mapped(u64 start, u64 end, unsigned type)
  77{
  78        int i;
  79
  80        for (i = 0; i < e820.nr_map; i++) {
  81                struct e820entry *ei = &e820.map[i];
  82
  83                if (type && ei->type != type)
  84                        continue;
  85                /* is the region (part) in overlap with the current region ?*/
  86                if (ei->addr >= end || ei->addr + ei->size <= start)
  87                        continue;
  88
  89                /* if the region is at the beginning of <start,end> we move
  90                 * start to the end of the region since it's ok until there
  91                 */
  92                if (ei->addr <= start)
  93                        start = ei->addr + ei->size;
  94                /*
  95                 * if start is now at or beyond end, we're done, full
  96                 * coverage
  97                 */
  98                if (start >= end)
  99                        return 1;
 100        }
 101        return 0;
 102}
 103
 104/*
 105 * Add a memory region to the kernel e820 map.
 106 */
 107static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
 108                                         int type)
 109{
 110        int x = e820x->nr_map;
 111
 112        if (x >= ARRAY_SIZE(e820x->map)) {
 113                printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
 114                return;
 115        }
 116
 117        e820x->map[x].addr = start;
 118        e820x->map[x].size = size;
 119        e820x->map[x].type = type;
 120        e820x->nr_map++;
 121}
 122
 123void __init e820_add_region(u64 start, u64 size, int type)
 124{
 125        __e820_add_region(&e820, start, size, type);
 126}
 127
 128static void __init e820_print_type(u32 type)
 129{
 130        switch (type) {
 131        case E820_RAM:
 132        case E820_RESERVED_KERN:
 133                printk(KERN_CONT "(usable)");
 134                break;
 135        case E820_RESERVED:
 136                printk(KERN_CONT "(reserved)");
 137                break;
 138        case E820_ACPI:
 139                printk(KERN_CONT "(ACPI data)");
 140                break;
 141        case E820_NVS:
 142                printk(KERN_CONT "(ACPI NVS)");
 143                break;
 144        case E820_UNUSABLE:
 145                printk(KERN_CONT "(unusable)");
 146                break;
 147        default:
 148                printk(KERN_CONT "type %u", type);
 149                break;
 150        }
 151}
 152
 153void __init e820_print_map(char *who)
 154{
 155        int i;
 156
 157        for (i = 0; i < e820.nr_map; i++) {
 158                printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
 159                       (unsigned long long) e820.map[i].addr,
 160                       (unsigned long long)
 161                       (e820.map[i].addr + e820.map[i].size));
 162                e820_print_type(e820.map[i].type);
 163                printk(KERN_CONT "\n");
 164        }
 165}
 166
 167/*
 168 * Sanitize the BIOS e820 map.
 169 *
 170 * Some e820 responses include overlapping entries. The following
 171 * replaces the original e820 map with a new one, removing overlaps,
 172 * and resolving conflicting memory types in favor of highest
 173 * numbered type.
 174 *
 175 * The input parameter biosmap points to an array of 'struct
 176 * e820entry' which on entry has elements in the range [0, *pnr_map)
 177 * valid, and which has space for up to max_nr_map entries.
 178 * On return, the resulting sanitized e820 map entries will be in
 179 * overwritten in the same location, starting at biosmap.
 180 *
 181 * The integer pointed to by pnr_map must be valid on entry (the
 182 * current number of valid entries located at biosmap) and will
 183 * be updated on return, with the new number of valid entries
 184 * (something no more than max_nr_map.)
 185 *
 186 * The return value from sanitize_e820_map() is zero if it
 187 * successfully 'sanitized' the map entries passed in, and is -1
 188 * if it did nothing, which can happen if either of (1) it was
 189 * only passed one map entry, or (2) any of the input map entries
 190 * were invalid (start + size < start, meaning that the size was
 191 * so big the described memory range wrapped around through zero.)
 192 *
 193 *      Visually we're performing the following
 194 *      (1,2,3,4 = memory types)...
 195 *
 196 *      Sample memory map (w/overlaps):
 197 *         ____22__________________
 198 *         ______________________4_
 199 *         ____1111________________
 200 *         _44_____________________
 201 *         11111111________________
 202 *         ____________________33__
 203 *         ___________44___________
 204 *         __________33333_________
 205 *         ______________22________
 206 *         ___________________2222_
 207 *         _________111111111______
 208 *         _____________________11_
 209 *         _________________4______
 210 *
 211 *      Sanitized equivalent (no overlap):
 212 *         1_______________________
 213 *         _44_____________________
 214 *         ___1____________________
 215 *         ____22__________________
 216 *         ______11________________
 217 *         _________1______________
 218 *         __________3_____________
 219 *         ___________44___________
 220 *         _____________33_________
 221 *         _______________2________
 222 *         ________________1_______
 223 *         _________________4______
 224 *         ___________________2____
 225 *         ____________________33__
 226 *         ______________________4_
 227 */
 228
 229int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
 230                             u32 *pnr_map)
 231{
 232        struct change_member {
 233                struct e820entry *pbios; /* pointer to original bios entry */
 234                unsigned long long addr; /* address for this change point */
 235        };
 236        static struct change_member change_point_list[2*E820_X_MAX] __initdata;
 237        static struct change_member *change_point[2*E820_X_MAX] __initdata;
 238        static struct e820entry *overlap_list[E820_X_MAX] __initdata;
 239        static struct e820entry new_bios[E820_X_MAX] __initdata;
 240        struct change_member *change_tmp;
 241        unsigned long current_type, last_type;
 242        unsigned long long last_addr;
 243        int chgidx, still_changing;
 244        int overlap_entries;
 245        int new_bios_entry;
 246        int old_nr, new_nr, chg_nr;
 247        int i;
 248
 249        /* if there's only one memory region, don't bother */
 250        if (*pnr_map < 2)
 251                return -1;
 252
 253        old_nr = *pnr_map;
 254        BUG_ON(old_nr > max_nr_map);
 255
 256        /* bail out if we find any unreasonable addresses in bios map */
 257        for (i = 0; i < old_nr; i++)
 258                if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
 259                        return -1;
 260
 261        /* create pointers for initial change-point information (for sorting) */
 262        for (i = 0; i < 2 * old_nr; i++)
 263                change_point[i] = &change_point_list[i];
 264
 265        /* record all known change-points (starting and ending addresses),
 266           omitting those that are for empty memory regions */
 267        chgidx = 0;
 268        for (i = 0; i < old_nr; i++)    {
 269                if (biosmap[i].size != 0) {
 270                        change_point[chgidx]->addr = biosmap[i].addr;
 271                        change_point[chgidx++]->pbios = &biosmap[i];
 272                        change_point[chgidx]->addr = biosmap[i].addr +
 273                                biosmap[i].size;
 274                        change_point[chgidx++]->pbios = &biosmap[i];
 275                }
 276        }
 277        chg_nr = chgidx;
 278
 279        /* sort change-point list by memory addresses (low -> high) */
 280        still_changing = 1;
 281        while (still_changing)  {
 282                still_changing = 0;
 283                for (i = 1; i < chg_nr; i++)  {
 284                        unsigned long long curaddr, lastaddr;
 285                        unsigned long long curpbaddr, lastpbaddr;
 286
 287                        curaddr = change_point[i]->addr;
 288                        lastaddr = change_point[i - 1]->addr;
 289                        curpbaddr = change_point[i]->pbios->addr;
 290                        lastpbaddr = change_point[i - 1]->pbios->addr;
 291
 292                        /*
 293                         * swap entries, when:
 294                         *
 295                         * curaddr > lastaddr or
 296                         * curaddr == lastaddr and curaddr == curpbaddr and
 297                         * lastaddr != lastpbaddr
 298                         */
 299                        if (curaddr < lastaddr ||
 300                            (curaddr == lastaddr && curaddr == curpbaddr &&
 301                             lastaddr != lastpbaddr)) {
 302                                change_tmp = change_point[i];
 303                                change_point[i] = change_point[i-1];
 304                                change_point[i-1] = change_tmp;
 305                                still_changing = 1;
 306                        }
 307                }
 308        }
 309
 310        /* create a new bios memory map, removing overlaps */
 311        overlap_entries = 0;     /* number of entries in the overlap table */
 312        new_bios_entry = 0;      /* index for creating new bios map entries */
 313        last_type = 0;           /* start with undefined memory type */
 314        last_addr = 0;           /* start with 0 as last starting address */
 315
 316        /* loop through change-points, determining affect on the new bios map */
 317        for (chgidx = 0; chgidx < chg_nr; chgidx++) {
 318                /* keep track of all overlapping bios entries */
 319                if (change_point[chgidx]->addr ==
 320                    change_point[chgidx]->pbios->addr) {
 321                        /*
 322                         * add map entry to overlap list (> 1 entry
 323                         * implies an overlap)
 324                         */
 325                        overlap_list[overlap_entries++] =
 326                                change_point[chgidx]->pbios;
 327                } else {
 328                        /*
 329                         * remove entry from list (order independent,
 330                         * so swap with last)
 331                         */
 332                        for (i = 0; i < overlap_entries; i++) {
 333                                if (overlap_list[i] ==
 334                                    change_point[chgidx]->pbios)
 335                                        overlap_list[i] =
 336                                                overlap_list[overlap_entries-1];
 337                        }
 338                        overlap_entries--;
 339                }
 340                /*
 341                 * if there are overlapping entries, decide which
 342                 * "type" to use (larger value takes precedence --
 343                 * 1=usable, 2,3,4,4+=unusable)
 344                 */
 345                current_type = 0;
 346                for (i = 0; i < overlap_entries; i++)
 347                        if (overlap_list[i]->type > current_type)
 348                                current_type = overlap_list[i]->type;
 349                /*
 350                 * continue building up new bios map based on this
 351                 * information
 352                 */
 353                if (current_type != last_type)  {
 354                        if (last_type != 0)      {
 355                                new_bios[new_bios_entry].size =
 356                                        change_point[chgidx]->addr - last_addr;
 357                                /*
 358                                 * move forward only if the new size
 359                                 * was non-zero
 360                                 */
 361                                if (new_bios[new_bios_entry].size != 0)
 362                                        /*
 363                                         * no more space left for new
 364                                         * bios entries ?
 365                                         */
 366                                        if (++new_bios_entry >= max_nr_map)
 367                                                break;
 368                        }
 369                        if (current_type != 0)  {
 370                                new_bios[new_bios_entry].addr =
 371                                        change_point[chgidx]->addr;
 372                                new_bios[new_bios_entry].type = current_type;
 373                                last_addr = change_point[chgidx]->addr;
 374                        }
 375                        last_type = current_type;
 376                }
 377        }
 378        /* retain count for new bios entries */
 379        new_nr = new_bios_entry;
 380
 381        /* copy new bios mapping into original location */
 382        memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
 383        *pnr_map = new_nr;
 384
 385        return 0;
 386}
 387
 388static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
 389{
 390        while (nr_map) {
 391                u64 start = biosmap->addr;
 392                u64 size = biosmap->size;
 393                u64 end = start + size;
 394                u32 type = biosmap->type;
 395
 396                /* Overflow in 64 bits? Ignore the memory map. */
 397                if (start > end)
 398                        return -1;
 399
 400                e820_add_region(start, size, type);
 401
 402                biosmap++;
 403                nr_map--;
 404        }
 405        return 0;
 406}
 407
 408/*
 409 * Copy the BIOS e820 map into a safe place.
 410 *
 411 * Sanity-check it while we're at it..
 412 *
 413 * If we're lucky and live on a modern system, the setup code
 414 * will have given us a memory map that we can use to properly
 415 * set up memory.  If we aren't, we'll fake a memory map.
 416 */
 417static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
 418{
 419        /* Only one memory region (or negative)? Ignore it */
 420        if (nr_map < 2)
 421                return -1;
 422
 423        return __append_e820_map(biosmap, nr_map);
 424}
 425
 426static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
 427                                        u64 size, unsigned old_type,
 428                                        unsigned new_type)
 429{
 430        u64 end;
 431        unsigned int i;
 432        u64 real_updated_size = 0;
 433
 434        BUG_ON(old_type == new_type);
 435
 436        if (size > (ULLONG_MAX - start))
 437                size = ULLONG_MAX - start;
 438
 439        end = start + size;
 440        printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
 441                       (unsigned long long) start,
 442                       (unsigned long long) end);
 443        e820_print_type(old_type);
 444        printk(KERN_CONT " ==> ");
 445        e820_print_type(new_type);
 446        printk(KERN_CONT "\n");
 447
 448        for (i = 0; i < e820x->nr_map; i++) {
 449                struct e820entry *ei = &e820x->map[i];
 450                u64 final_start, final_end;
 451                u64 ei_end;
 452
 453                if (ei->type != old_type)
 454                        continue;
 455
 456                ei_end = ei->addr + ei->size;
 457                /* totally covered by new range? */
 458                if (ei->addr >= start && ei_end <= end) {
 459                        ei->type = new_type;
 460                        real_updated_size += ei->size;
 461                        continue;
 462                }
 463
 464                /* new range is totally covered? */
 465                if (ei->addr < start && ei_end > end) {
 466                        __e820_add_region(e820x, start, size, new_type);
 467                        __e820_add_region(e820x, end, ei_end - end, ei->type);
 468                        ei->size = start - ei->addr;
 469                        real_updated_size += size;
 470                        continue;
 471                }
 472
 473                /* partially covered */
 474                final_start = max(start, ei->addr);
 475                final_end = min(end, ei_end);
 476                if (final_start >= final_end)
 477                        continue;
 478
 479                __e820_add_region(e820x, final_start, final_end - final_start,
 480                                  new_type);
 481
 482                real_updated_size += final_end - final_start;
 483
 484                /*
 485                 * left range could be head or tail, so need to update
 486                 * size at first.
 487                 */
 488                ei->size -= final_end - final_start;
 489                if (ei->addr < final_start)
 490                        continue;
 491                ei->addr = final_end;
 492        }
 493        return real_updated_size;
 494}
 495
 496u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
 497                             unsigned new_type)
 498{
 499        return __e820_update_range(&e820, start, size, old_type, new_type);
 500}
 501
 502static u64 __init e820_update_range_saved(u64 start, u64 size,
 503                                          unsigned old_type, unsigned new_type)
 504{
 505        return __e820_update_range(&e820_saved, start, size, old_type,
 506                                     new_type);
 507}
 508
 509/* make e820 not cover the range */
 510u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
 511                             int checktype)
 512{
 513        int i;
 514        u64 end;
 515        u64 real_removed_size = 0;
 516
 517        if (size > (ULLONG_MAX - start))
 518                size = ULLONG_MAX - start;
 519
 520        end = start + size;
 521        printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
 522                       (unsigned long long) start,
 523                       (unsigned long long) end);
 524        if (checktype)
 525                e820_print_type(old_type);
 526        printk(KERN_CONT "\n");
 527
 528        for (i = 0; i < e820.nr_map; i++) {
 529                struct e820entry *ei = &e820.map[i];
 530                u64 final_start, final_end;
 531                u64 ei_end;
 532
 533                if (checktype && ei->type != old_type)
 534                        continue;
 535
 536                ei_end = ei->addr + ei->size;
 537                /* totally covered? */
 538                if (ei->addr >= start && ei_end <= end) {
 539                        real_removed_size += ei->size;
 540                        memset(ei, 0, sizeof(struct e820entry));
 541                        continue;
 542                }
 543
 544                /* new range is totally covered? */
 545                if (ei->addr < start && ei_end > end) {
 546                        e820_add_region(end, ei_end - end, ei->type);
 547                        ei->size = start - ei->addr;
 548                        real_removed_size += size;
 549                        continue;
 550                }
 551
 552                /* partially covered */
 553                final_start = max(start, ei->addr);
 554                final_end = min(end, ei_end);
 555                if (final_start >= final_end)
 556                        continue;
 557                real_removed_size += final_end - final_start;
 558
 559                /*
 560                 * left range could be head or tail, so need to update
 561                 * size at first.
 562                 */
 563                ei->size -= final_end - final_start;
 564                if (ei->addr < final_start)
 565                        continue;
 566                ei->addr = final_end;
 567        }
 568        return real_removed_size;
 569}
 570
 571void __init update_e820(void)
 572{
 573        u32 nr_map;
 574
 575        nr_map = e820.nr_map;
 576        if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
 577                return;
 578        e820.nr_map = nr_map;
 579        printk(KERN_INFO "modified physical RAM map:\n");
 580        e820_print_map("modified");
 581}
 582static void __init update_e820_saved(void)
 583{
 584        u32 nr_map;
 585
 586        nr_map = e820_saved.nr_map;
 587        if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
 588                return;
 589        e820_saved.nr_map = nr_map;
 590}
 591#define MAX_GAP_END 0x100000000ull
 592/*
 593 * Search for a gap in the e820 memory space from start_addr to end_addr.
 594 */
 595__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
 596                unsigned long start_addr, unsigned long long end_addr)
 597{
 598        unsigned long long last;
 599        int i = e820.nr_map;
 600        int found = 0;
 601
 602        last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
 603
 604        while (--i >= 0) {
 605                unsigned long long start = e820.map[i].addr;
 606                unsigned long long end = start + e820.map[i].size;
 607
 608                if (end < start_addr)
 609                        continue;
 610
 611                /*
 612                 * Since "last" is at most 4GB, we know we'll
 613                 * fit in 32 bits if this condition is true
 614                 */
 615                if (last > end) {
 616                        unsigned long gap = last - end;
 617
 618                        if (gap >= *gapsize) {
 619                                *gapsize = gap;
 620                                *gapstart = end;
 621                                found = 1;
 622                        }
 623                }
 624                if (start < last)
 625                        last = start;
 626        }
 627        return found;
 628}
 629
 630/*
 631 * Search for the biggest gap in the low 32 bits of the e820
 632 * memory space.  We pass this space to PCI to assign MMIO resources
 633 * for hotplug or unconfigured devices in.
 634 * Hopefully the BIOS let enough space left.
 635 */
 636__init void e820_setup_gap(void)
 637{
 638        unsigned long gapstart, gapsize;
 639        int found;
 640
 641        gapstart = 0x10000000;
 642        gapsize = 0x400000;
 643        found  = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
 644
 645#ifdef CONFIG_X86_64
 646        if (!found) {
 647                gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
 648                printk(KERN_ERR
 649        "PCI: Warning: Cannot find a gap in the 32bit address range\n"
 650        "PCI: Unassigned devices with 32bit resource registers may break!\n");
 651        }
 652#endif
 653
 654        /*
 655         * e820_reserve_resources_late protect stolen RAM already
 656         */
 657        pci_mem_start = gapstart;
 658
 659        printk(KERN_INFO
 660               "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
 661               pci_mem_start, gapstart, gapsize);
 662}
 663
 664/**
 665 * Because of the size limitation of struct boot_params, only first
 666 * 128 E820 memory entries are passed to kernel via
 667 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
 668 * linked list of struct setup_data, which is parsed here.
 669 */
 670void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
 671{
 672        u32 map_len;
 673        int entries;
 674        struct e820entry *extmap;
 675
 676        entries = sdata->len / sizeof(struct e820entry);
 677        map_len = sdata->len + sizeof(struct setup_data);
 678        if (map_len > PAGE_SIZE)
 679                sdata = early_ioremap(pa_data, map_len);
 680        extmap = (struct e820entry *)(sdata->data);
 681        __append_e820_map(extmap, entries);
 682        sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 683        if (map_len > PAGE_SIZE)
 684                early_iounmap(sdata, map_len);
 685        printk(KERN_INFO "extended physical RAM map:\n");
 686        e820_print_map("extended");
 687}
 688
 689#if defined(CONFIG_X86_64) || \
 690        (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
 691/**
 692 * Find the ranges of physical addresses that do not correspond to
 693 * e820 RAM areas and mark the corresponding pages as nosave for
 694 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
 695 *
 696 * This function requires the e820 map to be sorted and without any
 697 * overlapping entries and assumes the first e820 area to be RAM.
 698 */
 699void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 700{
 701        int i;
 702        unsigned long pfn;
 703
 704        pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
 705        for (i = 1; i < e820.nr_map; i++) {
 706                struct e820entry *ei = &e820.map[i];
 707
 708                if (pfn < PFN_UP(ei->addr))
 709                        register_nosave_region(pfn, PFN_UP(ei->addr));
 710
 711                pfn = PFN_DOWN(ei->addr + ei->size);
 712                if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
 713                        register_nosave_region(PFN_UP(ei->addr), pfn);
 714
 715                if (pfn >= limit_pfn)
 716                        break;
 717        }
 718}
 719#endif
 720
 721#ifdef CONFIG_HIBERNATION
 722/**
 723 * Mark ACPI NVS memory region, so that we can save/restore it during
 724 * hibernation and the subsequent resume.
 725 */
 726static int __init e820_mark_nvs_memory(void)
 727{
 728        int i;
 729
 730        for (i = 0; i < e820.nr_map; i++) {
 731                struct e820entry *ei = &e820.map[i];
 732
 733                if (ei->type == E820_NVS)
 734                        suspend_nvs_register(ei->addr, ei->size);
 735        }
 736
 737        return 0;
 738}
 739core_initcall(e820_mark_nvs_memory);
 740#endif
 741
 742/*
 743 * pre allocated 4k and reserved it in memblock and e820_saved
 744 */
 745u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
 746{
 747        u64 size = 0;
 748        u64 addr;
 749        u64 start;
 750
 751        for (start = startt; ; start += size) {
 752                start = memblock_x86_find_in_range_size(start, &size, align);
 753                if (start == MEMBLOCK_ERROR)
 754                        return 0;
 755                if (size >= sizet)
 756                        break;
 757        }
 758
 759#ifdef CONFIG_X86_32
 760        if (start >= MAXMEM)
 761                return 0;
 762        if (start + size > MAXMEM)
 763                size = MAXMEM - start;
 764#endif
 765
 766        addr = round_down(start + size - sizet, align);
 767        if (addr < start)
 768                return 0;
 769        memblock_x86_reserve_range(addr, addr + sizet, "new next");
 770        e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
 771        printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
 772        update_e820_saved();
 773
 774        return addr;
 775}
 776
 777#ifdef CONFIG_X86_32
 778# ifdef CONFIG_X86_PAE
 779#  define MAX_ARCH_PFN          (1ULL<<(36-PAGE_SHIFT))
 780# else
 781#  define MAX_ARCH_PFN          (1ULL<<(32-PAGE_SHIFT))
 782# endif
 783#else /* CONFIG_X86_32 */
 784# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
 785#endif
 786
 787/*
 788 * Find the highest page frame number we have available
 789 */
 790static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 791{
 792        int i;
 793        unsigned long last_pfn = 0;
 794        unsigned long max_arch_pfn = MAX_ARCH_PFN;
 795
 796        for (i = 0; i < e820.nr_map; i++) {
 797                struct e820entry *ei = &e820.map[i];
 798                unsigned long start_pfn;
 799                unsigned long end_pfn;
 800
 801                if (ei->type != type)
 802                        continue;
 803
 804                start_pfn = ei->addr >> PAGE_SHIFT;
 805                end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
 806
 807                if (start_pfn >= limit_pfn)
 808                        continue;
 809                if (end_pfn > limit_pfn) {
 810                        last_pfn = limit_pfn;
 811                        break;
 812                }
 813                if (end_pfn > last_pfn)
 814                        last_pfn = end_pfn;
 815        }
 816
 817        if (last_pfn > max_arch_pfn)
 818                last_pfn = max_arch_pfn;
 819
 820        printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
 821                         last_pfn, max_arch_pfn);
 822        return last_pfn;
 823}
 824unsigned long __init e820_end_of_ram_pfn(void)
 825{
 826        return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
 827}
 828
 829unsigned long __init e820_end_of_low_ram_pfn(void)
 830{
 831        return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
 832}
 833
 834static void early_panic(char *msg)
 835{
 836        early_printk(msg);
 837        panic(msg);
 838}
 839
 840static int userdef __initdata;
 841
 842/* "mem=nopentium" disables the 4MB page tables. */
 843static int __init parse_memopt(char *p)
 844{
 845        u64 mem_size;
 846
 847        if (!p)
 848                return -EINVAL;
 849
 850#ifdef CONFIG_X86_32
 851        if (!strcmp(p, "nopentium")) {
 852                setup_clear_cpu_cap(X86_FEATURE_PSE);
 853                return 0;
 854        }
 855#endif
 856
 857        userdef = 1;
 858        mem_size = memparse(p, &p);
 859        e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
 860
 861        return 0;
 862}
 863early_param("mem", parse_memopt);
 864
 865static int __init parse_memmap_opt(char *p)
 866{
 867        char *oldp;
 868        u64 start_at, mem_size;
 869
 870        if (!p)
 871                return -EINVAL;
 872
 873        if (!strncmp(p, "exactmap", 8)) {
 874#ifdef CONFIG_CRASH_DUMP
 875                /*
 876                 * If we are doing a crash dump, we still need to know
 877                 * the real mem size before original memory map is
 878                 * reset.
 879                 */
 880                saved_max_pfn = e820_end_of_ram_pfn();
 881#endif
 882                e820.nr_map = 0;
 883                userdef = 1;
 884                return 0;
 885        }
 886
 887        oldp = p;
 888        mem_size = memparse(p, &p);
 889        if (p == oldp)
 890                return -EINVAL;
 891
 892        userdef = 1;
 893        if (*p == '@') {
 894                start_at = memparse(p+1, &p);
 895                e820_add_region(start_at, mem_size, E820_RAM);
 896        } else if (*p == '#') {
 897                start_at = memparse(p+1, &p);
 898                e820_add_region(start_at, mem_size, E820_ACPI);
 899        } else if (*p == '$') {
 900                start_at = memparse(p+1, &p);
 901                e820_add_region(start_at, mem_size, E820_RESERVED);
 902        } else
 903                e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
 904
 905        return *p == '\0' ? 0 : -EINVAL;
 906}
 907early_param("memmap", parse_memmap_opt);
 908
 909void __init finish_e820_parsing(void)
 910{
 911        if (userdef) {
 912                u32 nr = e820.nr_map;
 913
 914                if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
 915                        early_panic("Invalid user supplied memory map");
 916                e820.nr_map = nr;
 917
 918                printk(KERN_INFO "user-defined physical RAM map:\n");
 919                e820_print_map("user");
 920        }
 921}
 922
 923static inline const char *e820_type_to_string(int e820_type)
 924{
 925        switch (e820_type) {
 926        case E820_RESERVED_KERN:
 927        case E820_RAM:  return "System RAM";
 928        case E820_ACPI: return "ACPI Tables";
 929        case E820_NVS:  return "ACPI Non-volatile Storage";
 930        case E820_UNUSABLE:     return "Unusable memory";
 931        default:        return "reserved";
 932        }
 933}
 934
 935/*
 936 * Mark e820 reserved areas as busy for the resource manager.
 937 */
 938static struct resource __initdata *e820_res;
 939void __init e820_reserve_resources(void)
 940{
 941        int i;
 942        struct resource *res;
 943        u64 end;
 944
 945        res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
 946        e820_res = res;
 947        for (i = 0; i < e820.nr_map; i++) {
 948                end = e820.map[i].addr + e820.map[i].size - 1;
 949                if (end != (resource_size_t)end) {
 950                        res++;
 951                        continue;
 952                }
 953                res->name = e820_type_to_string(e820.map[i].type);
 954                res->start = e820.map[i].addr;
 955                res->end = end;
 956
 957                res->flags = IORESOURCE_MEM;
 958
 959                /*
 960                 * don't register the region that could be conflicted with
 961                 * pci device BAR resource and insert them later in
 962                 * pcibios_resource_survey()
 963                 */
 964                if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
 965                        res->flags |= IORESOURCE_BUSY;
 966                        insert_resource(&iomem_resource, res);
 967                }
 968                res++;
 969        }
 970
 971        for (i = 0; i < e820_saved.nr_map; i++) {
 972                struct e820entry *entry = &e820_saved.map[i];
 973                firmware_map_add_early(entry->addr,
 974                        entry->addr + entry->size - 1,
 975                        e820_type_to_string(entry->type));
 976        }
 977}
 978
 979/* How much should we pad RAM ending depending on where it is? */
 980static unsigned long ram_alignment(resource_size_t pos)
 981{
 982        unsigned long mb = pos >> 20;
 983
 984        /* To 64kB in the first megabyte */
 985        if (!mb)
 986                return 64*1024;
 987
 988        /* To 1MB in the first 16MB */
 989        if (mb < 16)
 990                return 1024*1024;
 991
 992        /* To 64MB for anything above that */
 993        return 64*1024*1024;
 994}
 995
 996#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
 997
 998void __init e820_reserve_resources_late(void)
 999{
1000        int i;
1001        struct resource *res;
1002
1003        res = e820_res;
1004        for (i = 0; i < e820.nr_map; i++) {
1005                if (!res->parent && res->end)
1006                        insert_resource_expand_to_fit(&iomem_resource, res);
1007                res++;
1008        }
1009
1010        /*
1011         * Try to bump up RAM regions to reasonable boundaries to
1012         * avoid stolen RAM:
1013         */
1014        for (i = 0; i < e820.nr_map; i++) {
1015                struct e820entry *entry = &e820.map[i];
1016                u64 start, end;
1017
1018                if (entry->type != E820_RAM)
1019                        continue;
1020                start = entry->addr + entry->size;
1021                end = round_up(start, ram_alignment(start)) - 1;
1022                if (end > MAX_RESOURCE_SIZE)
1023                        end = MAX_RESOURCE_SIZE;
1024                if (start >= end)
1025                        continue;
1026                printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
1027                               start, end);
1028                reserve_region_with_split(&iomem_resource, start, end,
1029                                          "RAM buffer");
1030        }
1031}
1032
1033char *__init default_machine_specific_memory_setup(void)
1034{
1035        char *who = "BIOS-e820";
1036        u32 new_nr;
1037        /*
1038         * Try to copy the BIOS-supplied E820-map.
1039         *
1040         * Otherwise fake a memory map; one section from 0k->640k,
1041         * the next section from 1mb->appropriate_mem_k
1042         */
1043        new_nr = boot_params.e820_entries;
1044        sanitize_e820_map(boot_params.e820_map,
1045                        ARRAY_SIZE(boot_params.e820_map),
1046                        &new_nr);
1047        boot_params.e820_entries = new_nr;
1048        if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
1049          < 0) {
1050                u64 mem_size;
1051
1052                /* compare results from other methods and take the greater */
1053                if (boot_params.alt_mem_k
1054                    < boot_params.screen_info.ext_mem_k) {
1055                        mem_size = boot_params.screen_info.ext_mem_k;
1056                        who = "BIOS-88";
1057                } else {
1058                        mem_size = boot_params.alt_mem_k;
1059                        who = "BIOS-e801";
1060                }
1061
1062                e820.nr_map = 0;
1063                e820_add_region(0, LOWMEMSIZE(), E820_RAM);
1064                e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
1065        }
1066
1067        /* In case someone cares... */
1068        return who;
1069}
1070
1071void __init setup_memory_map(void)
1072{
1073        char *who;
1074
1075        who = x86_init.resources.memory_setup();
1076        memcpy(&e820_saved, &e820, sizeof(struct e820map));
1077        printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1078        e820_print_map(who);
1079}
1080
1081void __init memblock_x86_fill(void)
1082{
1083        int i;
1084        u64 end;
1085
1086        /*
1087         * EFI may have more than 128 entries
1088         * We are safe to enable resizing, beause memblock_x86_fill()
1089         * is rather later for x86
1090         */
1091        memblock_can_resize = 1;
1092
1093        for (i = 0; i < e820.nr_map; i++) {
1094                struct e820entry *ei = &e820.map[i];
1095
1096                end = ei->addr + ei->size;
1097                if (end != (resource_size_t)end)
1098                        continue;
1099
1100                if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
1101                        continue;
1102
1103                memblock_add(ei->addr, ei->size);
1104        }
1105
1106        memblock_analyze();
1107        memblock_dump_all();
1108}
1109
1110void __init memblock_find_dma_reserve(void)
1111{
1112#ifdef CONFIG_X86_64
1113        u64 free_size_pfn;
1114        u64 mem_size_pfn;
1115        /*
1116         * need to find out used area below MAX_DMA_PFN
1117         * need to use memblock to get free size in [0, MAX_DMA_PFN]
1118         * at first, and assume boot_mem will not take below MAX_DMA_PFN
1119         */
1120        mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1121        free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1122        set_dma_reserve(mem_size_pfn - free_size_pfn);
1123#endif
1124}
1125