linux/mm/bootmem.c
<<
>>
Prefs
   1/*
   2 *  linux/mm/bootmem.c
   3 *
   4 *  Copyright (C) 1999 Ingo Molnar
   5 *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
   6 *
   7 *  simple boot-time physical memory area allocator and
   8 *  free memory collector. It's used to deal with reserved
   9 *  system memory and memory holes as well.
  10 */
  11#include <linux/init.h>
  12#include <linux/pfn.h>
  13#include <linux/bootmem.h>
  14#include <linux/module.h>
  15
  16#include <asm/bug.h>
  17#include <asm/io.h>
  18#include <asm/processor.h>
  19
  20#include "internal.h"
  21
  22/*
  23 * Access to this subsystem has to be serialized externally. (this is
  24 * true for the boot process anyway)
  25 */
  26unsigned long max_low_pfn;
  27unsigned long min_low_pfn;
  28unsigned long max_pfn;
  29
  30static LIST_HEAD(bdata_list);
  31#ifdef CONFIG_CRASH_DUMP
  32/*
  33 * If we have booted due to a crash, max_pfn will be a very low value. We need
  34 * to know the amount of memory that the previous kernel used.
  35 */
  36unsigned long saved_max_pfn;
  37#endif
  38
  39/* return the number of _pages_ that will be allocated for the boot bitmap */
  40unsigned long __init bootmem_bootmap_pages(unsigned long pages)
  41{
  42        unsigned long mapsize;
  43
  44        mapsize = (pages+7)/8;
  45        mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK;
  46        mapsize >>= PAGE_SHIFT;
  47
  48        return mapsize;
  49}
  50
  51/*
  52 * link bdata in order
  53 */
  54static void __init link_bootmem(bootmem_data_t *bdata)
  55{
  56        bootmem_data_t *ent;
  57
  58        if (list_empty(&bdata_list)) {
  59                list_add(&bdata->list, &bdata_list);
  60                return;
  61        }
  62        /* insert in order */
  63        list_for_each_entry(ent, &bdata_list, list) {
  64                if (bdata->node_boot_start < ent->node_boot_start) {
  65                        list_add_tail(&bdata->list, &ent->list);
  66                        return;
  67                }
  68        }
  69        list_add_tail(&bdata->list, &bdata_list);
  70}
  71
  72/*
  73 * Given an initialised bdata, it returns the size of the boot bitmap
  74 */
  75static unsigned long __init get_mapsize(bootmem_data_t *bdata)
  76{
  77        unsigned long mapsize;
  78        unsigned long start = PFN_DOWN(bdata->node_boot_start);
  79        unsigned long end = bdata->node_low_pfn;
  80
  81        mapsize = ((end - start) + 7) / 8;
  82        return ALIGN(mapsize, sizeof(long));
  83}
  84
  85/*
  86 * Called once to set up the allocator itself.
  87 */
  88static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
  89        unsigned long mapstart, unsigned long start, unsigned long end)
  90{
  91        bootmem_data_t *bdata = pgdat->bdata;
  92        unsigned long mapsize;
  93
  94        bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
  95        bdata->node_boot_start = PFN_PHYS(start);
  96        bdata->node_low_pfn = end;
  97        link_bootmem(bdata);
  98
  99        /*
 100         * Initially all pages are reserved - setup_arch() has to
 101         * register free RAM areas explicitly.
 102         */
 103        mapsize = get_mapsize(bdata);
 104        memset(bdata->node_bootmem_map, 0xff, mapsize);
 105
 106        return mapsize;
 107}
 108
 109/*
 110 * Marks a particular physical memory range as unallocatable. Usable RAM
 111 * might be used for boot-time allocations - or it might get added
 112 * to the free page pool later on.
 113 */
 114static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
 115                                        unsigned long size)
 116{
 117        unsigned long sidx, eidx;
 118        unsigned long i;
 119
 120        /*
 121         * round up, partially reserved pages are considered
 122         * fully reserved.
 123         */
 124        BUG_ON(!size);
 125        BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
 126        BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
 127
 128        sidx = PFN_DOWN(addr - bdata->node_boot_start);
 129        eidx = PFN_UP(addr + size - bdata->node_boot_start);
 130
 131        for (i = sidx; i < eidx; i++)
 132                if (test_and_set_bit(i, bdata->node_bootmem_map)) {
 133#ifdef CONFIG_DEBUG_BOOTMEM
 134                        printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
 135#endif
 136                }
 137}
 138
 139static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
 140                                     unsigned long size)
 141{
 142        unsigned long sidx, eidx;
 143        unsigned long i;
 144
 145        /*
 146         * round down end of usable mem, partially free pages are
 147         * considered reserved.
 148         */
 149        BUG_ON(!size);
 150        BUG_ON(PFN_DOWN(addr + size) > bdata->node_low_pfn);
 151
 152        if (addr < bdata->last_success)
 153                bdata->last_success = addr;
 154
 155        /*
 156         * Round up the beginning of the address.
 157         */
 158        sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
 159        eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
 160
 161        for (i = sidx; i < eidx; i++) {
 162                if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
 163                        BUG();
 164        }
 165}
 166
 167/*
 168 * We 'merge' subsequent allocations to save space. We might 'lose'
 169 * some fraction of a page if allocations cannot be satisfied due to
 170 * size constraints on boxes where there is physical RAM space
 171 * fragmentation - in these cases (mostly large memory boxes) this
 172 * is not a problem.
 173 *
 174 * On low memory boxes we get it right in 100% of the cases.
 175 *
 176 * alignment has to be a power of 2 value.
 177 *
 178 * NOTE:  This function is _not_ reentrant.
 179 */
 180void * __init
 181__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 182              unsigned long align, unsigned long goal, unsigned long limit)
 183{
 184        unsigned long offset, remaining_size, areasize, preferred;
 185        unsigned long i, start = 0, incr, eidx, end_pfn;
 186        void *ret;
 187
 188        if (!size) {
 189                printk("__alloc_bootmem_core(): zero-sized request\n");
 190                BUG();
 191        }
 192        BUG_ON(align & (align-1));
 193
 194        if (limit && bdata->node_boot_start >= limit)
 195                return NULL;
 196
 197        /* on nodes without memory - bootmem_map is NULL */
 198        if (!bdata->node_bootmem_map)
 199                return NULL;
 200
 201        end_pfn = bdata->node_low_pfn;
 202        limit = PFN_DOWN(limit);
 203        if (limit && end_pfn > limit)
 204                end_pfn = limit;
 205
 206        eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
 207        offset = 0;
 208        if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
 209                offset = align - (bdata->node_boot_start & (align - 1UL));
 210        offset = PFN_DOWN(offset);
 211
 212        /*
 213         * We try to allocate bootmem pages above 'goal'
 214         * first, then we try to allocate lower pages.
 215         */
 216        if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) {
 217                preferred = goal - bdata->node_boot_start;
 218
 219                if (bdata->last_success >= preferred)
 220                        if (!limit || (limit && limit > bdata->last_success))
 221                                preferred = bdata->last_success;
 222        } else
 223                preferred = 0;
 224
 225        preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
 226        areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
 227        incr = align >> PAGE_SHIFT ? : 1;
 228
 229restart_scan:
 230        for (i = preferred; i < eidx; i += incr) {
 231                unsigned long j;
 232                i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
 233                i = ALIGN(i, incr);
 234                if (i >= eidx)
 235                        break;
 236                if (test_bit(i, bdata->node_bootmem_map))
 237                        continue;
 238                for (j = i + 1; j < i + areasize; ++j) {
 239                        if (j >= eidx)
 240                                goto fail_block;
 241                        if (test_bit(j, bdata->node_bootmem_map))
 242                                goto fail_block;
 243                }
 244                start = i;
 245                goto found;
 246        fail_block:
 247                i = ALIGN(j, incr);
 248        }
 249
 250        if (preferred > offset) {
 251                preferred = offset;
 252                goto restart_scan;
 253        }
 254        return NULL;
 255
 256found:
 257        bdata->last_success = PFN_PHYS(start);
 258        BUG_ON(start >= eidx);
 259
 260        /*
 261         * Is the next page of the previous allocation-end the start
 262         * of this allocation's buffer? If yes then we can 'merge'
 263         * the previous partial page with this allocation.
 264         */
 265        if (align < PAGE_SIZE &&
 266            bdata->last_offset && bdata->last_pos+1 == start) {
 267                offset = ALIGN(bdata->last_offset, align);
 268                BUG_ON(offset > PAGE_SIZE);
 269                remaining_size = PAGE_SIZE - offset;
 270                if (size < remaining_size) {
 271                        areasize = 0;
 272                        /* last_pos unchanged */
 273                        bdata->last_offset = offset + size;
 274                        ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
 275                                           offset +
 276                                           bdata->node_boot_start);
 277                } else {
 278                        remaining_size = size - remaining_size;
 279                        areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
 280                        ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
 281                                           offset +
 282                                           bdata->node_boot_start);
 283                        bdata->last_pos = start + areasize - 1;
 284                        bdata->last_offset = remaining_size;
 285                }
 286                bdata->last_offset &= ~PAGE_MASK;
 287        } else {
 288                bdata->last_pos = start + areasize - 1;
 289                bdata->last_offset = size & ~PAGE_MASK;
 290                ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
 291        }
 292
 293        /*
 294         * Reserve the area now:
 295         */
 296        for (i = start; i < start + areasize; i++)
 297                if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
 298                        BUG();
 299        memset(ret, 0, size);
 300        return ret;
 301}
 302
 303static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
 304{
 305        struct page *page;
 306        unsigned long pfn;
 307        bootmem_data_t *bdata = pgdat->bdata;
 308        unsigned long i, count, total = 0;
 309        unsigned long idx;
 310        unsigned long *map; 
 311        int gofast = 0;
 312
 313        BUG_ON(!bdata->node_bootmem_map);
 314
 315        count = 0;
 316        /* first extant page of the node */
 317        pfn = PFN_DOWN(bdata->node_boot_start);
 318        idx = bdata->node_low_pfn - pfn;
 319        map = bdata->node_bootmem_map;
 320        /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
 321        if (bdata->node_boot_start == 0 ||
 322            ffs(bdata->node_boot_start) - PAGE_SHIFT > ffs(BITS_PER_LONG))
 323                gofast = 1;
 324        for (i = 0; i < idx; ) {
 325                unsigned long v = ~map[i / BITS_PER_LONG];
 326
 327                if (gofast && v == ~0UL) {
 328                        int order;
 329
 330                        page = pfn_to_page(pfn);
 331                        count += BITS_PER_LONG;
 332                        order = ffs(BITS_PER_LONG) - 1;
 333                        __free_pages_bootmem(page, order);
 334                        i += BITS_PER_LONG;
 335                        page += BITS_PER_LONG;
 336                } else if (v) {
 337                        unsigned long m;
 338
 339                        page = pfn_to_page(pfn);
 340                        for (m = 1; m && i < idx; m<<=1, page++, i++) {
 341                                if (v & m) {
 342                                        count++;
 343                                        __free_pages_bootmem(page, 0);
 344                                }
 345                        }
 346                } else {
 347                        i += BITS_PER_LONG;
 348                }
 349                pfn += BITS_PER_LONG;
 350        }
 351        total += count;
 352
 353        /*
 354         * Now free the allocator bitmap itself, it's not
 355         * needed anymore:
 356         */
 357        page = virt_to_page(bdata->node_bootmem_map);
 358        count = 0;
 359        idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
 360        for (i = 0; i < idx; i++, page++) {
 361                __free_pages_bootmem(page, 0);
 362                count++;
 363        }
 364        total += count;
 365        bdata->node_bootmem_map = NULL;
 366
 367        return total;
 368}
 369
 370unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
 371                                unsigned long startpfn, unsigned long endpfn)
 372{
 373        return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
 374}
 375
 376void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 377                                 unsigned long size)
 378{
 379        reserve_bootmem_core(pgdat->bdata, physaddr, size);
 380}
 381
 382void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 383                              unsigned long size)
 384{
 385        free_bootmem_core(pgdat->bdata, physaddr, size);
 386}
 387
 388unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 389{
 390        return free_all_bootmem_core(pgdat);
 391}
 392
 393unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 394{
 395        max_low_pfn = pages;
 396        min_low_pfn = start;
 397        return init_bootmem_core(NODE_DATA(0), start, 0, pages);
 398}
 399
 400#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 401void __init reserve_bootmem(unsigned long addr, unsigned long size)
 402{
 403        reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size);
 404}
 405#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 406
 407void __init free_bootmem(unsigned long addr, unsigned long size)
 408{
 409        free_bootmem_core(NODE_DATA(0)->bdata, addr, size);
 410}
 411
 412unsigned long __init free_all_bootmem(void)
 413{
 414        return free_all_bootmem_core(NODE_DATA(0));
 415}
 416
 417void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 418                                      unsigned long goal)
 419{
 420        bootmem_data_t *bdata;
 421        void *ptr;
 422
 423        list_for_each_entry(bdata, &bdata_list, list) {
 424                ptr = __alloc_bootmem_core(bdata, size, align, goal, 0);
 425                if (ptr)
 426                        return ptr;
 427        }
 428        return NULL;
 429}
 430
 431void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 432                              unsigned long goal)
 433{
 434        void *mem = __alloc_bootmem_nopanic(size,align,goal);
 435
 436        if (mem)
 437                return mem;
 438        /*
 439         * Whoops, we cannot satisfy the allocation request.
 440         */
 441        printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
 442        panic("Out of memory");
 443        return NULL;
 444}
 445
 446
 447void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 448                                   unsigned long align, unsigned long goal)
 449{
 450        void *ptr;
 451
 452        ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
 453        if (ptr)
 454                return ptr;
 455
 456        return __alloc_bootmem(size, align, goal);
 457}
 458
 459#ifndef ARCH_LOW_ADDRESS_LIMIT
 460#define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
 461#endif
 462
 463void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 464                                  unsigned long goal)
 465{
 466        bootmem_data_t *bdata;
 467        void *ptr;
 468
 469        list_for_each_entry(bdata, &bdata_list, list) {
 470                ptr = __alloc_bootmem_core(bdata, size, align, goal,
 471                                                ARCH_LOW_ADDRESS_LIMIT);
 472                if (ptr)
 473                        return ptr;
 474        }
 475
 476        /*
 477         * Whoops, we cannot satisfy the allocation request.
 478         */
 479        printk(KERN_ALERT "low bootmem alloc of %lu bytes failed!\n", size);
 480        panic("Out of low memory");
 481        return NULL;
 482}
 483
 484void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 485                                       unsigned long align, unsigned long goal)
 486{
 487        return __alloc_bootmem_core(pgdat->bdata, size, align, goal,
 488                                    ARCH_LOW_ADDRESS_LIMIT);
 489}
 490