linux/arch/x86/kernel/cpu/mtrr/cleanup.c
<<
>>
Prefs
   1/*
   2 * MTRR (Memory Type Range Register) cleanup
   3 *
   4 *  Copyright (C) 2009 Yinghai Lu
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Library General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Library General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Library General Public
  17 * License along with this library; if not, write to the Free
  18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19 */
  20#include <linux/module.h>
  21#include <linux/init.h>
  22#include <linux/pci.h>
  23#include <linux/smp.h>
  24#include <linux/cpu.h>
  25#include <linux/sort.h>
  26#include <linux/mutex.h>
  27#include <linux/uaccess.h>
  28#include <linux/kvm_para.h>
  29
  30#include <asm/processor.h>
  31#include <asm/e820.h>
  32#include <asm/mtrr.h>
  33#include <asm/msr.h>
  34
  35#include "mtrr.h"
  36
  37struct res_range {
  38        unsigned long   start;
  39        unsigned long   end;
  40};
  41
  42struct var_mtrr_range_state {
  43        unsigned long   base_pfn;
  44        unsigned long   size_pfn;
  45        mtrr_type       type;
  46};
  47
  48struct var_mtrr_state {
  49        unsigned long   range_startk;
  50        unsigned long   range_sizek;
  51        unsigned long   chunk_sizek;
  52        unsigned long   gran_sizek;
  53        unsigned int    reg;
  54};
  55
  56/* Should be related to MTRR_VAR_RANGES nums */
  57#define RANGE_NUM                               256
  58
  59static struct res_range __initdata              range[RANGE_NUM];
  60static int __initdata                           nr_range;
  61
  62static struct var_mtrr_range_state __initdata   range_state[RANGE_NUM];
  63
  64static int __initdata debug_print;
  65#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
  66
  67
  68static int __init
  69add_range(struct res_range *range, int nr_range,
  70          unsigned long start, unsigned long end)
  71{
  72        /* Out of slots: */
  73        if (nr_range >= RANGE_NUM)
  74                return nr_range;
  75
  76        range[nr_range].start = start;
  77        range[nr_range].end = end;
  78
  79        nr_range++;
  80
  81        return nr_range;
  82}
  83
  84static int __init
  85add_range_with_merge(struct res_range *range, int nr_range,
  86                     unsigned long start, unsigned long end)
  87{
  88        int i;
  89
  90        /* Try to merge it with old one: */
  91        for (i = 0; i < nr_range; i++) {
  92                unsigned long final_start, final_end;
  93                unsigned long common_start, common_end;
  94
  95                if (!range[i].end)
  96                        continue;
  97
  98                common_start = max(range[i].start, start);
  99                common_end = min(range[i].end, end);
 100                if (common_start > common_end + 1)
 101                        continue;
 102
 103                final_start = min(range[i].start, start);
 104                final_end = max(range[i].end, end);
 105
 106                range[i].start = final_start;
 107                range[i].end =  final_end;
 108                return nr_range;
 109        }
 110
 111        /* Need to add it: */
 112        return add_range(range, nr_range, start, end);
 113}
 114
 115static void __init
 116subtract_range(struct res_range *range, unsigned long start, unsigned long end)
 117{
 118        int i, j;
 119
 120        for (j = 0; j < RANGE_NUM; j++) {
 121                if (!range[j].end)
 122                        continue;
 123
 124                if (start <= range[j].start && end >= range[j].end) {
 125                        range[j].start = 0;
 126                        range[j].end = 0;
 127                        continue;
 128                }
 129
 130                if (start <= range[j].start && end < range[j].end &&
 131                    range[j].start < end + 1) {
 132                        range[j].start = end + 1;
 133                        continue;
 134                }
 135
 136
 137                if (start > range[j].start && end >= range[j].end &&
 138                    range[j].end > start - 1) {
 139                        range[j].end = start - 1;
 140                        continue;
 141                }
 142
 143                if (start > range[j].start && end < range[j].end) {
 144                        /* Find the new spare: */
 145                        for (i = 0; i < RANGE_NUM; i++) {
 146                                if (range[i].end == 0)
 147                                        break;
 148                        }
 149                        if (i < RANGE_NUM) {
 150                                range[i].end = range[j].end;
 151                                range[i].start = end + 1;
 152                        } else {
 153                                printk(KERN_ERR "run of slot in ranges\n");
 154                        }
 155                        range[j].end = start - 1;
 156                        continue;
 157                }
 158        }
 159}
 160
 161static int __init cmp_range(const void *x1, const void *x2)
 162{
 163        const struct res_range *r1 = x1;
 164        const struct res_range *r2 = x2;
 165        long start1, start2;
 166
 167        start1 = r1->start;
 168        start2 = r2->start;
 169
 170        return start1 - start2;
 171}
 172
 173#define BIOS_BUG_MSG KERN_WARNING \
 174        "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 175
 176static int __init
 177x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 178                       unsigned long extra_remove_base,
 179                       unsigned long extra_remove_size)
 180{
 181        unsigned long base, size;
 182        mtrr_type type;
 183        int i;
 184
 185        for (i = 0; i < num_var_ranges; i++) {
 186                type = range_state[i].type;
 187                if (type != MTRR_TYPE_WRBACK)
 188                        continue;
 189                base = range_state[i].base_pfn;
 190                size = range_state[i].size_pfn;
 191                nr_range = add_range_with_merge(range, nr_range, base,
 192                                                base + size - 1);
 193        }
 194        if (debug_print) {
 195                printk(KERN_DEBUG "After WB checking\n");
 196                for (i = 0; i < nr_range; i++)
 197                        printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
 198                                 range[i].start, range[i].end + 1);
 199        }
 200
 201        /* Take out UC ranges: */
 202        for (i = 0; i < num_var_ranges; i++) {
 203                type = range_state[i].type;
 204                if (type != MTRR_TYPE_UNCACHABLE &&
 205                    type != MTRR_TYPE_WRPROT)
 206                        continue;
 207                size = range_state[i].size_pfn;
 208                if (!size)
 209                        continue;
 210                base = range_state[i].base_pfn;
 211                if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
 212                    (mtrr_state.enabled & 1)) {
 213                        /* Var MTRR contains UC entry below 1M? Skip it: */
 214                        printk(BIOS_BUG_MSG, i);
 215                        if (base + size <= (1<<(20-PAGE_SHIFT)))
 216                                continue;
 217                        size -= (1<<(20-PAGE_SHIFT)) - base;
 218                        base = 1<<(20-PAGE_SHIFT);
 219                }
 220                subtract_range(range, base, base + size - 1);
 221        }
 222        if (extra_remove_size)
 223                subtract_range(range, extra_remove_base,
 224                                 extra_remove_base + extra_remove_size  - 1);
 225
 226        /* get new range num */
 227        nr_range = 0;
 228        for (i = 0; i < RANGE_NUM; i++) {
 229                if (!range[i].end)
 230                        continue;
 231                nr_range++;
 232        }
 233        if  (debug_print) {
 234                printk(KERN_DEBUG "After UC checking\n");
 235                for (i = 0; i < nr_range; i++)
 236                        printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
 237                                 range[i].start, range[i].end + 1);
 238        }
 239
 240        /* sort the ranges */
 241        sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
 242        if  (debug_print) {
 243                printk(KERN_DEBUG "After sorting\n");
 244                for (i = 0; i < nr_range; i++)
 245                        printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
 246                                 range[i].start, range[i].end + 1);
 247        }
 248
 249        /* clear those is not used */
 250        for (i = nr_range; i < RANGE_NUM; i++)
 251                memset(&range[i], 0, sizeof(range[i]));
 252
 253        return nr_range;
 254}
 255
 256#ifdef CONFIG_MTRR_SANITIZER
 257
 258static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
 259{
 260        unsigned long sum = 0;
 261        int i;
 262
 263        for (i = 0; i < nr_range; i++)
 264                sum += range[i].end + 1 - range[i].start;
 265
 266        return sum;
 267}
 268
 269static int enable_mtrr_cleanup __initdata =
 270        CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
 271
 272static int __init disable_mtrr_cleanup_setup(char *str)
 273{
 274        enable_mtrr_cleanup = 0;
 275        return 0;
 276}
 277early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
 278
 279static int __init enable_mtrr_cleanup_setup(char *str)
 280{
 281        enable_mtrr_cleanup = 1;
 282        return 0;
 283}
 284early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
 285
 286static int __init mtrr_cleanup_debug_setup(char *str)
 287{
 288        debug_print = 1;
 289        return 0;
 290}
 291early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
 292
 293static void __init
 294set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
 295             unsigned char type, unsigned int address_bits)
 296{
 297        u32 base_lo, base_hi, mask_lo, mask_hi;
 298        u64 base, mask;
 299
 300        if (!sizek) {
 301                fill_mtrr_var_range(reg, 0, 0, 0, 0);
 302                return;
 303        }
 304
 305        mask = (1ULL << address_bits) - 1;
 306        mask &= ~((((u64)sizek) << 10) - 1);
 307
 308        base = ((u64)basek) << 10;
 309
 310        base |= type;
 311        mask |= 0x800;
 312
 313        base_lo = base & ((1ULL<<32) - 1);
 314        base_hi = base >> 32;
 315
 316        mask_lo = mask & ((1ULL<<32) - 1);
 317        mask_hi = mask >> 32;
 318
 319        fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
 320}
 321
 322static void __init
 323save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
 324              unsigned char type)
 325{
 326        range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
 327        range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
 328        range_state[reg].type = type;
 329}
 330
 331static void __init set_var_mtrr_all(unsigned int address_bits)
 332{
 333        unsigned long basek, sizek;
 334        unsigned char type;
 335        unsigned int reg;
 336
 337        for (reg = 0; reg < num_var_ranges; reg++) {
 338                basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
 339                sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
 340                type = range_state[reg].type;
 341
 342                set_var_mtrr(reg, basek, sizek, type, address_bits);
 343        }
 344}
 345
 346static unsigned long to_size_factor(unsigned long sizek, char *factorp)
 347{
 348        unsigned long base = sizek;
 349        char factor;
 350
 351        if (base & ((1<<10) - 1)) {
 352                /* Not MB-aligned: */
 353                factor = 'K';
 354        } else if (base & ((1<<20) - 1)) {
 355                factor = 'M';
 356                base >>= 10;
 357        } else {
 358                factor = 'G';
 359                base >>= 20;
 360        }
 361
 362        *factorp = factor;
 363
 364        return base;
 365}
 366
 367static unsigned int __init
 368range_to_mtrr(unsigned int reg, unsigned long range_startk,
 369              unsigned long range_sizek, unsigned char type)
 370{
 371        if (!range_sizek || (reg >= num_var_ranges))
 372                return reg;
 373
 374        while (range_sizek) {
 375                unsigned long max_align, align;
 376                unsigned long sizek;
 377
 378                /* Compute the maximum size with which we can make a range: */
 379                if (range_startk)
 380                        max_align = ffs(range_startk) - 1;
 381                else
 382                        max_align = 32;
 383
 384                align = fls(range_sizek) - 1;
 385                if (align > max_align)
 386                        align = max_align;
 387
 388                sizek = 1 << align;
 389                if (debug_print) {
 390                        char start_factor = 'K', size_factor = 'K';
 391                        unsigned long start_base, size_base;
 392
 393                        start_base = to_size_factor(range_startk, &start_factor);
 394                        size_base = to_size_factor(sizek, &size_factor);
 395
 396                        Dprintk("Setting variable MTRR %d, "
 397                                "base: %ld%cB, range: %ld%cB, type %s\n",
 398                                reg, start_base, start_factor,
 399                                size_base, size_factor,
 400                                (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
 401                                   ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
 402                                );
 403                }
 404                save_var_mtrr(reg++, range_startk, sizek, type);
 405                range_startk += sizek;
 406                range_sizek -= sizek;
 407                if (reg >= num_var_ranges)
 408                        break;
 409        }
 410        return reg;
 411}
 412
 413static unsigned __init
 414range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
 415                        unsigned long sizek)
 416{
 417        unsigned long hole_basek, hole_sizek;
 418        unsigned long second_basek, second_sizek;
 419        unsigned long range0_basek, range0_sizek;
 420        unsigned long range_basek, range_sizek;
 421        unsigned long chunk_sizek;
 422        unsigned long gran_sizek;
 423
 424        hole_basek = 0;
 425        hole_sizek = 0;
 426        second_basek = 0;
 427        second_sizek = 0;
 428        chunk_sizek = state->chunk_sizek;
 429        gran_sizek = state->gran_sizek;
 430
 431        /* Align with gran size, prevent small block used up MTRRs: */
 432        range_basek = ALIGN(state->range_startk, gran_sizek);
 433        if ((range_basek > basek) && basek)
 434                return second_sizek;
 435
 436        state->range_sizek -= (range_basek - state->range_startk);
 437        range_sizek = ALIGN(state->range_sizek, gran_sizek);
 438
 439        while (range_sizek > state->range_sizek) {
 440                range_sizek -= gran_sizek;
 441                if (!range_sizek)
 442                        return 0;
 443        }
 444        state->range_sizek = range_sizek;
 445
 446        /* Try to append some small hole: */
 447        range0_basek = state->range_startk;
 448        range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
 449
 450        /* No increase: */
 451        if (range0_sizek == state->range_sizek) {
 452                Dprintk("rangeX: %016lx - %016lx\n",
 453                        range0_basek<<10,
 454                        (range0_basek + state->range_sizek)<<10);
 455                state->reg = range_to_mtrr(state->reg, range0_basek,
 456                                state->range_sizek, MTRR_TYPE_WRBACK);
 457                return 0;
 458        }
 459
 460        /* Only cut back when it is not the last: */
 461        if (sizek) {
 462                while (range0_basek + range0_sizek > (basek + sizek)) {
 463                        if (range0_sizek >= chunk_sizek)
 464                                range0_sizek -= chunk_sizek;
 465                        else
 466                                range0_sizek = 0;
 467
 468                        if (!range0_sizek)
 469                                break;
 470                }
 471        }
 472
 473second_try:
 474        range_basek = range0_basek + range0_sizek;
 475
 476        /* One hole in the middle: */
 477        if (range_basek > basek && range_basek <= (basek + sizek))
 478                second_sizek = range_basek - basek;
 479
 480        if (range0_sizek > state->range_sizek) {
 481
 482                /* One hole in middle or at the end: */
 483                hole_sizek = range0_sizek - state->range_sizek - second_sizek;
 484
 485                /* Hole size should be less than half of range0 size: */
 486                if (hole_sizek >= (range0_sizek >> 1) &&
 487                    range0_sizek >= chunk_sizek) {
 488                        range0_sizek -= chunk_sizek;
 489                        second_sizek = 0;
 490                        hole_sizek = 0;
 491
 492                        goto second_try;
 493                }
 494        }
 495
 496        if (range0_sizek) {
 497                Dprintk("range0: %016lx - %016lx\n",
 498                        range0_basek<<10,
 499                        (range0_basek + range0_sizek)<<10);
 500                state->reg = range_to_mtrr(state->reg, range0_basek,
 501                                range0_sizek, MTRR_TYPE_WRBACK);
 502        }
 503
 504        if (range0_sizek < state->range_sizek) {
 505                /* Need to handle left over range: */
 506                range_sizek = state->range_sizek - range0_sizek;
 507
 508                Dprintk("range: %016lx - %016lx\n",
 509                         range_basek<<10,
 510                         (range_basek + range_sizek)<<10);
 511
 512                state->reg = range_to_mtrr(state->reg, range_basek,
 513                                 range_sizek, MTRR_TYPE_WRBACK);
 514        }
 515
 516        if (hole_sizek) {
 517                hole_basek = range_basek - hole_sizek - second_sizek;
 518                Dprintk("hole: %016lx - %016lx\n",
 519                         hole_basek<<10,
 520                         (hole_basek + hole_sizek)<<10);
 521                state->reg = range_to_mtrr(state->reg, hole_basek,
 522                                 hole_sizek, MTRR_TYPE_UNCACHABLE);
 523        }
 524
 525        return second_sizek;
 526}
 527
 528static void __init
 529set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
 530                   unsigned long size_pfn)
 531{
 532        unsigned long basek, sizek;
 533        unsigned long second_sizek = 0;
 534
 535        if (state->reg >= num_var_ranges)
 536                return;
 537
 538        basek = base_pfn << (PAGE_SHIFT - 10);
 539        sizek = size_pfn << (PAGE_SHIFT - 10);
 540
 541        /* See if I can merge with the last range: */
 542        if ((basek <= 1024) ||
 543            (state->range_startk + state->range_sizek == basek)) {
 544                unsigned long endk = basek + sizek;
 545                state->range_sizek = endk - state->range_startk;
 546                return;
 547        }
 548        /* Write the range mtrrs: */
 549        if (state->range_sizek != 0)
 550                second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
 551
 552        /* Allocate an msr: */
 553        state->range_startk = basek + second_sizek;
 554        state->range_sizek  = sizek - second_sizek;
 555}
 556
 557/* Mininum size of mtrr block that can take hole: */
 558static u64 mtrr_chunk_size __initdata = (256ULL<<20);
 559
 560static int __init parse_mtrr_chunk_size_opt(char *p)
 561{
 562        if (!p)
 563                return -EINVAL;
 564        mtrr_chunk_size = memparse(p, &p);
 565        return 0;
 566}
 567early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
 568
 569/* Granularity of mtrr of block: */
 570static u64 mtrr_gran_size __initdata;
 571
 572static int __init parse_mtrr_gran_size_opt(char *p)
 573{
 574        if (!p)
 575                return -EINVAL;
 576        mtrr_gran_size = memparse(p, &p);
 577        return 0;
 578}
 579early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
 580
 581static unsigned long nr_mtrr_spare_reg __initdata =
 582                                 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
 583
 584static int __init parse_mtrr_spare_reg(char *arg)
 585{
 586        if (arg)
 587                nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
 588        return 0;
 589}
 590early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
 591
 592static int __init
 593x86_setup_var_mtrrs(struct res_range *range, int nr_range,
 594                    u64 chunk_size, u64 gran_size)
 595{
 596        struct var_mtrr_state var_state;
 597        int num_reg;
 598        int i;
 599
 600        var_state.range_startk  = 0;
 601        var_state.range_sizek   = 0;
 602        var_state.reg           = 0;
 603        var_state.chunk_sizek   = chunk_size >> 10;
 604        var_state.gran_sizek    = gran_size >> 10;
 605
 606        memset(range_state, 0, sizeof(range_state));
 607
 608        /* Write the range: */
 609        for (i = 0; i < nr_range; i++) {
 610                set_var_mtrr_range(&var_state, range[i].start,
 611                                   range[i].end - range[i].start + 1);
 612        }
 613
 614        /* Write the last range: */
 615        if (var_state.range_sizek != 0)
 616                range_to_mtrr_with_hole(&var_state, 0, 0);
 617
 618        num_reg = var_state.reg;
 619        /* Clear out the extra MTRR's: */
 620        while (var_state.reg < num_var_ranges) {
 621                save_var_mtrr(var_state.reg, 0, 0, 0);
 622                var_state.reg++;
 623        }
 624
 625        return num_reg;
 626}
 627
 628struct mtrr_cleanup_result {
 629        unsigned long   gran_sizek;
 630        unsigned long   chunk_sizek;
 631        unsigned long   lose_cover_sizek;
 632        unsigned int    num_reg;
 633        int             bad;
 634};
 635
 636/*
 637 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
 638 * chunk size: gran_size, ..., 2G
 639 * so we need (1+16)*8
 640 */
 641#define NUM_RESULT      136
 642#define PSHIFT          (PAGE_SHIFT - 10)
 643
 644static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
 645static unsigned long __initdata min_loss_pfn[RANGE_NUM];
 646
 647static void __init print_out_mtrr_range_state(void)
 648{
 649        char start_factor = 'K', size_factor = 'K';
 650        unsigned long start_base, size_base;
 651        mtrr_type type;
 652        int i;
 653
 654        for (i = 0; i < num_var_ranges; i++) {
 655
 656                size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
 657                if (!size_base)
 658                        continue;
 659
 660                size_base = to_size_factor(size_base, &size_factor),
 661                start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
 662                start_base = to_size_factor(start_base, &start_factor),
 663                type = range_state[i].type;
 664
 665                printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
 666                        i, start_base, start_factor,
 667                        size_base, size_factor,
 668                        (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
 669                            ((type == MTRR_TYPE_WRPROT) ? "WP" :
 670                             ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
 671                        );
 672        }
 673}
 674
 675static int __init mtrr_need_cleanup(void)
 676{
 677        int i;
 678        mtrr_type type;
 679        unsigned long size;
 680        /* Extra one for all 0: */
 681        int num[MTRR_NUM_TYPES + 1];
 682
 683        /* Check entries number: */
 684        memset(num, 0, sizeof(num));
 685        for (i = 0; i < num_var_ranges; i++) {
 686                type = range_state[i].type;
 687                size = range_state[i].size_pfn;
 688                if (type >= MTRR_NUM_TYPES)
 689                        continue;
 690                if (!size)
 691                        type = MTRR_NUM_TYPES;
 692                if (type == MTRR_TYPE_WRPROT)
 693                        type = MTRR_TYPE_UNCACHABLE;
 694                num[type]++;
 695        }
 696
 697        /* Check if we got UC entries: */
 698        if (!num[MTRR_TYPE_UNCACHABLE])
 699                return 0;
 700
 701        /* Check if we only had WB and UC */
 702        if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
 703            num_var_ranges - num[MTRR_NUM_TYPES])
 704                return 0;
 705
 706        return 1;
 707}
 708
 709static unsigned long __initdata range_sums;
 710
 711static void __init
 712mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
 713                      unsigned long x_remove_base,
 714                      unsigned long x_remove_size, int i)
 715{
 716        static struct res_range range_new[RANGE_NUM];
 717        unsigned long range_sums_new;
 718        static int nr_range_new;
 719        int num_reg;
 720
 721        /* Convert ranges to var ranges state: */
 722        num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
 723
 724        /* We got new setting in range_state, check it: */
 725        memset(range_new, 0, sizeof(range_new));
 726        nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
 727                                x_remove_base, x_remove_size);
 728        range_sums_new = sum_ranges(range_new, nr_range_new);
 729
 730        result[i].chunk_sizek = chunk_size >> 10;
 731        result[i].gran_sizek = gran_size >> 10;
 732        result[i].num_reg = num_reg;
 733
 734        if (range_sums < range_sums_new) {
 735                result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT;
 736                result[i].bad = 1;
 737        } else {
 738                result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT;
 739        }
 740
 741        /* Double check it: */
 742        if (!result[i].bad && !result[i].lose_cover_sizek) {
 743                if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range)))
 744                        result[i].bad = 1;
 745        }
 746
 747        if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg]))
 748                min_loss_pfn[num_reg] = range_sums - range_sums_new;
 749}
 750
 751static void __init mtrr_print_out_one_result(int i)
 752{
 753        unsigned long gran_base, chunk_base, lose_base;
 754        char gran_factor, chunk_factor, lose_factor;
 755
 756        gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
 757        chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
 758        lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
 759
 760        pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
 761                result[i].bad ? "*BAD*" : " ",
 762                gran_base, gran_factor, chunk_base, chunk_factor);
 763        pr_cont("num_reg: %d  \tlose cover RAM: %s%ld%c\n",
 764                result[i].num_reg, result[i].bad ? "-" : "",
 765                lose_base, lose_factor);
 766}
 767
 768static int __init mtrr_search_optimal_index(void)
 769{
 770        int num_reg_good;
 771        int index_good;
 772        int i;
 773
 774        if (nr_mtrr_spare_reg >= num_var_ranges)
 775                nr_mtrr_spare_reg = num_var_ranges - 1;
 776
 777        num_reg_good = -1;
 778        for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
 779                if (!min_loss_pfn[i])
 780                        num_reg_good = i;
 781        }
 782
 783        index_good = -1;
 784        if (num_reg_good != -1) {
 785                for (i = 0; i < NUM_RESULT; i++) {
 786                        if (!result[i].bad &&
 787                            result[i].num_reg == num_reg_good &&
 788                            !result[i].lose_cover_sizek) {
 789                                index_good = i;
 790                                break;
 791                        }
 792                }
 793        }
 794
 795        return index_good;
 796}
 797
 798int __init mtrr_cleanup(unsigned address_bits)
 799{
 800        unsigned long x_remove_base, x_remove_size;
 801        unsigned long base, size, def, dummy;
 802        u64 chunk_size, gran_size;
 803        mtrr_type type;
 804        int index_good;
 805        int i;
 806
 807        if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
 808                return 0;
 809
 810        rdmsr(MSR_MTRRdefType, def, dummy);
 811        def &= 0xff;
 812        if (def != MTRR_TYPE_UNCACHABLE)
 813                return 0;
 814
 815        /* Get it and store it aside: */
 816        memset(range_state, 0, sizeof(range_state));
 817        for (i = 0; i < num_var_ranges; i++) {
 818                mtrr_if->get(i, &base, &size, &type);
 819                range_state[i].base_pfn = base;
 820                range_state[i].size_pfn = size;
 821                range_state[i].type = type;
 822        }
 823
 824        /* Check if we need handle it and can handle it: */
 825        if (!mtrr_need_cleanup())
 826                return 0;
 827
 828        /* Print original var MTRRs at first, for debugging: */
 829        printk(KERN_DEBUG "original variable MTRRs\n");
 830        print_out_mtrr_range_state();
 831
 832        memset(range, 0, sizeof(range));
 833        x_remove_size = 0;
 834        x_remove_base = 1 << (32 - PAGE_SHIFT);
 835        if (mtrr_tom2)
 836                x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
 837
 838        nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
 839        /*
 840         * [0, 1M) should always be covered by var mtrr with WB
 841         * and fixed mtrrs should take effect before var mtrr for it:
 842         */
 843        nr_range = add_range_with_merge(range, nr_range, 0,
 844                                        (1ULL<<(20 - PAGE_SHIFT)) - 1);
 845        /* Sort the ranges: */
 846        sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
 847
 848        range_sums = sum_ranges(range, nr_range);
 849        printk(KERN_INFO "total RAM covered: %ldM\n",
 850               range_sums >> (20 - PAGE_SHIFT));
 851
 852        if (mtrr_chunk_size && mtrr_gran_size) {
 853                i = 0;
 854                mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
 855                                      x_remove_base, x_remove_size, i);
 856
 857                mtrr_print_out_one_result(i);
 858
 859                if (!result[i].bad) {
 860                        set_var_mtrr_all(address_bits);
 861                        printk(KERN_DEBUG "New variable MTRRs\n");
 862                        print_out_mtrr_range_state();
 863                        return 1;
 864                }
 865                printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
 866                       "will find optimal one\n");
 867        }
 868
 869        i = 0;
 870        memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
 871        memset(result, 0, sizeof(result));
 872        for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
 873
 874                for (chunk_size = gran_size; chunk_size < (1ULL<<32);
 875                     chunk_size <<= 1) {
 876
 877                        if (i >= NUM_RESULT)
 878                                continue;
 879
 880                        mtrr_calc_range_state(chunk_size, gran_size,
 881                                      x_remove_base, x_remove_size, i);
 882                        if (debug_print) {
 883                                mtrr_print_out_one_result(i);
 884                                printk(KERN_INFO "\n");
 885                        }
 886
 887                        i++;
 888                }
 889        }
 890
 891        /* Try to find the optimal index: */
 892        index_good = mtrr_search_optimal_index();
 893
 894        if (index_good != -1) {
 895                printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
 896                i = index_good;
 897                mtrr_print_out_one_result(i);
 898
 899                /* Convert ranges to var ranges state: */
 900                chunk_size = result[i].chunk_sizek;
 901                chunk_size <<= 10;
 902                gran_size = result[i].gran_sizek;
 903                gran_size <<= 10;
 904                x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
 905                set_var_mtrr_all(address_bits);
 906                printk(KERN_DEBUG "New variable MTRRs\n");
 907                print_out_mtrr_range_state();
 908                return 1;
 909        } else {
 910                /* print out all */
 911                for (i = 0; i < NUM_RESULT; i++)
 912                        mtrr_print_out_one_result(i);
 913        }
 914
 915        printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
 916        printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
 917
 918        return 0;
 919}
 920#else
 921int __init mtrr_cleanup(unsigned address_bits)
 922{
 923        return 0;
 924}
 925#endif
 926
 927static int disable_mtrr_trim;
 928
 929static int __init disable_mtrr_trim_setup(char *str)
 930{
 931        disable_mtrr_trim = 1;
 932        return 0;
 933}
 934early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
 935
 936/*
 937 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
 938 * for memory >4GB. Check for that here.
 939 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
 940 * apply to are wrong, but so far we don't know of any such case in the wild.
 941 */
 942#define Tom2Enabled             (1U << 21)
 943#define Tom2ForceMemTypeWB      (1U << 22)
 944
 945int __init amd_special_default_mtrr(void)
 946{
 947        u32 l, h;
 948
 949        if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 950                return 0;
 951        if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
 952                return 0;
 953        /* In case some hypervisor doesn't pass SYSCFG through: */
 954        if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
 955                return 0;
 956        /*
 957         * Memory between 4GB and top of mem is forced WB by this magic bit.
 958         * Reserved before K8RevF, but should be zero there.
 959         */
 960        if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
 961                 (Tom2Enabled | Tom2ForceMemTypeWB))
 962                return 1;
 963        return 0;
 964}
 965
 966static u64 __init
 967real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
 968{
 969        u64 trim_start, trim_size;
 970
 971        trim_start = start_pfn;
 972        trim_start <<= PAGE_SHIFT;
 973
 974        trim_size = limit_pfn;
 975        trim_size <<= PAGE_SHIFT;
 976        trim_size -= trim_start;
 977
 978        return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED);
 979}
 980
 981/**
 982 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
 983 * @end_pfn: ending page frame number
 984 *
 985 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
 986 * memory configurations.  This routine checks that the highest MTRR matches
 987 * the end of memory, to make sure the MTRRs having a write back type cover
 988 * all of the memory the kernel is intending to use.  If not, it'll trim any
 989 * memory off the end by adjusting end_pfn, removing it from the kernel's
 990 * allocation pools, warning the user with an obnoxious message.
 991 */
 992int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 993{
 994        unsigned long i, base, size, highest_pfn = 0, def, dummy;
 995        mtrr_type type;
 996        u64 total_trim_size;
 997        /* extra one for all 0 */
 998        int num[MTRR_NUM_TYPES + 1];
 999
1000        /*
1001         * Make sure we only trim uncachable memory on machines that
1002         * support the Intel MTRR architecture:
1003         */
1004        if (!is_cpu(INTEL) || disable_mtrr_trim)
1005                return 0;
1006
1007        rdmsr(MSR_MTRRdefType, def, dummy);
1008        def &= 0xff;
1009        if (def != MTRR_TYPE_UNCACHABLE)
1010                return 0;
1011
1012        /* Get it and store it aside: */
1013        memset(range_state, 0, sizeof(range_state));
1014        for (i = 0; i < num_var_ranges; i++) {
1015                mtrr_if->get(i, &base, &size, &type);
1016                range_state[i].base_pfn = base;
1017                range_state[i].size_pfn = size;
1018                range_state[i].type = type;
1019        }
1020
1021        /* Find highest cached pfn: */
1022        for (i = 0; i < num_var_ranges; i++) {
1023                type = range_state[i].type;
1024                if (type != MTRR_TYPE_WRBACK)
1025                        continue;
1026                base = range_state[i].base_pfn;
1027                size = range_state[i].size_pfn;
1028                if (highest_pfn < base + size)
1029                        highest_pfn = base + size;
1030        }
1031
1032        /* kvm/qemu doesn't have mtrr set right, don't trim them all: */
1033        if (!highest_pfn) {
1034                printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
1035                return 0;
1036        }
1037
1038        /* Check entries number: */
1039        memset(num, 0, sizeof(num));
1040        for (i = 0; i < num_var_ranges; i++) {
1041                type = range_state[i].type;
1042                if (type >= MTRR_NUM_TYPES)
1043                        continue;
1044                size = range_state[i].size_pfn;
1045                if (!size)
1046                        type = MTRR_NUM_TYPES;
1047                num[type]++;
1048        }
1049
1050        /* No entry for WB? */
1051        if (!num[MTRR_TYPE_WRBACK])
1052                return 0;
1053
1054        /* Check if we only had WB and UC: */
1055        if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1056                num_var_ranges - num[MTRR_NUM_TYPES])
1057                return 0;
1058
1059        memset(range, 0, sizeof(range));
1060        nr_range = 0;
1061        if (mtrr_tom2) {
1062                range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1063                range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1064                if (highest_pfn < range[nr_range].end + 1)
1065                        highest_pfn = range[nr_range].end + 1;
1066                nr_range++;
1067        }
1068        nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1069
1070        /* Check the head: */
1071        total_trim_size = 0;
1072        if (range[0].start)
1073                total_trim_size += real_trim_memory(0, range[0].start);
1074
1075        /* Check the holes: */
1076        for (i = 0; i < nr_range - 1; i++) {
1077                if (range[i].end + 1 < range[i+1].start)
1078                        total_trim_size += real_trim_memory(range[i].end + 1,
1079                                                            range[i+1].start);
1080        }
1081
1082        /* Check the top: */
1083        i = nr_range - 1;
1084        if (range[i].end + 1 < end_pfn)
1085                total_trim_size += real_trim_memory(range[i].end + 1,
1086                                                         end_pfn);
1087
1088        if (total_trim_size) {
1089                pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
1090
1091                if (!changed_by_mtrr_cleanup)
1092                        WARN_ON(1);
1093
1094                pr_info("update e820 for mtrr\n");
1095                update_e820();
1096
1097                return 1;
1098        }
1099
1100        return 0;
1101}
1102