linux/arch/x86/kernel/cpu/mtrr/main.c
<<
>>
Prefs
   1/*  Generic MTRR (Memory Type Range Register) driver.
   2
   3    Copyright (C) 1997-2000  Richard Gooch
   4    Copyright (c) 2002       Patrick Mochel
   5
   6    This library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Library General Public
   8    License as published by the Free Software Foundation; either
   9    version 2 of the License, or (at your option) any later version.
  10
  11    This library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Library General Public License for more details.
  15
  16    You should have received a copy of the GNU Library General Public
  17    License along with this library; if not, write to the Free
  18    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
  21    The postal address is:
  22      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
  23
  24    Source: "Pentium Pro Family Developer's Manual, Volume 3:
  25    Operating System Writer's Guide" (Intel document number 242692),
  26    section 11.11.7
  27
  28    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
  29    on 6-7 March 2002.
  30    Source: Intel Architecture Software Developers Manual, Volume 3:
  31    System Programming Guide; Section 9.11. (1997 edition - PPro).
  32*/
  33
  34#define DEBUG
  35
  36#include <linux/types.h> /* FIXME: kvm_para.h needs this */
  37
  38#include <linux/stop_machine.h>
  39#include <linux/kvm_para.h>
  40#include <linux/uaccess.h>
  41#include <linux/module.h>
  42#include <linux/mutex.h>
  43#include <linux/init.h>
  44#include <linux/sort.h>
  45#include <linux/cpu.h>
  46#include <linux/pci.h>
  47#include <linux/smp.h>
  48#include <linux/syscore_ops.h>
  49
  50#include <asm/processor.h>
  51#include <asm/e820.h>
  52#include <asm/mtrr.h>
  53#include <asm/msr.h>
  54#include <asm/pat.h>
  55
  56#include "mtrr.h"
  57
  58/* arch_phys_wc_add returns an MTRR register index plus this offset. */
  59#define MTRR_TO_PHYS_WC_OFFSET 1000
  60
  61u32 num_var_ranges;
  62
  63unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
  64static DEFINE_MUTEX(mtrr_mutex);
  65
  66u64 size_or_mask, size_and_mask;
  67static bool mtrr_aps_delayed_init;
  68
  69static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
  70
  71const struct mtrr_ops *mtrr_if;
  72
  73static void set_mtrr(unsigned int reg, unsigned long base,
  74                     unsigned long size, mtrr_type type);
  75
  76void set_mtrr_ops(const struct mtrr_ops *ops)
  77{
  78        if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
  79                mtrr_ops[ops->vendor] = ops;
  80}
  81
  82/*  Returns non-zero if we have the write-combining memory type  */
  83static int have_wrcomb(void)
  84{
  85        struct pci_dev *dev;
  86
  87        dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
  88        if (dev != NULL) {
  89                /*
  90                 * ServerWorks LE chipsets < rev 6 have problems with
  91                 * write-combining. Don't allow it and leave room for other
  92                 * chipsets to be tagged
  93                 */
  94                if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
  95                    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
  96                    dev->revision <= 5) {
  97                        pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
  98                        pci_dev_put(dev);
  99                        return 0;
 100                }
 101                /*
 102                 * Intel 450NX errata # 23. Non ascending cacheline evictions to
 103                 * write combining memory may resulting in data corruption
 104                 */
 105                if (dev->vendor == PCI_VENDOR_ID_INTEL &&
 106                    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
 107                        pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
 108                        pci_dev_put(dev);
 109                        return 0;
 110                }
 111                pci_dev_put(dev);
 112        }
 113        return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
 114}
 115
 116/*  This function returns the number of variable MTRRs  */
 117static void __init set_num_var_ranges(void)
 118{
 119        unsigned long config = 0, dummy;
 120
 121        if (use_intel())
 122                rdmsr(MSR_MTRRcap, config, dummy);
 123        else if (is_cpu(AMD))
 124                config = 2;
 125        else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
 126                config = 8;
 127
 128        num_var_ranges = config & 0xff;
 129}
 130
 131static void __init init_table(void)
 132{
 133        int i, max;
 134
 135        max = num_var_ranges;
 136        for (i = 0; i < max; i++)
 137                mtrr_usage_table[i] = 1;
 138}
 139
 140struct set_mtrr_data {
 141        unsigned long   smp_base;
 142        unsigned long   smp_size;
 143        unsigned int    smp_reg;
 144        mtrr_type       smp_type;
 145};
 146
 147/**
 148 * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
 149 * by all the CPUs.
 150 * @info: pointer to mtrr configuration data
 151 *
 152 * Returns nothing.
 153 */
 154static int mtrr_rendezvous_handler(void *info)
 155{
 156        struct set_mtrr_data *data = info;
 157
 158        /*
 159         * We use this same function to initialize the mtrrs during boot,
 160         * resume, runtime cpu online and on an explicit request to set a
 161         * specific MTRR.
 162         *
 163         * During boot or suspend, the state of the boot cpu's mtrrs has been
 164         * saved, and we want to replicate that across all the cpus that come
 165         * online (either at the end of boot or resume or during a runtime cpu
 166         * online). If we're doing that, @reg is set to something special and on
 167         * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
 168         * started the boot/resume sequence, this might be a duplicate
 169         * set_all()).
 170         */
 171        if (data->smp_reg != ~0U) {
 172                mtrr_if->set(data->smp_reg, data->smp_base,
 173                             data->smp_size, data->smp_type);
 174        } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
 175                mtrr_if->set_all();
 176        }
 177        return 0;
 178}
 179
 180static inline int types_compatible(mtrr_type type1, mtrr_type type2)
 181{
 182        return type1 == MTRR_TYPE_UNCACHABLE ||
 183               type2 == MTRR_TYPE_UNCACHABLE ||
 184               (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
 185               (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
 186}
 187
 188/**
 189 * set_mtrr - update mtrrs on all processors
 190 * @reg:        mtrr in question
 191 * @base:       mtrr base
 192 * @size:       mtrr size
 193 * @type:       mtrr type
 194 *
 195 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
 196 *
 197 * 1. Queue work to do the following on all processors:
 198 * 2. Disable Interrupts
 199 * 3. Wait for all procs to do so
 200 * 4. Enter no-fill cache mode
 201 * 5. Flush caches
 202 * 6. Clear PGE bit
 203 * 7. Flush all TLBs
 204 * 8. Disable all range registers
 205 * 9. Update the MTRRs
 206 * 10. Enable all range registers
 207 * 11. Flush all TLBs and caches again
 208 * 12. Enter normal cache mode and reenable caching
 209 * 13. Set PGE
 210 * 14. Wait for buddies to catch up
 211 * 15. Enable interrupts.
 212 *
 213 * What does that mean for us? Well, stop_machine() will ensure that
 214 * the rendezvous handler is started on each CPU. And in lockstep they
 215 * do the state transition of disabling interrupts, updating MTRR's
 216 * (the CPU vendors may each do it differently, so we call mtrr_if->set()
 217 * callback and let them take care of it.) and enabling interrupts.
 218 *
 219 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
 220 * becomes nops.
 221 */
 222static void
 223set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
 224{
 225        struct set_mtrr_data data = { .smp_reg = reg,
 226                                      .smp_base = base,
 227                                      .smp_size = size,
 228                                      .smp_type = type
 229                                    };
 230
 231        stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
 232}
 233
 234static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
 235                                      unsigned long size, mtrr_type type)
 236{
 237        struct set_mtrr_data data = { .smp_reg = reg,
 238                                      .smp_base = base,
 239                                      .smp_size = size,
 240                                      .smp_type = type
 241                                    };
 242
 243        stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
 244                                       cpu_callout_mask);
 245}
 246
 247/**
 248 * mtrr_add_page - Add a memory type region
 249 * @base: Physical base address of region in pages (in units of 4 kB!)
 250 * @size: Physical size of region in pages (4 kB)
 251 * @type: Type of MTRR desired
 252 * @increment: If this is true do usage counting on the region
 253 *
 254 * Memory type region registers control the caching on newer Intel and
 255 * non Intel processors. This function allows drivers to request an
 256 * MTRR is added. The details and hardware specifics of each processor's
 257 * implementation are hidden from the caller, but nevertheless the
 258 * caller should expect to need to provide a power of two size on an
 259 * equivalent power of two boundary.
 260 *
 261 * If the region cannot be added either because all regions are in use
 262 * or the CPU cannot support it a negative value is returned. On success
 263 * the register number for this entry is returned, but should be treated
 264 * as a cookie only.
 265 *
 266 * On a multiprocessor machine the changes are made to all processors.
 267 * This is required on x86 by the Intel processors.
 268 *
 269 * The available types are
 270 *
 271 * %MTRR_TYPE_UNCACHABLE - No caching
 272 *
 273 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
 274 *
 275 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
 276 *
 277 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
 278 *
 279 * BUGS: Needs a quiet flag for the cases where drivers do not mind
 280 * failures and do not wish system log messages to be sent.
 281 */
 282int mtrr_add_page(unsigned long base, unsigned long size,
 283                  unsigned int type, bool increment)
 284{
 285        unsigned long lbase, lsize;
 286        int i, replace, error;
 287        mtrr_type ltype;
 288
 289        if (!mtrr_if)
 290                return -ENXIO;
 291
 292        error = mtrr_if->validate_add_page(base, size, type);
 293        if (error)
 294                return error;
 295
 296        if (type >= MTRR_NUM_TYPES) {
 297                pr_warning("mtrr: type: %u invalid\n", type);
 298                return -EINVAL;
 299        }
 300
 301        /* If the type is WC, check that this processor supports it */
 302        if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
 303                pr_warning("mtrr: your processor doesn't support write-combining\n");
 304                return -ENOSYS;
 305        }
 306
 307        if (!size) {
 308                pr_warning("mtrr: zero sized request\n");
 309                return -EINVAL;
 310        }
 311
 312        if ((base | (base + size - 1)) >>
 313            (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
 314                pr_warning("mtrr: base or size exceeds the MTRR width\n");
 315                return -EINVAL;
 316        }
 317
 318        error = -EINVAL;
 319        replace = -1;
 320
 321        /* No CPU hotplug when we change MTRR entries */
 322        get_online_cpus();
 323
 324        /* Search for existing MTRR  */
 325        mutex_lock(&mtrr_mutex);
 326        for (i = 0; i < num_var_ranges; ++i) {
 327                mtrr_if->get(i, &lbase, &lsize, &ltype);
 328                if (!lsize || base > lbase + lsize - 1 ||
 329                    base + size - 1 < lbase)
 330                        continue;
 331                /*
 332                 * At this point we know there is some kind of
 333                 * overlap/enclosure
 334                 */
 335                if (base < lbase || base + size - 1 > lbase + lsize - 1) {
 336                        if (base <= lbase &&
 337                            base + size - 1 >= lbase + lsize - 1) {
 338                                /*  New region encloses an existing region  */
 339                                if (type == ltype) {
 340                                        replace = replace == -1 ? i : -2;
 341                                        continue;
 342                                } else if (types_compatible(type, ltype))
 343                                        continue;
 344                        }
 345                        pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
 346                                " 0x%lx000,0x%lx000\n", base, size, lbase,
 347                                lsize);
 348                        goto out;
 349                }
 350                /* New region is enclosed by an existing region */
 351                if (ltype != type) {
 352                        if (types_compatible(type, ltype))
 353                                continue;
 354                        pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
 355                                base, size, mtrr_attrib_to_str(ltype),
 356                                mtrr_attrib_to_str(type));
 357                        goto out;
 358                }
 359                if (increment)
 360                        ++mtrr_usage_table[i];
 361                error = i;
 362                goto out;
 363        }
 364        /* Search for an empty MTRR */
 365        i = mtrr_if->get_free_region(base, size, replace);
 366        if (i >= 0) {
 367                set_mtrr(i, base, size, type);
 368                if (likely(replace < 0)) {
 369                        mtrr_usage_table[i] = 1;
 370                } else {
 371                        mtrr_usage_table[i] = mtrr_usage_table[replace];
 372                        if (increment)
 373                                mtrr_usage_table[i]++;
 374                        if (unlikely(replace != i)) {
 375                                set_mtrr(replace, 0, 0, 0);
 376                                mtrr_usage_table[replace] = 0;
 377                        }
 378                }
 379        } else {
 380                pr_info("mtrr: no more MTRRs available\n");
 381        }
 382        error = i;
 383 out:
 384        mutex_unlock(&mtrr_mutex);
 385        put_online_cpus();
 386        return error;
 387}
 388
 389static int mtrr_check(unsigned long base, unsigned long size)
 390{
 391        if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
 392                pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
 393                pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
 394                dump_stack();
 395                return -1;
 396        }
 397        return 0;
 398}
 399
 400/**
 401 * mtrr_add - Add a memory type region
 402 * @base: Physical base address of region
 403 * @size: Physical size of region
 404 * @type: Type of MTRR desired
 405 * @increment: If this is true do usage counting on the region
 406 *
 407 * Memory type region registers control the caching on newer Intel and
 408 * non Intel processors. This function allows drivers to request an
 409 * MTRR is added. The details and hardware specifics of each processor's
 410 * implementation are hidden from the caller, but nevertheless the
 411 * caller should expect to need to provide a power of two size on an
 412 * equivalent power of two boundary.
 413 *
 414 * If the region cannot be added either because all regions are in use
 415 * or the CPU cannot support it a negative value is returned. On success
 416 * the register number for this entry is returned, but should be treated
 417 * as a cookie only.
 418 *
 419 * On a multiprocessor machine the changes are made to all processors.
 420 * This is required on x86 by the Intel processors.
 421 *
 422 * The available types are
 423 *
 424 * %MTRR_TYPE_UNCACHABLE - No caching
 425 *
 426 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
 427 *
 428 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
 429 *
 430 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
 431 *
 432 * BUGS: Needs a quiet flag for the cases where drivers do not mind
 433 * failures and do not wish system log messages to be sent.
 434 */
 435int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
 436             bool increment)
 437{
 438        if (mtrr_check(base, size))
 439                return -EINVAL;
 440        return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
 441                             increment);
 442}
 443EXPORT_SYMBOL(mtrr_add);
 444
 445/**
 446 * mtrr_del_page - delete a memory type region
 447 * @reg: Register returned by mtrr_add
 448 * @base: Physical base address
 449 * @size: Size of region
 450 *
 451 * If register is supplied then base and size are ignored. This is
 452 * how drivers should call it.
 453 *
 454 * Releases an MTRR region. If the usage count drops to zero the
 455 * register is freed and the region returns to default state.
 456 * On success the register is returned, on failure a negative error
 457 * code.
 458 */
 459int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 460{
 461        int i, max;
 462        mtrr_type ltype;
 463        unsigned long lbase, lsize;
 464        int error = -EINVAL;
 465
 466        if (!mtrr_if)
 467                return -ENXIO;
 468
 469        max = num_var_ranges;
 470        /* No CPU hotplug when we change MTRR entries */
 471        get_online_cpus();
 472        mutex_lock(&mtrr_mutex);
 473        if (reg < 0) {
 474                /*  Search for existing MTRR  */
 475                for (i = 0; i < max; ++i) {
 476                        mtrr_if->get(i, &lbase, &lsize, &ltype);
 477                        if (lbase == base && lsize == size) {
 478                                reg = i;
 479                                break;
 480                        }
 481                }
 482                if (reg < 0) {
 483                        pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
 484                                 base, size);
 485                        goto out;
 486                }
 487        }
 488        if (reg >= max) {
 489                pr_warning("mtrr: register: %d too big\n", reg);
 490                goto out;
 491        }
 492        mtrr_if->get(reg, &lbase, &lsize, &ltype);
 493        if (lsize < 1) {
 494                pr_warning("mtrr: MTRR %d not used\n", reg);
 495                goto out;
 496        }
 497        if (mtrr_usage_table[reg] < 1) {
 498                pr_warning("mtrr: reg: %d has count=0\n", reg);
 499                goto out;
 500        }
 501        if (--mtrr_usage_table[reg] < 1)
 502                set_mtrr(reg, 0, 0, 0);
 503        error = reg;
 504 out:
 505        mutex_unlock(&mtrr_mutex);
 506        put_online_cpus();
 507        return error;
 508}
 509
 510/**
 511 * mtrr_del - delete a memory type region
 512 * @reg: Register returned by mtrr_add
 513 * @base: Physical base address
 514 * @size: Size of region
 515 *
 516 * If register is supplied then base and size are ignored. This is
 517 * how drivers should call it.
 518 *
 519 * Releases an MTRR region. If the usage count drops to zero the
 520 * register is freed and the region returns to default state.
 521 * On success the register is returned, on failure a negative error
 522 * code.
 523 */
 524int mtrr_del(int reg, unsigned long base, unsigned long size)
 525{
 526        if (mtrr_check(base, size))
 527                return -EINVAL;
 528        return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
 529}
 530EXPORT_SYMBOL(mtrr_del);
 531
 532/**
 533 * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
 534 * @base: Physical base address
 535 * @size: Size of region
 536 *
 537 * If PAT is available, this does nothing.  If PAT is unavailable, it
 538 * attempts to add a WC MTRR covering size bytes starting at base and
 539 * logs an error if this fails.
 540 *
 541 * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
 542 * but drivers should not try to interpret that return value.
 543 */
 544int arch_phys_wc_add(unsigned long base, unsigned long size)
 545{
 546        int ret;
 547
 548        if (pat_enabled)
 549                return 0;  /* Success!  (We don't need to do anything.) */
 550
 551        ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
 552        if (ret < 0) {
 553                pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
 554                        (void *)base, (void *)(base + size - 1));
 555                return ret;
 556        }
 557        return ret + MTRR_TO_PHYS_WC_OFFSET;
 558}
 559EXPORT_SYMBOL(arch_phys_wc_add);
 560
 561/*
 562 * arch_phys_wc_del - undoes arch_phys_wc_add
 563 * @handle: Return value from arch_phys_wc_add
 564 *
 565 * This cleans up after mtrr_add_wc_if_needed.
 566 *
 567 * The API guarantees that mtrr_del_wc_if_needed(error code) and
 568 * mtrr_del_wc_if_needed(0) do nothing.
 569 */
 570void arch_phys_wc_del(int handle)
 571{
 572        if (handle >= 1) {
 573                WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
 574                mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
 575        }
 576}
 577EXPORT_SYMBOL(arch_phys_wc_del);
 578
 579/*
 580 * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
 581 * @handle: Return value from arch_phys_wc_add
 582 *
 583 * This will turn the return value from arch_phys_wc_add into an mtrr
 584 * index suitable for debugging.
 585 *
 586 * Note: There is no legitimate use for this function, except possibly
 587 * in printk line.  Alas there is an illegitimate use in some ancient
 588 * drm ioctls.
 589 */
 590int phys_wc_to_mtrr_index(int handle)
 591{
 592        if (handle < MTRR_TO_PHYS_WC_OFFSET)
 593                return -1;
 594        else
 595                return handle - MTRR_TO_PHYS_WC_OFFSET;
 596}
 597EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
 598
 599/*
 600 * HACK ALERT!
 601 * These should be called implicitly, but we can't yet until all the initcall
 602 * stuff is done...
 603 */
 604static void __init init_ifs(void)
 605{
 606#ifndef CONFIG_X86_64
 607        amd_init_mtrr();
 608        cyrix_init_mtrr();
 609        centaur_init_mtrr();
 610#endif
 611}
 612
 613/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
 614 * MTRR driver doesn't require this
 615 */
 616struct mtrr_value {
 617        mtrr_type       ltype;
 618        unsigned long   lbase;
 619        unsigned long   lsize;
 620};
 621
 622static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
 623
 624static int mtrr_save(void)
 625{
 626        int i;
 627
 628        for (i = 0; i < num_var_ranges; i++) {
 629                mtrr_if->get(i, &mtrr_value[i].lbase,
 630                                &mtrr_value[i].lsize,
 631                                &mtrr_value[i].ltype);
 632        }
 633        return 0;
 634}
 635
 636static void mtrr_restore(void)
 637{
 638        int i;
 639
 640        for (i = 0; i < num_var_ranges; i++) {
 641                if (mtrr_value[i].lsize) {
 642                        set_mtrr(i, mtrr_value[i].lbase,
 643                                    mtrr_value[i].lsize,
 644                                    mtrr_value[i].ltype);
 645                }
 646        }
 647}
 648
 649
 650
 651static struct syscore_ops mtrr_syscore_ops = {
 652        .suspend        = mtrr_save,
 653        .resume         = mtrr_restore,
 654};
 655
 656int __initdata changed_by_mtrr_cleanup;
 657
 658#define SIZE_OR_MASK_BITS(n)  (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
 659/**
 660 * mtrr_bp_init - initialize mtrrs on the boot CPU
 661 *
 662 * This needs to be called early; before any of the other CPUs are
 663 * initialized (i.e. before smp_init()).
 664 *
 665 */
 666void __init mtrr_bp_init(void)
 667{
 668        u32 phys_addr;
 669
 670        init_ifs();
 671
 672        phys_addr = 32;
 673
 674        if (cpu_has_mtrr) {
 675                mtrr_if = &generic_mtrr_ops;
 676                size_or_mask = SIZE_OR_MASK_BITS(36);
 677                size_and_mask = 0x00f00000;
 678                phys_addr = 36;
 679
 680                /*
 681                 * This is an AMD specific MSR, but we assume(hope?) that
 682                 * Intel will implement it too when they extend the address
 683                 * bus of the Xeon.
 684                 */
 685                if (cpuid_eax(0x80000000) >= 0x80000008) {
 686                        phys_addr = cpuid_eax(0x80000008) & 0xff;
 687                        /* CPUID workaround for Intel 0F33/0F34 CPU */
 688                        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 689                            boot_cpu_data.x86 == 0xF &&
 690                            boot_cpu_data.x86_model == 0x3 &&
 691                            (boot_cpu_data.x86_mask == 0x3 ||
 692                             boot_cpu_data.x86_mask == 0x4))
 693                                phys_addr = 36;
 694
 695                        size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
 696                        size_and_mask = ~size_or_mask & 0xfffff00000ULL;
 697                } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
 698                           boot_cpu_data.x86 == 6) {
 699                        /*
 700                         * VIA C* family have Intel style MTRRs,
 701                         * but don't support PAE
 702                         */
 703                        size_or_mask = SIZE_OR_MASK_BITS(32);
 704                        size_and_mask = 0;
 705                        phys_addr = 32;
 706                }
 707        } else {
 708                switch (boot_cpu_data.x86_vendor) {
 709                case X86_VENDOR_AMD:
 710                        if (cpu_has_k6_mtrr) {
 711                                /* Pre-Athlon (K6) AMD CPU MTRRs */
 712                                mtrr_if = mtrr_ops[X86_VENDOR_AMD];
 713                                size_or_mask = SIZE_OR_MASK_BITS(32);
 714                                size_and_mask = 0;
 715                        }
 716                        break;
 717                case X86_VENDOR_CENTAUR:
 718                        if (cpu_has_centaur_mcr) {
 719                                mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
 720                                size_or_mask = SIZE_OR_MASK_BITS(32);
 721                                size_and_mask = 0;
 722                        }
 723                        break;
 724                case X86_VENDOR_CYRIX:
 725                        if (cpu_has_cyrix_arr) {
 726                                mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
 727                                size_or_mask = SIZE_OR_MASK_BITS(32);
 728                                size_and_mask = 0;
 729                        }
 730                        break;
 731                default:
 732                        break;
 733                }
 734        }
 735
 736        if (mtrr_if) {
 737                set_num_var_ranges();
 738                init_table();
 739                if (use_intel()) {
 740                        get_mtrr_state();
 741
 742                        if (mtrr_cleanup(phys_addr)) {
 743                                changed_by_mtrr_cleanup = 1;
 744                                mtrr_if->set_all();
 745                        }
 746                }
 747        }
 748}
 749
 750void mtrr_ap_init(void)
 751{
 752        if (!use_intel() || mtrr_aps_delayed_init)
 753                return;
 754        /*
 755         * Ideally we should hold mtrr_mutex here to avoid mtrr entries
 756         * changed, but this routine will be called in cpu boot time,
 757         * holding the lock breaks it.
 758         *
 759         * This routine is called in two cases:
 760         *
 761         *   1. very earily time of software resume, when there absolutely
 762         *      isn't mtrr entry changes;
 763         *
 764         *   2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
 765         *      lock to prevent mtrr entry changes
 766         */
 767        set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
 768}
 769
 770/**
 771 * Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
 772 */
 773void mtrr_save_state(void)
 774{
 775        int first_cpu;
 776
 777        get_online_cpus();
 778        first_cpu = cpumask_first(cpu_online_mask);
 779        smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
 780        put_online_cpus();
 781}
 782
 783void set_mtrr_aps_delayed_init(void)
 784{
 785        if (!use_intel())
 786                return;
 787
 788        mtrr_aps_delayed_init = true;
 789}
 790
 791/*
 792 * Delayed MTRR initialization for all AP's
 793 */
 794void mtrr_aps_init(void)
 795{
 796        if (!use_intel())
 797                return;
 798
 799        /*
 800         * Check if someone has requested the delay of AP MTRR initialization,
 801         * by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
 802         * then we are done.
 803         */
 804        if (!mtrr_aps_delayed_init)
 805                return;
 806
 807        set_mtrr(~0U, 0, 0, 0);
 808        mtrr_aps_delayed_init = false;
 809}
 810
 811void mtrr_bp_restore(void)
 812{
 813        if (!use_intel())
 814                return;
 815
 816        mtrr_if->set_all();
 817}
 818
 819static int __init mtrr_init_finialize(void)
 820{
 821        if (!mtrr_if)
 822                return 0;
 823
 824        if (use_intel()) {
 825                if (!changed_by_mtrr_cleanup)
 826                        mtrr_state_warn();
 827                return 0;
 828        }
 829
 830        /*
 831         * The CPU has no MTRR and seems to not support SMP. They have
 832         * specific drivers, we use a tricky method to support
 833         * suspend/resume for them.
 834         *
 835         * TBD: is there any system with such CPU which supports
 836         * suspend/resume? If no, we should remove the code.
 837         */
 838        register_syscore_ops(&mtrr_syscore_ops);
 839
 840        return 0;
 841}
 842subsys_initcall(mtrr_init_finialize);
 843