linux/arch/powerpc/platforms/pseries/setup.c
<<
>>
Prefs
   1/*
   2 *  64-bit pSeries and RS/6000 setup code.
   3 *
   4 *  Copyright (C) 1995  Linus Torvalds
   5 *  Adapted from 'alpha' version by Gary Thomas
   6 *  Modified by Cort Dougan (cort@cs.nmt.edu)
   7 *  Modified by PPC64 Team, IBM Corp
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU General Public License
  11 * as published by the Free Software Foundation; either version
  12 * 2 of the License, or (at your option) any later version.
  13 */
  14
  15/*
  16 * bootup setup stuff..
  17 */
  18
  19#include <linux/cpu.h>
  20#include <linux/errno.h>
  21#include <linux/sched.h>
  22#include <linux/kernel.h>
  23#include <linux/mm.h>
  24#include <linux/stddef.h>
  25#include <linux/unistd.h>
  26#include <linux/user.h>
  27#include <linux/tty.h>
  28#include <linux/major.h>
  29#include <linux/interrupt.h>
  30#include <linux/reboot.h>
  31#include <linux/init.h>
  32#include <linux/ioport.h>
  33#include <linux/console.h>
  34#include <linux/pci.h>
  35#include <linux/utsname.h>
  36#include <linux/adb.h>
  37#include <linux/export.h>
  38#include <linux/delay.h>
  39#include <linux/irq.h>
  40#include <linux/seq_file.h>
  41#include <linux/root_dev.h>
  42#include <linux/of.h>
  43#include <linux/of_pci.h>
  44#include <linux/memblock.h>
  45
  46#include <asm/mmu.h>
  47#include <asm/processor.h>
  48#include <asm/io.h>
  49#include <asm/pgtable.h>
  50#include <asm/prom.h>
  51#include <asm/rtas.h>
  52#include <asm/pci-bridge.h>
  53#include <asm/iommu.h>
  54#include <asm/dma.h>
  55#include <asm/machdep.h>
  56#include <asm/irq.h>
  57#include <asm/time.h>
  58#include <asm/nvram.h>
  59#include <asm/pmc.h>
  60#include <asm/xics.h>
  61#include <asm/xive.h>
  62#include <asm/ppc-pci.h>
  63#include <asm/i8259.h>
  64#include <asm/udbg.h>
  65#include <asm/smp.h>
  66#include <asm/firmware.h>
  67#include <asm/eeh.h>
  68#include <asm/reg.h>
  69#include <asm/plpar_wrappers.h>
  70#include <asm/kexec.h>
  71#include <asm/isa-bridge.h>
  72#include <asm/security_features.h>
  73#include <asm/asm-const.h>
  74
  75#include "pseries.h"
  76#include "../../../../drivers/pci/pci.h"
  77
  78int CMO_PrPSP = -1;
  79int CMO_SecPSP = -1;
  80unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
  81EXPORT_SYMBOL(CMO_PageSize);
  82
  83int fwnmi_active;  /* TRUE if an FWNMI handler is present */
  84
  85static void pSeries_show_cpuinfo(struct seq_file *m)
  86{
  87        struct device_node *root;
  88        const char *model = "";
  89
  90        root = of_find_node_by_path("/");
  91        if (root)
  92                model = of_get_property(root, "model", NULL);
  93        seq_printf(m, "machine\t\t: CHRP %s\n", model);
  94        of_node_put(root);
  95        if (radix_enabled())
  96                seq_printf(m, "MMU\t\t: Radix\n");
  97        else
  98                seq_printf(m, "MMU\t\t: Hash\n");
  99}
 100
 101/* Initialize firmware assisted non-maskable interrupts if
 102 * the firmware supports this feature.
 103 */
 104static void __init fwnmi_init(void)
 105{
 106        unsigned long system_reset_addr, machine_check_addr;
 107        u8 *mce_data_buf;
 108        unsigned int i;
 109        int nr_cpus = num_possible_cpus();
 110#ifdef CONFIG_PPC_BOOK3S_64
 111        struct slb_entry *slb_ptr;
 112        size_t size;
 113#endif
 114
 115        int ibm_nmi_register = rtas_token("ibm,nmi-register");
 116        if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
 117                return;
 118
 119        /* If the kernel's not linked at zero we point the firmware at low
 120         * addresses anyway, and use a trampoline to get to the real code. */
 121        system_reset_addr  = __pa(system_reset_fwnmi) - PHYSICAL_START;
 122        machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;
 123
 124        if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr,
 125                                machine_check_addr))
 126                fwnmi_active = 1;
 127
 128        /*
 129         * Allocate a chunk for per cpu buffer to hold rtas errorlog.
 130         * It will be used in real mode mce handler, hence it needs to be
 131         * below RMA.
 132         */
 133        mce_data_buf = __va(memblock_alloc_base(RTAS_ERROR_LOG_MAX * nr_cpus,
 134                                        RTAS_ERROR_LOG_MAX, ppc64_rma_size));
 135        for_each_possible_cpu(i) {
 136                paca_ptrs[i]->mce_data_buf = mce_data_buf +
 137                                                (RTAS_ERROR_LOG_MAX * i);
 138        }
 139
 140#ifdef CONFIG_PPC_BOOK3S_64
 141        /* Allocate per cpu slb area to save old slb contents during MCE */
 142        size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
 143        slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry),
 144                                           ppc64_rma_size));
 145        for_each_possible_cpu(i)
 146                paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
 147#endif
 148}
 149
 150static void pseries_8259_cascade(struct irq_desc *desc)
 151{
 152        struct irq_chip *chip = irq_desc_get_chip(desc);
 153        unsigned int cascade_irq = i8259_irq();
 154
 155        if (cascade_irq)
 156                generic_handle_irq(cascade_irq);
 157
 158        chip->irq_eoi(&desc->irq_data);
 159}
 160
 161static void __init pseries_setup_i8259_cascade(void)
 162{
 163        struct device_node *np, *old, *found = NULL;
 164        unsigned int cascade;
 165        const u32 *addrp;
 166        unsigned long intack = 0;
 167        int naddr;
 168
 169        for_each_node_by_type(np, "interrupt-controller") {
 170                if (of_device_is_compatible(np, "chrp,iic")) {
 171                        found = np;
 172                        break;
 173                }
 174        }
 175
 176        if (found == NULL) {
 177                printk(KERN_DEBUG "pic: no ISA interrupt controller\n");
 178                return;
 179        }
 180
 181        cascade = irq_of_parse_and_map(found, 0);
 182        if (!cascade) {
 183                printk(KERN_ERR "pic: failed to map cascade interrupt");
 184                return;
 185        }
 186        pr_debug("pic: cascade mapped to irq %d\n", cascade);
 187
 188        for (old = of_node_get(found); old != NULL ; old = np) {
 189                np = of_get_parent(old);
 190                of_node_put(old);
 191                if (np == NULL)
 192                        break;
 193                if (!of_node_name_eq(np, "pci"))
 194                        continue;
 195                addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
 196                if (addrp == NULL)
 197                        continue;
 198                naddr = of_n_addr_cells(np);
 199                intack = addrp[naddr-1];
 200                if (naddr > 1)
 201                        intack |= ((unsigned long)addrp[naddr-2]) << 32;
 202        }
 203        if (intack)
 204                printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
 205        i8259_init(found, intack);
 206        of_node_put(found);
 207        irq_set_chained_handler(cascade, pseries_8259_cascade);
 208}
 209
 210static void __init pseries_init_irq(void)
 211{
 212        /* Try using a XIVE if available, otherwise use a XICS */
 213        if (!xive_spapr_init()) {
 214                xics_init();
 215                pseries_setup_i8259_cascade();
 216        }
 217}
 218
 219static void pseries_lpar_enable_pmcs(void)
 220{
 221        unsigned long set, reset;
 222
 223        set = 1UL << 63;
 224        reset = 0;
 225        plpar_hcall_norets(H_PERFMON, set, reset);
 226}
 227
 228static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
 229{
 230        struct of_reconfig_data *rd = data;
 231        struct device_node *parent, *np = rd->dn;
 232        struct pci_dn *pdn;
 233        int err = NOTIFY_OK;
 234
 235        switch (action) {
 236        case OF_RECONFIG_ATTACH_NODE:
 237                parent = of_get_parent(np);
 238                pdn = parent ? PCI_DN(parent) : NULL;
 239                if (pdn)
 240                        pci_add_device_node_info(pdn->phb, np);
 241
 242                of_node_put(parent);
 243                break;
 244        case OF_RECONFIG_DETACH_NODE:
 245                pdn = PCI_DN(np);
 246                if (pdn)
 247                        list_del(&pdn->list);
 248                break;
 249        default:
 250                err = NOTIFY_DONE;
 251                break;
 252        }
 253        return err;
 254}
 255
 256static struct notifier_block pci_dn_reconfig_nb = {
 257        .notifier_call = pci_dn_reconfig_notifier,
 258};
 259
 260struct kmem_cache *dtl_cache;
 261
 262#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 263/*
 264 * Allocate space for the dispatch trace log for all possible cpus
 265 * and register the buffers with the hypervisor.  This is used for
 266 * computing time stolen by the hypervisor.
 267 */
 268static int alloc_dispatch_logs(void)
 269{
 270        int cpu, ret;
 271        struct paca_struct *pp;
 272        struct dtl_entry *dtl;
 273
 274        if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 275                return 0;
 276
 277        if (!dtl_cache)
 278                return 0;
 279
 280        for_each_possible_cpu(cpu) {
 281                pp = paca_ptrs[cpu];
 282                dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
 283                if (!dtl) {
 284                        pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
 285                                cpu);
 286                        pr_warn("Stolen time statistics will be unreliable\n");
 287                        break;
 288                }
 289
 290                pp->dtl_ridx = 0;
 291                pp->dispatch_log = dtl;
 292                pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
 293                pp->dtl_curr = dtl;
 294        }
 295
 296        /* Register the DTL for the current (boot) cpu */
 297        dtl = get_paca()->dispatch_log;
 298        get_paca()->dtl_ridx = 0;
 299        get_paca()->dtl_curr = dtl;
 300        get_paca()->lppaca_ptr->dtl_idx = 0;
 301
 302        /* hypervisor reads buffer length from this field */
 303        dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
 304        ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
 305        if (ret)
 306                pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
 307                       "with %d\n", smp_processor_id(),
 308                       hard_smp_processor_id(), ret);
 309        get_paca()->lppaca_ptr->dtl_enable_mask = 2;
 310
 311        return 0;
 312}
 313#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 314static inline int alloc_dispatch_logs(void)
 315{
 316        return 0;
 317}
 318#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 319
 320static int alloc_dispatch_log_kmem_cache(void)
 321{
 322        dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
 323                                                DISPATCH_LOG_BYTES, 0, NULL);
 324        if (!dtl_cache) {
 325                pr_warn("Failed to create dispatch trace log buffer cache\n");
 326                pr_warn("Stolen time statistics will be unreliable\n");
 327                return 0;
 328        }
 329
 330        return alloc_dispatch_logs();
 331}
 332machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
 333
 334static void pseries_lpar_idle(void)
 335{
 336        /*
 337         * Default handler to go into low thread priority and possibly
 338         * low power mode by ceding processor to hypervisor
 339         */
 340
 341        /* Indicate to hypervisor that we are idle. */
 342        get_lppaca()->idle = 1;
 343
 344        /*
 345         * Yield the processor to the hypervisor.  We return if
 346         * an external interrupt occurs (which are driven prior
 347         * to returning here) or if a prod occurs from another
 348         * processor. When returning here, external interrupts
 349         * are enabled.
 350         */
 351        cede_processor();
 352
 353        get_lppaca()->idle = 0;
 354}
 355
 356/*
 357 * Enable relocation on during exceptions. This has partition wide scope and
 358 * may take a while to complete, if it takes longer than one second we will
 359 * just give up rather than wasting any more time on this - if that turns out
 360 * to ever be a problem in practice we can move this into a kernel thread to
 361 * finish off the process later in boot.
 362 */
 363void pseries_enable_reloc_on_exc(void)
 364{
 365        long rc;
 366        unsigned int delay, total_delay = 0;
 367
 368        while (1) {
 369                rc = enable_reloc_on_exceptions();
 370                if (!H_IS_LONG_BUSY(rc)) {
 371                        if (rc == H_P2) {
 372                                pr_info("Relocation on exceptions not"
 373                                        " supported\n");
 374                        } else if (rc != H_SUCCESS) {
 375                                pr_warn("Unable to enable relocation"
 376                                        " on exceptions: %ld\n", rc);
 377                        }
 378                        break;
 379                }
 380
 381                delay = get_longbusy_msecs(rc);
 382                total_delay += delay;
 383                if (total_delay > 1000) {
 384                        pr_warn("Warning: Giving up waiting to enable "
 385                                "relocation on exceptions (%u msec)!\n",
 386                                total_delay);
 387                        return;
 388                }
 389
 390                mdelay(delay);
 391        }
 392}
 393EXPORT_SYMBOL(pseries_enable_reloc_on_exc);
 394
 395void pseries_disable_reloc_on_exc(void)
 396{
 397        long rc;
 398
 399        while (1) {
 400                rc = disable_reloc_on_exceptions();
 401                if (!H_IS_LONG_BUSY(rc))
 402                        break;
 403                mdelay(get_longbusy_msecs(rc));
 404        }
 405        if (rc != H_SUCCESS)
 406                pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
 407                        rc);
 408}
 409EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
 410
 411#ifdef CONFIG_KEXEC_CORE
 412static void pSeries_machine_kexec(struct kimage *image)
 413{
 414        if (firmware_has_feature(FW_FEATURE_SET_MODE))
 415                pseries_disable_reloc_on_exc();
 416
 417        default_machine_kexec(image);
 418}
 419#endif
 420
 421#ifdef __LITTLE_ENDIAN__
 422void pseries_big_endian_exceptions(void)
 423{
 424        long rc;
 425
 426        while (1) {
 427                rc = enable_big_endian_exceptions();
 428                if (!H_IS_LONG_BUSY(rc))
 429                        break;
 430                mdelay(get_longbusy_msecs(rc));
 431        }
 432
 433        /*
 434         * At this point it is unlikely panic() will get anything
 435         * out to the user, since this is called very late in kexec
 436         * but at least this will stop us from continuing on further
 437         * and creating an even more difficult to debug situation.
 438         *
 439         * There is a known problem when kdump'ing, if cpus are offline
 440         * the above call will fail. Rather than panicking again, keep
 441         * going and hope the kdump kernel is also little endian, which
 442         * it usually is.
 443         */
 444        if (rc && !kdump_in_progress())
 445                panic("Could not enable big endian exceptions");
 446}
 447
 448void pseries_little_endian_exceptions(void)
 449{
 450        long rc;
 451
 452        while (1) {
 453                rc = enable_little_endian_exceptions();
 454                if (!H_IS_LONG_BUSY(rc))
 455                        break;
 456                mdelay(get_longbusy_msecs(rc));
 457        }
 458        if (rc) {
 459                ppc_md.progress("H_SET_MODE LE exception fail", 0);
 460                panic("Could not enable little endian exceptions");
 461        }
 462}
 463#endif
 464
 465static void __init find_and_init_phbs(void)
 466{
 467        struct device_node *node;
 468        struct pci_controller *phb;
 469        struct device_node *root = of_find_node_by_path("/");
 470
 471        for_each_child_of_node(root, node) {
 472                if (!of_node_is_type(node, "pci") &&
 473                    !of_node_is_type(node, "pciex"))
 474                        continue;
 475
 476                phb = pcibios_alloc_controller(node);
 477                if (!phb)
 478                        continue;
 479                rtas_setup_phb(phb);
 480                pci_process_bridge_OF_ranges(phb, node, 0);
 481                isa_bridge_find_early(phb);
 482                phb->controller_ops = pseries_pci_controller_ops;
 483        }
 484
 485        of_node_put(root);
 486
 487        /*
 488         * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
 489         * in chosen.
 490         */
 491        of_pci_check_probe_only();
 492}
 493
 494static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
 495{
 496        /*
 497         * The features below are disabled by default, so we instead look to see
 498         * if firmware has *enabled* them, and set them if so.
 499         */
 500        if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
 501                security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
 502
 503        if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
 504                security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
 505
 506        if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
 507                security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
 508
 509        if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
 510                security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
 511
 512        if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
 513                security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
 514
 515        if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
 516                security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
 517
 518        if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
 519                security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
 520
 521        if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
 522                security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
 523
 524        /*
 525         * The features below are enabled by default, so we instead look to see
 526         * if firmware has *disabled* them, and clear them if so.
 527         */
 528        if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
 529                security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
 530
 531        if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
 532                security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
 533
 534        if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
 535                security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 536}
 537
 538void pseries_setup_rfi_flush(void)
 539{
 540        struct h_cpu_char_result result;
 541        enum l1d_flush_type types;
 542        bool enable;
 543        long rc;
 544
 545        /*
 546         * Set features to the defaults assumed by init_cpu_char_feature_flags()
 547         * so it can set/clear again any features that might have changed after
 548         * migration, and in case the hypercall fails and it is not even called.
 549         */
 550        powerpc_security_features = SEC_FTR_DEFAULT;
 551
 552        rc = plpar_get_cpu_characteristics(&result);
 553        if (rc == H_SUCCESS)
 554                init_cpu_char_feature_flags(&result);
 555
 556        /*
 557         * We're the guest so this doesn't apply to us, clear it to simplify
 558         * handling of it elsewhere.
 559         */
 560        security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
 561
 562        types = L1D_FLUSH_FALLBACK;
 563
 564        if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
 565                types |= L1D_FLUSH_MTTRIG;
 566
 567        if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
 568                types |= L1D_FLUSH_ORI;
 569
 570        enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
 571                 security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
 572
 573        setup_rfi_flush(types, enable);
 574        setup_count_cache_flush();
 575}
 576
 577#ifdef CONFIG_PCI_IOV
 578enum rtas_iov_fw_value_map {
 579        NUM_RES_PROPERTY  = 0, /* Number of Resources */
 580        LOW_INT           = 1, /* Lowest 32 bits of Address */
 581        START_OF_ENTRIES  = 2, /* Always start of entry */
 582        APERTURE_PROPERTY = 2, /* Start of entry+ to  Aperture Size */
 583        WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
 584        NEXT_ENTRY        = 7  /* Go to next entry on array */
 585};
 586
 587enum get_iov_fw_value_index {
 588        BAR_ADDRS     = 1,    /*  Get Bar Address */
 589        APERTURE_SIZE = 2,    /*  Get Aperture Size */
 590        WDW_SIZE      = 3     /*  Get Window Size */
 591};
 592
 593resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
 594                                         enum get_iov_fw_value_index value)
 595{
 596        const int *indexes;
 597        struct device_node *dn = pci_device_to_OF_node(dev);
 598        int i, num_res, ret = 0;
 599
 600        indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
 601        if (!indexes)
 602                return  0;
 603
 604        /*
 605         * First element in the array is the number of Bars
 606         * returned.  Search through the list to find the matching
 607         * bar
 608         */
 609        num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
 610        if (resno >= num_res)
 611                return 0; /* or an errror */
 612
 613        i = START_OF_ENTRIES + NEXT_ENTRY * resno;
 614        switch (value) {
 615        case BAR_ADDRS:
 616                ret = of_read_number(&indexes[i], 2);
 617                break;
 618        case APERTURE_SIZE:
 619                ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
 620                break;
 621        case WDW_SIZE:
 622                ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
 623                break;
 624        }
 625
 626        return ret;
 627}
 628
 629void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
 630{
 631        struct resource *res;
 632        resource_size_t base, size;
 633        int i, r, num_res;
 634
 635        num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
 636        num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
 637        for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
 638             i += NEXT_ENTRY, r++) {
 639                res = &dev->resource[r + PCI_IOV_RESOURCES];
 640                base = of_read_number(&indexes[i], 2);
 641                size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
 642                res->flags = pci_parse_of_flags(of_read_number
 643                                                (&indexes[i + LOW_INT], 1), 0);
 644                res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
 645                res->name = pci_name(dev);
 646                res->start = base;
 647                res->end = base + size - 1;
 648        }
 649}
 650
 651void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
 652{
 653        struct resource *res, *root, *conflict;
 654        resource_size_t base, size;
 655        int i, r, num_res;
 656
 657        /*
 658         * First element in the array is the number of Bars
 659         * returned.  Search through the list to find the matching
 660         * bars assign them from firmware into resources structure.
 661         */
 662        num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
 663        for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
 664             i += NEXT_ENTRY, r++) {
 665                res = &dev->resource[r + PCI_IOV_RESOURCES];
 666                base = of_read_number(&indexes[i], 2);
 667                size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
 668                res->name = pci_name(dev);
 669                res->start = base;
 670                res->end = base + size - 1;
 671                root = &iomem_resource;
 672                dev_dbg(&dev->dev,
 673                        "pSeries IOV BAR %d: trying firmware assignment %pR\n",
 674                         r + PCI_IOV_RESOURCES, res);
 675                conflict = request_resource_conflict(root, res);
 676                if (conflict) {
 677                        dev_info(&dev->dev,
 678                                 "BAR %d: %pR conflicts with %s %pR\n",
 679                                 r + PCI_IOV_RESOURCES, res,
 680                                 conflict->name, conflict);
 681                        res->flags |= IORESOURCE_UNSET;
 682                }
 683        }
 684}
 685
 686static void pseries_disable_sriov_resources(struct pci_dev *pdev)
 687{
 688        int i;
 689
 690        pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
 691        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
 692                pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
 693}
 694
 695static void pseries_pci_fixup_resources(struct pci_dev *pdev)
 696{
 697        const int *indexes;
 698        struct device_node *dn = pci_device_to_OF_node(pdev);
 699
 700        /*Firmware must support open sriov otherwise dont configure*/
 701        indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
 702        if (indexes)
 703                of_pci_set_vf_bar_size(pdev, indexes);
 704        else
 705                pseries_disable_sriov_resources(pdev);
 706}
 707
 708static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
 709{
 710        const int *indexes;
 711        struct device_node *dn = pci_device_to_OF_node(pdev);
 712
 713        if (!pdev->is_physfn || pci_dev_is_added(pdev))
 714                return;
 715        /*Firmware must support open sriov otherwise dont configure*/
 716        indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
 717        if (indexes)
 718                of_pci_parse_iov_addrs(pdev, indexes);
 719        else
 720                pseries_disable_sriov_resources(pdev);
 721}
 722
 723static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
 724                                                          int resno)
 725{
 726        const __be32 *reg;
 727        struct device_node *dn = pci_device_to_OF_node(pdev);
 728
 729        /*Firmware must support open sriov otherwise report regular alignment*/
 730        reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
 731        if (!reg)
 732                return pci_iov_resource_size(pdev, resno);
 733
 734        if (!pdev->is_physfn)
 735                return 0;
 736        return pseries_get_iov_fw_value(pdev,
 737                                        resno - PCI_IOV_RESOURCES,
 738                                        APERTURE_SIZE);
 739}
 740#endif
 741
 742static void __init pSeries_setup_arch(void)
 743{
 744        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 745
 746        /* Discover PIC type and setup ppc_md accordingly */
 747        smp_init_pseries();
 748
 749
 750        /* openpic global configuration register (64-bit format). */
 751        /* openpic Interrupt Source Unit pointer (64-bit format). */
 752        /* python0 facility area (mmio) (64-bit format) REAL address. */
 753
 754        /* init to some ~sane value until calibrate_delay() runs */
 755        loops_per_jiffy = 50000000;
 756
 757        fwnmi_init();
 758
 759        pseries_setup_rfi_flush();
 760        setup_stf_barrier();
 761
 762        /* By default, only probe PCI (can be overridden by rtas_pci) */
 763        pci_add_flags(PCI_PROBE_ONLY);
 764
 765        /* Find and initialize PCI host bridges */
 766        init_pci_config_tokens();
 767        find_and_init_phbs();
 768        of_reconfig_notifier_register(&pci_dn_reconfig_nb);
 769
 770        pSeries_nvram_init();
 771
 772        if (firmware_has_feature(FW_FEATURE_LPAR)) {
 773                vpa_init(boot_cpuid);
 774                ppc_md.power_save = pseries_lpar_idle;
 775                ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
 776#ifdef CONFIG_PCI_IOV
 777                ppc_md.pcibios_fixup_resources =
 778                        pseries_pci_fixup_resources;
 779                ppc_md.pcibios_fixup_sriov =
 780                        pseries_pci_fixup_iov_resources;
 781                ppc_md.pcibios_iov_resource_alignment =
 782                        pseries_pci_iov_resource_alignment;
 783#endif
 784        } else {
 785                /* No special idle routine */
 786                ppc_md.enable_pmcs = power4_enable_pmcs;
 787        }
 788
 789        ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
 790}
 791
 792static void pseries_panic(char *str)
 793{
 794        panic_flush_kmsg_end();
 795        rtas_os_term(str);
 796}
 797
 798static int __init pSeries_init_panel(void)
 799{
 800        /* Manually leave the kernel version on the panel. */
 801#ifdef __BIG_ENDIAN__
 802        ppc_md.progress("Linux ppc64\n", 0);
 803#else
 804        ppc_md.progress("Linux ppc64le\n", 0);
 805#endif
 806        ppc_md.progress(init_utsname()->version, 0);
 807
 808        return 0;
 809}
 810machine_arch_initcall(pseries, pSeries_init_panel);
 811
 812static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
 813{
 814        return plpar_hcall_norets(H_SET_DABR, dabr);
 815}
 816
 817static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
 818{
 819        /* Have to set at least one bit in the DABRX according to PAPR */
 820        if (dabrx == 0 && dabr == 0)
 821                dabrx = DABRX_USER;
 822        /* PAPR says we can only set kernel and user bits */
 823        dabrx &= DABRX_KERNEL | DABRX_USER;
 824
 825        return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
 826}
 827
 828static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
 829{
 830        /* PAPR says we can't set HYP */
 831        dawrx &= ~DAWRX_HYP;
 832
 833        return  plpar_set_watchpoint0(dawr, dawrx);
 834}
 835
 836#define CMO_CHARACTERISTICS_TOKEN 44
 837#define CMO_MAXLENGTH 1026
 838
 839void pSeries_coalesce_init(void)
 840{
 841        struct hvcall_mpp_x_data mpp_x_data;
 842
 843        if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
 844                powerpc_firmware_features |= FW_FEATURE_XCMO;
 845        else
 846                powerpc_firmware_features &= ~FW_FEATURE_XCMO;
 847}
 848
 849/**
 850 * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
 851 * handle that here. (Stolen from parse_system_parameter_string)
 852 */
 853static void pSeries_cmo_feature_init(void)
 854{
 855        char *ptr, *key, *value, *end;
 856        int call_status;
 857        int page_order = IOMMU_PAGE_SHIFT_4K;
 858
 859        pr_debug(" -> fw_cmo_feature_init()\n");
 860        spin_lock(&rtas_data_buf_lock);
 861        memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
 862        call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
 863                                NULL,
 864                                CMO_CHARACTERISTICS_TOKEN,
 865                                __pa(rtas_data_buf),
 866                                RTAS_DATA_BUF_SIZE);
 867
 868        if (call_status != 0) {
 869                spin_unlock(&rtas_data_buf_lock);
 870                pr_debug("CMO not available\n");
 871                pr_debug(" <- fw_cmo_feature_init()\n");
 872                return;
 873        }
 874
 875        end = rtas_data_buf + CMO_MAXLENGTH - 2;
 876        ptr = rtas_data_buf + 2;        /* step over strlen value */
 877        key = value = ptr;
 878
 879        while (*ptr && (ptr <= end)) {
 880                /* Separate the key and value by replacing '=' with '\0' and
 881                 * point the value at the string after the '='
 882                 */
 883                if (ptr[0] == '=') {
 884                        ptr[0] = '\0';
 885                        value = ptr + 1;
 886                } else if (ptr[0] == '\0' || ptr[0] == ',') {
 887                        /* Terminate the string containing the key/value pair */
 888                        ptr[0] = '\0';
 889
 890                        if (key == value) {
 891                                pr_debug("Malformed key/value pair\n");
 892                                /* Never found a '=', end processing */
 893                                break;
 894                        }
 895
 896                        if (0 == strcmp(key, "CMOPageSize"))
 897                                page_order = simple_strtol(value, NULL, 10);
 898                        else if (0 == strcmp(key, "PrPSP"))
 899                                CMO_PrPSP = simple_strtol(value, NULL, 10);
 900                        else if (0 == strcmp(key, "SecPSP"))
 901                                CMO_SecPSP = simple_strtol(value, NULL, 10);
 902                        value = key = ptr + 1;
 903                }
 904                ptr++;
 905        }
 906
 907        /* Page size is returned as the power of 2 of the page size,
 908         * convert to the page size in bytes before returning
 909         */
 910        CMO_PageSize = 1 << page_order;
 911        pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);
 912
 913        if (CMO_PrPSP != -1 || CMO_SecPSP != -1) {
 914                pr_info("CMO enabled\n");
 915                pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
 916                         CMO_SecPSP);
 917                powerpc_firmware_features |= FW_FEATURE_CMO;
 918                pSeries_coalesce_init();
 919        } else
 920                pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
 921                         CMO_SecPSP);
 922        spin_unlock(&rtas_data_buf_lock);
 923        pr_debug(" <- fw_cmo_feature_init()\n");
 924}
 925
 926/*
 927 * Early initialization.  Relocation is on but do not reference unbolted pages
 928 */
 929static void __init pseries_init(void)
 930{
 931        pr_debug(" -> pseries_init()\n");
 932
 933#ifdef CONFIG_HVC_CONSOLE
 934        if (firmware_has_feature(FW_FEATURE_LPAR))
 935                hvc_vio_init_early();
 936#endif
 937        if (firmware_has_feature(FW_FEATURE_XDABR))
 938                ppc_md.set_dabr = pseries_set_xdabr;
 939        else if (firmware_has_feature(FW_FEATURE_DABR))
 940                ppc_md.set_dabr = pseries_set_dabr;
 941
 942        if (firmware_has_feature(FW_FEATURE_SET_MODE))
 943                ppc_md.set_dawr = pseries_set_dawr;
 944
 945        pSeries_cmo_feature_init();
 946        iommu_init_early_pSeries();
 947
 948        pr_debug(" <- pseries_init()\n");
 949}
 950
 951/**
 952 * pseries_power_off - tell firmware about how to power off the system.
 953 *
 954 * This function calls either the power-off rtas token in normal cases
 955 * or the ibm,power-off-ups token (if present & requested) in case of
 956 * a power failure. If power-off token is used, power on will only be
 957 * possible with power button press. If ibm,power-off-ups token is used
 958 * it will allow auto poweron after power is restored.
 959 */
 960static void pseries_power_off(void)
 961{
 962        int rc;
 963        int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups");
 964
 965        if (rtas_flash_term_hook)
 966                rtas_flash_term_hook(SYS_POWER_OFF);
 967
 968        if (rtas_poweron_auto == 0 ||
 969                rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) {
 970                rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1);
 971                printk(KERN_INFO "RTAS power-off returned %d\n", rc);
 972        } else {
 973                rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL);
 974                printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc);
 975        }
 976        for (;;);
 977}
 978
 979static int __init pSeries_probe(void)
 980{
 981        if (!of_node_is_type(of_root, "chrp"))
 982                return 0;
 983
 984        /* Cell blades firmware claims to be chrp while it's not. Until this
 985         * is fixed, we need to avoid those here.
 986         */
 987        if (of_machine_is_compatible("IBM,CPBW-1.0") ||
 988            of_machine_is_compatible("IBM,CBEA"))
 989                return 0;
 990
 991        pm_power_off = pseries_power_off;
 992
 993        pr_debug("Machine is%s LPAR !\n",
 994                 (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
 995
 996        pseries_init();
 997
 998        return 1;
 999}
1000
1001static int pSeries_pci_probe_mode(struct pci_bus *bus)
1002{
1003        if (firmware_has_feature(FW_FEATURE_LPAR))
1004                return PCI_PROBE_DEVTREE;
1005        return PCI_PROBE_NORMAL;
1006}
1007
1008struct pci_controller_ops pseries_pci_controller_ops = {
1009        .probe_mode             = pSeries_pci_probe_mode,
1010};
1011
1012define_machine(pseries) {
1013        .name                   = "pSeries",
1014        .probe                  = pSeries_probe,
1015        .setup_arch             = pSeries_setup_arch,
1016        .init_IRQ               = pseries_init_irq,
1017        .show_cpuinfo           = pSeries_show_cpuinfo,
1018        .log_error              = pSeries_log_error,
1019        .pcibios_fixup          = pSeries_final_fixup,
1020        .restart                = rtas_restart,
1021        .halt                   = rtas_halt,
1022        .panic                  = pseries_panic,
1023        .get_boot_time          = rtas_get_boot_time,
1024        .get_rtc_time           = rtas_get_rtc_time,
1025        .set_rtc_time           = rtas_set_rtc_time,
1026        .calibrate_decr         = generic_calibrate_decr,
1027        .progress               = rtas_progress,
1028        .system_reset_exception = pSeries_system_reset_exception,
1029        .machine_check_early    = pseries_machine_check_realmode,
1030        .machine_check_exception = pSeries_machine_check_exception,
1031#ifdef CONFIG_KEXEC_CORE
1032        .machine_kexec          = pSeries_machine_kexec,
1033        .kexec_cpu_down         = pseries_kexec_cpu_down,
1034#endif
1035#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
1036        .memory_block_size      = pseries_memory_block_size,
1037#endif
1038};
1039