linux/arch/powerpc/platforms/pseries/msi.c
<<
>>
Prefs
   1/*
   2 * Copyright 2006 Jake Moilanen <moilanen@austin.ibm.com>, IBM Corp.
   3 * Copyright 2006-2007 Michael Ellerman, IBM Corp.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License
   7 * as published by the Free Software Foundation; version 2 of the
   8 * License.
   9 *
  10 */
  11
  12#include <linux/device.h>
  13#include <linux/irq.h>
  14#include <linux/msi.h>
  15
  16#include <asm/rtas.h>
  17#include <asm/hw_irq.h>
  18#include <asm/ppc-pci.h>
  19
  20#include "pseries.h"
  21
  22static int query_token, change_token;
  23
  24#define RTAS_QUERY_FN           0
  25#define RTAS_CHANGE_FN          1
  26#define RTAS_RESET_FN           2
  27#define RTAS_CHANGE_MSI_FN      3
  28#define RTAS_CHANGE_MSIX_FN     4
  29#define RTAS_CHANGE_32MSI_FN    5
  30
  31/* RTAS Helpers */
  32
  33static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs)
  34{
  35        u32 addr, seq_num, rtas_ret[3];
  36        unsigned long buid;
  37        int rc;
  38
  39        addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
  40        buid = pdn->phb->buid;
  41
  42        seq_num = 1;
  43        do {
  44                if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN ||
  45                    func == RTAS_CHANGE_32MSI_FN)
  46                        rc = rtas_call(change_token, 6, 4, rtas_ret, addr,
  47                                        BUID_HI(buid), BUID_LO(buid),
  48                                        func, num_irqs, seq_num);
  49                else
  50                        rc = rtas_call(change_token, 6, 3, rtas_ret, addr,
  51                                        BUID_HI(buid), BUID_LO(buid),
  52                                        func, num_irqs, seq_num);
  53
  54                seq_num = rtas_ret[1];
  55        } while (rtas_busy_delay(rc));
  56
  57        /*
  58         * If the RTAS call succeeded, return the number of irqs allocated.
  59         * If not, make sure we return a negative error code.
  60         */
  61        if (rc == 0)
  62                rc = rtas_ret[0];
  63        else if (rc > 0)
  64                rc = -rc;
  65
  66        pr_debug("rtas_msi: ibm,change_msi(func=%d,num=%d), got %d rc = %d\n",
  67                 func, num_irqs, rtas_ret[0], rc);
  68
  69        return rc;
  70}
  71
  72static void rtas_disable_msi(struct pci_dev *pdev)
  73{
  74        struct pci_dn *pdn;
  75
  76        pdn = pci_get_pdn(pdev);
  77        if (!pdn)
  78                return;
  79
  80        /*
  81         * disabling MSI with the explicit interface also disables MSI-X
  82         */
  83        if (rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, 0) != 0) {
  84                /* 
  85                 * may have failed because explicit interface is not
  86                 * present
  87                 */
  88                if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) {
  89                        pr_debug("rtas_msi: Setting MSIs to 0 failed!\n");
  90                }
  91        }
  92}
  93
  94static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
  95{
  96        u32 addr, rtas_ret[2];
  97        unsigned long buid;
  98        int rc;
  99
 100        addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
 101        buid = pdn->phb->buid;
 102
 103        do {
 104                rc = rtas_call(query_token, 4, 3, rtas_ret, addr,
 105                               BUID_HI(buid), BUID_LO(buid), offset);
 106        } while (rtas_busy_delay(rc));
 107
 108        if (rc) {
 109                pr_debug("rtas_msi: error (%d) querying source number\n", rc);
 110                return rc;
 111        }
 112
 113        return rtas_ret[0];
 114}
 115
 116static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
 117{
 118        struct msi_desc *entry;
 119
 120        list_for_each_entry(entry, &pdev->msi_list, list) {
 121                if (entry->irq == NO_IRQ)
 122                        continue;
 123
 124                irq_set_msi_desc(entry->irq, NULL);
 125                irq_dispose_mapping(entry->irq);
 126        }
 127
 128        rtas_disable_msi(pdev);
 129}
 130
 131static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
 132{
 133        struct device_node *dn;
 134        struct pci_dn *pdn;
 135        const __be32 *p;
 136        u32 req_msi;
 137
 138        pdn = pci_get_pdn(pdev);
 139        if (!pdn)
 140                return -ENODEV;
 141
 142        dn = pdn->node;
 143
 144        p = of_get_property(dn, prop_name, NULL);
 145        if (!p) {
 146                pr_debug("rtas_msi: No %s on %s\n", prop_name, dn->full_name);
 147                return -ENOENT;
 148        }
 149
 150        req_msi = be32_to_cpup(p);
 151        if (req_msi < nvec) {
 152                pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec);
 153
 154                if (req_msi == 0) /* Be paranoid */
 155                        return -ENOSPC;
 156
 157                return req_msi;
 158        }
 159
 160        return 0;
 161}
 162
 163static int check_req_msi(struct pci_dev *pdev, int nvec)
 164{
 165        return check_req(pdev, nvec, "ibm,req#msi");
 166}
 167
 168static int check_req_msix(struct pci_dev *pdev, int nvec)
 169{
 170        return check_req(pdev, nvec, "ibm,req#msi-x");
 171}
 172
 173/* Quota calculation */
 174
 175static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 176{
 177        struct device_node *dn;
 178        const __be32 *p;
 179
 180        dn = of_node_get(pci_device_to_OF_node(dev));
 181        while (dn) {
 182                p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
 183                if (p) {
 184                        pr_debug("rtas_msi: found prop on dn %s\n",
 185                                dn->full_name);
 186                        *total = be32_to_cpup(p);
 187                        return dn;
 188                }
 189
 190                dn = of_get_next_parent(dn);
 191        }
 192
 193        return NULL;
 194}
 195
 196static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 197{
 198        struct device_node *dn;
 199        struct pci_dn *pdn;
 200        struct eeh_dev *edev;
 201
 202        /* Found our PE and assume 8 at that point. */
 203
 204        dn = pci_device_to_OF_node(dev);
 205        if (!dn)
 206                return NULL;
 207
 208        /* Get the top level device in the PE */
 209        edev = pdn_to_eeh_dev(PCI_DN(dn));
 210        if (edev->pe)
 211                edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
 212        pdn = eeh_dev_to_pdn(edev);
 213        dn = pdn ? pdn->node : NULL;
 214        if (!dn)
 215                return NULL;
 216
 217        /* We actually want the parent */
 218        dn = of_get_parent(dn);
 219        if (!dn)
 220                return NULL;
 221
 222        /* Hardcode of 8 for old firmwares */
 223        *total = 8;
 224        pr_debug("rtas_msi: using PE dn %s\n", dn->full_name);
 225
 226        return dn;
 227}
 228
 229struct msi_counts {
 230        struct device_node *requestor;
 231        int num_devices;
 232        int request;
 233        int quota;
 234        int spare;
 235        int over_quota;
 236};
 237
 238static void *count_non_bridge_devices(struct device_node *dn, void *data)
 239{
 240        struct msi_counts *counts = data;
 241        const __be32 *p;
 242        u32 class;
 243
 244        pr_debug("rtas_msi: counting %s\n", dn->full_name);
 245
 246        p = of_get_property(dn, "class-code", NULL);
 247        class = p ? be32_to_cpup(p) : 0;
 248
 249        if ((class >> 8) != PCI_CLASS_BRIDGE_PCI)
 250                counts->num_devices++;
 251
 252        return NULL;
 253}
 254
 255static void *count_spare_msis(struct device_node *dn, void *data)
 256{
 257        struct msi_counts *counts = data;
 258        const __be32 *p;
 259        int req;
 260
 261        if (dn == counts->requestor)
 262                req = counts->request;
 263        else {
 264                /* We don't know if a driver will try to use MSI or MSI-X,
 265                 * so we just have to punt and use the larger of the two. */
 266                req = 0;
 267                p = of_get_property(dn, "ibm,req#msi", NULL);
 268                if (p)
 269                        req = be32_to_cpup(p);
 270
 271                p = of_get_property(dn, "ibm,req#msi-x", NULL);
 272                if (p)
 273                        req = max(req, (int)be32_to_cpup(p));
 274        }
 275
 276        if (req < counts->quota)
 277                counts->spare += counts->quota - req;
 278        else if (req > counts->quota)
 279                counts->over_quota++;
 280
 281        return NULL;
 282}
 283
 284static int msi_quota_for_device(struct pci_dev *dev, int request)
 285{
 286        struct device_node *pe_dn;
 287        struct msi_counts counts;
 288        int total;
 289
 290        pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev),
 291                  request);
 292
 293        pe_dn = find_pe_total_msi(dev, &total);
 294        if (!pe_dn)
 295                pe_dn = find_pe_dn(dev, &total);
 296
 297        if (!pe_dn) {
 298                pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev));
 299                goto out;
 300        }
 301
 302        pr_debug("rtas_msi: found PE %s\n", pe_dn->full_name);
 303
 304        memset(&counts, 0, sizeof(struct msi_counts));
 305
 306        /* Work out how many devices we have below this PE */
 307        traverse_pci_devices(pe_dn, count_non_bridge_devices, &counts);
 308
 309        if (counts.num_devices == 0) {
 310                pr_err("rtas_msi: found 0 devices under PE for %s\n",
 311                        pci_name(dev));
 312                goto out;
 313        }
 314
 315        counts.quota = total / counts.num_devices;
 316        if (request <= counts.quota)
 317                goto out;
 318
 319        /* else, we have some more calculating to do */
 320        counts.requestor = pci_device_to_OF_node(dev);
 321        counts.request = request;
 322        traverse_pci_devices(pe_dn, count_spare_msis, &counts);
 323
 324        /* If the quota isn't an integer multiple of the total, we can
 325         * use the remainder as spare MSIs for anyone that wants them. */
 326        counts.spare += total % counts.num_devices;
 327
 328        /* Divide any spare by the number of over-quota requestors */
 329        if (counts.over_quota)
 330                counts.quota += counts.spare / counts.over_quota;
 331
 332        /* And finally clamp the request to the possibly adjusted quota */
 333        request = min(counts.quota, request);
 334
 335        pr_debug("rtas_msi: request clamped to quota %d\n", request);
 336out:
 337        of_node_put(pe_dn);
 338
 339        return request;
 340}
 341
 342static int check_msix_entries(struct pci_dev *pdev)
 343{
 344        struct msi_desc *entry;
 345        int expected;
 346
 347        /* There's no way for us to express to firmware that we want
 348         * a discontiguous, or non-zero based, range of MSI-X entries.
 349         * So we must reject such requests. */
 350
 351        expected = 0;
 352        list_for_each_entry(entry, &pdev->msi_list, list) {
 353                if (entry->msi_attrib.entry_nr != expected) {
 354                        pr_debug("rtas_msi: bad MSI-X entries.\n");
 355                        return -EINVAL;
 356                }
 357                expected++;
 358        }
 359
 360        return 0;
 361}
 362
 363static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev)
 364{
 365        u32 addr_hi, addr_lo;
 366
 367        /*
 368         * We should only get in here for IODA1 configs. This is based on the
 369         * fact that we using RTAS for MSIs, we don't have the 32 bit MSI RTAS
 370         * support, and we are in a PCIe Gen2 slot.
 371         */
 372        dev_info(&pdev->dev,
 373                 "rtas_msi: No 32 bit MSI firmware support, forcing 32 bit MSI\n");
 374        pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, &addr_hi);
 375        addr_lo = 0xffff0000 | ((addr_hi >> (48 - 32)) << 4);
 376        pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, addr_lo);
 377        pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0);
 378}
 379
 380static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
 381{
 382        struct pci_dn *pdn;
 383        int hwirq, virq, i, quota, rc;
 384        struct msi_desc *entry;
 385        struct msi_msg msg;
 386        int nvec = nvec_in;
 387        int use_32bit_msi_hack = 0;
 388
 389        if (type == PCI_CAP_ID_MSIX)
 390                rc = check_req_msix(pdev, nvec);
 391        else
 392                rc = check_req_msi(pdev, nvec);
 393
 394        if (rc)
 395                return rc;
 396
 397        quota = msi_quota_for_device(pdev, nvec);
 398
 399        if (quota && quota < nvec)
 400                return quota;
 401
 402        if (type == PCI_CAP_ID_MSIX && check_msix_entries(pdev))
 403                return -EINVAL;
 404
 405        /*
 406         * Firmware currently refuse any non power of two allocation
 407         * so we round up if the quota will allow it.
 408         */
 409        if (type == PCI_CAP_ID_MSIX) {
 410                int m = roundup_pow_of_two(nvec);
 411                quota = msi_quota_for_device(pdev, m);
 412
 413                if (quota >= m)
 414                        nvec = m;
 415        }
 416
 417        pdn = pci_get_pdn(pdev);
 418
 419        /*
 420         * Try the new more explicit firmware interface, if that fails fall
 421         * back to the old interface. The old interface is known to never
 422         * return MSI-Xs.
 423         */
 424again:
 425        if (type == PCI_CAP_ID_MSI) {
 426                if (pdev->no_64bit_msi) {
 427                        rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec);
 428                        if (rc < 0) {
 429                                /*
 430                                 * We only want to run the 32 bit MSI hack below if
 431                                 * the max bus speed is Gen2 speed
 432                                 */
 433                                if (pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT)
 434                                        return rc;
 435
 436                                use_32bit_msi_hack = 1;
 437                        }
 438                } else
 439                        rc = -1;
 440
 441                if (rc < 0)
 442                        rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec);
 443
 444                if (rc < 0) {
 445                        pr_debug("rtas_msi: trying the old firmware call.\n");
 446                        rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec);
 447                }
 448
 449                if (use_32bit_msi_hack && rc > 0)
 450                        rtas_hack_32bit_msi_gen2(pdev);
 451        } else
 452                rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);
 453
 454        if (rc != nvec) {
 455                if (nvec != nvec_in) {
 456                        nvec = nvec_in;
 457                        goto again;
 458                }
 459                pr_debug("rtas_msi: rtas_change_msi() failed\n");
 460                return rc;
 461        }
 462
 463        i = 0;
 464        list_for_each_entry(entry, &pdev->msi_list, list) {
 465                hwirq = rtas_query_irq_number(pdn, i++);
 466                if (hwirq < 0) {
 467                        pr_debug("rtas_msi: error (%d) getting hwirq\n", rc);
 468                        return hwirq;
 469                }
 470
 471                virq = irq_create_mapping(NULL, hwirq);
 472
 473                if (virq == NO_IRQ) {
 474                        pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
 475                        return -ENOSPC;
 476                }
 477
 478                dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq);
 479                irq_set_msi_desc(virq, entry);
 480
 481                /* Read config space back so we can restore after reset */
 482                __read_msi_msg(entry, &msg);
 483                entry->msg = msg;
 484        }
 485
 486        return 0;
 487}
 488
 489static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
 490{
 491        /* No LSI -> leave MSIs (if any) configured */
 492        if (pdev->irq == NO_IRQ) {
 493                dev_dbg(&pdev->dev, "rtas_msi: no LSI, nothing to do.\n");
 494                return;
 495        }
 496
 497        /* No MSI -> MSIs can't have been assigned by fw, leave LSI */
 498        if (check_req_msi(pdev, 1) && check_req_msix(pdev, 1)) {
 499                dev_dbg(&pdev->dev, "rtas_msi: no req#msi/x, nothing to do.\n");
 500                return;
 501        }
 502
 503        dev_dbg(&pdev->dev, "rtas_msi: disabling existing MSI.\n");
 504        rtas_disable_msi(pdev);
 505}
 506
 507static int rtas_msi_init(void)
 508{
 509        struct pci_controller *phb;
 510
 511        query_token  = rtas_token("ibm,query-interrupt-source-number");
 512        change_token = rtas_token("ibm,change-msi");
 513
 514        if ((query_token == RTAS_UNKNOWN_SERVICE) ||
 515                        (change_token == RTAS_UNKNOWN_SERVICE)) {
 516                pr_debug("rtas_msi: no RTAS tokens, no MSI support.\n");
 517                return -1;
 518        }
 519
 520        pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
 521
 522        WARN_ON(pseries_pci_controller_ops.setup_msi_irqs);
 523        pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
 524        pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
 525
 526        list_for_each_entry(phb, &hose_list, list_node) {
 527                WARN_ON(phb->controller_ops.setup_msi_irqs);
 528                phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
 529                phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
 530        }
 531
 532        WARN_ON(ppc_md.pci_irq_fixup);
 533        ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
 534
 535        return 0;
 536}
 537arch_initcall(rtas_msi_init);
 538
 539