linux/drivers/pci/pcie/aer/aer_inject.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * PCIe AER software error injection support.
   4 *
   5 * Debuging PCIe AER code is quite difficult because it is hard to
   6 * trigger various real hardware errors. Software based error
   7 * injection can fake almost all kinds of errors with the help of a
   8 * user space helper tool aer-inject, which can be gotten from:
   9 *   http://www.kernel.org/pub/linux/utils/pci/aer-inject/
  10 *
  11 * Copyright 2009 Intel Corporation.
  12 *     Huang Ying <ying.huang@intel.com>
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/init.h>
  17#include <linux/miscdevice.h>
  18#include <linux/pci.h>
  19#include <linux/slab.h>
  20#include <linux/fs.h>
  21#include <linux/uaccess.h>
  22#include <linux/stddef.h>
  23#include <linux/device.h>
  24#include "aerdrv.h"
  25
  26/* Override the existing corrected and uncorrected error masks */
  27static bool aer_mask_override;
  28module_param(aer_mask_override, bool, 0);
  29
  30struct aer_error_inj {
  31        u8 bus;
  32        u8 dev;
  33        u8 fn;
  34        u32 uncor_status;
  35        u32 cor_status;
  36        u32 header_log0;
  37        u32 header_log1;
  38        u32 header_log2;
  39        u32 header_log3;
  40        u32 domain;
  41};
  42
  43struct aer_error {
  44        struct list_head list;
  45        u32 domain;
  46        unsigned int bus;
  47        unsigned int devfn;
  48        int pos_cap_err;
  49
  50        u32 uncor_status;
  51        u32 cor_status;
  52        u32 header_log0;
  53        u32 header_log1;
  54        u32 header_log2;
  55        u32 header_log3;
  56        u32 root_status;
  57        u32 source_id;
  58};
  59
  60struct pci_bus_ops {
  61        struct list_head list;
  62        struct pci_bus *bus;
  63        struct pci_ops *ops;
  64};
  65
  66static LIST_HEAD(einjected);
  67
  68static LIST_HEAD(pci_bus_ops_list);
  69
  70/* Protect einjected and pci_bus_ops_list */
  71static DEFINE_SPINLOCK(inject_lock);
  72
  73static void aer_error_init(struct aer_error *err, u32 domain,
  74                           unsigned int bus, unsigned int devfn,
  75                           int pos_cap_err)
  76{
  77        INIT_LIST_HEAD(&err->list);
  78        err->domain = domain;
  79        err->bus = bus;
  80        err->devfn = devfn;
  81        err->pos_cap_err = pos_cap_err;
  82}
  83
  84/* inject_lock must be held before calling */
  85static struct aer_error *__find_aer_error(u32 domain, unsigned int bus,
  86                                          unsigned int devfn)
  87{
  88        struct aer_error *err;
  89
  90        list_for_each_entry(err, &einjected, list) {
  91                if (domain == err->domain &&
  92                    bus == err->bus &&
  93                    devfn == err->devfn)
  94                        return err;
  95        }
  96        return NULL;
  97}
  98
  99/* inject_lock must be held before calling */
 100static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev)
 101{
 102        int domain = pci_domain_nr(dev->bus);
 103        if (domain < 0)
 104                return NULL;
 105        return __find_aer_error(domain, dev->bus->number, dev->devfn);
 106}
 107
 108/* inject_lock must be held before calling */
 109static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus)
 110{
 111        struct pci_bus_ops *bus_ops;
 112
 113        list_for_each_entry(bus_ops, &pci_bus_ops_list, list) {
 114                if (bus_ops->bus == bus)
 115                        return bus_ops->ops;
 116        }
 117        return NULL;
 118}
 119
 120static struct pci_bus_ops *pci_bus_ops_pop(void)
 121{
 122        unsigned long flags;
 123        struct pci_bus_ops *bus_ops;
 124
 125        spin_lock_irqsave(&inject_lock, flags);
 126        bus_ops = list_first_entry_or_null(&pci_bus_ops_list,
 127                                           struct pci_bus_ops, list);
 128        if (bus_ops)
 129                list_del(&bus_ops->list);
 130        spin_unlock_irqrestore(&inject_lock, flags);
 131        return bus_ops;
 132}
 133
 134static u32 *find_pci_config_dword(struct aer_error *err, int where,
 135                                  int *prw1cs)
 136{
 137        int rw1cs = 0;
 138        u32 *target = NULL;
 139
 140        if (err->pos_cap_err == -1)
 141                return NULL;
 142
 143        switch (where - err->pos_cap_err) {
 144        case PCI_ERR_UNCOR_STATUS:
 145                target = &err->uncor_status;
 146                rw1cs = 1;
 147                break;
 148        case PCI_ERR_COR_STATUS:
 149                target = &err->cor_status;
 150                rw1cs = 1;
 151                break;
 152        case PCI_ERR_HEADER_LOG:
 153                target = &err->header_log0;
 154                break;
 155        case PCI_ERR_HEADER_LOG+4:
 156                target = &err->header_log1;
 157                break;
 158        case PCI_ERR_HEADER_LOG+8:
 159                target = &err->header_log2;
 160                break;
 161        case PCI_ERR_HEADER_LOG+12:
 162                target = &err->header_log3;
 163                break;
 164        case PCI_ERR_ROOT_STATUS:
 165                target = &err->root_status;
 166                rw1cs = 1;
 167                break;
 168        case PCI_ERR_ROOT_ERR_SRC:
 169                target = &err->source_id;
 170                break;
 171        }
 172        if (prw1cs)
 173                *prw1cs = rw1cs;
 174        return target;
 175}
 176
 177static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn,
 178                               int where, int size, u32 *val)
 179{
 180        u32 *sim;
 181        struct aer_error *err;
 182        unsigned long flags;
 183        struct pci_ops *ops;
 184        struct pci_ops *my_ops;
 185        int domain;
 186        int rv;
 187
 188        spin_lock_irqsave(&inject_lock, flags);
 189        if (size != sizeof(u32))
 190                goto out;
 191        domain = pci_domain_nr(bus);
 192        if (domain < 0)
 193                goto out;
 194        err = __find_aer_error(domain, bus->number, devfn);
 195        if (!err)
 196                goto out;
 197
 198        sim = find_pci_config_dword(err, where, NULL);
 199        if (sim) {
 200                *val = *sim;
 201                spin_unlock_irqrestore(&inject_lock, flags);
 202                return 0;
 203        }
 204out:
 205        ops = __find_pci_bus_ops(bus);
 206        /*
 207         * pci_lock must already be held, so we can directly
 208         * manipulate bus->ops.  Many config access functions,
 209         * including pci_generic_config_read() require the original
 210         * bus->ops be installed to function, so temporarily put them
 211         * back.
 212         */
 213        my_ops = bus->ops;
 214        bus->ops = ops;
 215        rv = ops->read(bus, devfn, where, size, val);
 216        bus->ops = my_ops;
 217        spin_unlock_irqrestore(&inject_lock, flags);
 218        return rv;
 219}
 220
 221static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn,
 222                                int where, int size, u32 val)
 223{
 224        u32 *sim;
 225        struct aer_error *err;
 226        unsigned long flags;
 227        int rw1cs;
 228        struct pci_ops *ops;
 229        struct pci_ops *my_ops;
 230        int domain;
 231        int rv;
 232
 233        spin_lock_irqsave(&inject_lock, flags);
 234        if (size != sizeof(u32))
 235                goto out;
 236        domain = pci_domain_nr(bus);
 237        if (domain < 0)
 238                goto out;
 239        err = __find_aer_error(domain, bus->number, devfn);
 240        if (!err)
 241                goto out;
 242
 243        sim = find_pci_config_dword(err, where, &rw1cs);
 244        if (sim) {
 245                if (rw1cs)
 246                        *sim ^= val;
 247                else
 248                        *sim = val;
 249                spin_unlock_irqrestore(&inject_lock, flags);
 250                return 0;
 251        }
 252out:
 253        ops = __find_pci_bus_ops(bus);
 254        /*
 255         * pci_lock must already be held, so we can directly
 256         * manipulate bus->ops.  Many config access functions,
 257         * including pci_generic_config_write() require the original
 258         * bus->ops be installed to function, so temporarily put them
 259         * back.
 260         */
 261        my_ops = bus->ops;
 262        bus->ops = ops;
 263        rv = ops->write(bus, devfn, where, size, val);
 264        bus->ops = my_ops;
 265        spin_unlock_irqrestore(&inject_lock, flags);
 266        return rv;
 267}
 268
 269static struct pci_ops aer_inj_pci_ops = {
 270        .read = aer_inj_read_config,
 271        .write = aer_inj_write_config,
 272};
 273
 274static void pci_bus_ops_init(struct pci_bus_ops *bus_ops,
 275                             struct pci_bus *bus,
 276                             struct pci_ops *ops)
 277{
 278        INIT_LIST_HEAD(&bus_ops->list);
 279        bus_ops->bus = bus;
 280        bus_ops->ops = ops;
 281}
 282
 283static int pci_bus_set_aer_ops(struct pci_bus *bus)
 284{
 285        struct pci_ops *ops;
 286        struct pci_bus_ops *bus_ops;
 287        unsigned long flags;
 288
 289        bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL);
 290        if (!bus_ops)
 291                return -ENOMEM;
 292        ops = pci_bus_set_ops(bus, &aer_inj_pci_ops);
 293        spin_lock_irqsave(&inject_lock, flags);
 294        if (ops == &aer_inj_pci_ops)
 295                goto out;
 296        pci_bus_ops_init(bus_ops, bus, ops);
 297        list_add(&bus_ops->list, &pci_bus_ops_list);
 298        bus_ops = NULL;
 299out:
 300        spin_unlock_irqrestore(&inject_lock, flags);
 301        kfree(bus_ops);
 302        return 0;
 303}
 304
 305static int find_aer_device_iter(struct device *device, void *data)
 306{
 307        struct pcie_device **result = data;
 308        struct pcie_device *pcie_dev;
 309
 310        if (device->bus == &pcie_port_bus_type) {
 311                pcie_dev = to_pcie_device(device);
 312                if (pcie_dev->service & PCIE_PORT_SERVICE_AER) {
 313                        *result = pcie_dev;
 314                        return 1;
 315                }
 316        }
 317        return 0;
 318}
 319
 320static int find_aer_device(struct pci_dev *dev, struct pcie_device **result)
 321{
 322        return device_for_each_child(&dev->dev, result, find_aer_device_iter);
 323}
 324
 325static int aer_inject(struct aer_error_inj *einj)
 326{
 327        struct aer_error *err, *rperr;
 328        struct aer_error *err_alloc = NULL, *rperr_alloc = NULL;
 329        struct pci_dev *dev, *rpdev;
 330        struct pcie_device *edev;
 331        unsigned long flags;
 332        unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn);
 333        int pos_cap_err, rp_pos_cap_err;
 334        u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0;
 335        int ret = 0;
 336
 337        dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn);
 338        if (!dev)
 339                return -ENODEV;
 340        rpdev = pcie_find_root_port(dev);
 341        if (!rpdev) {
 342                pci_err(dev, "aer_inject: Root port not found\n");
 343                ret = -ENODEV;
 344                goto out_put;
 345        }
 346
 347        pos_cap_err = dev->aer_cap;
 348        if (!pos_cap_err) {
 349                pci_err(dev, "aer_inject: Device doesn't support AER\n");
 350                ret = -EPROTONOSUPPORT;
 351                goto out_put;
 352        }
 353        pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever);
 354        pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask);
 355        pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
 356                              &uncor_mask);
 357
 358        rp_pos_cap_err = rpdev->aer_cap;
 359        if (!rp_pos_cap_err) {
 360                pci_err(rpdev, "aer_inject: Root port doesn't support AER\n");
 361                ret = -EPROTONOSUPPORT;
 362                goto out_put;
 363        }
 364
 365        err_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
 366        if (!err_alloc) {
 367                ret = -ENOMEM;
 368                goto out_put;
 369        }
 370        rperr_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
 371        if (!rperr_alloc) {
 372                ret = -ENOMEM;
 373                goto out_put;
 374        }
 375
 376        if (aer_mask_override) {
 377                cor_mask_orig = cor_mask;
 378                cor_mask &= !(einj->cor_status);
 379                pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
 380                                       cor_mask);
 381
 382                uncor_mask_orig = uncor_mask;
 383                uncor_mask &= !(einj->uncor_status);
 384                pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
 385                                       uncor_mask);
 386        }
 387
 388        spin_lock_irqsave(&inject_lock, flags);
 389
 390        err = __find_aer_error_by_dev(dev);
 391        if (!err) {
 392                err = err_alloc;
 393                err_alloc = NULL;
 394                aer_error_init(err, einj->domain, einj->bus, devfn,
 395                               pos_cap_err);
 396                list_add(&err->list, &einjected);
 397        }
 398        err->uncor_status |= einj->uncor_status;
 399        err->cor_status |= einj->cor_status;
 400        err->header_log0 = einj->header_log0;
 401        err->header_log1 = einj->header_log1;
 402        err->header_log2 = einj->header_log2;
 403        err->header_log3 = einj->header_log3;
 404
 405        if (!aer_mask_override && einj->cor_status &&
 406            !(einj->cor_status & ~cor_mask)) {
 407                ret = -EINVAL;
 408                pci_warn(dev, "aer_inject: The correctable error(s) is masked by device\n");
 409                spin_unlock_irqrestore(&inject_lock, flags);
 410                goto out_put;
 411        }
 412        if (!aer_mask_override && einj->uncor_status &&
 413            !(einj->uncor_status & ~uncor_mask)) {
 414                ret = -EINVAL;
 415                pci_warn(dev, "aer_inject: The uncorrectable error(s) is masked by device\n");
 416                spin_unlock_irqrestore(&inject_lock, flags);
 417                goto out_put;
 418        }
 419
 420        rperr = __find_aer_error_by_dev(rpdev);
 421        if (!rperr) {
 422                rperr = rperr_alloc;
 423                rperr_alloc = NULL;
 424                aer_error_init(rperr, pci_domain_nr(rpdev->bus),
 425                               rpdev->bus->number, rpdev->devfn,
 426                               rp_pos_cap_err);
 427                list_add(&rperr->list, &einjected);
 428        }
 429        if (einj->cor_status) {
 430                if (rperr->root_status & PCI_ERR_ROOT_COR_RCV)
 431                        rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
 432                else
 433                        rperr->root_status |= PCI_ERR_ROOT_COR_RCV;
 434                rperr->source_id &= 0xffff0000;
 435                rperr->source_id |= (einj->bus << 8) | devfn;
 436        }
 437        if (einj->uncor_status) {
 438                if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)
 439                        rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
 440                if (sever & einj->uncor_status) {
 441                        rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV;
 442                        if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV))
 443                                rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL;
 444                } else
 445                        rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
 446                rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV;
 447                rperr->source_id &= 0x0000ffff;
 448                rperr->source_id |= ((einj->bus << 8) | devfn) << 16;
 449        }
 450        spin_unlock_irqrestore(&inject_lock, flags);
 451
 452        if (aer_mask_override) {
 453                pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
 454                                       cor_mask_orig);
 455                pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
 456                                       uncor_mask_orig);
 457        }
 458
 459        ret = pci_bus_set_aer_ops(dev->bus);
 460        if (ret)
 461                goto out_put;
 462        ret = pci_bus_set_aer_ops(rpdev->bus);
 463        if (ret)
 464                goto out_put;
 465
 466        if (find_aer_device(rpdev, &edev)) {
 467                if (!get_service_data(edev)) {
 468                        dev_warn(&edev->device,
 469                                 "aer_inject: AER service is not initialized\n");
 470                        ret = -EPROTONOSUPPORT;
 471                        goto out_put;
 472                }
 473                dev_info(&edev->device,
 474                         "aer_inject: Injecting errors %08x/%08x into device %s\n",
 475                         einj->cor_status, einj->uncor_status, pci_name(dev));
 476                aer_irq(-1, edev);
 477        } else {
 478                pci_err(rpdev, "aer_inject: AER device not found\n");
 479                ret = -ENODEV;
 480        }
 481out_put:
 482        kfree(err_alloc);
 483        kfree(rperr_alloc);
 484        pci_dev_put(dev);
 485        return ret;
 486}
 487
 488static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf,
 489                                size_t usize, loff_t *off)
 490{
 491        struct aer_error_inj einj;
 492        int ret;
 493
 494        if (!capable(CAP_SYS_ADMIN))
 495                return -EPERM;
 496        if (usize < offsetof(struct aer_error_inj, domain) ||
 497            usize > sizeof(einj))
 498                return -EINVAL;
 499
 500        memset(&einj, 0, sizeof(einj));
 501        if (copy_from_user(&einj, ubuf, usize))
 502                return -EFAULT;
 503
 504        ret = aer_inject(&einj);
 505        return ret ? ret : usize;
 506}
 507
 508static const struct file_operations aer_inject_fops = {
 509        .write = aer_inject_write,
 510        .owner = THIS_MODULE,
 511        .llseek = noop_llseek,
 512};
 513
 514static struct miscdevice aer_inject_device = {
 515        .minor = MISC_DYNAMIC_MINOR,
 516        .name = "aer_inject",
 517        .fops = &aer_inject_fops,
 518};
 519
 520static int __init aer_inject_init(void)
 521{
 522        return misc_register(&aer_inject_device);
 523}
 524
 525static void __exit aer_inject_exit(void)
 526{
 527        struct aer_error *err, *err_next;
 528        unsigned long flags;
 529        struct pci_bus_ops *bus_ops;
 530
 531        misc_deregister(&aer_inject_device);
 532
 533        while ((bus_ops = pci_bus_ops_pop())) {
 534                pci_bus_set_ops(bus_ops->bus, bus_ops->ops);
 535                kfree(bus_ops);
 536        }
 537
 538        spin_lock_irqsave(&inject_lock, flags);
 539        list_for_each_entry_safe(err, err_next, &einjected, list) {
 540                list_del(&err->list);
 541                kfree(err);
 542        }
 543        spin_unlock_irqrestore(&inject_lock, flags);
 544}
 545
 546module_init(aer_inject_init);
 547module_exit(aer_inject_exit);
 548
 549MODULE_DESCRIPTION("PCIe AER software error injector");
 550MODULE_LICENSE("GPL");
 551