linux/drivers/pci/pcie/aer_inject.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * PCIe AER software error injection support.
   4 *
   5 * Debuging PCIe AER code is quite difficult because it is hard to
   6 * trigger various real hardware errors. Software based error
   7 * injection can fake almost all kinds of errors with the help of a
   8 * user space helper tool aer-inject, which can be gotten from:
   9 *   http://www.kernel.org/pub/linux/utils/pci/aer-inject/
  10 *
  11 * Copyright 2009 Intel Corporation.
  12 *     Huang Ying <ying.huang@intel.com>
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/init.h>
  17#include <linux/irq.h>
  18#include <linux/miscdevice.h>
  19#include <linux/pci.h>
  20#include <linux/slab.h>
  21#include <linux/fs.h>
  22#include <linux/uaccess.h>
  23#include <linux/stddef.h>
  24#include <linux/device.h>
  25
  26#include "portdrv.h"
  27
  28/* Override the existing corrected and uncorrected error masks */
  29static bool aer_mask_override;
  30module_param(aer_mask_override, bool, 0);
  31
  32struct aer_error_inj {
  33        u8 bus;
  34        u8 dev;
  35        u8 fn;
  36        u32 uncor_status;
  37        u32 cor_status;
  38        u32 header_log0;
  39        u32 header_log1;
  40        u32 header_log2;
  41        u32 header_log3;
  42        u32 domain;
  43};
  44
  45struct aer_error {
  46        struct list_head list;
  47        u32 domain;
  48        unsigned int bus;
  49        unsigned int devfn;
  50        int pos_cap_err;
  51
  52        u32 uncor_status;
  53        u32 cor_status;
  54        u32 header_log0;
  55        u32 header_log1;
  56        u32 header_log2;
  57        u32 header_log3;
  58        u32 root_status;
  59        u32 source_id;
  60};
  61
  62struct pci_bus_ops {
  63        struct list_head list;
  64        struct pci_bus *bus;
  65        struct pci_ops *ops;
  66};
  67
  68static LIST_HEAD(einjected);
  69
  70static LIST_HEAD(pci_bus_ops_list);
  71
  72/* Protect einjected and pci_bus_ops_list */
  73static DEFINE_SPINLOCK(inject_lock);
  74
  75static void aer_error_init(struct aer_error *err, u32 domain,
  76                           unsigned int bus, unsigned int devfn,
  77                           int pos_cap_err)
  78{
  79        INIT_LIST_HEAD(&err->list);
  80        err->domain = domain;
  81        err->bus = bus;
  82        err->devfn = devfn;
  83        err->pos_cap_err = pos_cap_err;
  84}
  85
  86/* inject_lock must be held before calling */
  87static struct aer_error *__find_aer_error(u32 domain, unsigned int bus,
  88                                          unsigned int devfn)
  89{
  90        struct aer_error *err;
  91
  92        list_for_each_entry(err, &einjected, list) {
  93                if (domain == err->domain &&
  94                    bus == err->bus &&
  95                    devfn == err->devfn)
  96                        return err;
  97        }
  98        return NULL;
  99}
 100
 101/* inject_lock must be held before calling */
 102static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev)
 103{
 104        int domain = pci_domain_nr(dev->bus);
 105        if (domain < 0)
 106                return NULL;
 107        return __find_aer_error(domain, dev->bus->number, dev->devfn);
 108}
 109
 110/* inject_lock must be held before calling */
 111static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus)
 112{
 113        struct pci_bus_ops *bus_ops;
 114
 115        list_for_each_entry(bus_ops, &pci_bus_ops_list, list) {
 116                if (bus_ops->bus == bus)
 117                        return bus_ops->ops;
 118        }
 119        return NULL;
 120}
 121
 122static struct pci_bus_ops *pci_bus_ops_pop(void)
 123{
 124        unsigned long flags;
 125        struct pci_bus_ops *bus_ops;
 126
 127        spin_lock_irqsave(&inject_lock, flags);
 128        bus_ops = list_first_entry_or_null(&pci_bus_ops_list,
 129                                           struct pci_bus_ops, list);
 130        if (bus_ops)
 131                list_del(&bus_ops->list);
 132        spin_unlock_irqrestore(&inject_lock, flags);
 133        return bus_ops;
 134}
 135
 136static u32 *find_pci_config_dword(struct aer_error *err, int where,
 137                                  int *prw1cs)
 138{
 139        int rw1cs = 0;
 140        u32 *target = NULL;
 141
 142        if (err->pos_cap_err == -1)
 143                return NULL;
 144
 145        switch (where - err->pos_cap_err) {
 146        case PCI_ERR_UNCOR_STATUS:
 147                target = &err->uncor_status;
 148                rw1cs = 1;
 149                break;
 150        case PCI_ERR_COR_STATUS:
 151                target = &err->cor_status;
 152                rw1cs = 1;
 153                break;
 154        case PCI_ERR_HEADER_LOG:
 155                target = &err->header_log0;
 156                break;
 157        case PCI_ERR_HEADER_LOG+4:
 158                target = &err->header_log1;
 159                break;
 160        case PCI_ERR_HEADER_LOG+8:
 161                target = &err->header_log2;
 162                break;
 163        case PCI_ERR_HEADER_LOG+12:
 164                target = &err->header_log3;
 165                break;
 166        case PCI_ERR_ROOT_STATUS:
 167                target = &err->root_status;
 168                rw1cs = 1;
 169                break;
 170        case PCI_ERR_ROOT_ERR_SRC:
 171                target = &err->source_id;
 172                break;
 173        }
 174        if (prw1cs)
 175                *prw1cs = rw1cs;
 176        return target;
 177}
 178
 179static int aer_inj_read(struct pci_bus *bus, unsigned int devfn, int where,
 180                        int size, u32 *val)
 181{
 182        struct pci_ops *ops, *my_ops;
 183        int rv;
 184
 185        ops = __find_pci_bus_ops(bus);
 186        if (!ops)
 187                return -1;
 188
 189        my_ops = bus->ops;
 190        bus->ops = ops;
 191        rv = ops->read(bus, devfn, where, size, val);
 192        bus->ops = my_ops;
 193
 194        return rv;
 195}
 196
 197static int aer_inj_write(struct pci_bus *bus, unsigned int devfn, int where,
 198                         int size, u32 val)
 199{
 200        struct pci_ops *ops, *my_ops;
 201        int rv;
 202
 203        ops = __find_pci_bus_ops(bus);
 204        if (!ops)
 205                return -1;
 206
 207        my_ops = bus->ops;
 208        bus->ops = ops;
 209        rv = ops->write(bus, devfn, where, size, val);
 210        bus->ops = my_ops;
 211
 212        return rv;
 213}
 214
 215static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn,
 216                               int where, int size, u32 *val)
 217{
 218        u32 *sim;
 219        struct aer_error *err;
 220        unsigned long flags;
 221        int domain;
 222        int rv;
 223
 224        spin_lock_irqsave(&inject_lock, flags);
 225        if (size != sizeof(u32))
 226                goto out;
 227        domain = pci_domain_nr(bus);
 228        if (domain < 0)
 229                goto out;
 230        err = __find_aer_error(domain, bus->number, devfn);
 231        if (!err)
 232                goto out;
 233
 234        sim = find_pci_config_dword(err, where, NULL);
 235        if (sim) {
 236                *val = *sim;
 237                spin_unlock_irqrestore(&inject_lock, flags);
 238                return 0;
 239        }
 240out:
 241        rv = aer_inj_read(bus, devfn, where, size, val);
 242        spin_unlock_irqrestore(&inject_lock, flags);
 243        return rv;
 244}
 245
 246static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn,
 247                                int where, int size, u32 val)
 248{
 249        u32 *sim;
 250        struct aer_error *err;
 251        unsigned long flags;
 252        int rw1cs;
 253        int domain;
 254        int rv;
 255
 256        spin_lock_irqsave(&inject_lock, flags);
 257        if (size != sizeof(u32))
 258                goto out;
 259        domain = pci_domain_nr(bus);
 260        if (domain < 0)
 261                goto out;
 262        err = __find_aer_error(domain, bus->number, devfn);
 263        if (!err)
 264                goto out;
 265
 266        sim = find_pci_config_dword(err, where, &rw1cs);
 267        if (sim) {
 268                if (rw1cs)
 269                        *sim ^= val;
 270                else
 271                        *sim = val;
 272                spin_unlock_irqrestore(&inject_lock, flags);
 273                return 0;
 274        }
 275out:
 276        rv = aer_inj_write(bus, devfn, where, size, val);
 277        spin_unlock_irqrestore(&inject_lock, flags);
 278        return rv;
 279}
 280
 281static struct pci_ops aer_inj_pci_ops = {
 282        .read = aer_inj_read_config,
 283        .write = aer_inj_write_config,
 284};
 285
 286static void pci_bus_ops_init(struct pci_bus_ops *bus_ops,
 287                             struct pci_bus *bus,
 288                             struct pci_ops *ops)
 289{
 290        INIT_LIST_HEAD(&bus_ops->list);
 291        bus_ops->bus = bus;
 292        bus_ops->ops = ops;
 293}
 294
 295static int pci_bus_set_aer_ops(struct pci_bus *bus)
 296{
 297        struct pci_ops *ops;
 298        struct pci_bus_ops *bus_ops;
 299        unsigned long flags;
 300
 301        bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL);
 302        if (!bus_ops)
 303                return -ENOMEM;
 304        ops = pci_bus_set_ops(bus, &aer_inj_pci_ops);
 305        spin_lock_irqsave(&inject_lock, flags);
 306        if (ops == &aer_inj_pci_ops)
 307                goto out;
 308        pci_bus_ops_init(bus_ops, bus, ops);
 309        list_add(&bus_ops->list, &pci_bus_ops_list);
 310        bus_ops = NULL;
 311out:
 312        spin_unlock_irqrestore(&inject_lock, flags);
 313        kfree(bus_ops);
 314        return 0;
 315}
 316
 317static int aer_inject(struct aer_error_inj *einj)
 318{
 319        struct aer_error *err, *rperr;
 320        struct aer_error *err_alloc = NULL, *rperr_alloc = NULL;
 321        struct pci_dev *dev, *rpdev;
 322        struct pcie_device *edev;
 323        struct device *device;
 324        unsigned long flags;
 325        unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn);
 326        int pos_cap_err, rp_pos_cap_err;
 327        u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0;
 328        int ret = 0;
 329
 330        dev = pci_get_domain_bus_and_slot(einj->domain, einj->bus, devfn);
 331        if (!dev)
 332                return -ENODEV;
 333        rpdev = pcie_find_root_port(dev);
 334        if (!rpdev) {
 335                pci_err(dev, "aer_inject: Root port not found\n");
 336                ret = -ENODEV;
 337                goto out_put;
 338        }
 339
 340        pos_cap_err = dev->aer_cap;
 341        if (!pos_cap_err) {
 342                pci_err(dev, "aer_inject: Device doesn't support AER\n");
 343                ret = -EPROTONOSUPPORT;
 344                goto out_put;
 345        }
 346        pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever);
 347        pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask);
 348        pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
 349                              &uncor_mask);
 350
 351        rp_pos_cap_err = rpdev->aer_cap;
 352        if (!rp_pos_cap_err) {
 353                pci_err(rpdev, "aer_inject: Root port doesn't support AER\n");
 354                ret = -EPROTONOSUPPORT;
 355                goto out_put;
 356        }
 357
 358        err_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
 359        if (!err_alloc) {
 360                ret = -ENOMEM;
 361                goto out_put;
 362        }
 363        rperr_alloc =  kzalloc(sizeof(struct aer_error), GFP_KERNEL);
 364        if (!rperr_alloc) {
 365                ret = -ENOMEM;
 366                goto out_put;
 367        }
 368
 369        if (aer_mask_override) {
 370                cor_mask_orig = cor_mask;
 371                cor_mask &= !(einj->cor_status);
 372                pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
 373                                       cor_mask);
 374
 375                uncor_mask_orig = uncor_mask;
 376                uncor_mask &= !(einj->uncor_status);
 377                pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
 378                                       uncor_mask);
 379        }
 380
 381        spin_lock_irqsave(&inject_lock, flags);
 382
 383        err = __find_aer_error_by_dev(dev);
 384        if (!err) {
 385                err = err_alloc;
 386                err_alloc = NULL;
 387                aer_error_init(err, einj->domain, einj->bus, devfn,
 388                               pos_cap_err);
 389                list_add(&err->list, &einjected);
 390        }
 391        err->uncor_status |= einj->uncor_status;
 392        err->cor_status |= einj->cor_status;
 393        err->header_log0 = einj->header_log0;
 394        err->header_log1 = einj->header_log1;
 395        err->header_log2 = einj->header_log2;
 396        err->header_log3 = einj->header_log3;
 397
 398        if (!aer_mask_override && einj->cor_status &&
 399            !(einj->cor_status & ~cor_mask)) {
 400                ret = -EINVAL;
 401                pci_warn(dev, "aer_inject: The correctable error(s) is masked by device\n");
 402                spin_unlock_irqrestore(&inject_lock, flags);
 403                goto out_put;
 404        }
 405        if (!aer_mask_override && einj->uncor_status &&
 406            !(einj->uncor_status & ~uncor_mask)) {
 407                ret = -EINVAL;
 408                pci_warn(dev, "aer_inject: The uncorrectable error(s) is masked by device\n");
 409                spin_unlock_irqrestore(&inject_lock, flags);
 410                goto out_put;
 411        }
 412
 413        rperr = __find_aer_error_by_dev(rpdev);
 414        if (!rperr) {
 415                rperr = rperr_alloc;
 416                rperr_alloc = NULL;
 417                aer_error_init(rperr, pci_domain_nr(rpdev->bus),
 418                               rpdev->bus->number, rpdev->devfn,
 419                               rp_pos_cap_err);
 420                list_add(&rperr->list, &einjected);
 421        }
 422        if (einj->cor_status) {
 423                if (rperr->root_status & PCI_ERR_ROOT_COR_RCV)
 424                        rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
 425                else
 426                        rperr->root_status |= PCI_ERR_ROOT_COR_RCV;
 427                rperr->source_id &= 0xffff0000;
 428                rperr->source_id |= (einj->bus << 8) | devfn;
 429        }
 430        if (einj->uncor_status) {
 431                if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)
 432                        rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
 433                if (sever & einj->uncor_status) {
 434                        rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV;
 435                        if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV))
 436                                rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL;
 437                } else
 438                        rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
 439                rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV;
 440                rperr->source_id &= 0x0000ffff;
 441                rperr->source_id |= ((einj->bus << 8) | devfn) << 16;
 442        }
 443        spin_unlock_irqrestore(&inject_lock, flags);
 444
 445        if (aer_mask_override) {
 446                pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
 447                                       cor_mask_orig);
 448                pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK,
 449                                       uncor_mask_orig);
 450        }
 451
 452        ret = pci_bus_set_aer_ops(dev->bus);
 453        if (ret)
 454                goto out_put;
 455        ret = pci_bus_set_aer_ops(rpdev->bus);
 456        if (ret)
 457                goto out_put;
 458
 459        device = pcie_port_find_device(rpdev, PCIE_PORT_SERVICE_AER);
 460        if (device) {
 461                edev = to_pcie_device(device);
 462                if (!get_service_data(edev)) {
 463                        dev_warn(&edev->device,
 464                                 "aer_inject: AER service is not initialized\n");
 465                        ret = -EPROTONOSUPPORT;
 466                        goto out_put;
 467                }
 468                dev_info(&edev->device,
 469                         "aer_inject: Injecting errors %08x/%08x into device %s\n",
 470                         einj->cor_status, einj->uncor_status, pci_name(dev));
 471                local_irq_disable();
 472                generic_handle_irq(edev->irq);
 473                local_irq_enable();
 474        } else {
 475                pci_err(rpdev, "aer_inject: AER device not found\n");
 476                ret = -ENODEV;
 477        }
 478out_put:
 479        kfree(err_alloc);
 480        kfree(rperr_alloc);
 481        pci_dev_put(dev);
 482        return ret;
 483}
 484
 485static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf,
 486                                size_t usize, loff_t *off)
 487{
 488        struct aer_error_inj einj;
 489        int ret;
 490
 491        if (!capable(CAP_SYS_ADMIN))
 492                return -EPERM;
 493        if (usize < offsetof(struct aer_error_inj, domain) ||
 494            usize > sizeof(einj))
 495                return -EINVAL;
 496
 497        memset(&einj, 0, sizeof(einj));
 498        if (copy_from_user(&einj, ubuf, usize))
 499                return -EFAULT;
 500
 501        ret = aer_inject(&einj);
 502        return ret ? ret : usize;
 503}
 504
 505static const struct file_operations aer_inject_fops = {
 506        .write = aer_inject_write,
 507        .owner = THIS_MODULE,
 508        .llseek = noop_llseek,
 509};
 510
 511static struct miscdevice aer_inject_device = {
 512        .minor = MISC_DYNAMIC_MINOR,
 513        .name = "aer_inject",
 514        .fops = &aer_inject_fops,
 515};
 516
 517static int __init aer_inject_init(void)
 518{
 519        return misc_register(&aer_inject_device);
 520}
 521
 522static void __exit aer_inject_exit(void)
 523{
 524        struct aer_error *err, *err_next;
 525        unsigned long flags;
 526        struct pci_bus_ops *bus_ops;
 527
 528        misc_deregister(&aer_inject_device);
 529
 530        while ((bus_ops = pci_bus_ops_pop())) {
 531                pci_bus_set_ops(bus_ops->bus, bus_ops->ops);
 532                kfree(bus_ops);
 533        }
 534
 535        spin_lock_irqsave(&inject_lock, flags);
 536        list_for_each_entry_safe(err, err_next, &einjected, list) {
 537                list_del(&err->list);
 538                kfree(err);
 539        }
 540        spin_unlock_irqrestore(&inject_lock, flags);
 541}
 542
 543module_init(aer_inject_init);
 544module_exit(aer_inject_exit);
 545
 546MODULE_DESCRIPTION("PCIe AER software error injector");
 547MODULE_LICENSE("GPL");
 548