qemu/hw/pci/pcie_aer.c
<<
>>
Prefs
   1/*
   2 * pcie_aer.c
   3 *
   4 * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
   5 *                    VA Linux Systems Japan K.K.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License along
  18 * with this program; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "sysemu/sysemu.h"
  23#include "qapi/qmp/types.h"
  24#include "qapi/qmp/qjson.h"
  25#include "monitor/monitor.h"
  26#include "hw/pci/pci_bridge.h"
  27#include "hw/pci/pcie.h"
  28#include "hw/pci/msix.h"
  29#include "hw/pci/msi.h"
  30#include "hw/pci/pci_bus.h"
  31#include "hw/pci/pcie_regs.h"
  32
  33//#define DEBUG_PCIE
  34#ifdef DEBUG_PCIE
  35# define PCIE_DPRINTF(fmt, ...)                                         \
  36    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
  37#else
  38# define PCIE_DPRINTF(fmt, ...) do {} while (0)
  39#endif
  40#define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
  41    PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
  42
  43#define PCI_ERR_SRC_COR_OFFS    0
  44#define PCI_ERR_SRC_UNCOR_OFFS  2
  45
  46/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
  47static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
  48{
  49    switch (status) {
  50    case PCI_ERR_UNC_INTN:
  51    case PCI_ERR_UNC_DLP:
  52    case PCI_ERR_UNC_SDN:
  53    case PCI_ERR_UNC_RX_OVER:
  54    case PCI_ERR_UNC_FCP:
  55    case PCI_ERR_UNC_MALF_TLP:
  56        return PCI_ERR_ROOT_CMD_FATAL_EN;
  57    case PCI_ERR_UNC_POISON_TLP:
  58    case PCI_ERR_UNC_ECRC:
  59    case PCI_ERR_UNC_UNSUP:
  60    case PCI_ERR_UNC_COMP_TIME:
  61    case PCI_ERR_UNC_COMP_ABORT:
  62    case PCI_ERR_UNC_UNX_COMP:
  63    case PCI_ERR_UNC_ACSV:
  64    case PCI_ERR_UNC_MCBTLP:
  65    case PCI_ERR_UNC_ATOP_EBLOCKED:
  66    case PCI_ERR_UNC_TLP_PRF_BLOCKED:
  67        return PCI_ERR_ROOT_CMD_NONFATAL_EN;
  68    default:
  69        abort();
  70        break;
  71    }
  72    return PCI_ERR_ROOT_CMD_FATAL_EN;
  73}
  74
  75static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
  76{
  77    if (aer_log->log_num == aer_log->log_max) {
  78        return -1;
  79    }
  80    memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
  81    aer_log->log_num++;
  82    return 0;
  83}
  84
  85static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
  86{
  87    assert(aer_log->log_num);
  88    *err = aer_log->log[0];
  89    aer_log->log_num--;
  90    memmove(&aer_log->log[0], &aer_log->log[1],
  91            aer_log->log_num * sizeof *err);
  92}
  93
  94static void aer_log_clear_all_err(PCIEAERLog *aer_log)
  95{
  96    aer_log->log_num = 0;
  97}
  98
  99int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
 100{
 101    PCIExpressDevice *exp;
 102
 103    pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
 104                        offset, size);
 105    exp = &dev->exp;
 106    exp->aer_cap = offset;
 107
 108    /* log_max is property */
 109    if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
 110        dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
 111    }
 112    /* clip down the value to avoid unreasobale memory usage */
 113    if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
 114        return -EINVAL;
 115    }
 116    dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
 117                                        dev->exp.aer_log.log_max);
 118
 119    pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
 120                 PCI_ERR_UNC_SUPPORTED);
 121
 122    pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
 123                 PCI_ERR_UNC_SEVERITY_DEFAULT);
 124    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
 125                 PCI_ERR_UNC_SUPPORTED);
 126
 127    pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
 128                               PCI_ERR_COR_SUPPORTED);
 129
 130    pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
 131                 PCI_ERR_COR_MASK_DEFAULT);
 132    pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
 133                 PCI_ERR_COR_SUPPORTED);
 134
 135    /* capabilities and control. multiple header logging is supported */
 136    if (dev->exp.aer_log.log_max > 0) {
 137        pci_set_long(dev->config + offset + PCI_ERR_CAP,
 138                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
 139                     PCI_ERR_CAP_MHRC);
 140        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
 141                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
 142                     PCI_ERR_CAP_MHRE);
 143    } else {
 144        pci_set_long(dev->config + offset + PCI_ERR_CAP,
 145                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
 146        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
 147                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
 148    }
 149
 150    switch (pcie_cap_get_type(dev)) {
 151    case PCI_EXP_TYPE_ROOT_PORT:
 152        /* this case will be set by pcie_aer_root_init() */
 153        /* fallthrough */
 154    case PCI_EXP_TYPE_DOWNSTREAM:
 155    case PCI_EXP_TYPE_UPSTREAM:
 156        pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
 157                                   PCI_BRIDGE_CTL_SERR);
 158        pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
 159                                   PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
 160        break;
 161    default:
 162        /* nothing */
 163        break;
 164    }
 165    return 0;
 166}
 167
 168void pcie_aer_exit(PCIDevice *dev)
 169{
 170    g_free(dev->exp.aer_log.log);
 171}
 172
 173static void pcie_aer_update_uncor_status(PCIDevice *dev)
 174{
 175    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 176    PCIEAERLog *aer_log = &dev->exp.aer_log;
 177
 178    uint16_t i;
 179    for (i = 0; i < aer_log->log_num; i++) {
 180        pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
 181                                   dev->exp.aer_log.log[i].status);
 182    }
 183}
 184
 185/*
 186 * return value:
 187 * true: error message needs to be sent up
 188 * false: error message is masked
 189 *
 190 * 6.2.6 Error Message Control
 191 * Figure 6-3
 192 * all pci express devices part
 193 */
 194static bool
 195pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
 196{
 197    if (!(pcie_aer_msg_is_uncor(msg) &&
 198          (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
 199        return false;
 200    }
 201
 202    /* Signaled System Error
 203     *
 204     * 7.5.1.1 Command register
 205     * Bit 8 SERR# Enable
 206     *
 207     * When Set, this bit enables reporting of Non-fatal and Fatal
 208     * errors detected by the Function to the Root Complex. Note that
 209     * errors are reported if enabled either through this bit or through
 210     * the PCI Express specific bits in the Device Control register (see
 211     * Section 7.8.4).
 212     */
 213    pci_word_test_and_set_mask(dev->config + PCI_STATUS,
 214                               PCI_STATUS_SIG_SYSTEM_ERROR);
 215
 216    if (!(msg->severity &
 217          pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
 218        return false;
 219    }
 220
 221    /* send up error message */
 222    return true;
 223}
 224
 225/*
 226 * return value:
 227 * true: error message is sent up
 228 * false: error message is masked
 229 *
 230 * 6.2.6 Error Message Control
 231 * Figure 6-3
 232 * virtual pci bridge part
 233 */
 234static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
 235{
 236    uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
 237
 238    if (pcie_aer_msg_is_uncor(msg)) {
 239        /* Received System Error */
 240        pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
 241                                   PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
 242    }
 243
 244    if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
 245        return false;
 246    }
 247    return true;
 248}
 249
 250void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
 251{
 252    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 253    assert(vector < PCI_ERR_ROOT_IRQ_MAX);
 254    pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
 255                                 PCI_ERR_ROOT_IRQ);
 256    pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
 257                               vector << PCI_ERR_ROOT_IRQ_SHIFT);
 258}
 259
 260static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
 261{
 262    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 263    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
 264    return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
 265}
 266
 267/* Given a status register, get corresponding bits in the command register */
 268static uint32_t pcie_aer_status_to_cmd(uint32_t status)
 269{
 270    uint32_t cmd = 0;
 271    if (status & PCI_ERR_ROOT_COR_RCV) {
 272        cmd |= PCI_ERR_ROOT_CMD_COR_EN;
 273    }
 274    if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
 275        cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
 276    }
 277    if (status & PCI_ERR_ROOT_FATAL_RCV) {
 278        cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
 279    }
 280    return cmd;
 281}
 282
 283static void pcie_aer_root_notify(PCIDevice *dev)
 284{
 285    if (msix_enabled(dev)) {
 286        msix_notify(dev, pcie_aer_root_get_vector(dev));
 287    } else if (msi_enabled(dev)) {
 288        msi_notify(dev, pcie_aer_root_get_vector(dev));
 289    } else {
 290        pci_irq_assert(dev);
 291    }
 292}
 293
 294/*
 295 * 6.2.6 Error Message Control
 296 * Figure 6-3
 297 * root port part
 298 */
 299static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
 300{
 301    uint16_t cmd;
 302    uint8_t *aer_cap;
 303    uint32_t root_cmd;
 304    uint32_t root_status, prev_status;
 305
 306    cmd = pci_get_word(dev->config + PCI_COMMAND);
 307    aer_cap = dev->config + dev->exp.aer_cap;
 308    root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
 309    prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
 310
 311    if (cmd & PCI_COMMAND_SERR) {
 312        /* System Error.
 313         *
 314         * The way to report System Error is platform specific and
 315         * it isn't implemented in qemu right now.
 316         * So just discard the error for now.
 317         * OS which cares of aer would receive errors via
 318         * native aer mechanims, so this wouldn't matter.
 319         */
 320    }
 321
 322    /* Errro Message Received: Root Error Status register */
 323    switch (msg->severity) {
 324    case PCI_ERR_ROOT_CMD_COR_EN:
 325        if (root_status & PCI_ERR_ROOT_COR_RCV) {
 326            root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
 327        } else {
 328            pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
 329                         msg->source_id);
 330        }
 331        root_status |= PCI_ERR_ROOT_COR_RCV;
 332        break;
 333    case PCI_ERR_ROOT_CMD_NONFATAL_EN:
 334        root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
 335        break;
 336    case PCI_ERR_ROOT_CMD_FATAL_EN:
 337        if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
 338            root_status |= PCI_ERR_ROOT_FIRST_FATAL;
 339        }
 340        root_status |= PCI_ERR_ROOT_FATAL_RCV;
 341        break;
 342    default:
 343        abort();
 344        break;
 345    }
 346    if (pcie_aer_msg_is_uncor(msg)) {
 347        if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
 348            root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
 349        } else {
 350            pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
 351                         PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
 352        }
 353        root_status |= PCI_ERR_ROOT_UNCOR_RCV;
 354    }
 355    pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
 356
 357    /* 6.2.4.1.2 Interrupt Generation */
 358    /* All the above did was set some bits in the status register.
 359     * Specifically these that match message severity.
 360     * The below code relies on this fact. */
 361    if (!(root_cmd & msg->severity) ||
 362        (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
 363        /* Condition is not being set or was already true so nothing to do. */
 364        return;
 365    }
 366
 367    pcie_aer_root_notify(dev);
 368}
 369
 370/*
 371 * 6.2.6 Error Message Control Figure 6-3
 372 *
 373 * Walk up the bus tree from the device, propagate the error message.
 374 */
 375void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 376{
 377    uint8_t type;
 378
 379    while (dev) {
 380        if (!pci_is_express(dev)) {
 381            /* just ignore it */
 382            /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
 383             * Consider e.g. a PCI bridge above a PCI Express device. */
 384            return;
 385        }
 386
 387        type = pcie_cap_get_type(dev);
 388        if ((type == PCI_EXP_TYPE_ROOT_PORT ||
 389            type == PCI_EXP_TYPE_UPSTREAM ||
 390            type == PCI_EXP_TYPE_DOWNSTREAM) &&
 391            !pcie_aer_msg_vbridge(dev, msg)) {
 392                return;
 393        }
 394        if (!pcie_aer_msg_alldev(dev, msg)) {
 395            return;
 396        }
 397        if (type == PCI_EXP_TYPE_ROOT_PORT) {
 398            pcie_aer_msg_root_port(dev, msg);
 399            /* Root port can notify system itself,
 400               or send the error message to root complex event collector. */
 401            /*
 402             * if root port is associated with an event collector,
 403             * return the root complex event collector here.
 404             * For now root complex event collector isn't supported.
 405             */
 406            return;
 407        }
 408        dev = pci_bridge_get_device(dev->bus);
 409    }
 410}
 411
 412static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
 413{
 414    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 415    uint8_t first_bit = ctz32(err->status);
 416    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 417    int i;
 418
 419    assert(err->status);
 420    assert(!(err->status & (err->status - 1)));
 421
 422    errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
 423    errcap |= PCI_ERR_CAP_FEP(first_bit);
 424
 425    if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
 426        for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
 427            /* 7.10.8 Header Log Register */
 428            uint8_t *header_log =
 429                aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
 430            stl_be_p(header_log, err->header[i]);
 431        }
 432    } else {
 433        assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
 434        memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
 435    }
 436
 437    if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
 438        (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
 439         PCI_EXP_DEVCAP2_EETLPP)) {
 440        for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
 441            /* 7.10.12 tlp prefix log register */
 442            uint8_t *prefix_log =
 443                aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
 444            stl_be_p(prefix_log, err->prefix[i]);
 445        }
 446        errcap |= PCI_ERR_CAP_TLP;
 447    } else {
 448        memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
 449               PCI_ERR_TLP_PREFIX_LOG_SIZE);
 450    }
 451    pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
 452}
 453
 454static void pcie_aer_clear_log(PCIDevice *dev)
 455{
 456    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 457
 458    pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
 459                                 PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
 460
 461    memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
 462    memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
 463}
 464
 465static void pcie_aer_clear_error(PCIDevice *dev)
 466{
 467    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 468    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 469    PCIEAERLog *aer_log = &dev->exp.aer_log;
 470    PCIEAERErr err;
 471
 472    if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
 473        pcie_aer_clear_log(dev);
 474        return;
 475    }
 476
 477    /*
 478     * If more errors are queued, set corresponding bits in uncorrectable
 479     * error status.
 480     * We emulate uncorrectable error status register as W1CS.
 481     * So set bit in uncorrectable error status here again for multiple
 482     * error recording support.
 483     *
 484     * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
 485     */
 486    pcie_aer_update_uncor_status(dev);
 487
 488    aer_log_del_err(aer_log, &err);
 489    pcie_aer_update_log(dev, &err);
 490}
 491
 492static int pcie_aer_record_error(PCIDevice *dev,
 493                                 const PCIEAERErr *err)
 494{
 495    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 496    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 497    int fep = PCI_ERR_CAP_FEP(errcap);
 498
 499    assert(err->status);
 500    assert(!(err->status & (err->status - 1)));
 501
 502    if (errcap & PCI_ERR_CAP_MHRE &&
 503        (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
 504        /*  Not first error. queue error */
 505        if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
 506            /* overflow */
 507            return -1;
 508        }
 509        return 0;
 510    }
 511
 512    pcie_aer_update_log(dev, err);
 513    return 0;
 514}
 515
 516typedef struct PCIEAERInject {
 517    PCIDevice *dev;
 518    uint8_t *aer_cap;
 519    const PCIEAERErr *err;
 520    uint16_t devctl;
 521    uint16_t devsta;
 522    uint32_t error_status;
 523    bool unsupported_request;
 524    bool log_overflow;
 525    PCIEAERMsg msg;
 526} PCIEAERInject;
 527
 528static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
 529                                      uint32_t uncor_status,
 530                                      bool is_advisory_nonfatal)
 531{
 532    PCIDevice *dev = inj->dev;
 533
 534    inj->devsta |= PCI_EXP_DEVSTA_CED;
 535    if (inj->unsupported_request) {
 536        inj->devsta |= PCI_EXP_DEVSTA_URD;
 537    }
 538    pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
 539
 540    if (inj->aer_cap) {
 541        uint32_t mask;
 542        pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
 543                                   inj->error_status);
 544        mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
 545        if (mask & inj->error_status) {
 546            return false;
 547        }
 548        if (is_advisory_nonfatal) {
 549            uint32_t uncor_mask =
 550                pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
 551            if (!(uncor_mask & uncor_status)) {
 552                inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
 553            }
 554            pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
 555                                       uncor_status);
 556        }
 557    }
 558
 559    if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
 560        return false;
 561    }
 562    if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
 563        return false;
 564    }
 565
 566    inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
 567    return true;
 568}
 569
 570static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
 571{
 572    PCIDevice *dev = inj->dev;
 573    uint16_t cmd;
 574
 575    if (is_fatal) {
 576        inj->devsta |= PCI_EXP_DEVSTA_FED;
 577    } else {
 578        inj->devsta |= PCI_EXP_DEVSTA_NFED;
 579    }
 580    if (inj->unsupported_request) {
 581        inj->devsta |= PCI_EXP_DEVSTA_URD;
 582    }
 583    pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
 584
 585    if (inj->aer_cap) {
 586        uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
 587        if (mask & inj->error_status) {
 588            pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
 589                                       inj->error_status);
 590            return false;
 591        }
 592
 593        inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
 594        pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
 595                                   inj->error_status);
 596    }
 597
 598    cmd = pci_get_word(dev->config + PCI_COMMAND);
 599    if (inj->unsupported_request &&
 600        !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
 601        return false;
 602    }
 603    if (is_fatal) {
 604        if (!((cmd & PCI_COMMAND_SERR) ||
 605              (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
 606            return false;
 607        }
 608        inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
 609    } else {
 610        if (!((cmd & PCI_COMMAND_SERR) ||
 611              (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
 612            return false;
 613        }
 614        inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
 615    }
 616    return true;
 617}
 618
 619/*
 620 * non-Function specific error must be recorded in all functions.
 621 * It is the responsibility of the caller of this function.
 622 * It is also caller's responsibility to determine which function should
 623 * report the error.
 624 *
 625 * 6.2.4 Error Logging
 626 * 6.2.5 Sequence of Device Error Signaling and Logging Operations
 627 * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
 628 *             Operations
 629 */
 630int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
 631{
 632    uint8_t *aer_cap = NULL;
 633    uint16_t devctl = 0;
 634    uint16_t devsta = 0;
 635    uint32_t error_status = err->status;
 636    PCIEAERInject inj;
 637
 638    if (!pci_is_express(dev)) {
 639        return -ENOSYS;
 640    }
 641
 642    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
 643        error_status &= PCI_ERR_COR_SUPPORTED;
 644    } else {
 645        error_status &= PCI_ERR_UNC_SUPPORTED;
 646    }
 647
 648    /* invalid status bit. one and only one bit must be set */
 649    if (!error_status || (error_status & (error_status - 1))) {
 650        return -EINVAL;
 651    }
 652
 653    if (dev->exp.aer_cap) {
 654        uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
 655        aer_cap = dev->config + dev->exp.aer_cap;
 656        devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
 657        devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
 658    }
 659
 660    inj.dev = dev;
 661    inj.aer_cap = aer_cap;
 662    inj.err = err;
 663    inj.devctl = devctl;
 664    inj.devsta = devsta;
 665    inj.error_status = error_status;
 666    inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
 667        err->status == PCI_ERR_UNC_UNSUP;
 668    inj.log_overflow = false;
 669
 670    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
 671        if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
 672            return 0;
 673        }
 674    } else {
 675        bool is_fatal =
 676            pcie_aer_uncor_default_severity(error_status) ==
 677            PCI_ERR_ROOT_CMD_FATAL_EN;
 678        if (aer_cap) {
 679            is_fatal =
 680                error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
 681        }
 682        if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
 683            inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
 684            if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
 685                return 0;
 686            }
 687        } else {
 688            if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
 689                return 0;
 690            }
 691        }
 692    }
 693
 694    /* send up error message */
 695    inj.msg.source_id = err->source_id;
 696    pcie_aer_msg(dev, &inj.msg);
 697
 698    if (inj.log_overflow) {
 699        PCIEAERErr header_log_overflow = {
 700            .status = PCI_ERR_COR_HL_OVERFLOW,
 701            .flags = PCIE_AER_ERR_IS_CORRECTABLE,
 702        };
 703        int ret = pcie_aer_inject_error(dev, &header_log_overflow);
 704        assert(!ret);
 705    }
 706    return 0;
 707}
 708
 709void pcie_aer_write_config(PCIDevice *dev,
 710                           uint32_t addr, uint32_t val, int len)
 711{
 712    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 713    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 714    uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
 715    uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
 716
 717    /* uncorrectable error */
 718    if (!(uncorsta & first_error)) {
 719        /* the bit that corresponds to the first error is cleared */
 720        pcie_aer_clear_error(dev);
 721    } else if (errcap & PCI_ERR_CAP_MHRE) {
 722        /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
 723         * nothing should happen. So we have to revert the modification to
 724         * the register.
 725         */
 726        pcie_aer_update_uncor_status(dev);
 727    } else {
 728        /* capability & control
 729         * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
 730         */
 731        aer_log_clear_all_err(&dev->exp.aer_log);
 732    }
 733}
 734
 735void pcie_aer_root_init(PCIDevice *dev)
 736{
 737    uint16_t pos = dev->exp.aer_cap;
 738
 739    pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
 740                 PCI_ERR_ROOT_CMD_EN_MASK);
 741    pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
 742                 PCI_ERR_ROOT_STATUS_REPORT_MASK);
 743    /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
 744     * device-specific method.
 745     */
 746    pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
 747                 ~PCI_ERR_ROOT_IRQ);
 748}
 749
 750void pcie_aer_root_reset(PCIDevice *dev)
 751{
 752    uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
 753
 754    pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
 755
 756    /*
 757     * Advanced Error Interrupt Message Number in Root Error Status Register
 758     * must be updated by chip dependent code because it's chip dependent
 759     * which number is used.
 760     */
 761}
 762
 763void pcie_aer_root_write_config(PCIDevice *dev,
 764                                uint32_t addr, uint32_t val, int len,
 765                                uint32_t root_cmd_prev)
 766{
 767    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 768    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
 769    uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
 770    uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
 771    /* 6.2.4.1.2 Interrupt Generation */
 772    if (!msix_enabled(dev) && !msi_enabled(dev)) {
 773        pci_set_irq(dev, !!(root_cmd & enabled_cmd));
 774        return;
 775    }
 776
 777    if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
 778        /* Send MSI on transition from false to true. */
 779        return;
 780    }
 781
 782    pcie_aer_root_notify(dev);
 783}
 784
 785static const VMStateDescription vmstate_pcie_aer_err = {
 786    .name = "PCIE_AER_ERROR",
 787    .version_id = 1,
 788    .minimum_version_id = 1,
 789    .fields = (VMStateField[]) {
 790        VMSTATE_UINT32(status, PCIEAERErr),
 791        VMSTATE_UINT16(source_id, PCIEAERErr),
 792        VMSTATE_UINT16(flags, PCIEAERErr),
 793        VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
 794        VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
 795        VMSTATE_END_OF_LIST()
 796    }
 797};
 798
 799static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
 800{
 801    PCIEAERLog *s = opaque;
 802
 803    return s->log_num <= s->log_max;
 804}
 805
 806const VMStateDescription vmstate_pcie_aer_log = {
 807    .name = "PCIE_AER_ERROR_LOG",
 808    .version_id = 1,
 809    .minimum_version_id = 1,
 810    .fields = (VMStateField[]) {
 811        VMSTATE_UINT16(log_num, PCIEAERLog),
 812        VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog),
 813        VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
 814        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
 815                              vmstate_pcie_aer_err, PCIEAERErr),
 816        VMSTATE_END_OF_LIST()
 817    }
 818};
 819
 820typedef struct PCIEAERErrorName {
 821    const char *name;
 822    uint32_t val;
 823    bool correctable;
 824} PCIEAERErrorName;
 825
 826/*
 827 * AER error name -> value conversion table
 828 * This naming scheme is same to linux aer-injection tool.
 829 */
 830static const struct PCIEAERErrorName pcie_aer_error_list[] = {
 831    {
 832        .name = "DLP",
 833        .val = PCI_ERR_UNC_DLP,
 834        .correctable = false,
 835    }, {
 836        .name = "SDN",
 837        .val = PCI_ERR_UNC_SDN,
 838        .correctable = false,
 839    }, {
 840        .name = "POISON_TLP",
 841        .val = PCI_ERR_UNC_POISON_TLP,
 842        .correctable = false,
 843    }, {
 844        .name = "FCP",
 845        .val = PCI_ERR_UNC_FCP,
 846        .correctable = false,
 847    }, {
 848        .name = "COMP_TIME",
 849        .val = PCI_ERR_UNC_COMP_TIME,
 850        .correctable = false,
 851    }, {
 852        .name = "COMP_ABORT",
 853        .val = PCI_ERR_UNC_COMP_ABORT,
 854        .correctable = false,
 855    }, {
 856        .name = "UNX_COMP",
 857        .val = PCI_ERR_UNC_UNX_COMP,
 858        .correctable = false,
 859    }, {
 860        .name = "RX_OVER",
 861        .val = PCI_ERR_UNC_RX_OVER,
 862        .correctable = false,
 863    }, {
 864        .name = "MALF_TLP",
 865        .val = PCI_ERR_UNC_MALF_TLP,
 866        .correctable = false,
 867    }, {
 868        .name = "ECRC",
 869        .val = PCI_ERR_UNC_ECRC,
 870        .correctable = false,
 871    }, {
 872        .name = "UNSUP",
 873        .val = PCI_ERR_UNC_UNSUP,
 874        .correctable = false,
 875    }, {
 876        .name = "ACSV",
 877        .val = PCI_ERR_UNC_ACSV,
 878        .correctable = false,
 879    }, {
 880        .name = "INTN",
 881        .val = PCI_ERR_UNC_INTN,
 882        .correctable = false,
 883    }, {
 884        .name = "MCBTLP",
 885        .val = PCI_ERR_UNC_MCBTLP,
 886        .correctable = false,
 887    }, {
 888        .name = "ATOP_EBLOCKED",
 889        .val = PCI_ERR_UNC_ATOP_EBLOCKED,
 890        .correctable = false,
 891    }, {
 892        .name = "TLP_PRF_BLOCKED",
 893        .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
 894        .correctable = false,
 895    }, {
 896        .name = "RCVR",
 897        .val = PCI_ERR_COR_RCVR,
 898        .correctable = true,
 899    }, {
 900        .name = "BAD_TLP",
 901        .val = PCI_ERR_COR_BAD_TLP,
 902        .correctable = true,
 903    }, {
 904        .name = "BAD_DLLP",
 905        .val = PCI_ERR_COR_BAD_DLLP,
 906        .correctable = true,
 907    }, {
 908        .name = "REP_ROLL",
 909        .val = PCI_ERR_COR_REP_ROLL,
 910        .correctable = true,
 911    }, {
 912        .name = "REP_TIMER",
 913        .val = PCI_ERR_COR_REP_TIMER,
 914        .correctable = true,
 915    }, {
 916        .name = "ADV_NONFATAL",
 917        .val = PCI_ERR_COR_ADV_NONFATAL,
 918        .correctable = true,
 919    }, {
 920        .name = "INTERNAL",
 921        .val = PCI_ERR_COR_INTERNAL,
 922        .correctable = true,
 923    }, {
 924        .name = "HL_OVERFLOW",
 925        .val = PCI_ERR_COR_HL_OVERFLOW,
 926        .correctable = true,
 927    },
 928};
 929
 930static int pcie_aer_parse_error_string(const char *error_name,
 931                                       uint32_t *status, bool *correctable)
 932{
 933    int i;
 934
 935    for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
 936        const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
 937        if (strcmp(error_name, e->name)) {
 938            continue;
 939        }
 940
 941        *status = e->val;
 942        *correctable = e->correctable;
 943        return 0;
 944    }
 945    return -EINVAL;
 946}
 947
 948static int do_pcie_aer_inject_error(Monitor *mon,
 949                                    const QDict *qdict, QObject **ret_data)
 950{
 951    const char *id = qdict_get_str(qdict, "id");
 952    const char *error_name;
 953    uint32_t error_status;
 954    bool correctable;
 955    PCIDevice *dev;
 956    PCIEAERErr err;
 957    int ret;
 958
 959    ret = pci_qdev_find_device(id, &dev);
 960    if (ret < 0) {
 961        monitor_printf(mon,
 962                       "id or pci device path is invalid or device not "
 963                       "found. %s\n", id);
 964        return ret;
 965    }
 966    if (!pci_is_express(dev)) {
 967        monitor_printf(mon, "the device doesn't support pci express. %s\n",
 968                       id);
 969        return -ENOSYS;
 970    }
 971
 972    error_name = qdict_get_str(qdict, "error_status");
 973    if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
 974        char *e = NULL;
 975        error_status = strtoul(error_name, &e, 0);
 976        correctable = qdict_get_try_bool(qdict, "correctable", false);
 977        if (!e || *e != '\0') {
 978            monitor_printf(mon, "invalid error status value. \"%s\"",
 979                           error_name);
 980            return -EINVAL;
 981        }
 982    }
 983    err.status = error_status;
 984    err.source_id = pci_requester_id(dev);
 985
 986    err.flags = 0;
 987    if (correctable) {
 988        err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
 989    }
 990    if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
 991        err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
 992    }
 993    if (qdict_haskey(qdict, "header0")) {
 994        err.flags |= PCIE_AER_ERR_HEADER_VALID;
 995    }
 996    if (qdict_haskey(qdict, "prefix0")) {
 997        err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
 998    }
 999
1000    err.header[0] = qdict_get_try_int(qdict, "header0", 0);
1001    err.header[1] = qdict_get_try_int(qdict, "header1", 0);
1002    err.header[2] = qdict_get_try_int(qdict, "header2", 0);
1003    err.header[3] = qdict_get_try_int(qdict, "header3", 0);
1004
1005    err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
1006    err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
1007    err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
1008    err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
1009
1010    ret = pcie_aer_inject_error(dev, &err);
1011    *ret_data = qobject_from_jsonf("{'id': %s, "
1012                                   "'root_bus': %s, 'bus': %d, 'devfn': %d, "
1013                                   "'ret': %d}",
1014                                   id, pci_root_bus_path(dev),
1015                                   pci_bus_num(dev->bus), dev->devfn,
1016                                   ret);
1017    assert(*ret_data);
1018
1019    return 0;
1020}
1021
1022void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
1023{
1024    QObject *data;
1025    int devfn;
1026
1027    if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
1028        return;
1029    }
1030
1031    assert(qobject_type(data) == QTYPE_QDICT);
1032    qdict = qobject_to_qdict(data);
1033
1034    devfn = (int)qdict_get_int(qdict, "devfn");
1035    monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
1036                   qdict_get_str(qdict, "id"),
1037                   qdict_get_str(qdict, "root_bus"),
1038                   (int) qdict_get_int(qdict, "bus"),
1039                   PCI_SLOT(devfn), PCI_FUNC(devfn));
1040}
1041