qemu/hw/pci/pcie_aer.c
<<
>>
Prefs
   1/*
   2 * pcie_aer.c
   3 *
   4 * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
   5 *                    VA Linux Systems Japan K.K.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License along
  18 * with this program; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "sysemu/sysemu.h"
  23#include "qapi/qmp/qdict.h"
  24#include "migration/vmstate.h"
  25#include "monitor/monitor.h"
  26#include "hw/pci/pci_bridge.h"
  27#include "hw/pci/pcie.h"
  28#include "hw/pci/msix.h"
  29#include "hw/pci/msi.h"
  30#include "hw/pci/pci_bus.h"
  31#include "hw/pci/pcie_regs.h"
  32#include "qapi/error.h"
  33
  34//#define DEBUG_PCIE
  35#ifdef DEBUG_PCIE
  36# define PCIE_DPRINTF(fmt, ...)                                         \
  37    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
  38#else
  39# define PCIE_DPRINTF(fmt, ...) do {} while (0)
  40#endif
  41#define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
  42    PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
  43
  44#define PCI_ERR_SRC_COR_OFFS    0
  45#define PCI_ERR_SRC_UNCOR_OFFS  2
  46
  47typedef struct PCIEErrorDetails {
  48    const char *id;
  49    const char *root_bus;
  50    int bus;
  51    int devfn;
  52} PCIEErrorDetails;
  53
  54/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
  55static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
  56{
  57    switch (status) {
  58    case PCI_ERR_UNC_INTN:
  59    case PCI_ERR_UNC_DLP:
  60    case PCI_ERR_UNC_SDN:
  61    case PCI_ERR_UNC_RX_OVER:
  62    case PCI_ERR_UNC_FCP:
  63    case PCI_ERR_UNC_MALF_TLP:
  64        return PCI_ERR_ROOT_CMD_FATAL_EN;
  65    case PCI_ERR_UNC_POISON_TLP:
  66    case PCI_ERR_UNC_ECRC:
  67    case PCI_ERR_UNC_UNSUP:
  68    case PCI_ERR_UNC_COMP_TIME:
  69    case PCI_ERR_UNC_COMP_ABORT:
  70    case PCI_ERR_UNC_UNX_COMP:
  71    case PCI_ERR_UNC_ACSV:
  72    case PCI_ERR_UNC_MCBTLP:
  73    case PCI_ERR_UNC_ATOP_EBLOCKED:
  74    case PCI_ERR_UNC_TLP_PRF_BLOCKED:
  75        return PCI_ERR_ROOT_CMD_NONFATAL_EN;
  76    default:
  77        abort();
  78        break;
  79    }
  80    return PCI_ERR_ROOT_CMD_FATAL_EN;
  81}
  82
  83static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
  84{
  85    if (aer_log->log_num == aer_log->log_max) {
  86        return -1;
  87    }
  88    memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
  89    aer_log->log_num++;
  90    return 0;
  91}
  92
  93static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
  94{
  95    assert(aer_log->log_num);
  96    *err = aer_log->log[0];
  97    aer_log->log_num--;
  98    memmove(&aer_log->log[0], &aer_log->log[1],
  99            aer_log->log_num * sizeof *err);
 100}
 101
 102static void aer_log_clear_all_err(PCIEAERLog *aer_log)
 103{
 104    aer_log->log_num = 0;
 105}
 106
 107int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
 108                  uint16_t size, Error **errp)
 109{
 110    pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, cap_ver,
 111                        offset, size);
 112    dev->exp.aer_cap = offset;
 113
 114    /* clip down the value to avoid unreasonable memory usage */
 115    if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
 116        error_setg(errp, "Invalid aer_log_max %d. The max number of aer log "
 117                "is %d", dev->exp.aer_log.log_max, PCIE_AER_LOG_MAX_LIMIT);
 118        return -EINVAL;
 119    }
 120    dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
 121                                        dev->exp.aer_log.log_max);
 122
 123    pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
 124                 PCI_ERR_UNC_SUPPORTED);
 125
 126    pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
 127                 PCI_ERR_UNC_SEVERITY_DEFAULT);
 128    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
 129                 PCI_ERR_UNC_SUPPORTED);
 130
 131    pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
 132                               PCI_ERR_COR_SUPPORTED);
 133
 134    pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
 135                 PCI_ERR_COR_MASK_DEFAULT);
 136    pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
 137                 PCI_ERR_COR_SUPPORTED);
 138
 139    /* capabilities and control. multiple header logging is supported */
 140    if (dev->exp.aer_log.log_max > 0) {
 141        pci_set_long(dev->config + offset + PCI_ERR_CAP,
 142                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
 143                     PCI_ERR_CAP_MHRC);
 144        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
 145                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
 146                     PCI_ERR_CAP_MHRE);
 147    } else {
 148        pci_set_long(dev->config + offset + PCI_ERR_CAP,
 149                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
 150        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
 151                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
 152    }
 153
 154    switch (pcie_cap_get_type(dev)) {
 155    case PCI_EXP_TYPE_ROOT_PORT:
 156        /* this case will be set by pcie_aer_root_init() */
 157        /* fallthrough */
 158    case PCI_EXP_TYPE_DOWNSTREAM:
 159    case PCI_EXP_TYPE_UPSTREAM:
 160        pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
 161                                   PCI_BRIDGE_CTL_SERR);
 162        pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
 163                                   PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
 164        break;
 165    default:
 166        /* nothing */
 167        break;
 168    }
 169    return 0;
 170}
 171
 172void pcie_aer_exit(PCIDevice *dev)
 173{
 174    g_free(dev->exp.aer_log.log);
 175}
 176
 177static void pcie_aer_update_uncor_status(PCIDevice *dev)
 178{
 179    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 180    PCIEAERLog *aer_log = &dev->exp.aer_log;
 181
 182    uint16_t i;
 183    for (i = 0; i < aer_log->log_num; i++) {
 184        pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
 185                                   dev->exp.aer_log.log[i].status);
 186    }
 187}
 188
 189/*
 190 * return value:
 191 * true: error message needs to be sent up
 192 * false: error message is masked
 193 *
 194 * 6.2.6 Error Message Control
 195 * Figure 6-3
 196 * all pci express devices part
 197 */
 198static bool
 199pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
 200{
 201    if (!(pcie_aer_msg_is_uncor(msg) &&
 202          (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
 203        return false;
 204    }
 205
 206    /* Signaled System Error
 207     *
 208     * 7.5.1.1 Command register
 209     * Bit 8 SERR# Enable
 210     *
 211     * When Set, this bit enables reporting of Non-fatal and Fatal
 212     * errors detected by the Function to the Root Complex. Note that
 213     * errors are reported if enabled either through this bit or through
 214     * the PCI Express specific bits in the Device Control register (see
 215     * Section 7.8.4).
 216     */
 217    pci_word_test_and_set_mask(dev->config + PCI_STATUS,
 218                               PCI_STATUS_SIG_SYSTEM_ERROR);
 219
 220    if (!(msg->severity &
 221          pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
 222        return false;
 223    }
 224
 225    /* send up error message */
 226    return true;
 227}
 228
 229/*
 230 * return value:
 231 * true: error message is sent up
 232 * false: error message is masked
 233 *
 234 * 6.2.6 Error Message Control
 235 * Figure 6-3
 236 * virtual pci bridge part
 237 */
 238static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
 239{
 240    uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
 241
 242    if (pcie_aer_msg_is_uncor(msg)) {
 243        /* Received System Error */
 244        pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
 245                                   PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
 246    }
 247
 248    if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
 249        return false;
 250    }
 251    return true;
 252}
 253
 254void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
 255{
 256    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 257    assert(vector < PCI_ERR_ROOT_IRQ_MAX);
 258    pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
 259                                 PCI_ERR_ROOT_IRQ);
 260    pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
 261                               vector << PCI_ERR_ROOT_IRQ_SHIFT);
 262}
 263
 264static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
 265{
 266    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 267    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
 268    return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
 269}
 270
 271/* Given a status register, get corresponding bits in the command register */
 272static uint32_t pcie_aer_status_to_cmd(uint32_t status)
 273{
 274    uint32_t cmd = 0;
 275    if (status & PCI_ERR_ROOT_COR_RCV) {
 276        cmd |= PCI_ERR_ROOT_CMD_COR_EN;
 277    }
 278    if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
 279        cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
 280    }
 281    if (status & PCI_ERR_ROOT_FATAL_RCV) {
 282        cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
 283    }
 284    return cmd;
 285}
 286
 287static void pcie_aer_root_notify(PCIDevice *dev)
 288{
 289    if (msix_enabled(dev)) {
 290        msix_notify(dev, pcie_aer_root_get_vector(dev));
 291    } else if (msi_enabled(dev)) {
 292        msi_notify(dev, pcie_aer_root_get_vector(dev));
 293    } else {
 294        pci_irq_assert(dev);
 295    }
 296}
 297
 298/*
 299 * 6.2.6 Error Message Control
 300 * Figure 6-3
 301 * root port part
 302 */
 303static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
 304{
 305    uint16_t cmd;
 306    uint8_t *aer_cap;
 307    uint32_t root_cmd;
 308    uint32_t root_status, prev_status;
 309
 310    cmd = pci_get_word(dev->config + PCI_COMMAND);
 311    aer_cap = dev->config + dev->exp.aer_cap;
 312    root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
 313    prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
 314
 315    if (cmd & PCI_COMMAND_SERR) {
 316        /* System Error.
 317         *
 318         * The way to report System Error is platform specific and
 319         * it isn't implemented in qemu right now.
 320         * So just discard the error for now.
 321         * OS which cares of aer would receive errors via
 322         * native aer mechanims, so this wouldn't matter.
 323         */
 324    }
 325
 326    /* Errro Message Received: Root Error Status register */
 327    switch (msg->severity) {
 328    case PCI_ERR_ROOT_CMD_COR_EN:
 329        if (root_status & PCI_ERR_ROOT_COR_RCV) {
 330            root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
 331        } else {
 332            pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
 333                         msg->source_id);
 334        }
 335        root_status |= PCI_ERR_ROOT_COR_RCV;
 336        break;
 337    case PCI_ERR_ROOT_CMD_NONFATAL_EN:
 338        root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
 339        break;
 340    case PCI_ERR_ROOT_CMD_FATAL_EN:
 341        if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
 342            root_status |= PCI_ERR_ROOT_FIRST_FATAL;
 343        }
 344        root_status |= PCI_ERR_ROOT_FATAL_RCV;
 345        break;
 346    default:
 347        abort();
 348        break;
 349    }
 350    if (pcie_aer_msg_is_uncor(msg)) {
 351        if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
 352            root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
 353        } else {
 354            pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
 355                         PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
 356        }
 357        root_status |= PCI_ERR_ROOT_UNCOR_RCV;
 358    }
 359    pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
 360
 361    /* 6.2.4.1.2 Interrupt Generation */
 362    /* All the above did was set some bits in the status register.
 363     * Specifically these that match message severity.
 364     * The below code relies on this fact. */
 365    if (!(root_cmd & msg->severity) ||
 366        (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
 367        /* Condition is not being set or was already true so nothing to do. */
 368        return;
 369    }
 370
 371    pcie_aer_root_notify(dev);
 372}
 373
 374/*
 375 * 6.2.6 Error Message Control Figure 6-3
 376 *
 377 * Walk up the bus tree from the device, propagate the error message.
 378 */
 379static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 380{
 381    uint8_t type;
 382
 383    while (dev) {
 384        if (!pci_is_express(dev)) {
 385            /* just ignore it */
 386            /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
 387             * Consider e.g. a PCI bridge above a PCI Express device. */
 388            return;
 389        }
 390
 391        type = pcie_cap_get_type(dev);
 392        if ((type == PCI_EXP_TYPE_ROOT_PORT ||
 393            type == PCI_EXP_TYPE_UPSTREAM ||
 394            type == PCI_EXP_TYPE_DOWNSTREAM) &&
 395            !pcie_aer_msg_vbridge(dev, msg)) {
 396                return;
 397        }
 398        if (!pcie_aer_msg_alldev(dev, msg)) {
 399            return;
 400        }
 401        if (type == PCI_EXP_TYPE_ROOT_PORT) {
 402            pcie_aer_msg_root_port(dev, msg);
 403            /* Root port can notify system itself,
 404               or send the error message to root complex event collector. */
 405            /*
 406             * if root port is associated with an event collector,
 407             * return the root complex event collector here.
 408             * For now root complex event collector isn't supported.
 409             */
 410            return;
 411        }
 412        dev = pci_bridge_get_device(pci_get_bus(dev));
 413    }
 414}
 415
 416static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
 417{
 418    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 419    uint8_t first_bit = ctz32(err->status);
 420    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 421    int i;
 422
 423    assert(err->status);
 424    assert(!(err->status & (err->status - 1)));
 425
 426    errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
 427    errcap |= PCI_ERR_CAP_FEP(first_bit);
 428
 429    if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
 430        for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
 431            /* 7.10.8 Header Log Register */
 432            uint8_t *header_log =
 433                aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
 434            stl_be_p(header_log, err->header[i]);
 435        }
 436    } else {
 437        assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
 438        memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
 439    }
 440
 441    if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
 442        (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
 443         PCI_EXP_DEVCAP2_EETLPP)) {
 444        for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
 445            /* 7.10.12 tlp prefix log register */
 446            uint8_t *prefix_log =
 447                aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
 448            stl_be_p(prefix_log, err->prefix[i]);
 449        }
 450        errcap |= PCI_ERR_CAP_TLP;
 451    } else {
 452        memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
 453               PCI_ERR_TLP_PREFIX_LOG_SIZE);
 454    }
 455    pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
 456}
 457
 458static void pcie_aer_clear_log(PCIDevice *dev)
 459{
 460    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 461
 462    pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
 463                                 PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
 464
 465    memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
 466    memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
 467}
 468
 469static void pcie_aer_clear_error(PCIDevice *dev)
 470{
 471    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 472    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 473    PCIEAERLog *aer_log = &dev->exp.aer_log;
 474    PCIEAERErr err;
 475
 476    if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
 477        pcie_aer_clear_log(dev);
 478        return;
 479    }
 480
 481    /*
 482     * If more errors are queued, set corresponding bits in uncorrectable
 483     * error status.
 484     * We emulate uncorrectable error status register as W1CS.
 485     * So set bit in uncorrectable error status here again for multiple
 486     * error recording support.
 487     *
 488     * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
 489     */
 490    pcie_aer_update_uncor_status(dev);
 491
 492    aer_log_del_err(aer_log, &err);
 493    pcie_aer_update_log(dev, &err);
 494}
 495
 496static int pcie_aer_record_error(PCIDevice *dev,
 497                                 const PCIEAERErr *err)
 498{
 499    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 500    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 501    int fep = PCI_ERR_CAP_FEP(errcap);
 502
 503    assert(err->status);
 504    assert(!(err->status & (err->status - 1)));
 505
 506    if (errcap & PCI_ERR_CAP_MHRE &&
 507        (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
 508        /*  Not first error. queue error */
 509        if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
 510            /* overflow */
 511            return -1;
 512        }
 513        return 0;
 514    }
 515
 516    pcie_aer_update_log(dev, err);
 517    return 0;
 518}
 519
 520typedef struct PCIEAERInject {
 521    PCIDevice *dev;
 522    uint8_t *aer_cap;
 523    const PCIEAERErr *err;
 524    uint16_t devctl;
 525    uint16_t devsta;
 526    uint32_t error_status;
 527    bool unsupported_request;
 528    bool log_overflow;
 529    PCIEAERMsg msg;
 530} PCIEAERInject;
 531
 532static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
 533                                      uint32_t uncor_status,
 534                                      bool is_advisory_nonfatal)
 535{
 536    PCIDevice *dev = inj->dev;
 537
 538    inj->devsta |= PCI_EXP_DEVSTA_CED;
 539    if (inj->unsupported_request) {
 540        inj->devsta |= PCI_EXP_DEVSTA_URD;
 541    }
 542    pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
 543
 544    if (inj->aer_cap) {
 545        uint32_t mask;
 546        pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
 547                                   inj->error_status);
 548        mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
 549        if (mask & inj->error_status) {
 550            return false;
 551        }
 552        if (is_advisory_nonfatal) {
 553            uint32_t uncor_mask =
 554                pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
 555            if (!(uncor_mask & uncor_status)) {
 556                inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
 557            }
 558            pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
 559                                       uncor_status);
 560        }
 561    }
 562
 563    if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
 564        return false;
 565    }
 566    if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
 567        return false;
 568    }
 569
 570    inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
 571    return true;
 572}
 573
 574static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
 575{
 576    PCIDevice *dev = inj->dev;
 577    uint16_t cmd;
 578
 579    if (is_fatal) {
 580        inj->devsta |= PCI_EXP_DEVSTA_FED;
 581    } else {
 582        inj->devsta |= PCI_EXP_DEVSTA_NFED;
 583    }
 584    if (inj->unsupported_request) {
 585        inj->devsta |= PCI_EXP_DEVSTA_URD;
 586    }
 587    pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
 588
 589    if (inj->aer_cap) {
 590        uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
 591        if (mask & inj->error_status) {
 592            pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
 593                                       inj->error_status);
 594            return false;
 595        }
 596
 597        inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
 598        pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
 599                                   inj->error_status);
 600    }
 601
 602    cmd = pci_get_word(dev->config + PCI_COMMAND);
 603    if (inj->unsupported_request &&
 604        !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
 605        return false;
 606    }
 607    if (is_fatal) {
 608        if (!((cmd & PCI_COMMAND_SERR) ||
 609              (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
 610            return false;
 611        }
 612        inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
 613    } else {
 614        if (!((cmd & PCI_COMMAND_SERR) ||
 615              (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
 616            return false;
 617        }
 618        inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
 619    }
 620    return true;
 621}
 622
 623/*
 624 * non-Function specific error must be recorded in all functions.
 625 * It is the responsibility of the caller of this function.
 626 * It is also caller's responsibility to determine which function should
 627 * report the error.
 628 *
 629 * 6.2.4 Error Logging
 630 * 6.2.5 Sequence of Device Error Signaling and Logging Operations
 631 * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
 632 *             Operations
 633 */
 634static int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
 635{
 636    uint8_t *aer_cap = NULL;
 637    uint16_t devctl = 0;
 638    uint16_t devsta = 0;
 639    uint32_t error_status = err->status;
 640    PCIEAERInject inj;
 641
 642    if (!pci_is_express(dev)) {
 643        return -ENOSYS;
 644    }
 645
 646    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
 647        error_status &= PCI_ERR_COR_SUPPORTED;
 648    } else {
 649        error_status &= PCI_ERR_UNC_SUPPORTED;
 650    }
 651
 652    /* invalid status bit. one and only one bit must be set */
 653    if (!error_status || (error_status & (error_status - 1))) {
 654        return -EINVAL;
 655    }
 656
 657    if (dev->exp.aer_cap) {
 658        uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
 659        aer_cap = dev->config + dev->exp.aer_cap;
 660        devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
 661        devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
 662    }
 663
 664    inj.dev = dev;
 665    inj.aer_cap = aer_cap;
 666    inj.err = err;
 667    inj.devctl = devctl;
 668    inj.devsta = devsta;
 669    inj.error_status = error_status;
 670    inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
 671        err->status == PCI_ERR_UNC_UNSUP;
 672    inj.log_overflow = false;
 673
 674    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
 675        if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
 676            return 0;
 677        }
 678    } else {
 679        bool is_fatal =
 680            pcie_aer_uncor_default_severity(error_status) ==
 681            PCI_ERR_ROOT_CMD_FATAL_EN;
 682        if (aer_cap) {
 683            is_fatal =
 684                error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
 685        }
 686        if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
 687            inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
 688            if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
 689                return 0;
 690            }
 691        } else {
 692            if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
 693                return 0;
 694            }
 695        }
 696    }
 697
 698    /* send up error message */
 699    inj.msg.source_id = err->source_id;
 700    pcie_aer_msg(dev, &inj.msg);
 701
 702    if (inj.log_overflow) {
 703        PCIEAERErr header_log_overflow = {
 704            .status = PCI_ERR_COR_HL_OVERFLOW,
 705            .flags = PCIE_AER_ERR_IS_CORRECTABLE,
 706        };
 707        int ret = pcie_aer_inject_error(dev, &header_log_overflow);
 708        assert(!ret);
 709    }
 710    return 0;
 711}
 712
 713void pcie_aer_write_config(PCIDevice *dev,
 714                           uint32_t addr, uint32_t val, int len)
 715{
 716    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 717    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 718    uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
 719    uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
 720
 721    /* uncorrectable error */
 722    if (!(uncorsta & first_error)) {
 723        /* the bit that corresponds to the first error is cleared */
 724        pcie_aer_clear_error(dev);
 725    } else if (errcap & PCI_ERR_CAP_MHRE) {
 726        /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
 727         * nothing should happen. So we have to revert the modification to
 728         * the register.
 729         */
 730        pcie_aer_update_uncor_status(dev);
 731    } else {
 732        /* capability & control
 733         * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
 734         */
 735        aer_log_clear_all_err(&dev->exp.aer_log);
 736    }
 737}
 738
 739void pcie_aer_root_init(PCIDevice *dev)
 740{
 741    uint16_t pos = dev->exp.aer_cap;
 742
 743    pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
 744                 PCI_ERR_ROOT_CMD_EN_MASK);
 745    pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
 746                 PCI_ERR_ROOT_STATUS_REPORT_MASK);
 747    /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
 748     * device-specific method.
 749     */
 750    pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
 751                 ~PCI_ERR_ROOT_IRQ);
 752}
 753
 754void pcie_aer_root_reset(PCIDevice *dev)
 755{
 756    uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
 757
 758    pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
 759
 760    /*
 761     * Advanced Error Interrupt Message Number in Root Error Status Register
 762     * must be updated by chip dependent code because it's chip dependent
 763     * which number is used.
 764     */
 765}
 766
 767void pcie_aer_root_write_config(PCIDevice *dev,
 768                                uint32_t addr, uint32_t val, int len,
 769                                uint32_t root_cmd_prev)
 770{
 771    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 772    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
 773    uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
 774    uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
 775    /* 6.2.4.1.2 Interrupt Generation */
 776    if (!msix_enabled(dev) && !msi_enabled(dev)) {
 777        pci_set_irq(dev, !!(root_cmd & enabled_cmd));
 778        return;
 779    }
 780
 781    if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
 782        /* Send MSI on transition from false to true. */
 783        return;
 784    }
 785
 786    pcie_aer_root_notify(dev);
 787}
 788
 789static const VMStateDescription vmstate_pcie_aer_err = {
 790    .name = "PCIE_AER_ERROR",
 791    .version_id = 1,
 792    .minimum_version_id = 1,
 793    .fields = (VMStateField[]) {
 794        VMSTATE_UINT32(status, PCIEAERErr),
 795        VMSTATE_UINT16(source_id, PCIEAERErr),
 796        VMSTATE_UINT16(flags, PCIEAERErr),
 797        VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
 798        VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
 799        VMSTATE_END_OF_LIST()
 800    }
 801};
 802
 803static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
 804{
 805    PCIEAERLog *s = opaque;
 806
 807    return s->log_num <= s->log_max;
 808}
 809
 810const VMStateDescription vmstate_pcie_aer_log = {
 811    .name = "PCIE_AER_ERROR_LOG",
 812    .version_id = 1,
 813    .minimum_version_id = 1,
 814    .fields = (VMStateField[]) {
 815        VMSTATE_UINT16(log_num, PCIEAERLog),
 816        VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL),
 817        VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
 818        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
 819                              vmstate_pcie_aer_err, PCIEAERErr),
 820        VMSTATE_END_OF_LIST()
 821    }
 822};
 823
 824typedef struct PCIEAERErrorName {
 825    const char *name;
 826    uint32_t val;
 827    bool correctable;
 828} PCIEAERErrorName;
 829
 830/*
 831 * AER error name -> value conversion table
 832 * This naming scheme is same to linux aer-injection tool.
 833 */
 834static const struct PCIEAERErrorName pcie_aer_error_list[] = {
 835    {
 836        .name = "DLP",
 837        .val = PCI_ERR_UNC_DLP,
 838        .correctable = false,
 839    }, {
 840        .name = "SDN",
 841        .val = PCI_ERR_UNC_SDN,
 842        .correctable = false,
 843    }, {
 844        .name = "POISON_TLP",
 845        .val = PCI_ERR_UNC_POISON_TLP,
 846        .correctable = false,
 847    }, {
 848        .name = "FCP",
 849        .val = PCI_ERR_UNC_FCP,
 850        .correctable = false,
 851    }, {
 852        .name = "COMP_TIME",
 853        .val = PCI_ERR_UNC_COMP_TIME,
 854        .correctable = false,
 855    }, {
 856        .name = "COMP_ABORT",
 857        .val = PCI_ERR_UNC_COMP_ABORT,
 858        .correctable = false,
 859    }, {
 860        .name = "UNX_COMP",
 861        .val = PCI_ERR_UNC_UNX_COMP,
 862        .correctable = false,
 863    }, {
 864        .name = "RX_OVER",
 865        .val = PCI_ERR_UNC_RX_OVER,
 866        .correctable = false,
 867    }, {
 868        .name = "MALF_TLP",
 869        .val = PCI_ERR_UNC_MALF_TLP,
 870        .correctable = false,
 871    }, {
 872        .name = "ECRC",
 873        .val = PCI_ERR_UNC_ECRC,
 874        .correctable = false,
 875    }, {
 876        .name = "UNSUP",
 877        .val = PCI_ERR_UNC_UNSUP,
 878        .correctable = false,
 879    }, {
 880        .name = "ACSV",
 881        .val = PCI_ERR_UNC_ACSV,
 882        .correctable = false,
 883    }, {
 884        .name = "INTN",
 885        .val = PCI_ERR_UNC_INTN,
 886        .correctable = false,
 887    }, {
 888        .name = "MCBTLP",
 889        .val = PCI_ERR_UNC_MCBTLP,
 890        .correctable = false,
 891    }, {
 892        .name = "ATOP_EBLOCKED",
 893        .val = PCI_ERR_UNC_ATOP_EBLOCKED,
 894        .correctable = false,
 895    }, {
 896        .name = "TLP_PRF_BLOCKED",
 897        .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
 898        .correctable = false,
 899    }, {
 900        .name = "RCVR",
 901        .val = PCI_ERR_COR_RCVR,
 902        .correctable = true,
 903    }, {
 904        .name = "BAD_TLP",
 905        .val = PCI_ERR_COR_BAD_TLP,
 906        .correctable = true,
 907    }, {
 908        .name = "BAD_DLLP",
 909        .val = PCI_ERR_COR_BAD_DLLP,
 910        .correctable = true,
 911    }, {
 912        .name = "REP_ROLL",
 913        .val = PCI_ERR_COR_REP_ROLL,
 914        .correctable = true,
 915    }, {
 916        .name = "REP_TIMER",
 917        .val = PCI_ERR_COR_REP_TIMER,
 918        .correctable = true,
 919    }, {
 920        .name = "ADV_NONFATAL",
 921        .val = PCI_ERR_COR_ADV_NONFATAL,
 922        .correctable = true,
 923    }, {
 924        .name = "INTERNAL",
 925        .val = PCI_ERR_COR_INTERNAL,
 926        .correctable = true,
 927    }, {
 928        .name = "HL_OVERFLOW",
 929        .val = PCI_ERR_COR_HL_OVERFLOW,
 930        .correctable = true,
 931    },
 932};
 933
 934static int pcie_aer_parse_error_string(const char *error_name,
 935                                       uint32_t *status, bool *correctable)
 936{
 937    int i;
 938
 939    for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
 940        const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
 941        if (strcmp(error_name, e->name)) {
 942            continue;
 943        }
 944
 945        *status = e->val;
 946        *correctable = e->correctable;
 947        return 0;
 948    }
 949    return -EINVAL;
 950}
 951
 952/*
 953 * Inject an error described by @qdict.
 954 * On success, set @details to show where error was sent.
 955 * Return negative errno if injection failed and a message was emitted.
 956 */
 957static int do_pcie_aer_inject_error(Monitor *mon,
 958                                    const QDict *qdict,
 959                                    PCIEErrorDetails *details)
 960{
 961    const char *id = qdict_get_str(qdict, "id");
 962    const char *error_name;
 963    uint32_t error_status;
 964    bool correctable;
 965    PCIDevice *dev;
 966    PCIEAERErr err;
 967    int ret;
 968
 969    ret = pci_qdev_find_device(id, &dev);
 970    if (ret < 0) {
 971        monitor_printf(mon,
 972                       "id or pci device path is invalid or device not "
 973                       "found. %s\n", id);
 974        return ret;
 975    }
 976    if (!pci_is_express(dev)) {
 977        monitor_printf(mon, "the device doesn't support pci express. %s\n",
 978                       id);
 979        return -ENOSYS;
 980    }
 981
 982    error_name = qdict_get_str(qdict, "error_status");
 983    if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
 984        char *e = NULL;
 985        error_status = strtoul(error_name, &e, 0);
 986        correctable = qdict_get_try_bool(qdict, "correctable", false);
 987        if (!e || *e != '\0') {
 988            monitor_printf(mon, "invalid error status value. \"%s\"",
 989                           error_name);
 990            return -EINVAL;
 991        }
 992    }
 993    err.status = error_status;
 994    err.source_id = pci_requester_id(dev);
 995
 996    err.flags = 0;
 997    if (correctable) {
 998        err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
 999    }
1000    if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
1001        err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
1002    }
1003    if (qdict_haskey(qdict, "header0")) {
1004        err.flags |= PCIE_AER_ERR_HEADER_VALID;
1005    }
1006    if (qdict_haskey(qdict, "prefix0")) {
1007        err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
1008    }
1009
1010    err.header[0] = qdict_get_try_int(qdict, "header0", 0);
1011    err.header[1] = qdict_get_try_int(qdict, "header1", 0);
1012    err.header[2] = qdict_get_try_int(qdict, "header2", 0);
1013    err.header[3] = qdict_get_try_int(qdict, "header3", 0);
1014
1015    err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
1016    err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
1017    err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
1018    err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
1019
1020    ret = pcie_aer_inject_error(dev, &err);
1021    if (ret < 0) {
1022        monitor_printf(mon, "failed to inject error: %s\n",
1023                       strerror(-ret));
1024        return ret;
1025    }
1026    details->id = id;
1027    details->root_bus = pci_root_bus_path(dev);
1028    details->bus = pci_dev_bus_num(dev);
1029    details->devfn = dev->devfn;
1030
1031    return 0;
1032}
1033
1034void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
1035{
1036    PCIEErrorDetails data;
1037
1038    if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
1039        return;
1040    }
1041
1042    monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
1043                   data.id, data.root_bus, data.bus,
1044                   PCI_SLOT(data.devfn), PCI_FUNC(data.devfn));
1045}
1046