qemu/hw/pci/pcie_aer.c
<<
>>
Prefs
   1/*
   2 * pcie_aer.c
   3 *
   4 * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
   5 *                    VA Linux Systems Japan K.K.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License along
  18 * with this program; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "sysemu/sysemu.h"
  22#include "qapi/qmp/types.h"
  23#include "monitor/monitor.h"
  24#include "hw/pci/pci_bridge.h"
  25#include "hw/pci/pcie.h"
  26#include "hw/pci/msix.h"
  27#include "hw/pci/msi.h"
  28#include "hw/pci/pci_bus.h"
  29#include "hw/pci/pcie_regs.h"
  30
  31//#define DEBUG_PCIE
  32#ifdef DEBUG_PCIE
  33# define PCIE_DPRINTF(fmt, ...)                                         \
  34    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
  35#else
  36# define PCIE_DPRINTF(fmt, ...) do {} while (0)
  37#endif
  38#define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
  39    PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
  40
  41#define PCI_ERR_SRC_COR_OFFS    0
  42#define PCI_ERR_SRC_UNCOR_OFFS  2
  43
  44/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
  45static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
  46{
  47    switch (status) {
  48    case PCI_ERR_UNC_INTN:
  49    case PCI_ERR_UNC_DLP:
  50    case PCI_ERR_UNC_SDN:
  51    case PCI_ERR_UNC_RX_OVER:
  52    case PCI_ERR_UNC_FCP:
  53    case PCI_ERR_UNC_MALF_TLP:
  54        return PCI_ERR_ROOT_CMD_FATAL_EN;
  55    case PCI_ERR_UNC_POISON_TLP:
  56    case PCI_ERR_UNC_ECRC:
  57    case PCI_ERR_UNC_UNSUP:
  58    case PCI_ERR_UNC_COMP_TIME:
  59    case PCI_ERR_UNC_COMP_ABORT:
  60    case PCI_ERR_UNC_UNX_COMP:
  61    case PCI_ERR_UNC_ACSV:
  62    case PCI_ERR_UNC_MCBTLP:
  63    case PCI_ERR_UNC_ATOP_EBLOCKED:
  64    case PCI_ERR_UNC_TLP_PRF_BLOCKED:
  65        return PCI_ERR_ROOT_CMD_NONFATAL_EN;
  66    default:
  67        abort();
  68        break;
  69    }
  70    return PCI_ERR_ROOT_CMD_FATAL_EN;
  71}
  72
  73static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
  74{
  75    if (aer_log->log_num == aer_log->log_max) {
  76        return -1;
  77    }
  78    memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
  79    aer_log->log_num++;
  80    return 0;
  81}
  82
  83static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
  84{
  85    assert(aer_log->log_num);
  86    *err = aer_log->log[0];
  87    aer_log->log_num--;
  88    memmove(&aer_log->log[0], &aer_log->log[1],
  89            aer_log->log_num * sizeof *err);
  90}
  91
  92static void aer_log_clear_all_err(PCIEAERLog *aer_log)
  93{
  94    aer_log->log_num = 0;
  95}
  96
  97int pcie_aer_init(PCIDevice *dev, uint16_t offset)
  98{
  99    PCIExpressDevice *exp;
 100
 101    pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
 102                        offset, PCI_ERR_SIZEOF);
 103    exp = &dev->exp;
 104    exp->aer_cap = offset;
 105
 106    /* log_max is property */
 107    if (dev->exp.aer_log.log_max == PCIE_AER_LOG_MAX_UNSET) {
 108        dev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
 109    }
 110    /* clip down the value to avoid unreasobale memory usage */
 111    if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
 112        return -EINVAL;
 113    }
 114    dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
 115                                        dev->exp.aer_log.log_max);
 116
 117    pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
 118                 PCI_ERR_UNC_SUPPORTED);
 119
 120    pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
 121                 PCI_ERR_UNC_SEVERITY_DEFAULT);
 122    pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
 123                 PCI_ERR_UNC_SUPPORTED);
 124
 125    pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
 126                               PCI_ERR_COR_SUPPORTED);
 127
 128    pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
 129                 PCI_ERR_COR_MASK_DEFAULT);
 130    pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
 131                 PCI_ERR_COR_SUPPORTED);
 132
 133    /* capabilities and control. multiple header logging is supported */
 134    if (dev->exp.aer_log.log_max > 0) {
 135        pci_set_long(dev->config + offset + PCI_ERR_CAP,
 136                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
 137                     PCI_ERR_CAP_MHRC);
 138        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
 139                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
 140                     PCI_ERR_CAP_MHRE);
 141    } else {
 142        pci_set_long(dev->config + offset + PCI_ERR_CAP,
 143                     PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
 144        pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
 145                     PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
 146    }
 147
 148    switch (pcie_cap_get_type(dev)) {
 149    case PCI_EXP_TYPE_ROOT_PORT:
 150        /* this case will be set by pcie_aer_root_init() */
 151        /* fallthrough */
 152    case PCI_EXP_TYPE_DOWNSTREAM:
 153    case PCI_EXP_TYPE_UPSTREAM:
 154        pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
 155                                   PCI_BRIDGE_CTL_SERR);
 156        pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
 157                                   PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
 158        break;
 159    default:
 160        /* nothing */
 161        break;
 162    }
 163    return 0;
 164}
 165
 166void pcie_aer_exit(PCIDevice *dev)
 167{
 168    g_free(dev->exp.aer_log.log);
 169}
 170
 171static void pcie_aer_update_uncor_status(PCIDevice *dev)
 172{
 173    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 174    PCIEAERLog *aer_log = &dev->exp.aer_log;
 175
 176    uint16_t i;
 177    for (i = 0; i < aer_log->log_num; i++) {
 178        pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
 179                                   dev->exp.aer_log.log[i].status);
 180    }
 181}
 182
 183/*
 184 * return value:
 185 * true: error message needs to be sent up
 186 * false: error message is masked
 187 *
 188 * 6.2.6 Error Message Control
 189 * Figure 6-3
 190 * all pci express devices part
 191 */
 192static bool
 193pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
 194{
 195    if (!(pcie_aer_msg_is_uncor(msg) &&
 196          (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
 197        return false;
 198    }
 199
 200    /* Signaled System Error
 201     *
 202     * 7.5.1.1 Command register
 203     * Bit 8 SERR# Enable
 204     *
 205     * When Set, this bit enables reporting of Non-fatal and Fatal
 206     * errors detected by the Function to the Root Complex. Note that
 207     * errors are reported if enabled either through this bit or through
 208     * the PCI Express specific bits in the Device Control register (see
 209     * Section 7.8.4).
 210     */
 211    pci_word_test_and_set_mask(dev->config + PCI_STATUS,
 212                               PCI_STATUS_SIG_SYSTEM_ERROR);
 213
 214    if (!(msg->severity &
 215          pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
 216        return false;
 217    }
 218
 219    /* send up error message */
 220    return true;
 221}
 222
 223/*
 224 * return value:
 225 * true: error message is sent up
 226 * false: error message is masked
 227 *
 228 * 6.2.6 Error Message Control
 229 * Figure 6-3
 230 * virtual pci bridge part
 231 */
 232static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
 233{
 234    uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
 235
 236    if (pcie_aer_msg_is_uncor(msg)) {
 237        /* Received System Error */
 238        pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
 239                                   PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
 240    }
 241
 242    if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
 243        return false;
 244    }
 245    return true;
 246}
 247
 248void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
 249{
 250    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 251    assert(vector < PCI_ERR_ROOT_IRQ_MAX);
 252    pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
 253                                 PCI_ERR_ROOT_IRQ);
 254    pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
 255                               vector << PCI_ERR_ROOT_IRQ_SHIFT);
 256}
 257
 258static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
 259{
 260    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 261    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
 262    return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
 263}
 264
 265/* Given a status register, get corresponding bits in the command register */
 266static uint32_t pcie_aer_status_to_cmd(uint32_t status)
 267{
 268    uint32_t cmd = 0;
 269    if (status & PCI_ERR_ROOT_COR_RCV) {
 270        cmd |= PCI_ERR_ROOT_CMD_COR_EN;
 271    }
 272    if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
 273        cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
 274    }
 275    if (status & PCI_ERR_ROOT_FATAL_RCV) {
 276        cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
 277    }
 278    return cmd;
 279}
 280
 281static void pcie_aer_root_notify(PCIDevice *dev)
 282{
 283    if (msix_enabled(dev)) {
 284        msix_notify(dev, pcie_aer_root_get_vector(dev));
 285    } else if (msi_enabled(dev)) {
 286        msi_notify(dev, pcie_aer_root_get_vector(dev));
 287    } else {
 288        pci_irq_assert(dev);
 289    }
 290}
 291
 292/*
 293 * 6.2.6 Error Message Control
 294 * Figure 6-3
 295 * root port part
 296 */
 297static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
 298{
 299    uint16_t cmd;
 300    uint8_t *aer_cap;
 301    uint32_t root_cmd;
 302    uint32_t root_status, prev_status;
 303
 304    cmd = pci_get_word(dev->config + PCI_COMMAND);
 305    aer_cap = dev->config + dev->exp.aer_cap;
 306    root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
 307    prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
 308
 309    if (cmd & PCI_COMMAND_SERR) {
 310        /* System Error.
 311         *
 312         * The way to report System Error is platform specific and
 313         * it isn't implemented in qemu right now.
 314         * So just discard the error for now.
 315         * OS which cares of aer would receive errors via
 316         * native aer mechanims, so this wouldn't matter.
 317         */
 318    }
 319
 320    /* Errro Message Received: Root Error Status register */
 321    switch (msg->severity) {
 322    case PCI_ERR_ROOT_CMD_COR_EN:
 323        if (root_status & PCI_ERR_ROOT_COR_RCV) {
 324            root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
 325        } else {
 326            pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
 327                         msg->source_id);
 328        }
 329        root_status |= PCI_ERR_ROOT_COR_RCV;
 330        break;
 331    case PCI_ERR_ROOT_CMD_NONFATAL_EN:
 332        root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
 333        break;
 334    case PCI_ERR_ROOT_CMD_FATAL_EN:
 335        if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
 336            root_status |= PCI_ERR_ROOT_FIRST_FATAL;
 337        }
 338        root_status |= PCI_ERR_ROOT_FATAL_RCV;
 339        break;
 340    default:
 341        abort();
 342        break;
 343    }
 344    if (pcie_aer_msg_is_uncor(msg)) {
 345        if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
 346            root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
 347        } else {
 348            pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
 349                         PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
 350        }
 351        root_status |= PCI_ERR_ROOT_UNCOR_RCV;
 352    }
 353    pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
 354
 355    /* 6.2.4.1.2 Interrupt Generation */
 356    /* All the above did was set some bits in the status register.
 357     * Specifically these that match message severity.
 358     * The below code relies on this fact. */
 359    if (!(root_cmd & msg->severity) ||
 360        (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
 361        /* Condition is not being set or was already true so nothing to do. */
 362        return;
 363    }
 364
 365    pcie_aer_root_notify(dev);
 366}
 367
 368/*
 369 * 6.2.6 Error Message Control Figure 6-3
 370 *
 371 * Walk up the bus tree from the device, propagate the error message.
 372 */
 373static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 374{
 375    uint8_t type;
 376
 377    while (dev) {
 378        if (!pci_is_express(dev)) {
 379            /* just ignore it */
 380            /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
 381             * Consider e.g. a PCI bridge above a PCI Express device. */
 382            return;
 383        }
 384
 385        type = pcie_cap_get_type(dev);
 386        if ((type == PCI_EXP_TYPE_ROOT_PORT ||
 387            type == PCI_EXP_TYPE_UPSTREAM ||
 388            type == PCI_EXP_TYPE_DOWNSTREAM) &&
 389            !pcie_aer_msg_vbridge(dev, msg)) {
 390                return;
 391        }
 392        if (!pcie_aer_msg_alldev(dev, msg)) {
 393            return;
 394        }
 395        if (type == PCI_EXP_TYPE_ROOT_PORT) {
 396            pcie_aer_msg_root_port(dev, msg);
 397            /* Root port can notify system itself,
 398               or send the error message to root complex event collector. */
 399            /*
 400             * if root port is associated with an event collector,
 401             * return the root complex event collector here.
 402             * For now root complex event collector isn't supported.
 403             */
 404            return;
 405        }
 406        dev = pci_bridge_get_device(dev->bus);
 407    }
 408}
 409
 410static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
 411{
 412    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 413    uint8_t first_bit = ctz32(err->status);
 414    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 415    int i;
 416
 417    assert(err->status);
 418    assert(!(err->status & (err->status - 1)));
 419
 420    errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
 421    errcap |= PCI_ERR_CAP_FEP(first_bit);
 422
 423    if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
 424        for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
 425            /* 7.10.8 Header Log Register */
 426            uint8_t *header_log =
 427                aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
 428            stl_be_p(header_log, err->header[i]);
 429        }
 430    } else {
 431        assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
 432        memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
 433    }
 434
 435    if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
 436        (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
 437         PCI_EXP_DEVCAP2_EETLPP)) {
 438        for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
 439            /* 7.10.12 tlp prefix log register */
 440            uint8_t *prefix_log =
 441                aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
 442            stl_be_p(prefix_log, err->prefix[i]);
 443        }
 444        errcap |= PCI_ERR_CAP_TLP;
 445    } else {
 446        memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
 447               PCI_ERR_TLP_PREFIX_LOG_SIZE);
 448    }
 449    pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
 450}
 451
 452static void pcie_aer_clear_log(PCIDevice *dev)
 453{
 454    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 455
 456    pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
 457                                 PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
 458
 459    memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
 460    memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
 461}
 462
 463static void pcie_aer_clear_error(PCIDevice *dev)
 464{
 465    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 466    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 467    PCIEAERLog *aer_log = &dev->exp.aer_log;
 468    PCIEAERErr err;
 469
 470    if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
 471        pcie_aer_clear_log(dev);
 472        return;
 473    }
 474
 475    /*
 476     * If more errors are queued, set corresponding bits in uncorrectable
 477     * error status.
 478     * We emulate uncorrectable error status register as W1CS.
 479     * So set bit in uncorrectable error status here again for multiple
 480     * error recording support.
 481     *
 482     * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
 483     */
 484    pcie_aer_update_uncor_status(dev);
 485
 486    aer_log_del_err(aer_log, &err);
 487    pcie_aer_update_log(dev, &err);
 488}
 489
 490static int pcie_aer_record_error(PCIDevice *dev,
 491                                 const PCIEAERErr *err)
 492{
 493    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 494    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 495    int fep = PCI_ERR_CAP_FEP(errcap);
 496
 497    assert(err->status);
 498    assert(!(err->status & (err->status - 1)));
 499
 500    if (errcap & PCI_ERR_CAP_MHRE &&
 501        (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
 502        /*  Not first error. queue error */
 503        if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
 504            /* overflow */
 505            return -1;
 506        }
 507        return 0;
 508    }
 509
 510    pcie_aer_update_log(dev, err);
 511    return 0;
 512}
 513
 514typedef struct PCIEAERInject {
 515    PCIDevice *dev;
 516    uint8_t *aer_cap;
 517    const PCIEAERErr *err;
 518    uint16_t devctl;
 519    uint16_t devsta;
 520    uint32_t error_status;
 521    bool unsupported_request;
 522    bool log_overflow;
 523    PCIEAERMsg msg;
 524} PCIEAERInject;
 525
 526static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
 527                                      uint32_t uncor_status,
 528                                      bool is_advisory_nonfatal)
 529{
 530    PCIDevice *dev = inj->dev;
 531
 532    inj->devsta |= PCI_EXP_DEVSTA_CED;
 533    if (inj->unsupported_request) {
 534        inj->devsta |= PCI_EXP_DEVSTA_URD;
 535    }
 536    pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
 537
 538    if (inj->aer_cap) {
 539        uint32_t mask;
 540        pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
 541                                   inj->error_status);
 542        mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
 543        if (mask & inj->error_status) {
 544            return false;
 545        }
 546        if (is_advisory_nonfatal) {
 547            uint32_t uncor_mask =
 548                pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
 549            if (!(uncor_mask & uncor_status)) {
 550                inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
 551            }
 552            pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
 553                                       uncor_status);
 554        }
 555    }
 556
 557    if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
 558        return false;
 559    }
 560    if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
 561        return false;
 562    }
 563
 564    inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
 565    return true;
 566}
 567
 568static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
 569{
 570    PCIDevice *dev = inj->dev;
 571    uint16_t cmd;
 572
 573    if (is_fatal) {
 574        inj->devsta |= PCI_EXP_DEVSTA_FED;
 575    } else {
 576        inj->devsta |= PCI_EXP_DEVSTA_NFED;
 577    }
 578    if (inj->unsupported_request) {
 579        inj->devsta |= PCI_EXP_DEVSTA_URD;
 580    }
 581    pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
 582
 583    if (inj->aer_cap) {
 584        uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
 585        if (mask & inj->error_status) {
 586            pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
 587                                       inj->error_status);
 588            return false;
 589        }
 590
 591        inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
 592        pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
 593                                   inj->error_status);
 594    }
 595
 596    cmd = pci_get_word(dev->config + PCI_COMMAND);
 597    if (inj->unsupported_request &&
 598        !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
 599        return false;
 600    }
 601    if (is_fatal) {
 602        if (!((cmd & PCI_COMMAND_SERR) ||
 603              (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
 604            return false;
 605        }
 606        inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
 607    } else {
 608        if (!((cmd & PCI_COMMAND_SERR) ||
 609              (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
 610            return false;
 611        }
 612        inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
 613    }
 614    return true;
 615}
 616
 617/*
 618 * non-Function specific error must be recorded in all functions.
 619 * It is the responsibility of the caller of this function.
 620 * It is also caller's responsibility to determine which function should
 621 * report the error.
 622 *
 623 * 6.2.4 Error Logging
 624 * 6.2.5 Sequence of Device Error Signaling and Logging Operations
 625 * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
 626 *             Operations
 627 */
 628int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
 629{
 630    uint8_t *aer_cap = NULL;
 631    uint16_t devctl = 0;
 632    uint16_t devsta = 0;
 633    uint32_t error_status = err->status;
 634    PCIEAERInject inj;
 635
 636    if (!pci_is_express(dev)) {
 637        return -ENOSYS;
 638    }
 639
 640    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
 641        error_status &= PCI_ERR_COR_SUPPORTED;
 642    } else {
 643        error_status &= PCI_ERR_UNC_SUPPORTED;
 644    }
 645
 646    /* invalid status bit. one and only one bit must be set */
 647    if (!error_status || (error_status & (error_status - 1))) {
 648        return -EINVAL;
 649    }
 650
 651    if (dev->exp.aer_cap) {
 652        uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
 653        aer_cap = dev->config + dev->exp.aer_cap;
 654        devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
 655        devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
 656    }
 657
 658    inj.dev = dev;
 659    inj.aer_cap = aer_cap;
 660    inj.err = err;
 661    inj.devctl = devctl;
 662    inj.devsta = devsta;
 663    inj.error_status = error_status;
 664    inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
 665        err->status == PCI_ERR_UNC_UNSUP;
 666    inj.log_overflow = false;
 667
 668    if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
 669        if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
 670            return 0;
 671        }
 672    } else {
 673        bool is_fatal =
 674            pcie_aer_uncor_default_severity(error_status) ==
 675            PCI_ERR_ROOT_CMD_FATAL_EN;
 676        if (aer_cap) {
 677            is_fatal =
 678                error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
 679        }
 680        if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
 681            inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
 682            if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
 683                return 0;
 684            }
 685        } else {
 686            if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
 687                return 0;
 688            }
 689        }
 690    }
 691
 692    /* send up error message */
 693    inj.msg.source_id = err->source_id;
 694    pcie_aer_msg(dev, &inj.msg);
 695
 696    if (inj.log_overflow) {
 697        PCIEAERErr header_log_overflow = {
 698            .status = PCI_ERR_COR_HL_OVERFLOW,
 699            .flags = PCIE_AER_ERR_IS_CORRECTABLE,
 700        };
 701        int ret = pcie_aer_inject_error(dev, &header_log_overflow);
 702        assert(!ret);
 703    }
 704    return 0;
 705}
 706
 707void pcie_aer_write_config(PCIDevice *dev,
 708                           uint32_t addr, uint32_t val, int len)
 709{
 710    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 711    uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
 712    uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
 713    uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
 714
 715    /* uncorrectable error */
 716    if (!(uncorsta & first_error)) {
 717        /* the bit that corresponds to the first error is cleared */
 718        pcie_aer_clear_error(dev);
 719    } else if (errcap & PCI_ERR_CAP_MHRE) {
 720        /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
 721         * nothing should happen. So we have to revert the modification to
 722         * the register.
 723         */
 724        pcie_aer_update_uncor_status(dev);
 725    } else {
 726        /* capability & control
 727         * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
 728         */
 729        aer_log_clear_all_err(&dev->exp.aer_log);
 730    }
 731}
 732
 733void pcie_aer_root_init(PCIDevice *dev)
 734{
 735    uint16_t pos = dev->exp.aer_cap;
 736
 737    pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
 738                 PCI_ERR_ROOT_CMD_EN_MASK);
 739    pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
 740                 PCI_ERR_ROOT_STATUS_REPORT_MASK);
 741    /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
 742     * device-specific method.
 743     */
 744    pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
 745                 ~PCI_ERR_ROOT_IRQ);
 746}
 747
 748void pcie_aer_root_reset(PCIDevice *dev)
 749{
 750    uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
 751
 752    pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
 753
 754    /*
 755     * Advanced Error Interrupt Message Number in Root Error Status Register
 756     * must be updated by chip dependent code because it's chip dependent
 757     * which number is used.
 758     */
 759}
 760
 761void pcie_aer_root_write_config(PCIDevice *dev,
 762                                uint32_t addr, uint32_t val, int len,
 763                                uint32_t root_cmd_prev)
 764{
 765    uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
 766    uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
 767    uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
 768    uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
 769    /* 6.2.4.1.2 Interrupt Generation */
 770    if (!msix_enabled(dev) && !msi_enabled(dev)) {
 771        pci_set_irq(dev, !!(root_cmd & enabled_cmd));
 772        return;
 773    }
 774
 775    if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
 776        /* Send MSI on transition from false to true. */
 777        return;
 778    }
 779
 780    pcie_aer_root_notify(dev);
 781}
 782
 783static const VMStateDescription vmstate_pcie_aer_err = {
 784    .name = "PCIE_AER_ERROR",
 785    .version_id = 1,
 786    .minimum_version_id = 1,
 787    .fields = (VMStateField[]) {
 788        VMSTATE_UINT32(status, PCIEAERErr),
 789        VMSTATE_UINT16(source_id, PCIEAERErr),
 790        VMSTATE_UINT16(flags, PCIEAERErr),
 791        VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
 792        VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
 793        VMSTATE_END_OF_LIST()
 794    }
 795};
 796
 797static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
 798{
 799    PCIEAERLog *s = opaque;
 800
 801    return s->log_num <= s->log_max;
 802}
 803
 804const VMStateDescription vmstate_pcie_aer_log = {
 805    .name = "PCIE_AER_ERROR_LOG",
 806    .version_id = 1,
 807    .minimum_version_id = 1,
 808    .fields = (VMStateField[]) {
 809        VMSTATE_UINT16(log_num, PCIEAERLog),
 810        VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog),
 811        VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
 812        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
 813                              vmstate_pcie_aer_err, PCIEAERErr),
 814        VMSTATE_END_OF_LIST()
 815    }
 816};
 817
 818typedef struct PCIEAERErrorName {
 819    const char *name;
 820    uint32_t val;
 821    bool correctable;
 822} PCIEAERErrorName;
 823
 824/*
 825 * AER error name -> value conversion table
 826 * This naming scheme is same to linux aer-injection tool.
 827 */
 828static const struct PCIEAERErrorName pcie_aer_error_list[] = {
 829    {
 830        .name = "TRAIN",
 831        .val = PCI_ERR_UNC_TRAIN,
 832        .correctable = false,
 833    }, {
 834        .name = "DLP",
 835        .val = PCI_ERR_UNC_DLP,
 836        .correctable = false,
 837    }, {
 838        .name = "SDN",
 839        .val = PCI_ERR_UNC_SDN,
 840        .correctable = false,
 841    }, {
 842        .name = "POISON_TLP",
 843        .val = PCI_ERR_UNC_POISON_TLP,
 844        .correctable = false,
 845    }, {
 846        .name = "FCP",
 847        .val = PCI_ERR_UNC_FCP,
 848        .correctable = false,
 849    }, {
 850        .name = "COMP_TIME",
 851        .val = PCI_ERR_UNC_COMP_TIME,
 852        .correctable = false,
 853    }, {
 854        .name = "COMP_ABORT",
 855        .val = PCI_ERR_UNC_COMP_ABORT,
 856        .correctable = false,
 857    }, {
 858        .name = "UNX_COMP",
 859        .val = PCI_ERR_UNC_UNX_COMP,
 860        .correctable = false,
 861    }, {
 862        .name = "RX_OVER",
 863        .val = PCI_ERR_UNC_RX_OVER,
 864        .correctable = false,
 865    }, {
 866        .name = "MALF_TLP",
 867        .val = PCI_ERR_UNC_MALF_TLP,
 868        .correctable = false,
 869    }, {
 870        .name = "ECRC",
 871        .val = PCI_ERR_UNC_ECRC,
 872        .correctable = false,
 873    }, {
 874        .name = "UNSUP",
 875        .val = PCI_ERR_UNC_UNSUP,
 876        .correctable = false,
 877    }, {
 878        .name = "ACSV",
 879        .val = PCI_ERR_UNC_ACSV,
 880        .correctable = false,
 881    }, {
 882        .name = "INTN",
 883        .val = PCI_ERR_UNC_INTN,
 884        .correctable = false,
 885    }, {
 886        .name = "MCBTLP",
 887        .val = PCI_ERR_UNC_MCBTLP,
 888        .correctable = false,
 889    }, {
 890        .name = "ATOP_EBLOCKED",
 891        .val = PCI_ERR_UNC_ATOP_EBLOCKED,
 892        .correctable = false,
 893    }, {
 894        .name = "TLP_PRF_BLOCKED",
 895        .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
 896        .correctable = false,
 897    }, {
 898        .name = "RCVR",
 899        .val = PCI_ERR_COR_RCVR,
 900        .correctable = true,
 901    }, {
 902        .name = "BAD_TLP",
 903        .val = PCI_ERR_COR_BAD_TLP,
 904        .correctable = true,
 905    }, {
 906        .name = "BAD_DLLP",
 907        .val = PCI_ERR_COR_BAD_DLLP,
 908        .correctable = true,
 909    }, {
 910        .name = "REP_ROLL",
 911        .val = PCI_ERR_COR_REP_ROLL,
 912        .correctable = true,
 913    }, {
 914        .name = "REP_TIMER",
 915        .val = PCI_ERR_COR_REP_TIMER,
 916        .correctable = true,
 917    }, {
 918        .name = "ADV_NONFATAL",
 919        .val = PCI_ERR_COR_ADV_NONFATAL,
 920        .correctable = true,
 921    }, {
 922        .name = "INTERNAL",
 923        .val = PCI_ERR_COR_INTERNAL,
 924        .correctable = true,
 925    }, {
 926        .name = "HL_OVERFLOW",
 927        .val = PCI_ERR_COR_HL_OVERFLOW,
 928        .correctable = true,
 929    },
 930};
 931
 932static int pcie_aer_parse_error_string(const char *error_name,
 933                                       uint32_t *status, bool *correctable)
 934{
 935    int i;
 936
 937    for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
 938        const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
 939        if (strcmp(error_name, e->name)) {
 940            continue;
 941        }
 942
 943        *status = e->val;
 944        *correctable = e->correctable;
 945        return 0;
 946    }
 947    return -EINVAL;
 948}
 949
 950static int do_pcie_aer_inject_error(Monitor *mon,
 951                                    const QDict *qdict, QObject **ret_data)
 952{
 953    const char *id = qdict_get_str(qdict, "id");
 954    const char *error_name;
 955    uint32_t error_status;
 956    bool correctable;
 957    PCIDevice *dev;
 958    PCIEAERErr err;
 959    int ret;
 960
 961    ret = pci_qdev_find_device(id, &dev);
 962    if (ret < 0) {
 963        monitor_printf(mon,
 964                       "id or pci device path is invalid or device not "
 965                       "found. %s\n", id);
 966        return ret;
 967    }
 968    if (!pci_is_express(dev)) {
 969        monitor_printf(mon, "the device doesn't support pci express. %s\n",
 970                       id);
 971        return -ENOSYS;
 972    }
 973
 974    error_name = qdict_get_str(qdict, "error_status");
 975    if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
 976        char *e = NULL;
 977        error_status = strtoul(error_name, &e, 0);
 978        correctable = qdict_get_try_bool(qdict, "correctable", false);
 979        if (!e || *e != '\0') {
 980            monitor_printf(mon, "invalid error status value. \"%s\"",
 981                           error_name);
 982            return -EINVAL;
 983        }
 984    }
 985    err.status = error_status;
 986    err.source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn;
 987
 988    err.flags = 0;
 989    if (correctable) {
 990        err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
 991    }
 992    if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
 993        err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
 994    }
 995    if (qdict_haskey(qdict, "header0")) {
 996        err.flags |= PCIE_AER_ERR_HEADER_VALID;
 997    }
 998    if (qdict_haskey(qdict, "prefix0")) {
 999        err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
1000    }
1001
1002    err.header[0] = qdict_get_try_int(qdict, "header0", 0);
1003    err.header[1] = qdict_get_try_int(qdict, "header1", 0);
1004    err.header[2] = qdict_get_try_int(qdict, "header2", 0);
1005    err.header[3] = qdict_get_try_int(qdict, "header3", 0);
1006
1007    err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
1008    err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
1009    err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
1010    err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
1011
1012    ret = pcie_aer_inject_error(dev, &err);
1013    *ret_data = qobject_from_jsonf("{'id': %s, "
1014                                   "'root_bus': %s, 'bus': %d, 'devfn': %d, "
1015                                   "'ret': %d}",
1016                                   id, pci_root_bus_path(dev),
1017                                   pci_bus_num(dev->bus), dev->devfn,
1018                                   ret);
1019    assert(*ret_data);
1020
1021    return 0;
1022}
1023
1024void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
1025{
1026    QObject *data;
1027    int devfn;
1028
1029    if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
1030        return;
1031    }
1032
1033    assert(qobject_type(data) == QTYPE_QDICT);
1034    qdict = qobject_to_qdict(data);
1035
1036    devfn = (int)qdict_get_int(qdict, "devfn");
1037    monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
1038                   qdict_get_str(qdict, "id"),
1039                   qdict_get_str(qdict, "root_bus"),
1040                   (int) qdict_get_int(qdict, "bus"),
1041                   PCI_SLOT(devfn), PCI_FUNC(devfn));
1042}
1043