qemu/hw/i386/amd_iommu.c
<<
>>
Prefs
   1/*
   2 * QEMU emulation of AMD IOMMU (AMD-Vi)
   3 *
   4 * Copyright (C) 2011 Eduard - Gabriel Munteanu
   5 * Copyright (C) 2015, 2016 David Kiarie Kahurani
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16
  17 * You should have received a copy of the GNU General Public License along
  18 * with this program; if not, see <http://www.gnu.org/licenses/>.
  19 *
  20 * Cache implementation inspired by hw/i386/intel_iommu.c
  21 */
  22
  23#include "qemu/osdep.h"
  24#include "hw/i386/pc.h"
  25#include "hw/pci/msi.h"
  26#include "hw/pci/pci_bus.h"
  27#include "migration/vmstate.h"
  28#include "amd_iommu.h"
  29#include "qapi/error.h"
  30#include "qemu/error-report.h"
  31#include "hw/i386/apic_internal.h"
  32#include "trace.h"
  33#include "hw/i386/apic-msidef.h"
  34
  35/* used AMD-Vi MMIO registers */
  36const char *amdvi_mmio_low[] = {
  37    "AMDVI_MMIO_DEVTAB_BASE",
  38    "AMDVI_MMIO_CMDBUF_BASE",
  39    "AMDVI_MMIO_EVTLOG_BASE",
  40    "AMDVI_MMIO_CONTROL",
  41    "AMDVI_MMIO_EXCL_BASE",
  42    "AMDVI_MMIO_EXCL_LIMIT",
  43    "AMDVI_MMIO_EXT_FEATURES",
  44    "AMDVI_MMIO_PPR_BASE",
  45    "UNHANDLED"
  46};
  47const char *amdvi_mmio_high[] = {
  48    "AMDVI_MMIO_COMMAND_HEAD",
  49    "AMDVI_MMIO_COMMAND_TAIL",
  50    "AMDVI_MMIO_EVTLOG_HEAD",
  51    "AMDVI_MMIO_EVTLOG_TAIL",
  52    "AMDVI_MMIO_STATUS",
  53    "AMDVI_MMIO_PPR_HEAD",
  54    "AMDVI_MMIO_PPR_TAIL",
  55    "UNHANDLED"
  56};
  57
  58struct AMDVIAddressSpace {
  59    uint8_t bus_num;            /* bus number                           */
  60    uint8_t devfn;              /* device function                      */
  61    AMDVIState *iommu_state;    /* AMDVI - one per machine              */
  62    MemoryRegion root;          /* AMDVI Root memory map region */
  63    IOMMUMemoryRegion iommu;    /* Device's address translation region  */
  64    MemoryRegion iommu_ir;      /* Device's interrupt remapping region  */
  65    AddressSpace as;            /* device's corresponding address space */
  66};
  67
  68/* AMDVI cache entry */
  69typedef struct AMDVIIOTLBEntry {
  70    uint16_t domid;             /* assigned domain id  */
  71    uint16_t devid;             /* device owning entry */
  72    uint64_t perms;             /* access permissions  */
  73    uint64_t translated_addr;   /* translated address  */
  74    uint64_t page_mask;         /* physical page size  */
  75} AMDVIIOTLBEntry;
  76
  77/* configure MMIO registers at startup/reset */
  78static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
  79                           uint64_t romask, uint64_t w1cmask)
  80{
  81    stq_le_p(&s->mmior[addr], val);
  82    stq_le_p(&s->romask[addr], romask);
  83    stq_le_p(&s->w1cmask[addr], w1cmask);
  84}
  85
  86static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
  87{
  88    return lduw_le_p(&s->mmior[addr]);
  89}
  90
  91static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
  92{
  93    return ldl_le_p(&s->mmior[addr]);
  94}
  95
  96static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
  97{
  98    return ldq_le_p(&s->mmior[addr]);
  99}
 100
 101/* internal write */
 102static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val)
 103{
 104    stq_le_p(&s->mmior[addr], val);
 105}
 106
 107/* external write */
 108static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
 109{
 110    uint16_t romask = lduw_le_p(&s->romask[addr]);
 111    uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
 112    uint16_t oldval = lduw_le_p(&s->mmior[addr]);
 113    stw_le_p(&s->mmior[addr],
 114            ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
 115}
 116
 117static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
 118{
 119    uint32_t romask = ldl_le_p(&s->romask[addr]);
 120    uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
 121    uint32_t oldval = ldl_le_p(&s->mmior[addr]);
 122    stl_le_p(&s->mmior[addr],
 123            ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
 124}
 125
 126static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
 127{
 128    uint64_t romask = ldq_le_p(&s->romask[addr]);
 129    uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
 130    uint32_t oldval = ldq_le_p(&s->mmior[addr]);
 131    stq_le_p(&s->mmior[addr],
 132            ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
 133}
 134
 135/* OR a 64-bit register with a 64-bit value */
 136static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
 137{
 138    return amdvi_readq(s, addr) | val;
 139}
 140
 141/* OR a 64-bit register with a 64-bit value storing result in the register */
 142static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
 143{
 144    amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
 145}
 146
 147/* AND a 64-bit register with a 64-bit value storing result in the register */
 148static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
 149{
 150   amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
 151}
 152
 153static void amdvi_generate_msi_interrupt(AMDVIState *s)
 154{
 155    MSIMessage msg = {};
 156    MemTxAttrs attrs = {
 157        .requester_id = pci_requester_id(&s->pci.dev)
 158    };
 159
 160    if (msi_enabled(&s->pci.dev)) {
 161        msg = msi_get_message(&s->pci.dev, 0);
 162        address_space_stl_le(&address_space_memory, msg.address, msg.data,
 163                             attrs, NULL);
 164    }
 165}
 166
 167static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
 168{
 169    /* event logging not enabled */
 170    if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
 171        AMDVI_MMIO_STATUS_EVT_OVF)) {
 172        return;
 173    }
 174
 175    /* event log buffer full */
 176    if (s->evtlog_tail >= s->evtlog_len) {
 177        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
 178        /* generate interrupt */
 179        amdvi_generate_msi_interrupt(s);
 180        return;
 181    }
 182
 183    if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
 184                         evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) {
 185        trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
 186    }
 187
 188    s->evtlog_tail += AMDVI_EVENT_LEN;
 189    amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
 190    amdvi_generate_msi_interrupt(s);
 191}
 192
 193static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
 194                                int length)
 195{
 196    int index = start / 64, bitpos = start % 64;
 197    uint64_t mask = MAKE_64BIT_MASK(start, length);
 198    buffer[index] &= ~mask;
 199    buffer[index] |= (value << bitpos) & mask;
 200}
 201/*
 202 * AMDVi event structure
 203 *    0:15   -> DeviceID
 204 *    55:63  -> event type + miscellaneous info
 205 *    63:127 -> related address
 206 */
 207static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
 208                               uint16_t info)
 209{
 210    amdvi_setevent_bits(evt, devid, 0, 16);
 211    amdvi_setevent_bits(evt, info, 55, 8);
 212    amdvi_setevent_bits(evt, addr, 63, 64);
 213}
 214/* log an error encountered during a page walk
 215 *
 216 * @addr: virtual address in translation request
 217 */
 218static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
 219                             hwaddr addr, uint16_t info)
 220{
 221    uint64_t evt[4];
 222
 223    info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
 224    amdvi_encode_event(evt, devid, addr, info);
 225    amdvi_log_event(s, evt);
 226    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
 227            PCI_STATUS_SIG_TARGET_ABORT);
 228}
 229/*
 230 * log a master abort accessing device table
 231 *  @devtab : address of device table entry
 232 *  @info : error flags
 233 */
 234static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
 235                                   hwaddr devtab, uint16_t info)
 236{
 237    uint64_t evt[4];
 238
 239    info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
 240
 241    amdvi_encode_event(evt, devid, devtab, info);
 242    amdvi_log_event(s, evt);
 243    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
 244            PCI_STATUS_SIG_TARGET_ABORT);
 245}
 246/* log an event trying to access command buffer
 247 *   @addr : address that couldn't be accessed
 248 */
 249static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
 250{
 251    uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR;
 252
 253    amdvi_encode_event(evt, 0, addr, info);
 254    amdvi_log_event(s, evt);
 255    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
 256            PCI_STATUS_SIG_TARGET_ABORT);
 257}
 258/* log an illegal comand event
 259 *   @addr : address of illegal command
 260 */
 261static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
 262                                       hwaddr addr)
 263{
 264    uint64_t evt[4];
 265
 266    info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
 267    amdvi_encode_event(evt, 0, addr, info);
 268    amdvi_log_event(s, evt);
 269}
 270/* log an error accessing device table
 271 *
 272 *  @devid : device owning the table entry
 273 *  @devtab : address of device table entry
 274 *  @info : error flags
 275 */
 276static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
 277                                          hwaddr addr, uint16_t info)
 278{
 279    uint64_t evt[4];
 280
 281    info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
 282    amdvi_encode_event(evt, devid, addr, info);
 283    amdvi_log_event(s, evt);
 284}
 285/* log an error accessing a PTE entry
 286 * @addr : address that couldn't be accessed
 287 */
 288static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
 289                                    hwaddr addr, uint16_t info)
 290{
 291    uint64_t evt[4];
 292
 293    info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
 294    amdvi_encode_event(evt, devid, addr, info);
 295    amdvi_log_event(s, evt);
 296    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
 297             PCI_STATUS_SIG_TARGET_ABORT);
 298}
 299
 300static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
 301{
 302    return *((const uint64_t *)v1) == *((const uint64_t *)v2);
 303}
 304
 305static guint amdvi_uint64_hash(gconstpointer v)
 306{
 307    return (guint)*(const uint64_t *)v;
 308}
 309
 310static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
 311                                           uint64_t devid)
 312{
 313    uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
 314                   ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
 315    return g_hash_table_lookup(s->iotlb, &key);
 316}
 317
 318static void amdvi_iotlb_reset(AMDVIState *s)
 319{
 320    assert(s->iotlb);
 321    trace_amdvi_iotlb_reset();
 322    g_hash_table_remove_all(s->iotlb);
 323}
 324
 325static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
 326                                            gpointer user_data)
 327{
 328    AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
 329    uint16_t devid = *(uint16_t *)user_data;
 330    return entry->devid == devid;
 331}
 332
 333static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
 334                                    uint64_t devid)
 335{
 336    uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
 337                   ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
 338    g_hash_table_remove(s->iotlb, &key);
 339}
 340
 341static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
 342                               uint64_t gpa, IOMMUTLBEntry to_cache,
 343                               uint16_t domid)
 344{
 345    AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
 346    uint64_t *key = g_new(uint64_t, 1);
 347    uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
 348
 349    /* don't cache erroneous translations */
 350    if (to_cache.perm != IOMMU_NONE) {
 351        trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
 352                PCI_FUNC(devid), gpa, to_cache.translated_addr);
 353
 354        if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
 355            amdvi_iotlb_reset(s);
 356        }
 357
 358        entry->domid = domid;
 359        entry->perms = to_cache.perm;
 360        entry->translated_addr = to_cache.translated_addr;
 361        entry->page_mask = to_cache.addr_mask;
 362        *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
 363        g_hash_table_replace(s->iotlb, key, entry);
 364    }
 365}
 366
 367static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
 368{
 369    /* pad the last 3 bits */
 370    hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
 371    uint64_t data = cpu_to_le64(cmd[1]);
 372
 373    if (extract64(cmd[0], 52, 8)) {
 374        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
 375                                   s->cmdbuf + s->cmdbuf_head);
 376    }
 377    if (extract64(cmd[0], 0, 1)) {
 378        if (dma_memory_write(&address_space_memory, addr, &data,
 379                             AMDVI_COMPLETION_DATA_SIZE,
 380                             MEMTXATTRS_UNSPECIFIED)) {
 381            trace_amdvi_completion_wait_fail(addr);
 382        }
 383    }
 384    /* set completion interrupt */
 385    if (extract64(cmd[0], 1, 1)) {
 386        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
 387        /* generate interrupt */
 388        amdvi_generate_msi_interrupt(s);
 389    }
 390    trace_amdvi_completion_wait(addr, data);
 391}
 392
 393/* log error without aborting since linux seems to be using reserved bits */
 394static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
 395{
 396    uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
 397
 398    /* This command should invalidate internal caches of which there isn't */
 399    if (extract64(cmd[0], 16, 44) || cmd[1]) {
 400        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
 401                                   s->cmdbuf + s->cmdbuf_head);
 402    }
 403    trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
 404                             PCI_FUNC(devid));
 405}
 406
 407static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
 408{
 409    if (extract64(cmd[0], 16, 16) ||  extract64(cmd[0], 52, 8) ||
 410        extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
 411        || extract64(cmd[1], 48, 16)) {
 412        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
 413                                   s->cmdbuf + s->cmdbuf_head);
 414    }
 415    trace_amdvi_ppr_exec();
 416}
 417
 418static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
 419{
 420    if (extract64(cmd[0], 0, 60) || cmd[1]) {
 421        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
 422                                   s->cmdbuf + s->cmdbuf_head);
 423    }
 424
 425    amdvi_iotlb_reset(s);
 426    trace_amdvi_all_inval();
 427}
 428
 429static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
 430                                            gpointer user_data)
 431{
 432    AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
 433    uint16_t domid = *(uint16_t *)user_data;
 434    return entry->domid == domid;
 435}
 436
 437/* we don't have devid - we can't remove pages by address */
 438static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
 439{
 440    uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
 441
 442    if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
 443        extract64(cmd[1], 3, 9)) {
 444        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
 445                                   s->cmdbuf + s->cmdbuf_head);
 446    }
 447
 448    g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
 449                                &domid);
 450    trace_amdvi_pages_inval(domid);
 451}
 452
 453static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
 454{
 455    if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) ||
 456        extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
 457        extract64(cmd[1], 5, 7)) {
 458        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
 459                                   s->cmdbuf + s->cmdbuf_head);
 460    }
 461
 462    trace_amdvi_prefetch_pages();
 463}
 464
 465static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
 466{
 467    if (extract64(cmd[0], 16, 44) || cmd[1]) {
 468        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
 469                                   s->cmdbuf + s->cmdbuf_head);
 470        return;
 471    }
 472
 473    trace_amdvi_intr_inval();
 474}
 475
 476/* FIXME: Try to work with the specified size instead of all the pages
 477 * when the S bit is on
 478 */
 479static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
 480{
 481
 482    uint16_t devid = extract64(cmd[0], 0, 16);
 483    if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
 484        extract64(cmd[1], 6, 6)) {
 485        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
 486                                   s->cmdbuf + s->cmdbuf_head);
 487        return;
 488    }
 489
 490    if (extract64(cmd[1], 0, 1)) {
 491        g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
 492                                    &devid);
 493    } else {
 494        amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
 495                                cpu_to_le16(extract64(cmd[1], 0, 16)));
 496    }
 497    trace_amdvi_iotlb_inval();
 498}
 499
 500/* not honouring reserved bits is regarded as an illegal command */
 501static void amdvi_cmdbuf_exec(AMDVIState *s)
 502{
 503    uint64_t cmd[2];
 504
 505    if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
 506                        cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) {
 507        trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
 508        amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
 509        return;
 510    }
 511
 512    switch (extract64(cmd[0], 60, 4)) {
 513    case AMDVI_CMD_COMPLETION_WAIT:
 514        amdvi_completion_wait(s, cmd);
 515        break;
 516    case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
 517        amdvi_inval_devtab_entry(s, cmd);
 518        break;
 519    case AMDVI_CMD_INVAL_AMDVI_PAGES:
 520        amdvi_inval_pages(s, cmd);
 521        break;
 522    case AMDVI_CMD_INVAL_IOTLB_PAGES:
 523        iommu_inval_iotlb(s, cmd);
 524        break;
 525    case AMDVI_CMD_INVAL_INTR_TABLE:
 526        amdvi_inval_inttable(s, cmd);
 527        break;
 528    case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
 529        amdvi_prefetch_pages(s, cmd);
 530        break;
 531    case AMDVI_CMD_COMPLETE_PPR_REQUEST:
 532        amdvi_complete_ppr(s, cmd);
 533        break;
 534    case AMDVI_CMD_INVAL_AMDVI_ALL:
 535        amdvi_inval_all(s, cmd);
 536        break;
 537    default:
 538        trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
 539        /* log illegal command */
 540        amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
 541                                   s->cmdbuf + s->cmdbuf_head);
 542    }
 543}
 544
 545static void amdvi_cmdbuf_run(AMDVIState *s)
 546{
 547    if (!s->cmdbuf_enabled) {
 548        trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
 549        return;
 550    }
 551
 552    /* check if there is work to do. */
 553    while (s->cmdbuf_head != s->cmdbuf_tail) {
 554        trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
 555        amdvi_cmdbuf_exec(s);
 556        s->cmdbuf_head += AMDVI_COMMAND_SIZE;
 557        amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head);
 558
 559        /* wrap head pointer */
 560        if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
 561            s->cmdbuf_head = 0;
 562        }
 563    }
 564}
 565
 566static void amdvi_mmio_trace(hwaddr addr, unsigned size)
 567{
 568    uint8_t index = (addr & ~0x2000) / 8;
 569
 570    if ((addr & 0x2000)) {
 571        /* high table */
 572        index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
 573        trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
 574    } else {
 575        index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
 576        trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
 577    }
 578}
 579
 580static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
 581{
 582    AMDVIState *s = opaque;
 583
 584    uint64_t val = -1;
 585    if (addr + size > AMDVI_MMIO_SIZE) {
 586        trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size);
 587        return (uint64_t)-1;
 588    }
 589
 590    if (size == 2) {
 591        val = amdvi_readw(s, addr);
 592    } else if (size == 4) {
 593        val = amdvi_readl(s, addr);
 594    } else if (size == 8) {
 595        val = amdvi_readq(s, addr);
 596    }
 597    amdvi_mmio_trace(addr, size);
 598
 599    return val;
 600}
 601
 602static void amdvi_handle_control_write(AMDVIState *s)
 603{
 604    unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
 605    s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
 606
 607    s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
 608    s->evtlog_enabled = s->enabled && !!(control &
 609                        AMDVI_MMIO_CONTROL_EVENTLOGEN);
 610
 611    s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
 612    s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
 613    s->cmdbuf_enabled = s->enabled && !!(control &
 614                        AMDVI_MMIO_CONTROL_CMDBUFLEN);
 615    s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN);
 616
 617    /* update the flags depending on the control register */
 618    if (s->cmdbuf_enabled) {
 619        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
 620    } else {
 621        amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
 622    }
 623    if (s->evtlog_enabled) {
 624        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
 625    } else {
 626        amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
 627    }
 628
 629    trace_amdvi_control_status(control);
 630    amdvi_cmdbuf_run(s);
 631}
 632
 633static inline void amdvi_handle_devtab_write(AMDVIState *s)
 634
 635{
 636    uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
 637    s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
 638
 639    /* set device table length */
 640    s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
 641                    (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
 642                     AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
 643}
 644
 645static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
 646{
 647    s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
 648                     & AMDVI_MMIO_CMDBUF_HEAD_MASK;
 649    amdvi_cmdbuf_run(s);
 650}
 651
 652static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
 653{
 654    s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
 655                & AMDVI_MMIO_CMDBUF_BASE_MASK;
 656    s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
 657                    & AMDVI_MMIO_CMDBUF_SIZE_MASK);
 658    s->cmdbuf_head = s->cmdbuf_tail = 0;
 659}
 660
 661static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
 662{
 663    s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
 664                     & AMDVI_MMIO_CMDBUF_TAIL_MASK;
 665    amdvi_cmdbuf_run(s);
 666}
 667
 668static inline void amdvi_handle_excllim_write(AMDVIState *s)
 669{
 670    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
 671    s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
 672                    AMDVI_MMIO_EXCL_LIMIT_LOW;
 673}
 674
 675static inline void amdvi_handle_evtbase_write(AMDVIState *s)
 676{
 677    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
 678    s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
 679    s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
 680                    & AMDVI_MMIO_EVTLOG_SIZE_MASK);
 681}
 682
 683static inline void amdvi_handle_evttail_write(AMDVIState *s)
 684{
 685    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
 686    s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
 687}
 688
 689static inline void amdvi_handle_evthead_write(AMDVIState *s)
 690{
 691    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
 692    s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
 693}
 694
 695static inline void amdvi_handle_pprbase_write(AMDVIState *s)
 696{
 697    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
 698    s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
 699    s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
 700                    & AMDVI_MMIO_PPRLOG_SIZE_MASK);
 701}
 702
 703static inline void amdvi_handle_pprhead_write(AMDVIState *s)
 704{
 705    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
 706    s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
 707}
 708
 709static inline void amdvi_handle_pprtail_write(AMDVIState *s)
 710{
 711    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
 712    s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
 713}
 714
 715/* FIXME: something might go wrong if System Software writes in chunks
 716 * of one byte but linux writes in chunks of 4 bytes so currently it
 717 * works correctly with linux but will definitely be busted if software
 718 * reads/writes 8 bytes
 719 */
 720static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
 721                                 hwaddr addr)
 722{
 723    if (size == 2) {
 724        amdvi_writew(s, addr, val);
 725    } else if (size == 4) {
 726        amdvi_writel(s, addr, val);
 727    } else if (size == 8) {
 728        amdvi_writeq(s, addr, val);
 729    }
 730}
 731
 732static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
 733                             unsigned size)
 734{
 735    AMDVIState *s = opaque;
 736    unsigned long offset = addr & 0x07;
 737
 738    if (addr + size > AMDVI_MMIO_SIZE) {
 739        trace_amdvi_mmio_write("error: addr outside region: max ",
 740                (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
 741        return;
 742    }
 743
 744    amdvi_mmio_trace(addr, size);
 745    switch (addr & ~0x07) {
 746    case AMDVI_MMIO_CONTROL:
 747        amdvi_mmio_reg_write(s, size, val, addr);
 748        amdvi_handle_control_write(s);
 749        break;
 750    case AMDVI_MMIO_DEVICE_TABLE:
 751        amdvi_mmio_reg_write(s, size, val, addr);
 752       /*  set device table address
 753        *   This also suffers from inability to tell whether software
 754        *   is done writing
 755        */
 756        if (offset || (size == 8)) {
 757            amdvi_handle_devtab_write(s);
 758        }
 759        break;
 760    case AMDVI_MMIO_COMMAND_HEAD:
 761        amdvi_mmio_reg_write(s, size, val, addr);
 762        amdvi_handle_cmdhead_write(s);
 763        break;
 764    case AMDVI_MMIO_COMMAND_BASE:
 765        amdvi_mmio_reg_write(s, size, val, addr);
 766        /* FIXME - make sure System Software has finished writing incase
 767         * it writes in chucks less than 8 bytes in a robust way.As for
 768         * now, this hacks works for the linux driver
 769         */
 770        if (offset || (size == 8)) {
 771            amdvi_handle_cmdbase_write(s);
 772        }
 773        break;
 774    case AMDVI_MMIO_COMMAND_TAIL:
 775        amdvi_mmio_reg_write(s, size, val, addr);
 776        amdvi_handle_cmdtail_write(s);
 777        break;
 778    case AMDVI_MMIO_EVENT_BASE:
 779        amdvi_mmio_reg_write(s, size, val, addr);
 780        amdvi_handle_evtbase_write(s);
 781        break;
 782    case AMDVI_MMIO_EVENT_HEAD:
 783        amdvi_mmio_reg_write(s, size, val, addr);
 784        amdvi_handle_evthead_write(s);
 785        break;
 786    case AMDVI_MMIO_EVENT_TAIL:
 787        amdvi_mmio_reg_write(s, size, val, addr);
 788        amdvi_handle_evttail_write(s);
 789        break;
 790    case AMDVI_MMIO_EXCL_LIMIT:
 791        amdvi_mmio_reg_write(s, size, val, addr);
 792        amdvi_handle_excllim_write(s);
 793        break;
 794        /* PPR log base - unused for now */
 795    case AMDVI_MMIO_PPR_BASE:
 796        amdvi_mmio_reg_write(s, size, val, addr);
 797        amdvi_handle_pprbase_write(s);
 798        break;
 799        /* PPR log head - also unused for now */
 800    case AMDVI_MMIO_PPR_HEAD:
 801        amdvi_mmio_reg_write(s, size, val, addr);
 802        amdvi_handle_pprhead_write(s);
 803        break;
 804        /* PPR log tail - unused for now */
 805    case AMDVI_MMIO_PPR_TAIL:
 806        amdvi_mmio_reg_write(s, size, val, addr);
 807        amdvi_handle_pprtail_write(s);
 808        break;
 809    }
 810}
 811
 812static inline uint64_t amdvi_get_perms(uint64_t entry)
 813{
 814    return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
 815           AMDVI_DEV_PERM_SHIFT;
 816}
 817
 818/* validate that reserved bits are honoured */
 819static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
 820                               uint64_t *dte)
 821{
 822    if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED)
 823        || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED)
 824        || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) {
 825        amdvi_log_illegaldevtab_error(s, devid,
 826                                      s->devtab +
 827                                      devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
 828        return false;
 829    }
 830
 831    return true;
 832}
 833
 834/* get a device table entry given the devid */
 835static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
 836{
 837    uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
 838
 839    if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
 840                        AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) {
 841        trace_amdvi_dte_get_fail(s->devtab, offset);
 842        /* log error accessing dte */
 843        amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
 844        return false;
 845    }
 846
 847    *entry = le64_to_cpu(*entry);
 848    if (!amdvi_validate_dte(s, devid, entry)) {
 849        trace_amdvi_invalid_dte(entry[0]);
 850        return false;
 851    }
 852
 853    return true;
 854}
 855
 856/* get pte translation mode */
 857static inline uint8_t get_pte_translation_mode(uint64_t pte)
 858{
 859    return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
 860}
 861
 862static inline uint64_t pte_override_page_mask(uint64_t pte)
 863{
 864    uint8_t page_mask = 13;
 865    uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12;
 866    /* find the first zero bit */
 867    while (addr & 1) {
 868        page_mask++;
 869        addr = addr >> 1;
 870    }
 871
 872    return ~((1ULL << page_mask) - 1);
 873}
 874
 875static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
 876{
 877    return ~((1UL << ((oldlevel * 9) + 3)) - 1);
 878}
 879
 880static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
 881                                          uint16_t devid)
 882{
 883    uint64_t pte;
 884
 885    if (dma_memory_read(&address_space_memory, pte_addr,
 886                        &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
 887        trace_amdvi_get_pte_hwerror(pte_addr);
 888        amdvi_log_pagetab_error(s, devid, pte_addr, 0);
 889        pte = 0;
 890        return pte;
 891    }
 892
 893    pte = le64_to_cpu(pte);
 894    return pte;
 895}
 896
 897static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
 898                            IOMMUTLBEntry *ret, unsigned perms,
 899                            hwaddr addr)
 900{
 901    unsigned level, present, pte_perms, oldlevel;
 902    uint64_t pte = dte[0], pte_addr, page_mask;
 903
 904    /* make sure the DTE has TV = 1 */
 905    if (pte & AMDVI_DEV_TRANSLATION_VALID) {
 906        level = get_pte_translation_mode(pte);
 907        if (level >= 7) {
 908            trace_amdvi_mode_invalid(level, addr);
 909            return;
 910        }
 911        if (level == 0) {
 912            goto no_remap;
 913        }
 914
 915        /* we are at the leaf page table or page table encodes a huge page */
 916        do {
 917            pte_perms = amdvi_get_perms(pte);
 918            present = pte & 1;
 919            if (!present || perms != (perms & pte_perms)) {
 920                amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
 921                trace_amdvi_page_fault(addr);
 922                return;
 923            }
 924
 925            /* go to the next lower level */
 926            pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
 927            /* add offset and load pte */
 928            pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
 929            pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
 930            if (!pte) {
 931                return;
 932            }
 933            oldlevel = level;
 934            level = get_pte_translation_mode(pte);
 935        } while (level > 0 && level < 7);
 936
 937        if (level == 0x7) {
 938            page_mask = pte_override_page_mask(pte);
 939        } else {
 940            page_mask = pte_get_page_mask(oldlevel);
 941        }
 942
 943        /* get access permissions from pte */
 944        ret->iova = addr & page_mask;
 945        ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
 946        ret->addr_mask = ~page_mask;
 947        ret->perm = amdvi_get_perms(pte);
 948        return;
 949    }
 950no_remap:
 951    ret->iova = addr & AMDVI_PAGE_MASK_4K;
 952    ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
 953    ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
 954    ret->perm = amdvi_get_perms(pte);
 955}
 956
 957static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
 958                               bool is_write, IOMMUTLBEntry *ret)
 959{
 960    AMDVIState *s = as->iommu_state;
 961    uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
 962    AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
 963    uint64_t entry[4];
 964
 965    if (iotlb_entry) {
 966        trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
 967                PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
 968        ret->iova = addr & ~iotlb_entry->page_mask;
 969        ret->translated_addr = iotlb_entry->translated_addr;
 970        ret->addr_mask = iotlb_entry->page_mask;
 971        ret->perm = iotlb_entry->perms;
 972        return;
 973    }
 974
 975    if (!amdvi_get_dte(s, devid, entry)) {
 976        return;
 977    }
 978
 979    /* devices with V = 0 are not translated */
 980    if (!(entry[0] & AMDVI_DEV_VALID)) {
 981        goto out;
 982    }
 983
 984    amdvi_page_walk(as, entry, ret,
 985                    is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
 986
 987    amdvi_update_iotlb(s, devid, addr, *ret,
 988                       entry[1] & AMDVI_DEV_DOMID_ID_MASK);
 989    return;
 990
 991out:
 992    ret->iova = addr & AMDVI_PAGE_MASK_4K;
 993    ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
 994    ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
 995    ret->perm = IOMMU_RW;
 996}
 997
 998static inline bool amdvi_is_interrupt_addr(hwaddr addr)
 999{
1000    return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
1001}
1002
1003static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
1004                                     IOMMUAccessFlags flag, int iommu_idx)
1005{
1006    AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1007    AMDVIState *s = as->iommu_state;
1008    IOMMUTLBEntry ret = {
1009        .target_as = &address_space_memory,
1010        .iova = addr,
1011        .translated_addr = 0,
1012        .addr_mask = ~(hwaddr)0,
1013        .perm = IOMMU_NONE
1014    };
1015
1016    if (!s->enabled) {
1017        /* AMDVI disabled - corresponds to iommu=off not
1018         * failure to provide any parameter
1019         */
1020        ret.iova = addr & AMDVI_PAGE_MASK_4K;
1021        ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1022        ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1023        ret.perm = IOMMU_RW;
1024        return ret;
1025    } else if (amdvi_is_interrupt_addr(addr)) {
1026        ret.iova = addr & AMDVI_PAGE_MASK_4K;
1027        ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
1028        ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
1029        ret.perm = IOMMU_WO;
1030        return ret;
1031    }
1032
1033    amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
1034    trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
1035            PCI_FUNC(as->devfn), addr, ret.translated_addr);
1036    return ret;
1037}
1038
1039static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1040                          union irte *irte, uint16_t devid)
1041{
1042    uint64_t irte_root, offset;
1043
1044    irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1045    offset = (origin->data & AMDVI_IRTE_OFFSET) << 2;
1046
1047    trace_amdvi_ir_irte(irte_root, offset);
1048
1049    if (dma_memory_read(&address_space_memory, irte_root + offset,
1050                        irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1051        trace_amdvi_ir_err("failed to get irte");
1052        return -AMDVI_IR_GET_IRTE;
1053    }
1054
1055    trace_amdvi_ir_irte_val(irte->val);
1056
1057    return 0;
1058}
1059
1060static int amdvi_int_remap_legacy(AMDVIState *iommu,
1061                                  MSIMessage *origin,
1062                                  MSIMessage *translated,
1063                                  uint64_t *dte,
1064                                  X86IOMMUIrq *irq,
1065                                  uint16_t sid)
1066{
1067    int ret;
1068    union irte irte;
1069
1070    /* get interrupt remapping table */
1071    ret = amdvi_get_irte(iommu, origin, dte, &irte, sid);
1072    if (ret < 0) {
1073        return ret;
1074    }
1075
1076    if (!irte.fields.valid) {
1077        trace_amdvi_ir_target_abort("RemapEn is disabled");
1078        return -AMDVI_IR_TARGET_ABORT;
1079    }
1080
1081    if (irte.fields.guest_mode) {
1082        error_report_once("guest mode is not zero");
1083        return -AMDVI_IR_ERR;
1084    }
1085
1086    if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1087        error_report_once("reserved int_type");
1088        return -AMDVI_IR_ERR;
1089    }
1090
1091    irq->delivery_mode = irte.fields.int_type;
1092    irq->vector = irte.fields.vector;
1093    irq->dest_mode = irte.fields.dm;
1094    irq->redir_hint = irte.fields.rq_eoi;
1095    irq->dest = irte.fields.destination;
1096
1097    return 0;
1098}
1099
1100static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte,
1101                             struct irte_ga *irte, uint16_t devid)
1102{
1103    uint64_t irte_root, offset;
1104
1105    irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK;
1106    offset = (origin->data & AMDVI_IRTE_OFFSET) << 4;
1107    trace_amdvi_ir_irte(irte_root, offset);
1108
1109    if (dma_memory_read(&address_space_memory, irte_root + offset,
1110                        irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) {
1111        trace_amdvi_ir_err("failed to get irte_ga");
1112        return -AMDVI_IR_GET_IRTE;
1113    }
1114
1115    trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val);
1116    return 0;
1117}
1118
1119static int amdvi_int_remap_ga(AMDVIState *iommu,
1120                              MSIMessage *origin,
1121                              MSIMessage *translated,
1122                              uint64_t *dte,
1123                              X86IOMMUIrq *irq,
1124                              uint16_t sid)
1125{
1126    int ret;
1127    struct irte_ga irte;
1128
1129    /* get interrupt remapping table */
1130    ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid);
1131    if (ret < 0) {
1132        return ret;
1133    }
1134
1135    if (!irte.lo.fields_remap.valid) {
1136        trace_amdvi_ir_target_abort("RemapEn is disabled");
1137        return -AMDVI_IR_TARGET_ABORT;
1138    }
1139
1140    if (irte.lo.fields_remap.guest_mode) {
1141        error_report_once("guest mode is not zero");
1142        return -AMDVI_IR_ERR;
1143    }
1144
1145    if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) {
1146        error_report_once("reserved int_type is set");
1147        return -AMDVI_IR_ERR;
1148    }
1149
1150    irq->delivery_mode = irte.lo.fields_remap.int_type;
1151    irq->vector = irte.hi.fields.vector;
1152    irq->dest_mode = irte.lo.fields_remap.dm;
1153    irq->redir_hint = irte.lo.fields_remap.rq_eoi;
1154    irq->dest = irte.lo.fields_remap.destination;
1155
1156    return 0;
1157}
1158
1159static int __amdvi_int_remap_msi(AMDVIState *iommu,
1160                                 MSIMessage *origin,
1161                                 MSIMessage *translated,
1162                                 uint64_t *dte,
1163                                 X86IOMMUIrq *irq,
1164                                 uint16_t sid)
1165{
1166    int ret;
1167    uint8_t int_ctl;
1168
1169    int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3;
1170    trace_amdvi_ir_intctl(int_ctl);
1171
1172    switch (int_ctl) {
1173    case AMDVI_IR_INTCTL_PASS:
1174        memcpy(translated, origin, sizeof(*origin));
1175        return 0;
1176    case AMDVI_IR_INTCTL_REMAP:
1177        break;
1178    case AMDVI_IR_INTCTL_ABORT:
1179        trace_amdvi_ir_target_abort("int_ctl abort");
1180        return -AMDVI_IR_TARGET_ABORT;
1181    default:
1182        trace_amdvi_ir_err("int_ctl reserved");
1183        return -AMDVI_IR_ERR;
1184    }
1185
1186    if (iommu->ga_enabled) {
1187        ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid);
1188    } else {
1189        ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid);
1190    }
1191
1192    return ret;
1193}
1194
1195/* Interrupt remapping for MSI/MSI-X entry */
1196static int amdvi_int_remap_msi(AMDVIState *iommu,
1197                               MSIMessage *origin,
1198                               MSIMessage *translated,
1199                               uint16_t sid)
1200{
1201    int ret = 0;
1202    uint64_t pass = 0;
1203    uint64_t dte[4] = { 0 };
1204    X86IOMMUIrq irq = { 0 };
1205    uint8_t dest_mode, delivery_mode;
1206
1207    assert(origin && translated);
1208
1209    /*
1210     * When IOMMU is enabled, interrupt remap request will come either from
1211     * IO-APIC or PCI device. If interrupt is from PCI device then it will
1212     * have a valid requester id but if the interrupt is from IO-APIC
1213     * then requester id will be invalid.
1214     */
1215    if (sid == X86_IOMMU_SID_INVALID) {
1216        sid = AMDVI_IOAPIC_SB_DEVID;
1217    }
1218
1219    trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
1220
1221    /* check if device table entry is set before we go further. */
1222    if (!iommu || !iommu->devtab_len) {
1223        memcpy(translated, origin, sizeof(*origin));
1224        goto out;
1225    }
1226
1227    if (!amdvi_get_dte(iommu, sid, dte)) {
1228        return -AMDVI_IR_ERR;
1229    }
1230
1231    /* Check if IR is enabled in DTE */
1232    if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) {
1233        memcpy(translated, origin, sizeof(*origin));
1234        goto out;
1235    }
1236
1237    /* validate that we are configure with intremap=on */
1238    if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) {
1239        trace_amdvi_err("Interrupt remapping is enabled in the guest but "
1240                        "not in the host. Use intremap=on to enable interrupt "
1241                        "remapping in amd-iommu.");
1242        return -AMDVI_IR_ERR;
1243    }
1244
1245    if (origin->address & AMDVI_MSI_ADDR_HI_MASK) {
1246        trace_amdvi_err("MSI address high 32 bits non-zero when "
1247                        "Interrupt Remapping enabled.");
1248        return -AMDVI_IR_ERR;
1249    }
1250
1251    if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) {
1252        trace_amdvi_err("MSI is not from IOAPIC.");
1253        return -AMDVI_IR_ERR;
1254    }
1255
1256    /*
1257     * The MSI data register [10:8] are used to get the upstream interrupt type.
1258     *
1259     * See MSI/MSI-X format:
1260     * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf
1261     * (page 5)
1262     */
1263    delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7;
1264
1265    switch (delivery_mode) {
1266    case AMDVI_IOAPIC_INT_TYPE_FIXED:
1267    case AMDVI_IOAPIC_INT_TYPE_ARBITRATED:
1268        trace_amdvi_ir_delivery_mode("fixed/arbitrated");
1269        ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid);
1270        if (ret < 0) {
1271            goto remap_fail;
1272        } else {
1273            /* Translate IRQ to MSI messages */
1274            x86_iommu_irq_to_msi_message(&irq, translated);
1275            goto out;
1276        }
1277        break;
1278    case AMDVI_IOAPIC_INT_TYPE_SMI:
1279        error_report("SMI is not supported!");
1280        ret = -AMDVI_IR_ERR;
1281        break;
1282    case AMDVI_IOAPIC_INT_TYPE_NMI:
1283        pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK;
1284        trace_amdvi_ir_delivery_mode("nmi");
1285        break;
1286    case AMDVI_IOAPIC_INT_TYPE_INIT:
1287        pass = dte[3] & AMDVI_DEV_INT_PASS_MASK;
1288        trace_amdvi_ir_delivery_mode("init");
1289        break;
1290    case AMDVI_IOAPIC_INT_TYPE_EINT:
1291        pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK;
1292        trace_amdvi_ir_delivery_mode("eint");
1293        break;
1294    default:
1295        trace_amdvi_ir_delivery_mode("unsupported delivery_mode");
1296        ret = -AMDVI_IR_ERR;
1297        break;
1298    }
1299
1300    if (ret < 0) {
1301        goto remap_fail;
1302    }
1303
1304    /*
1305     * The MSI address register bit[2] is used to get the destination
1306     * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts
1307     * only.
1308     */
1309    dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1;
1310    if (dest_mode) {
1311        trace_amdvi_ir_err("invalid dest_mode");
1312        ret = -AMDVI_IR_ERR;
1313        goto remap_fail;
1314    }
1315
1316    if (pass) {
1317        memcpy(translated, origin, sizeof(*origin));
1318    } else {
1319        trace_amdvi_ir_err("passthrough is not enabled");
1320        ret = -AMDVI_IR_ERR;
1321        goto remap_fail;
1322    }
1323
1324out:
1325    trace_amdvi_ir_remap_msi(origin->address, origin->data,
1326                             translated->address, translated->data);
1327    return 0;
1328
1329remap_fail:
1330    return ret;
1331}
1332
1333static int amdvi_int_remap(X86IOMMUState *iommu,
1334                           MSIMessage *origin,
1335                           MSIMessage *translated,
1336                           uint16_t sid)
1337{
1338    return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
1339                               translated, sid);
1340}
1341
1342static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
1343                                      uint64_t value, unsigned size,
1344                                      MemTxAttrs attrs)
1345{
1346    int ret;
1347    MSIMessage from = { 0, 0 }, to = { 0, 0 };
1348    uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
1349
1350    from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
1351    from.data = (uint32_t) value;
1352
1353    trace_amdvi_mem_ir_write_req(addr, value, size);
1354
1355    if (!attrs.unspecified) {
1356        /* We have explicit Source ID */
1357        sid = attrs.requester_id;
1358    }
1359
1360    ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
1361    if (ret < 0) {
1362        /* TODO: log the event using IOMMU log event interface */
1363        error_report_once("failed to remap interrupt from devid 0x%x", sid);
1364        return MEMTX_ERROR;
1365    }
1366
1367    apic_get_class()->send_msi(&to);
1368
1369    trace_amdvi_mem_ir_write(to.address, to.data);
1370    return MEMTX_OK;
1371}
1372
1373static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
1374                                     uint64_t *data, unsigned size,
1375                                     MemTxAttrs attrs)
1376{
1377    return MEMTX_OK;
1378}
1379
1380static const MemoryRegionOps amdvi_ir_ops = {
1381    .read_with_attrs = amdvi_mem_ir_read,
1382    .write_with_attrs = amdvi_mem_ir_write,
1383    .endianness = DEVICE_LITTLE_ENDIAN,
1384    .impl = {
1385        .min_access_size = 4,
1386        .max_access_size = 4,
1387    },
1388    .valid = {
1389        .min_access_size = 4,
1390        .max_access_size = 4,
1391    }
1392};
1393
1394static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1395{
1396    char name[128];
1397    AMDVIState *s = opaque;
1398    AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
1399    int bus_num = pci_bus_num(bus);
1400
1401    iommu_as = s->address_spaces[bus_num];
1402
1403    /* allocate memory during the first run */
1404    if (!iommu_as) {
1405        iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX);
1406        s->address_spaces[bus_num] = iommu_as;
1407    }
1408
1409    /* set up AMD-Vi region */
1410    if (!iommu_as[devfn]) {
1411        snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
1412
1413        iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1);
1414        iommu_as[devfn]->bus_num = (uint8_t)bus_num;
1415        iommu_as[devfn]->devfn = (uint8_t)devfn;
1416        iommu_as[devfn]->iommu_state = s;
1417
1418        amdvi_dev_as = iommu_as[devfn];
1419
1420        /*
1421         * Memory region relationships looks like (Address range shows
1422         * only lower 32 bits to make it short in length...):
1423         *
1424         * |-----------------+-------------------+----------|
1425         * | Name            | Address range     | Priority |
1426         * |-----------------+-------------------+----------+
1427         * | amdvi_root      | 00000000-ffffffff |        0 |
1428         * |  amdvi_iommu    | 00000000-ffffffff |        1 |
1429         * |  amdvi_iommu_ir | fee00000-feefffff |       64 |
1430         * |-----------------+-------------------+----------|
1431         */
1432        memory_region_init_iommu(&amdvi_dev_as->iommu,
1433                                 sizeof(amdvi_dev_as->iommu),
1434                                 TYPE_AMD_IOMMU_MEMORY_REGION,
1435                                 OBJECT(s),
1436                                 "amd_iommu", UINT64_MAX);
1437        memory_region_init(&amdvi_dev_as->root, OBJECT(s),
1438                           "amdvi_root", UINT64_MAX);
1439        address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);
1440        memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s),
1441                              &amdvi_ir_ops, s, "amd_iommu_ir",
1442                              AMDVI_INT_ADDR_SIZE);
1443        memory_region_add_subregion_overlap(&amdvi_dev_as->root,
1444                                            AMDVI_INT_ADDR_FIRST,
1445                                            &amdvi_dev_as->iommu_ir,
1446                                            64);
1447        memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
1448                                            MEMORY_REGION(&amdvi_dev_as->iommu),
1449                                            1);
1450    }
1451    return &iommu_as[devfn]->as;
1452}
1453
1454static const MemoryRegionOps mmio_mem_ops = {
1455    .read = amdvi_mmio_read,
1456    .write = amdvi_mmio_write,
1457    .endianness = DEVICE_LITTLE_ENDIAN,
1458    .impl = {
1459        .min_access_size = 1,
1460        .max_access_size = 8,
1461        .unaligned = false,
1462    },
1463    .valid = {
1464        .min_access_size = 1,
1465        .max_access_size = 8,
1466    }
1467};
1468
1469static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
1470                                           IOMMUNotifierFlag old,
1471                                           IOMMUNotifierFlag new,
1472                                           Error **errp)
1473{
1474    AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
1475
1476    if (new & IOMMU_NOTIFIER_MAP) {
1477        error_setg(errp,
1478                   "device %02x.%02x.%x requires iommu notifier which is not "
1479                   "currently supported", as->bus_num, PCI_SLOT(as->devfn),
1480                   PCI_FUNC(as->devfn));
1481        return -EINVAL;
1482    }
1483    return 0;
1484}
1485
1486static void amdvi_init(AMDVIState *s)
1487{
1488    amdvi_iotlb_reset(s);
1489
1490    s->devtab_len = 0;
1491    s->cmdbuf_len = 0;
1492    s->cmdbuf_head = 0;
1493    s->cmdbuf_tail = 0;
1494    s->evtlog_head = 0;
1495    s->evtlog_tail = 0;
1496    s->excl_enabled = false;
1497    s->excl_allow = false;
1498    s->mmio_enabled = false;
1499    s->enabled = false;
1500    s->ats_enabled = false;
1501    s->cmdbuf_enabled = false;
1502
1503    /* reset MMIO */
1504    memset(s->mmior, 0, AMDVI_MMIO_SIZE);
1505    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
1506            0xffffffffffffffef, 0);
1507    amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
1508
1509    /* reset device ident */
1510    pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD);
1511    pci_config_set_prog_interface(s->pci.dev.config, 00);
1512    pci_config_set_device_id(s->pci.dev.config, s->devid);
1513    pci_config_set_class(s->pci.dev.config, 0x0806);
1514
1515    /* reset AMDVI specific capabilities, all r/o */
1516    pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES);
1517    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
1518                 s->mmio.addr & ~(0xffff0000));
1519    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
1520                (s->mmio.addr & ~(0xffff)) >> 16);
1521    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE,
1522                 0xff000000);
1523    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
1524    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC,
1525            AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
1526}
1527
1528static void amdvi_sysbus_reset(DeviceState *dev)
1529{
1530    AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1531
1532    msi_reset(&s->pci.dev);
1533    amdvi_init(s);
1534}
1535
1536static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
1537{
1538    int ret = 0;
1539    AMDVIState *s = AMD_IOMMU_DEVICE(dev);
1540    MachineState *ms = MACHINE(qdev_get_machine());
1541    PCMachineState *pcms = PC_MACHINE(ms);
1542    X86MachineState *x86ms = X86_MACHINE(ms);
1543    PCIBus *bus = pcms->bus;
1544
1545    s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
1546                                     amdvi_uint64_equal, g_free, g_free);
1547
1548    /* This device should take care of IOMMU PCI properties */
1549    if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
1550        return;
1551    }
1552    ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
1553                                         AMDVI_CAPAB_SIZE, errp);
1554    if (ret < 0) {
1555        return;
1556    }
1557    s->capab_offset = ret;
1558
1559    ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0,
1560                             AMDVI_CAPAB_REG_SIZE, errp);
1561    if (ret < 0) {
1562        return;
1563    }
1564    ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0,
1565                             AMDVI_CAPAB_REG_SIZE, errp);
1566    if (ret < 0) {
1567        return;
1568    }
1569
1570    /* Pseudo address space under root PCI bus. */
1571    x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
1572
1573    /* set up MMIO */
1574    memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
1575                          AMDVI_MMIO_SIZE);
1576
1577    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
1578    sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
1579    pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
1580    s->devid = object_property_get_int(OBJECT(&s->pci), "addr", &error_abort);
1581    msi_init(&s->pci.dev, 0, 1, true, false, errp);
1582    amdvi_init(s);
1583}
1584
1585static const VMStateDescription vmstate_amdvi_sysbus = {
1586    .name = "amd-iommu",
1587    .unmigratable = 1
1588};
1589
1590static void amdvi_sysbus_instance_init(Object *klass)
1591{
1592    AMDVIState *s = AMD_IOMMU_DEVICE(klass);
1593
1594    object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
1595}
1596
1597static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
1598{
1599    DeviceClass *dc = DEVICE_CLASS(klass);
1600    X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass);
1601
1602    dc->reset = amdvi_sysbus_reset;
1603    dc->vmsd = &vmstate_amdvi_sysbus;
1604    dc->hotpluggable = false;
1605    dc_class->realize = amdvi_sysbus_realize;
1606    dc_class->int_remap = amdvi_int_remap;
1607    /* Supported by the pc-q35-* machine types */
1608    dc->user_creatable = true;
1609    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1610    dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1611}
1612
1613static const TypeInfo amdvi_sysbus = {
1614    .name = TYPE_AMD_IOMMU_DEVICE,
1615    .parent = TYPE_X86_IOMMU_DEVICE,
1616    .instance_size = sizeof(AMDVIState),
1617    .instance_init = amdvi_sysbus_instance_init,
1618    .class_init = amdvi_sysbus_class_init
1619};
1620
1621static void amdvi_pci_class_init(ObjectClass *klass, void *data)
1622{
1623    DeviceClass *dc = DEVICE_CLASS(klass);
1624
1625    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1626    dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
1627}
1628
1629static const TypeInfo amdvi_pci = {
1630    .name = TYPE_AMD_IOMMU_PCI,
1631    .parent = TYPE_PCI_DEVICE,
1632    .instance_size = sizeof(AMDVIPCIState),
1633    .class_init = amdvi_pci_class_init,
1634    .interfaces = (InterfaceInfo[]) {
1635        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1636        { },
1637    },
1638};
1639
1640static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data)
1641{
1642    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1643
1644    imrc->translate = amdvi_translate;
1645    imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
1646}
1647
1648static const TypeInfo amdvi_iommu_memory_region_info = {
1649    .parent = TYPE_IOMMU_MEMORY_REGION,
1650    .name = TYPE_AMD_IOMMU_MEMORY_REGION,
1651    .class_init = amdvi_iommu_memory_region_class_init,
1652};
1653
1654static void amdvi_register_types(void)
1655{
1656    type_register_static(&amdvi_pci);
1657    type_register_static(&amdvi_sysbus);
1658    type_register_static(&amdvi_iommu_memory_region_info);
1659}
1660
1661type_init(amdvi_register_types);
1662