qemu/softmmu/dma-helpers.c
<<
>>
Prefs
   1/*
   2 * DMA helper functions
   3 *
   4 * Copyright (c) 2009,2020 Red Hat
   5 *
   6 * This work is licensed under the terms of the GNU General Public License
   7 * (GNU GPL), version 2 or later.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "sysemu/block-backend.h"
  12#include "sysemu/dma.h"
  13#include "trace/trace-root.h"
  14#include "qemu/thread.h"
  15#include "qemu/main-loop.h"
  16#include "sysemu/cpu-timers.h"
  17#include "qemu/range.h"
  18
  19/* #define DEBUG_IOMMU */
  20
  21MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
  22                           uint8_t c, dma_addr_t len, MemTxAttrs attrs)
  23{
  24    dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
  25
  26    return address_space_set(as, addr, c, len, attrs);
  27}
  28
  29void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
  30                      AddressSpace *as)
  31{
  32    qsg->sg = g_new(ScatterGatherEntry, alloc_hint);
  33    qsg->nsg = 0;
  34    qsg->nalloc = alloc_hint;
  35    qsg->size = 0;
  36    qsg->as = as;
  37    qsg->dev = dev;
  38    object_ref(OBJECT(dev));
  39}
  40
  41void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
  42{
  43    if (qsg->nsg == qsg->nalloc) {
  44        qsg->nalloc = 2 * qsg->nalloc + 1;
  45        qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc);
  46    }
  47    qsg->sg[qsg->nsg].base = base;
  48    qsg->sg[qsg->nsg].len = len;
  49    qsg->size += len;
  50    ++qsg->nsg;
  51}
  52
  53void qemu_sglist_destroy(QEMUSGList *qsg)
  54{
  55    object_unref(OBJECT(qsg->dev));
  56    g_free(qsg->sg);
  57    memset(qsg, 0, sizeof(*qsg));
  58}
  59
  60typedef struct {
  61    BlockAIOCB common;
  62    AioContext *ctx;
  63    BlockAIOCB *acb;
  64    QEMUSGList *sg;
  65    uint32_t align;
  66    uint64_t offset;
  67    DMADirection dir;
  68    int sg_cur_index;
  69    dma_addr_t sg_cur_byte;
  70    QEMUIOVector iov;
  71    QEMUBH *bh;
  72    DMAIOFunc *io_func;
  73    void *io_func_opaque;
  74} DMAAIOCB;
  75
  76static void dma_blk_cb(void *opaque, int ret);
  77
  78static void reschedule_dma(void *opaque)
  79{
  80    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
  81
  82    assert(!dbs->acb && dbs->bh);
  83    qemu_bh_delete(dbs->bh);
  84    dbs->bh = NULL;
  85    dma_blk_cb(dbs, 0);
  86}
  87
  88static void dma_blk_unmap(DMAAIOCB *dbs)
  89{
  90    int i;
  91
  92    for (i = 0; i < dbs->iov.niov; ++i) {
  93        dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
  94                         dbs->iov.iov[i].iov_len, dbs->dir,
  95                         dbs->iov.iov[i].iov_len);
  96    }
  97    qemu_iovec_reset(&dbs->iov);
  98}
  99
 100static void dma_complete(DMAAIOCB *dbs, int ret)
 101{
 102    trace_dma_complete(dbs, ret, dbs->common.cb);
 103
 104    assert(!dbs->acb && !dbs->bh);
 105    dma_blk_unmap(dbs);
 106    if (dbs->common.cb) {
 107        dbs->common.cb(dbs->common.opaque, ret);
 108    }
 109    qemu_iovec_destroy(&dbs->iov);
 110    qemu_aio_unref(dbs);
 111}
 112
 113static void dma_blk_cb(void *opaque, int ret)
 114{
 115    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
 116    dma_addr_t cur_addr, cur_len;
 117    void *mem;
 118
 119    trace_dma_blk_cb(dbs, ret);
 120
 121    dbs->acb = NULL;
 122    dbs->offset += dbs->iov.size;
 123
 124    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
 125        dma_complete(dbs, ret);
 126        return;
 127    }
 128    dma_blk_unmap(dbs);
 129
 130    while (dbs->sg_cur_index < dbs->sg->nsg) {
 131        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
 132        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
 133        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
 134                             MEMTXATTRS_UNSPECIFIED);
 135        /*
 136         * Make reads deterministic in icount mode. Windows sometimes issues
 137         * disk read requests with overlapping SGs. It leads
 138         * to non-determinism, because resulting buffer contents may be mixed
 139         * from several sectors. This code splits all SGs into several
 140         * groups. SGs in every group do not overlap.
 141         */
 142        if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
 143            int i;
 144            for (i = 0 ; i < dbs->iov.niov ; ++i) {
 145                if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
 146                                   dbs->iov.iov[i].iov_len, (intptr_t)mem,
 147                                   cur_len)) {
 148                    dma_memory_unmap(dbs->sg->as, mem, cur_len,
 149                                     dbs->dir, cur_len);
 150                    mem = NULL;
 151                    break;
 152                }
 153            }
 154        }
 155        if (!mem)
 156            break;
 157        qemu_iovec_add(&dbs->iov, mem, cur_len);
 158        dbs->sg_cur_byte += cur_len;
 159        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
 160            dbs->sg_cur_byte = 0;
 161            ++dbs->sg_cur_index;
 162        }
 163    }
 164
 165    if (dbs->iov.size == 0) {
 166        trace_dma_map_wait(dbs);
 167        dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
 168        cpu_register_map_client(dbs->bh);
 169        return;
 170    }
 171
 172    if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
 173        qemu_iovec_discard_back(&dbs->iov,
 174                                QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
 175    }
 176
 177    aio_context_acquire(dbs->ctx);
 178    dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
 179                            dma_blk_cb, dbs, dbs->io_func_opaque);
 180    aio_context_release(dbs->ctx);
 181    assert(dbs->acb);
 182}
 183
 184static void dma_aio_cancel(BlockAIOCB *acb)
 185{
 186    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
 187
 188    trace_dma_aio_cancel(dbs);
 189
 190    assert(!(dbs->acb && dbs->bh));
 191    if (dbs->acb) {
 192        /* This will invoke dma_blk_cb.  */
 193        blk_aio_cancel_async(dbs->acb);
 194        return;
 195    }
 196
 197    if (dbs->bh) {
 198        cpu_unregister_map_client(dbs->bh);
 199        qemu_bh_delete(dbs->bh);
 200        dbs->bh = NULL;
 201    }
 202    if (dbs->common.cb) {
 203        dbs->common.cb(dbs->common.opaque, -ECANCELED);
 204    }
 205}
 206
 207static AioContext *dma_get_aio_context(BlockAIOCB *acb)
 208{
 209    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
 210
 211    return dbs->ctx;
 212}
 213
 214static const AIOCBInfo dma_aiocb_info = {
 215    .aiocb_size         = sizeof(DMAAIOCB),
 216    .cancel_async       = dma_aio_cancel,
 217    .get_aio_context    = dma_get_aio_context,
 218};
 219
 220BlockAIOCB *dma_blk_io(AioContext *ctx,
 221    QEMUSGList *sg, uint64_t offset, uint32_t align,
 222    DMAIOFunc *io_func, void *io_func_opaque,
 223    BlockCompletionFunc *cb,
 224    void *opaque, DMADirection dir)
 225{
 226    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
 227
 228    trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
 229
 230    dbs->acb = NULL;
 231    dbs->sg = sg;
 232    dbs->ctx = ctx;
 233    dbs->offset = offset;
 234    dbs->align = align;
 235    dbs->sg_cur_index = 0;
 236    dbs->sg_cur_byte = 0;
 237    dbs->dir = dir;
 238    dbs->io_func = io_func;
 239    dbs->io_func_opaque = io_func_opaque;
 240    dbs->bh = NULL;
 241    qemu_iovec_init(&dbs->iov, sg->nsg);
 242    dma_blk_cb(dbs, 0);
 243    return &dbs->common;
 244}
 245
 246
 247static
 248BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
 249                                 BlockCompletionFunc *cb, void *cb_opaque,
 250                                 void *opaque)
 251{
 252    BlockBackend *blk = opaque;
 253    return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
 254}
 255
 256BlockAIOCB *dma_blk_read(BlockBackend *blk,
 257                         QEMUSGList *sg, uint64_t offset, uint32_t align,
 258                         void (*cb)(void *opaque, int ret), void *opaque)
 259{
 260    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
 261                      dma_blk_read_io_func, blk, cb, opaque,
 262                      DMA_DIRECTION_FROM_DEVICE);
 263}
 264
 265static
 266BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
 267                                  BlockCompletionFunc *cb, void *cb_opaque,
 268                                  void *opaque)
 269{
 270    BlockBackend *blk = opaque;
 271    return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
 272}
 273
 274BlockAIOCB *dma_blk_write(BlockBackend *blk,
 275                          QEMUSGList *sg, uint64_t offset, uint32_t align,
 276                          void (*cb)(void *opaque, int ret), void *opaque)
 277{
 278    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
 279                      dma_blk_write_io_func, blk, cb, opaque,
 280                      DMA_DIRECTION_TO_DEVICE);
 281}
 282
 283
 284static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
 285                              QEMUSGList *sg, DMADirection dir,
 286                              MemTxAttrs attrs)
 287{
 288    uint8_t *ptr = buf;
 289    dma_addr_t xresidual;
 290    int sg_cur_index;
 291    MemTxResult res = MEMTX_OK;
 292
 293    xresidual = sg->size;
 294    sg_cur_index = 0;
 295    len = MIN(len, xresidual);
 296    while (len > 0) {
 297        ScatterGatherEntry entry = sg->sg[sg_cur_index++];
 298        dma_addr_t xfer = MIN(len, entry.len);
 299        res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
 300        ptr += xfer;
 301        len -= xfer;
 302        xresidual -= xfer;
 303    }
 304
 305    if (residual) {
 306        *residual = xresidual;
 307    }
 308    return res;
 309}
 310
 311MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
 312                         QEMUSGList *sg, MemTxAttrs attrs)
 313{
 314    return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs);
 315}
 316
 317MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
 318                          QEMUSGList *sg, MemTxAttrs attrs)
 319{
 320    return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs);
 321}
 322
 323void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
 324                    QEMUSGList *sg, enum BlockAcctType type)
 325{
 326    block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
 327}
 328
 329uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
 330{
 331    uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
 332    uint64_t alignment_mask, size_mask;
 333
 334    if (max_addr_bits != 64) {
 335        max_mask = (1ULL << max_addr_bits) - 1;
 336    }
 337
 338    alignment_mask = start ? (start & -start) - 1 : max_mask;
 339    alignment_mask = MIN(alignment_mask, max_mask);
 340    size_mask = MIN(addr_mask, max_mask);
 341
 342    if (alignment_mask <= size_mask) {
 343        /* Increase the alignment of start */
 344        return alignment_mask;
 345    } else {
 346        /* Find the largest page mask from size */
 347        if (addr_mask == UINT64_MAX) {
 348            return UINT64_MAX;
 349        }
 350        return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
 351    }
 352}
 353
 354