qemu/softmmu/dma-helpers.c
<<
>>
Prefs
   1/*
   2 * DMA helper functions
   3 *
   4 * Copyright (c) 2009,2020 Red Hat
   5 *
   6 * This work is licensed under the terms of the GNU General Public License
   7 * (GNU GPL), version 2 or later.
   8 */
   9
  10#include "qemu/osdep.h"
  11#include "sysemu/block-backend.h"
  12#include "sysemu/dma.h"
  13#include "trace/trace-root.h"
  14#include "qemu/thread.h"
  15#include "qemu/main-loop.h"
  16#include "sysemu/cpu-timers.h"
  17#include "qemu/range.h"
  18
  19/* #define DEBUG_IOMMU */
  20
  21MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
  22                           uint8_t c, dma_addr_t len)
  23{
  24    dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
  25
  26#define FILLBUF_SIZE 512
  27    uint8_t fillbuf[FILLBUF_SIZE];
  28    int l;
  29    MemTxResult error = MEMTX_OK;
  30
  31    memset(fillbuf, c, FILLBUF_SIZE);
  32    while (len > 0) {
  33        l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
  34        error |= address_space_write(as, addr, MEMTXATTRS_UNSPECIFIED,
  35                                     fillbuf, l);
  36        len -= l;
  37        addr += l;
  38    }
  39
  40    return error;
  41}
  42
  43void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
  44                      AddressSpace *as)
  45{
  46    qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
  47    qsg->nsg = 0;
  48    qsg->nalloc = alloc_hint;
  49    qsg->size = 0;
  50    qsg->as = as;
  51    qsg->dev = dev;
  52    object_ref(OBJECT(dev));
  53}
  54
  55void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
  56{
  57    if (qsg->nsg == qsg->nalloc) {
  58        qsg->nalloc = 2 * qsg->nalloc + 1;
  59        qsg->sg = g_realloc(qsg->sg, qsg->nalloc * sizeof(ScatterGatherEntry));
  60    }
  61    qsg->sg[qsg->nsg].base = base;
  62    qsg->sg[qsg->nsg].len = len;
  63    qsg->size += len;
  64    ++qsg->nsg;
  65}
  66
  67void qemu_sglist_destroy(QEMUSGList *qsg)
  68{
  69    object_unref(OBJECT(qsg->dev));
  70    g_free(qsg->sg);
  71    memset(qsg, 0, sizeof(*qsg));
  72}
  73
  74typedef struct {
  75    BlockAIOCB common;
  76    AioContext *ctx;
  77    BlockAIOCB *acb;
  78    QEMUSGList *sg;
  79    uint32_t align;
  80    uint64_t offset;
  81    DMADirection dir;
  82    int sg_cur_index;
  83    dma_addr_t sg_cur_byte;
  84    QEMUIOVector iov;
  85    QEMUBH *bh;
  86    DMAIOFunc *io_func;
  87    void *io_func_opaque;
  88} DMAAIOCB;
  89
  90static void dma_blk_cb(void *opaque, int ret);
  91
  92static void reschedule_dma(void *opaque)
  93{
  94    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
  95
  96    assert(!dbs->acb && dbs->bh);
  97    qemu_bh_delete(dbs->bh);
  98    dbs->bh = NULL;
  99    dma_blk_cb(dbs, 0);
 100}
 101
 102static void dma_blk_unmap(DMAAIOCB *dbs)
 103{
 104    int i;
 105
 106    for (i = 0; i < dbs->iov.niov; ++i) {
 107        dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
 108                         dbs->iov.iov[i].iov_len, dbs->dir,
 109                         dbs->iov.iov[i].iov_len);
 110    }
 111    qemu_iovec_reset(&dbs->iov);
 112}
 113
 114static void dma_complete(DMAAIOCB *dbs, int ret)
 115{
 116    trace_dma_complete(dbs, ret, dbs->common.cb);
 117
 118    assert(!dbs->acb && !dbs->bh);
 119    dma_blk_unmap(dbs);
 120    if (dbs->common.cb) {
 121        dbs->common.cb(dbs->common.opaque, ret);
 122    }
 123    qemu_iovec_destroy(&dbs->iov);
 124    qemu_aio_unref(dbs);
 125}
 126
 127static void dma_blk_cb(void *opaque, int ret)
 128{
 129    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
 130    dma_addr_t cur_addr, cur_len;
 131    void *mem;
 132
 133    trace_dma_blk_cb(dbs, ret);
 134
 135    dbs->acb = NULL;
 136    dbs->offset += dbs->iov.size;
 137
 138    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
 139        dma_complete(dbs, ret);
 140        return;
 141    }
 142    dma_blk_unmap(dbs);
 143
 144    while (dbs->sg_cur_index < dbs->sg->nsg) {
 145        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
 146        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
 147        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir);
 148        /*
 149         * Make reads deterministic in icount mode. Windows sometimes issues
 150         * disk read requests with overlapping SGs. It leads
 151         * to non-determinism, because resulting buffer contents may be mixed
 152         * from several sectors. This code splits all SGs into several
 153         * groups. SGs in every group do not overlap.
 154         */
 155        if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
 156            int i;
 157            for (i = 0 ; i < dbs->iov.niov ; ++i) {
 158                if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
 159                                   dbs->iov.iov[i].iov_len, (intptr_t)mem,
 160                                   cur_len)) {
 161                    dma_memory_unmap(dbs->sg->as, mem, cur_len,
 162                                     dbs->dir, cur_len);
 163                    mem = NULL;
 164                    break;
 165                }
 166            }
 167        }
 168        if (!mem)
 169            break;
 170        qemu_iovec_add(&dbs->iov, mem, cur_len);
 171        dbs->sg_cur_byte += cur_len;
 172        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
 173            dbs->sg_cur_byte = 0;
 174            ++dbs->sg_cur_index;
 175        }
 176    }
 177
 178    if (dbs->iov.size == 0) {
 179        trace_dma_map_wait(dbs);
 180        dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
 181        cpu_register_map_client(dbs->bh);
 182        return;
 183    }
 184
 185    if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
 186        qemu_iovec_discard_back(&dbs->iov,
 187                                QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
 188    }
 189
 190    aio_context_acquire(dbs->ctx);
 191    dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
 192                            dma_blk_cb, dbs, dbs->io_func_opaque);
 193    aio_context_release(dbs->ctx);
 194    assert(dbs->acb);
 195}
 196
 197static void dma_aio_cancel(BlockAIOCB *acb)
 198{
 199    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
 200
 201    trace_dma_aio_cancel(dbs);
 202
 203    assert(!(dbs->acb && dbs->bh));
 204    if (dbs->acb) {
 205        /* This will invoke dma_blk_cb.  */
 206        blk_aio_cancel_async(dbs->acb);
 207        return;
 208    }
 209
 210    if (dbs->bh) {
 211        cpu_unregister_map_client(dbs->bh);
 212        qemu_bh_delete(dbs->bh);
 213        dbs->bh = NULL;
 214    }
 215    if (dbs->common.cb) {
 216        dbs->common.cb(dbs->common.opaque, -ECANCELED);
 217    }
 218}
 219
 220static AioContext *dma_get_aio_context(BlockAIOCB *acb)
 221{
 222    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
 223
 224    return dbs->ctx;
 225}
 226
 227static const AIOCBInfo dma_aiocb_info = {
 228    .aiocb_size         = sizeof(DMAAIOCB),
 229    .cancel_async       = dma_aio_cancel,
 230    .get_aio_context    = dma_get_aio_context,
 231};
 232
 233BlockAIOCB *dma_blk_io(AioContext *ctx,
 234    QEMUSGList *sg, uint64_t offset, uint32_t align,
 235    DMAIOFunc *io_func, void *io_func_opaque,
 236    BlockCompletionFunc *cb,
 237    void *opaque, DMADirection dir)
 238{
 239    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
 240
 241    trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
 242
 243    dbs->acb = NULL;
 244    dbs->sg = sg;
 245    dbs->ctx = ctx;
 246    dbs->offset = offset;
 247    dbs->align = align;
 248    dbs->sg_cur_index = 0;
 249    dbs->sg_cur_byte = 0;
 250    dbs->dir = dir;
 251    dbs->io_func = io_func;
 252    dbs->io_func_opaque = io_func_opaque;
 253    dbs->bh = NULL;
 254    qemu_iovec_init(&dbs->iov, sg->nsg);
 255    dma_blk_cb(dbs, 0);
 256    return &dbs->common;
 257}
 258
 259
 260static
 261BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
 262                                 BlockCompletionFunc *cb, void *cb_opaque,
 263                                 void *opaque)
 264{
 265    BlockBackend *blk = opaque;
 266    return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
 267}
 268
 269BlockAIOCB *dma_blk_read(BlockBackend *blk,
 270                         QEMUSGList *sg, uint64_t offset, uint32_t align,
 271                         void (*cb)(void *opaque, int ret), void *opaque)
 272{
 273    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
 274                      dma_blk_read_io_func, blk, cb, opaque,
 275                      DMA_DIRECTION_FROM_DEVICE);
 276}
 277
 278static
 279BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
 280                                  BlockCompletionFunc *cb, void *cb_opaque,
 281                                  void *opaque)
 282{
 283    BlockBackend *blk = opaque;
 284    return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
 285}
 286
 287BlockAIOCB *dma_blk_write(BlockBackend *blk,
 288                          QEMUSGList *sg, uint64_t offset, uint32_t align,
 289                          void (*cb)(void *opaque, int ret), void *opaque)
 290{
 291    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
 292                      dma_blk_write_io_func, blk, cb, opaque,
 293                      DMA_DIRECTION_TO_DEVICE);
 294}
 295
 296
 297static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg,
 298                           DMADirection dir)
 299{
 300    uint64_t resid;
 301    int sg_cur_index;
 302
 303    resid = sg->size;
 304    sg_cur_index = 0;
 305    len = MIN(len, resid);
 306    while (len > 0) {
 307        ScatterGatherEntry entry = sg->sg[sg_cur_index++];
 308        int32_t xfer = MIN(len, entry.len);
 309        dma_memory_rw(sg->as, entry.base, ptr, xfer, dir);
 310        ptr += xfer;
 311        len -= xfer;
 312        resid -= xfer;
 313    }
 314
 315    return resid;
 316}
 317
 318uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg)
 319{
 320    return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_FROM_DEVICE);
 321}
 322
 323uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg)
 324{
 325    return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_TO_DEVICE);
 326}
 327
 328void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
 329                    QEMUSGList *sg, enum BlockAcctType type)
 330{
 331    block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
 332}
 333
 334uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
 335{
 336    uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
 337    uint64_t alignment_mask, size_mask;
 338
 339    if (max_addr_bits != 64) {
 340        max_mask = (1ULL << max_addr_bits) - 1;
 341    }
 342
 343    alignment_mask = start ? (start & -start) - 1 : max_mask;
 344    alignment_mask = MIN(alignment_mask, max_mask);
 345    size_mask = MIN(addr_mask, max_mask);
 346
 347    if (alignment_mask <= size_mask) {
 348        /* Increase the alignment of start */
 349        return alignment_mask;
 350    } else {
 351        /* Find the largest page mask from size */
 352        if (addr_mask == UINT64_MAX) {
 353            return UINT64_MAX;
 354        }
 355        return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
 356    }
 357}
 358
 359