linux/drivers/virtio/virtio_ring.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* Virtio ring implementation.
   3 *
   4 *  Copyright 2007 Rusty Russell IBM Corporation
   5 */
   6#include <linux/virtio.h>
   7#include <linux/virtio_ring.h>
   8#include <linux/virtio_config.h>
   9#include <linux/device.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/hrtimer.h>
  13#include <linux/dma-mapping.h>
  14#include <xen/xen.h>
  15
  16#ifdef DEBUG
  17/* For development, we want to crash whenever the ring is screwed. */
  18#define BAD_RING(_vq, fmt, args...)                             \
  19        do {                                                    \
  20                dev_err(&(_vq)->vq.vdev->dev,                   \
  21                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  22                BUG();                                          \
  23        } while (0)
  24/* Caller is supposed to guarantee no reentry. */
  25#define START_USE(_vq)                                          \
  26        do {                                                    \
  27                if ((_vq)->in_use)                              \
  28                        panic("%s:in_use = %i\n",               \
  29                              (_vq)->vq.name, (_vq)->in_use);   \
  30                (_vq)->in_use = __LINE__;                       \
  31        } while (0)
  32#define END_USE(_vq) \
  33        do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
  34#define LAST_ADD_TIME_UPDATE(_vq)                               \
  35        do {                                                    \
  36                ktime_t now = ktime_get();                      \
  37                                                                \
  38                /* No kick or get, with .1 second between?  Warn. */ \
  39                if ((_vq)->last_add_time_valid)                 \
  40                        WARN_ON(ktime_to_ms(ktime_sub(now,      \
  41                                (_vq)->last_add_time)) > 100);  \
  42                (_vq)->last_add_time = now;                     \
  43                (_vq)->last_add_time_valid = true;              \
  44        } while (0)
  45#define LAST_ADD_TIME_CHECK(_vq)                                \
  46        do {                                                    \
  47                if ((_vq)->last_add_time_valid) {               \
  48                        WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
  49                                      (_vq)->last_add_time)) > 100); \
  50                }                                               \
  51        } while (0)
  52#define LAST_ADD_TIME_INVALID(_vq)                              \
  53        ((_vq)->last_add_time_valid = false)
  54#else
  55#define BAD_RING(_vq, fmt, args...)                             \
  56        do {                                                    \
  57                dev_err(&_vq->vq.vdev->dev,                     \
  58                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  59                (_vq)->broken = true;                           \
  60        } while (0)
  61#define START_USE(vq)
  62#define END_USE(vq)
  63#define LAST_ADD_TIME_UPDATE(vq)
  64#define LAST_ADD_TIME_CHECK(vq)
  65#define LAST_ADD_TIME_INVALID(vq)
  66#endif
  67
  68struct vring_desc_state_split {
  69        void *data;                     /* Data for callback. */
  70        struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
  71};
  72
  73struct vring_desc_state_packed {
  74        void *data;                     /* Data for callback. */
  75        struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
  76        u16 num;                        /* Descriptor list length. */
  77        u16 next;                       /* The next desc state in a list. */
  78        u16 last;                       /* The last desc state in a list. */
  79};
  80
  81struct vring_desc_extra_packed {
  82        dma_addr_t addr;                /* Buffer DMA addr. */
  83        u32 len;                        /* Buffer length. */
  84        u16 flags;                      /* Descriptor flags. */
  85};
  86
  87struct vring_virtqueue {
  88        struct virtqueue vq;
  89
  90        /* Is this a packed ring? */
  91        bool packed_ring;
  92
  93        /* Is DMA API used? */
  94        bool use_dma_api;
  95
  96        /* Can we use weak barriers? */
  97        bool weak_barriers;
  98
  99        /* Other side has made a mess, don't try any more. */
 100        bool broken;
 101
 102        /* Host supports indirect buffers */
 103        bool indirect;
 104
 105        /* Host publishes avail event idx */
 106        bool event;
 107
 108        /* Head of free buffer list. */
 109        unsigned int free_head;
 110        /* Number we've added since last sync. */
 111        unsigned int num_added;
 112
 113        /* Last used index we've seen. */
 114        u16 last_used_idx;
 115
 116        union {
 117                /* Available for split ring */
 118                struct {
 119                        /* Actual memory layout for this queue. */
 120                        struct vring vring;
 121
 122                        /* Last written value to avail->flags */
 123                        u16 avail_flags_shadow;
 124
 125                        /*
 126                         * Last written value to avail->idx in
 127                         * guest byte order.
 128                         */
 129                        u16 avail_idx_shadow;
 130
 131                        /* Per-descriptor state. */
 132                        struct vring_desc_state_split *desc_state;
 133
 134                        /* DMA address and size information */
 135                        dma_addr_t queue_dma_addr;
 136                        size_t queue_size_in_bytes;
 137                } split;
 138
 139                /* Available for packed ring */
 140                struct {
 141                        /* Actual memory layout for this queue. */
 142                        struct {
 143                                unsigned int num;
 144                                struct vring_packed_desc *desc;
 145                                struct vring_packed_desc_event *driver;
 146                                struct vring_packed_desc_event *device;
 147                        } vring;
 148
 149                        /* Driver ring wrap counter. */
 150                        bool avail_wrap_counter;
 151
 152                        /* Device ring wrap counter. */
 153                        bool used_wrap_counter;
 154
 155                        /* Avail used flags. */
 156                        u16 avail_used_flags;
 157
 158                        /* Index of the next avail descriptor. */
 159                        u16 next_avail_idx;
 160
 161                        /*
 162                         * Last written value to driver->flags in
 163                         * guest byte order.
 164                         */
 165                        u16 event_flags_shadow;
 166
 167                        /* Per-descriptor state. */
 168                        struct vring_desc_state_packed *desc_state;
 169                        struct vring_desc_extra_packed *desc_extra;
 170
 171                        /* DMA address and size information */
 172                        dma_addr_t ring_dma_addr;
 173                        dma_addr_t driver_event_dma_addr;
 174                        dma_addr_t device_event_dma_addr;
 175                        size_t ring_size_in_bytes;
 176                        size_t event_size_in_bytes;
 177                } packed;
 178        };
 179
 180        /* How to notify other side. FIXME: commonalize hcalls! */
 181        bool (*notify)(struct virtqueue *vq);
 182
 183        /* DMA, allocation, and size information */
 184        bool we_own_ring;
 185
 186#ifdef DEBUG
 187        /* They're supposed to lock for us. */
 188        unsigned int in_use;
 189
 190        /* Figure out if their kicks are too delayed. */
 191        bool last_add_time_valid;
 192        ktime_t last_add_time;
 193#endif
 194};
 195
 196
 197/*
 198 * Helpers.
 199 */
 200
 201#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 202
 203static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
 204                                          unsigned int total_sg)
 205{
 206        struct vring_virtqueue *vq = to_vvq(_vq);
 207
 208        /*
 209         * If the host supports indirect descriptor tables, and we have multiple
 210         * buffers, then go indirect. FIXME: tune this threshold
 211         */
 212        return (vq->indirect && total_sg > 1 && vq->vq.num_free);
 213}
 214
 215/*
 216 * Modern virtio devices have feature bits to specify whether they need a
 217 * quirk and bypass the IOMMU. If not there, just use the DMA API.
 218 *
 219 * If there, the interaction between virtio and DMA API is messy.
 220 *
 221 * On most systems with virtio, physical addresses match bus addresses,
 222 * and it doesn't particularly matter whether we use the DMA API.
 223 *
 224 * On some systems, including Xen and any system with a physical device
 225 * that speaks virtio behind a physical IOMMU, we must use the DMA API
 226 * for virtio DMA to work at all.
 227 *
 228 * On other systems, including SPARC and PPC64, virtio-pci devices are
 229 * enumerated as though they are behind an IOMMU, but the virtio host
 230 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
 231 * there or somehow map everything as the identity.
 232 *
 233 * For the time being, we preserve historic behavior and bypass the DMA
 234 * API.
 235 *
 236 * TODO: install a per-device DMA ops structure that does the right thing
 237 * taking into account all the above quirks, and use the DMA API
 238 * unconditionally on data path.
 239 */
 240
 241static bool vring_use_dma_api(struct virtio_device *vdev)
 242{
 243        if (!virtio_has_iommu_quirk(vdev))
 244                return true;
 245
 246        /* Otherwise, we are left to guess. */
 247        /*
 248         * In theory, it's possible to have a buggy QEMU-supposed
 249         * emulated Q35 IOMMU and Xen enabled at the same time.  On
 250         * such a configuration, virtio has never worked and will
 251         * not work without an even larger kludge.  Instead, enable
 252         * the DMA API if we're a Xen guest, which at least allows
 253         * all of the sensible Xen configurations to work correctly.
 254         */
 255        if (xen_domain())
 256                return true;
 257
 258        return false;
 259}
 260
 261size_t virtio_max_dma_size(struct virtio_device *vdev)
 262{
 263        size_t max_segment_size = SIZE_MAX;
 264
 265        if (vring_use_dma_api(vdev))
 266                max_segment_size = dma_max_mapping_size(&vdev->dev);
 267
 268        return max_segment_size;
 269}
 270EXPORT_SYMBOL_GPL(virtio_max_dma_size);
 271
 272static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
 273                              dma_addr_t *dma_handle, gfp_t flag)
 274{
 275        if (vring_use_dma_api(vdev)) {
 276                return dma_alloc_coherent(vdev->dev.parent, size,
 277                                          dma_handle, flag);
 278        } else {
 279                void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
 280
 281                if (queue) {
 282                        phys_addr_t phys_addr = virt_to_phys(queue);
 283                        *dma_handle = (dma_addr_t)phys_addr;
 284
 285                        /*
 286                         * Sanity check: make sure we dind't truncate
 287                         * the address.  The only arches I can find that
 288                         * have 64-bit phys_addr_t but 32-bit dma_addr_t
 289                         * are certain non-highmem MIPS and x86
 290                         * configurations, but these configurations
 291                         * should never allocate physical pages above 32
 292                         * bits, so this is fine.  Just in case, throw a
 293                         * warning and abort if we end up with an
 294                         * unrepresentable address.
 295                         */
 296                        if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
 297                                free_pages_exact(queue, PAGE_ALIGN(size));
 298                                return NULL;
 299                        }
 300                }
 301                return queue;
 302        }
 303}
 304
 305static void vring_free_queue(struct virtio_device *vdev, size_t size,
 306                             void *queue, dma_addr_t dma_handle)
 307{
 308        if (vring_use_dma_api(vdev))
 309                dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
 310        else
 311                free_pages_exact(queue, PAGE_ALIGN(size));
 312}
 313
 314/*
 315 * The DMA ops on various arches are rather gnarly right now, and
 316 * making all of the arch DMA ops work on the vring device itself
 317 * is a mess.  For now, we use the parent device for DMA ops.
 318 */
 319static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 320{
 321        return vq->vq.vdev->dev.parent;
 322}
 323
 324/* Map one sg entry. */
 325static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
 326                                   struct scatterlist *sg,
 327                                   enum dma_data_direction direction)
 328{
 329        if (!vq->use_dma_api)
 330                return (dma_addr_t)sg_phys(sg);
 331
 332        /*
 333         * We can't use dma_map_sg, because we don't use scatterlists in
 334         * the way it expects (we don't guarantee that the scatterlist
 335         * will exist for the lifetime of the mapping).
 336         */
 337        return dma_map_page(vring_dma_dev(vq),
 338                            sg_page(sg), sg->offset, sg->length,
 339                            direction);
 340}
 341
 342static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
 343                                   void *cpu_addr, size_t size,
 344                                   enum dma_data_direction direction)
 345{
 346        if (!vq->use_dma_api)
 347                return (dma_addr_t)virt_to_phys(cpu_addr);
 348
 349        return dma_map_single(vring_dma_dev(vq),
 350                              cpu_addr, size, direction);
 351}
 352
 353static int vring_mapping_error(const struct vring_virtqueue *vq,
 354                               dma_addr_t addr)
 355{
 356        if (!vq->use_dma_api)
 357                return 0;
 358
 359        return dma_mapping_error(vring_dma_dev(vq), addr);
 360}
 361
 362
 363/*
 364 * Split ring specific functions - *_split().
 365 */
 366
 367static void vring_unmap_one_split(const struct vring_virtqueue *vq,
 368                                  struct vring_desc *desc)
 369{
 370        u16 flags;
 371
 372        if (!vq->use_dma_api)
 373                return;
 374
 375        flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
 376
 377        if (flags & VRING_DESC_F_INDIRECT) {
 378                dma_unmap_single(vring_dma_dev(vq),
 379                                 virtio64_to_cpu(vq->vq.vdev, desc->addr),
 380                                 virtio32_to_cpu(vq->vq.vdev, desc->len),
 381                                 (flags & VRING_DESC_F_WRITE) ?
 382                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 383        } else {
 384                dma_unmap_page(vring_dma_dev(vq),
 385                               virtio64_to_cpu(vq->vq.vdev, desc->addr),
 386                               virtio32_to_cpu(vq->vq.vdev, desc->len),
 387                               (flags & VRING_DESC_F_WRITE) ?
 388                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 389        }
 390}
 391
 392static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
 393                                               unsigned int total_sg,
 394                                               gfp_t gfp)
 395{
 396        struct vring_desc *desc;
 397        unsigned int i;
 398
 399        /*
 400         * We require lowmem mappings for the descriptors because
 401         * otherwise virt_to_phys will give us bogus addresses in the
 402         * virtqueue.
 403         */
 404        gfp &= ~__GFP_HIGHMEM;
 405
 406        desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
 407        if (!desc)
 408                return NULL;
 409
 410        for (i = 0; i < total_sg; i++)
 411                desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
 412        return desc;
 413}
 414
 415static inline int virtqueue_add_split(struct virtqueue *_vq,
 416                                      struct scatterlist *sgs[],
 417                                      unsigned int total_sg,
 418                                      unsigned int out_sgs,
 419                                      unsigned int in_sgs,
 420                                      void *data,
 421                                      void *ctx,
 422                                      gfp_t gfp)
 423{
 424        struct vring_virtqueue *vq = to_vvq(_vq);
 425        struct scatterlist *sg;
 426        struct vring_desc *desc;
 427        unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
 428        int head;
 429        bool indirect;
 430
 431        START_USE(vq);
 432
 433        BUG_ON(data == NULL);
 434        BUG_ON(ctx && vq->indirect);
 435
 436        if (unlikely(vq->broken)) {
 437                END_USE(vq);
 438                return -EIO;
 439        }
 440
 441        LAST_ADD_TIME_UPDATE(vq);
 442
 443        BUG_ON(total_sg == 0);
 444
 445        head = vq->free_head;
 446
 447        if (virtqueue_use_indirect(_vq, total_sg))
 448                desc = alloc_indirect_split(_vq, total_sg, gfp);
 449        else {
 450                desc = NULL;
 451                WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
 452        }
 453
 454        if (desc) {
 455                /* Use a single buffer which doesn't continue */
 456                indirect = true;
 457                /* Set up rest to use this indirect table. */
 458                i = 0;
 459                descs_used = 1;
 460        } else {
 461                indirect = false;
 462                desc = vq->split.vring.desc;
 463                i = head;
 464                descs_used = total_sg;
 465        }
 466
 467        if (vq->vq.num_free < descs_used) {
 468                pr_debug("Can't add buf len %i - avail = %i\n",
 469                         descs_used, vq->vq.num_free);
 470                /* FIXME: for historical reasons, we force a notify here if
 471                 * there are outgoing parts to the buffer.  Presumably the
 472                 * host should service the ring ASAP. */
 473                if (out_sgs)
 474                        vq->notify(&vq->vq);
 475                if (indirect)
 476                        kfree(desc);
 477                END_USE(vq);
 478                return -ENOSPC;
 479        }
 480
 481        for (n = 0; n < out_sgs; n++) {
 482                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 483                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
 484                        if (vring_mapping_error(vq, addr))
 485                                goto unmap_release;
 486
 487                        desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
 488                        desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
 489                        desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
 490                        prev = i;
 491                        i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 492                }
 493        }
 494        for (; n < (out_sgs + in_sgs); n++) {
 495                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 496                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
 497                        if (vring_mapping_error(vq, addr))
 498                                goto unmap_release;
 499
 500                        desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
 501                        desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
 502                        desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
 503                        prev = i;
 504                        i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 505                }
 506        }
 507        /* Last one doesn't continue. */
 508        desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
 509
 510        if (indirect) {
 511                /* Now that the indirect table is filled in, map it. */
 512                dma_addr_t addr = vring_map_single(
 513                        vq, desc, total_sg * sizeof(struct vring_desc),
 514                        DMA_TO_DEVICE);
 515                if (vring_mapping_error(vq, addr))
 516                        goto unmap_release;
 517
 518                vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
 519                                VRING_DESC_F_INDIRECT);
 520                vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
 521                                addr);
 522
 523                vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
 524                                total_sg * sizeof(struct vring_desc));
 525        }
 526
 527        /* We're using some buffers from the free list. */
 528        vq->vq.num_free -= descs_used;
 529
 530        /* Update free pointer */
 531        if (indirect)
 532                vq->free_head = virtio16_to_cpu(_vq->vdev,
 533                                        vq->split.vring.desc[head].next);
 534        else
 535                vq->free_head = i;
 536
 537        /* Store token and indirect buffer state. */
 538        vq->split.desc_state[head].data = data;
 539        if (indirect)
 540                vq->split.desc_state[head].indir_desc = desc;
 541        else
 542                vq->split.desc_state[head].indir_desc = ctx;
 543
 544        /* Put entry in available array (but don't update avail->idx until they
 545         * do sync). */
 546        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
 547        vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 548
 549        /* Descriptors and available array need to be set before we expose the
 550         * new available array entries. */
 551        virtio_wmb(vq->weak_barriers);
 552        vq->split.avail_idx_shadow++;
 553        vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 554                                                vq->split.avail_idx_shadow);
 555        vq->num_added++;
 556
 557        pr_debug("Added buffer head %i to %p\n", head, vq);
 558        END_USE(vq);
 559
 560        /* This is very unlikely, but theoretically possible.  Kick
 561         * just in case. */
 562        if (unlikely(vq->num_added == (1 << 16) - 1))
 563                virtqueue_kick(_vq);
 564
 565        return 0;
 566
 567unmap_release:
 568        err_idx = i;
 569
 570        if (indirect)
 571                i = 0;
 572        else
 573                i = head;
 574
 575        for (n = 0; n < total_sg; n++) {
 576                if (i == err_idx)
 577                        break;
 578                vring_unmap_one_split(vq, &desc[i]);
 579                i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 580        }
 581
 582        if (indirect)
 583                kfree(desc);
 584
 585        END_USE(vq);
 586        return -ENOMEM;
 587}
 588
 589static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 590{
 591        struct vring_virtqueue *vq = to_vvq(_vq);
 592        u16 new, old;
 593        bool needs_kick;
 594
 595        START_USE(vq);
 596        /* We need to expose available array entries before checking avail
 597         * event. */
 598        virtio_mb(vq->weak_barriers);
 599
 600        old = vq->split.avail_idx_shadow - vq->num_added;
 601        new = vq->split.avail_idx_shadow;
 602        vq->num_added = 0;
 603
 604        LAST_ADD_TIME_CHECK(vq);
 605        LAST_ADD_TIME_INVALID(vq);
 606
 607        if (vq->event) {
 608                needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
 609                                        vring_avail_event(&vq->split.vring)),
 610                                              new, old);
 611        } else {
 612                needs_kick = !(vq->split.vring.used->flags &
 613                                        cpu_to_virtio16(_vq->vdev,
 614                                                VRING_USED_F_NO_NOTIFY));
 615        }
 616        END_USE(vq);
 617        return needs_kick;
 618}
 619
 620static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 621                             void **ctx)
 622{
 623        unsigned int i, j;
 624        __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
 625
 626        /* Clear data ptr. */
 627        vq->split.desc_state[head].data = NULL;
 628
 629        /* Put back on free list: unmap first-level descriptors and find end */
 630        i = head;
 631
 632        while (vq->split.vring.desc[i].flags & nextflag) {
 633                vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
 634                i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next);
 635                vq->vq.num_free++;
 636        }
 637
 638        vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
 639        vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev,
 640                                                vq->free_head);
 641        vq->free_head = head;
 642
 643        /* Plus final descriptor */
 644        vq->vq.num_free++;
 645
 646        if (vq->indirect) {
 647                struct vring_desc *indir_desc =
 648                                vq->split.desc_state[head].indir_desc;
 649                u32 len;
 650
 651                /* Free the indirect table, if any, now that it's unmapped. */
 652                if (!indir_desc)
 653                        return;
 654
 655                len = virtio32_to_cpu(vq->vq.vdev,
 656                                vq->split.vring.desc[head].len);
 657
 658                BUG_ON(!(vq->split.vring.desc[head].flags &
 659                         cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
 660                BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 661
 662                for (j = 0; j < len / sizeof(struct vring_desc); j++)
 663                        vring_unmap_one_split(vq, &indir_desc[j]);
 664
 665                kfree(indir_desc);
 666                vq->split.desc_state[head].indir_desc = NULL;
 667        } else if (ctx) {
 668                *ctx = vq->split.desc_state[head].indir_desc;
 669        }
 670}
 671
 672static inline bool more_used_split(const struct vring_virtqueue *vq)
 673{
 674        return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
 675                        vq->split.vring.used->idx);
 676}
 677
 678static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 679                                         unsigned int *len,
 680                                         void **ctx)
 681{
 682        struct vring_virtqueue *vq = to_vvq(_vq);
 683        void *ret;
 684        unsigned int i;
 685        u16 last_used;
 686
 687        START_USE(vq);
 688
 689        if (unlikely(vq->broken)) {
 690                END_USE(vq);
 691                return NULL;
 692        }
 693
 694        if (!more_used_split(vq)) {
 695                pr_debug("No more buffers in queue\n");
 696                END_USE(vq);
 697                return NULL;
 698        }
 699
 700        /* Only get used array entries after they have been exposed by host. */
 701        virtio_rmb(vq->weak_barriers);
 702
 703        last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
 704        i = virtio32_to_cpu(_vq->vdev,
 705                        vq->split.vring.used->ring[last_used].id);
 706        *len = virtio32_to_cpu(_vq->vdev,
 707                        vq->split.vring.used->ring[last_used].len);
 708
 709        if (unlikely(i >= vq->split.vring.num)) {
 710                BAD_RING(vq, "id %u out of range\n", i);
 711                return NULL;
 712        }
 713        if (unlikely(!vq->split.desc_state[i].data)) {
 714                BAD_RING(vq, "id %u is not a head!\n", i);
 715                return NULL;
 716        }
 717
 718        /* detach_buf_split clears data, so grab it now. */
 719        ret = vq->split.desc_state[i].data;
 720        detach_buf_split(vq, i, ctx);
 721        vq->last_used_idx++;
 722        /* If we expect an interrupt for the next entry, tell host
 723         * by writing event index and flush out the write before
 724         * the read in the next get_buf call. */
 725        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
 726                virtio_store_mb(vq->weak_barriers,
 727                                &vring_used_event(&vq->split.vring),
 728                                cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
 729
 730        LAST_ADD_TIME_INVALID(vq);
 731
 732        END_USE(vq);
 733        return ret;
 734}
 735
 736static void virtqueue_disable_cb_split(struct virtqueue *_vq)
 737{
 738        struct vring_virtqueue *vq = to_vvq(_vq);
 739
 740        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
 741                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
 742                if (!vq->event)
 743                        vq->split.vring.avail->flags =
 744                                cpu_to_virtio16(_vq->vdev,
 745                                                vq->split.avail_flags_shadow);
 746        }
 747}
 748
 749static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
 750{
 751        struct vring_virtqueue *vq = to_vvq(_vq);
 752        u16 last_used_idx;
 753
 754        START_USE(vq);
 755
 756        /* We optimistically turn back on interrupts, then check if there was
 757         * more to do. */
 758        /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
 759         * either clear the flags bit or point the event index at the next
 760         * entry. Always do both to keep code simple. */
 761        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 762                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 763                if (!vq->event)
 764                        vq->split.vring.avail->flags =
 765                                cpu_to_virtio16(_vq->vdev,
 766                                                vq->split.avail_flags_shadow);
 767        }
 768        vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
 769                        last_used_idx = vq->last_used_idx);
 770        END_USE(vq);
 771        return last_used_idx;
 772}
 773
 774static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
 775{
 776        struct vring_virtqueue *vq = to_vvq(_vq);
 777
 778        return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
 779                        vq->split.vring.used->idx);
 780}
 781
 782static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
 783{
 784        struct vring_virtqueue *vq = to_vvq(_vq);
 785        u16 bufs;
 786
 787        START_USE(vq);
 788
 789        /* We optimistically turn back on interrupts, then check if there was
 790         * more to do. */
 791        /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
 792         * either clear the flags bit or point the event index at the next
 793         * entry. Always update the event index to keep code simple. */
 794        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 795                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 796                if (!vq->event)
 797                        vq->split.vring.avail->flags =
 798                                cpu_to_virtio16(_vq->vdev,
 799                                                vq->split.avail_flags_shadow);
 800        }
 801        /* TODO: tune this threshold */
 802        bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
 803
 804        virtio_store_mb(vq->weak_barriers,
 805                        &vring_used_event(&vq->split.vring),
 806                        cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
 807
 808        if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
 809                                        - vq->last_used_idx) > bufs)) {
 810                END_USE(vq);
 811                return false;
 812        }
 813
 814        END_USE(vq);
 815        return true;
 816}
 817
 818static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
 819{
 820        struct vring_virtqueue *vq = to_vvq(_vq);
 821        unsigned int i;
 822        void *buf;
 823
 824        START_USE(vq);
 825
 826        for (i = 0; i < vq->split.vring.num; i++) {
 827                if (!vq->split.desc_state[i].data)
 828                        continue;
 829                /* detach_buf_split clears data, so grab it now. */
 830                buf = vq->split.desc_state[i].data;
 831                detach_buf_split(vq, i, NULL);
 832                vq->split.avail_idx_shadow--;
 833                vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 834                                vq->split.avail_idx_shadow);
 835                END_USE(vq);
 836                return buf;
 837        }
 838        /* That should have freed everything. */
 839        BUG_ON(vq->vq.num_free != vq->split.vring.num);
 840
 841        END_USE(vq);
 842        return NULL;
 843}
 844
 845static struct virtqueue *vring_create_virtqueue_split(
 846        unsigned int index,
 847        unsigned int num,
 848        unsigned int vring_align,
 849        struct virtio_device *vdev,
 850        bool weak_barriers,
 851        bool may_reduce_num,
 852        bool context,
 853        bool (*notify)(struct virtqueue *),
 854        void (*callback)(struct virtqueue *),
 855        const char *name)
 856{
 857        struct virtqueue *vq;
 858        void *queue = NULL;
 859        dma_addr_t dma_addr;
 860        size_t queue_size_in_bytes;
 861        struct vring vring;
 862
 863        /* We assume num is a power of 2. */
 864        if (num & (num - 1)) {
 865                dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
 866                return NULL;
 867        }
 868
 869        /* TODO: allocate each queue chunk individually */
 870        for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
 871                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 872                                          &dma_addr,
 873                                          GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 874                if (queue)
 875                        break;
 876                if (!may_reduce_num)
 877                        return NULL;
 878        }
 879
 880        if (!num)
 881                return NULL;
 882
 883        if (!queue) {
 884                /* Try to get a single page. You are my only hope! */
 885                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 886                                          &dma_addr, GFP_KERNEL|__GFP_ZERO);
 887        }
 888        if (!queue)
 889                return NULL;
 890
 891        queue_size_in_bytes = vring_size(num, vring_align);
 892        vring_init(&vring, num, queue, vring_align);
 893
 894        vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 895                                   notify, callback, name);
 896        if (!vq) {
 897                vring_free_queue(vdev, queue_size_in_bytes, queue,
 898                                 dma_addr);
 899                return NULL;
 900        }
 901
 902        to_vvq(vq)->split.queue_dma_addr = dma_addr;
 903        to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
 904        to_vvq(vq)->we_own_ring = true;
 905
 906        return vq;
 907}
 908
 909
 910/*
 911 * Packed ring specific functions - *_packed().
 912 */
 913
 914static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
 915                                     struct vring_desc_extra_packed *state)
 916{
 917        u16 flags;
 918
 919        if (!vq->use_dma_api)
 920                return;
 921
 922        flags = state->flags;
 923
 924        if (flags & VRING_DESC_F_INDIRECT) {
 925                dma_unmap_single(vring_dma_dev(vq),
 926                                 state->addr, state->len,
 927                                 (flags & VRING_DESC_F_WRITE) ?
 928                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 929        } else {
 930                dma_unmap_page(vring_dma_dev(vq),
 931                               state->addr, state->len,
 932                               (flags & VRING_DESC_F_WRITE) ?
 933                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 934        }
 935}
 936
 937static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
 938                                   struct vring_packed_desc *desc)
 939{
 940        u16 flags;
 941
 942        if (!vq->use_dma_api)
 943                return;
 944
 945        flags = le16_to_cpu(desc->flags);
 946
 947        if (flags & VRING_DESC_F_INDIRECT) {
 948                dma_unmap_single(vring_dma_dev(vq),
 949                                 le64_to_cpu(desc->addr),
 950                                 le32_to_cpu(desc->len),
 951                                 (flags & VRING_DESC_F_WRITE) ?
 952                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 953        } else {
 954                dma_unmap_page(vring_dma_dev(vq),
 955                               le64_to_cpu(desc->addr),
 956                               le32_to_cpu(desc->len),
 957                               (flags & VRING_DESC_F_WRITE) ?
 958                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 959        }
 960}
 961
 962static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
 963                                                       gfp_t gfp)
 964{
 965        struct vring_packed_desc *desc;
 966
 967        /*
 968         * We require lowmem mappings for the descriptors because
 969         * otherwise virt_to_phys will give us bogus addresses in the
 970         * virtqueue.
 971         */
 972        gfp &= ~__GFP_HIGHMEM;
 973
 974        desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
 975
 976        return desc;
 977}
 978
 979static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 980                                       struct scatterlist *sgs[],
 981                                       unsigned int total_sg,
 982                                       unsigned int out_sgs,
 983                                       unsigned int in_sgs,
 984                                       void *data,
 985                                       gfp_t gfp)
 986{
 987        struct vring_packed_desc *desc;
 988        struct scatterlist *sg;
 989        unsigned int i, n, err_idx;
 990        u16 head, id;
 991        dma_addr_t addr;
 992
 993        head = vq->packed.next_avail_idx;
 994        desc = alloc_indirect_packed(total_sg, gfp);
 995
 996        if (unlikely(vq->vq.num_free < 1)) {
 997                pr_debug("Can't add buf len 1 - avail = 0\n");
 998                kfree(desc);
 999                END_USE(vq);
1000                return -ENOSPC;
1001        }
1002
1003        i = 0;
1004        id = vq->free_head;
1005        BUG_ON(id == vq->packed.vring.num);
1006
1007        for (n = 0; n < out_sgs + in_sgs; n++) {
1008                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1009                        addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1010                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1011                        if (vring_mapping_error(vq, addr))
1012                                goto unmap_release;
1013
1014                        desc[i].flags = cpu_to_le16(n < out_sgs ?
1015                                                0 : VRING_DESC_F_WRITE);
1016                        desc[i].addr = cpu_to_le64(addr);
1017                        desc[i].len = cpu_to_le32(sg->length);
1018                        i++;
1019                }
1020        }
1021
1022        /* Now that the indirect table is filled in, map it. */
1023        addr = vring_map_single(vq, desc,
1024                        total_sg * sizeof(struct vring_packed_desc),
1025                        DMA_TO_DEVICE);
1026        if (vring_mapping_error(vq, addr))
1027                goto unmap_release;
1028
1029        vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1030        vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1031                                sizeof(struct vring_packed_desc));
1032        vq->packed.vring.desc[head].id = cpu_to_le16(id);
1033
1034        if (vq->use_dma_api) {
1035                vq->packed.desc_extra[id].addr = addr;
1036                vq->packed.desc_extra[id].len = total_sg *
1037                                sizeof(struct vring_packed_desc);
1038                vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1039                                                  vq->packed.avail_used_flags;
1040        }
1041
1042        /*
1043         * A driver MUST NOT make the first descriptor in the list
1044         * available before all subsequent descriptors comprising
1045         * the list are made available.
1046         */
1047        virtio_wmb(vq->weak_barriers);
1048        vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1049                                                vq->packed.avail_used_flags);
1050
1051        /* We're using some buffers from the free list. */
1052        vq->vq.num_free -= 1;
1053
1054        /* Update free pointer */
1055        n = head + 1;
1056        if (n >= vq->packed.vring.num) {
1057                n = 0;
1058                vq->packed.avail_wrap_counter ^= 1;
1059                vq->packed.avail_used_flags ^=
1060                                1 << VRING_PACKED_DESC_F_AVAIL |
1061                                1 << VRING_PACKED_DESC_F_USED;
1062        }
1063        vq->packed.next_avail_idx = n;
1064        vq->free_head = vq->packed.desc_state[id].next;
1065
1066        /* Store token and indirect buffer state. */
1067        vq->packed.desc_state[id].num = 1;
1068        vq->packed.desc_state[id].data = data;
1069        vq->packed.desc_state[id].indir_desc = desc;
1070        vq->packed.desc_state[id].last = id;
1071
1072        vq->num_added += 1;
1073
1074        pr_debug("Added buffer head %i to %p\n", head, vq);
1075        END_USE(vq);
1076
1077        return 0;
1078
1079unmap_release:
1080        err_idx = i;
1081
1082        for (i = 0; i < err_idx; i++)
1083                vring_unmap_desc_packed(vq, &desc[i]);
1084
1085        kfree(desc);
1086
1087        END_USE(vq);
1088        return -ENOMEM;
1089}
1090
1091static inline int virtqueue_add_packed(struct virtqueue *_vq,
1092                                       struct scatterlist *sgs[],
1093                                       unsigned int total_sg,
1094                                       unsigned int out_sgs,
1095                                       unsigned int in_sgs,
1096                                       void *data,
1097                                       void *ctx,
1098                                       gfp_t gfp)
1099{
1100        struct vring_virtqueue *vq = to_vvq(_vq);
1101        struct vring_packed_desc *desc;
1102        struct scatterlist *sg;
1103        unsigned int i, n, c, descs_used, err_idx;
1104        __le16 uninitialized_var(head_flags), flags;
1105        u16 head, id, uninitialized_var(prev), curr, avail_used_flags;
1106
1107        START_USE(vq);
1108
1109        BUG_ON(data == NULL);
1110        BUG_ON(ctx && vq->indirect);
1111
1112        if (unlikely(vq->broken)) {
1113                END_USE(vq);
1114                return -EIO;
1115        }
1116
1117        LAST_ADD_TIME_UPDATE(vq);
1118
1119        BUG_ON(total_sg == 0);
1120
1121        if (virtqueue_use_indirect(_vq, total_sg))
1122                return virtqueue_add_indirect_packed(vq, sgs, total_sg,
1123                                out_sgs, in_sgs, data, gfp);
1124
1125        head = vq->packed.next_avail_idx;
1126        avail_used_flags = vq->packed.avail_used_flags;
1127
1128        WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1129
1130        desc = vq->packed.vring.desc;
1131        i = head;
1132        descs_used = total_sg;
1133
1134        if (unlikely(vq->vq.num_free < descs_used)) {
1135                pr_debug("Can't add buf len %i - avail = %i\n",
1136                         descs_used, vq->vq.num_free);
1137                END_USE(vq);
1138                return -ENOSPC;
1139        }
1140
1141        id = vq->free_head;
1142        BUG_ON(id == vq->packed.vring.num);
1143
1144        curr = id;
1145        c = 0;
1146        for (n = 0; n < out_sgs + in_sgs; n++) {
1147                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1148                        dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1149                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1150                        if (vring_mapping_error(vq, addr))
1151                                goto unmap_release;
1152
1153                        flags = cpu_to_le16(vq->packed.avail_used_flags |
1154                                    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1155                                    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1156                        if (i == head)
1157                                head_flags = flags;
1158                        else
1159                                desc[i].flags = flags;
1160
1161                        desc[i].addr = cpu_to_le64(addr);
1162                        desc[i].len = cpu_to_le32(sg->length);
1163                        desc[i].id = cpu_to_le16(id);
1164
1165                        if (unlikely(vq->use_dma_api)) {
1166                                vq->packed.desc_extra[curr].addr = addr;
1167                                vq->packed.desc_extra[curr].len = sg->length;
1168                                vq->packed.desc_extra[curr].flags =
1169                                        le16_to_cpu(flags);
1170                        }
1171                        prev = curr;
1172                        curr = vq->packed.desc_state[curr].next;
1173
1174                        if ((unlikely(++i >= vq->packed.vring.num))) {
1175                                i = 0;
1176                                vq->packed.avail_used_flags ^=
1177                                        1 << VRING_PACKED_DESC_F_AVAIL |
1178                                        1 << VRING_PACKED_DESC_F_USED;
1179                        }
1180                }
1181        }
1182
1183        if (i < head)
1184                vq->packed.avail_wrap_counter ^= 1;
1185
1186        /* We're using some buffers from the free list. */
1187        vq->vq.num_free -= descs_used;
1188
1189        /* Update free pointer */
1190        vq->packed.next_avail_idx = i;
1191        vq->free_head = curr;
1192
1193        /* Store token. */
1194        vq->packed.desc_state[id].num = descs_used;
1195        vq->packed.desc_state[id].data = data;
1196        vq->packed.desc_state[id].indir_desc = ctx;
1197        vq->packed.desc_state[id].last = prev;
1198
1199        /*
1200         * A driver MUST NOT make the first descriptor in the list
1201         * available before all subsequent descriptors comprising
1202         * the list are made available.
1203         */
1204        virtio_wmb(vq->weak_barriers);
1205        vq->packed.vring.desc[head].flags = head_flags;
1206        vq->num_added += descs_used;
1207
1208        pr_debug("Added buffer head %i to %p\n", head, vq);
1209        END_USE(vq);
1210
1211        return 0;
1212
1213unmap_release:
1214        err_idx = i;
1215        i = head;
1216
1217        vq->packed.avail_used_flags = avail_used_flags;
1218
1219        for (n = 0; n < total_sg; n++) {
1220                if (i == err_idx)
1221                        break;
1222                vring_unmap_desc_packed(vq, &desc[i]);
1223                i++;
1224                if (i >= vq->packed.vring.num)
1225                        i = 0;
1226        }
1227
1228        END_USE(vq);
1229        return -EIO;
1230}
1231
1232static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1233{
1234        struct vring_virtqueue *vq = to_vvq(_vq);
1235        u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1236        bool needs_kick;
1237        union {
1238                struct {
1239                        __le16 off_wrap;
1240                        __le16 flags;
1241                };
1242                u32 u32;
1243        } snapshot;
1244
1245        START_USE(vq);
1246
1247        /*
1248         * We need to expose the new flags value before checking notification
1249         * suppressions.
1250         */
1251        virtio_mb(vq->weak_barriers);
1252
1253        old = vq->packed.next_avail_idx - vq->num_added;
1254        new = vq->packed.next_avail_idx;
1255        vq->num_added = 0;
1256
1257        snapshot.u32 = *(u32 *)vq->packed.vring.device;
1258        flags = le16_to_cpu(snapshot.flags);
1259
1260        LAST_ADD_TIME_CHECK(vq);
1261        LAST_ADD_TIME_INVALID(vq);
1262
1263        if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1264                needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1265                goto out;
1266        }
1267
1268        off_wrap = le16_to_cpu(snapshot.off_wrap);
1269
1270        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1271        event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1272        if (wrap_counter != vq->packed.avail_wrap_counter)
1273                event_idx -= vq->packed.vring.num;
1274
1275        needs_kick = vring_need_event(event_idx, new, old);
1276out:
1277        END_USE(vq);
1278        return needs_kick;
1279}
1280
1281static void detach_buf_packed(struct vring_virtqueue *vq,
1282                              unsigned int id, void **ctx)
1283{
1284        struct vring_desc_state_packed *state = NULL;
1285        struct vring_packed_desc *desc;
1286        unsigned int i, curr;
1287
1288        state = &vq->packed.desc_state[id];
1289
1290        /* Clear data ptr. */
1291        state->data = NULL;
1292
1293        vq->packed.desc_state[state->last].next = vq->free_head;
1294        vq->free_head = id;
1295        vq->vq.num_free += state->num;
1296
1297        if (unlikely(vq->use_dma_api)) {
1298                curr = id;
1299                for (i = 0; i < state->num; i++) {
1300                        vring_unmap_state_packed(vq,
1301                                &vq->packed.desc_extra[curr]);
1302                        curr = vq->packed.desc_state[curr].next;
1303                }
1304        }
1305
1306        if (vq->indirect) {
1307                u32 len;
1308
1309                /* Free the indirect table, if any, now that it's unmapped. */
1310                desc = state->indir_desc;
1311                if (!desc)
1312                        return;
1313
1314                if (vq->use_dma_api) {
1315                        len = vq->packed.desc_extra[id].len;
1316                        for (i = 0; i < len / sizeof(struct vring_packed_desc);
1317                                        i++)
1318                                vring_unmap_desc_packed(vq, &desc[i]);
1319                }
1320                kfree(desc);
1321                state->indir_desc = NULL;
1322        } else if (ctx) {
1323                *ctx = state->indir_desc;
1324        }
1325}
1326
1327static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1328                                       u16 idx, bool used_wrap_counter)
1329{
1330        bool avail, used;
1331        u16 flags;
1332
1333        flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1334        avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1335        used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1336
1337        return avail == used && used == used_wrap_counter;
1338}
1339
1340static inline bool more_used_packed(const struct vring_virtqueue *vq)
1341{
1342        return is_used_desc_packed(vq, vq->last_used_idx,
1343                        vq->packed.used_wrap_counter);
1344}
1345
1346static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1347                                          unsigned int *len,
1348                                          void **ctx)
1349{
1350        struct vring_virtqueue *vq = to_vvq(_vq);
1351        u16 last_used, id;
1352        void *ret;
1353
1354        START_USE(vq);
1355
1356        if (unlikely(vq->broken)) {
1357                END_USE(vq);
1358                return NULL;
1359        }
1360
1361        if (!more_used_packed(vq)) {
1362                pr_debug("No more buffers in queue\n");
1363                END_USE(vq);
1364                return NULL;
1365        }
1366
1367        /* Only get used elements after they have been exposed by host. */
1368        virtio_rmb(vq->weak_barriers);
1369
1370        last_used = vq->last_used_idx;
1371        id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1372        *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1373
1374        if (unlikely(id >= vq->packed.vring.num)) {
1375                BAD_RING(vq, "id %u out of range\n", id);
1376                return NULL;
1377        }
1378        if (unlikely(!vq->packed.desc_state[id].data)) {
1379                BAD_RING(vq, "id %u is not a head!\n", id);
1380                return NULL;
1381        }
1382
1383        /* detach_buf_packed clears data, so grab it now. */
1384        ret = vq->packed.desc_state[id].data;
1385        detach_buf_packed(vq, id, ctx);
1386
1387        vq->last_used_idx += vq->packed.desc_state[id].num;
1388        if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1389                vq->last_used_idx -= vq->packed.vring.num;
1390                vq->packed.used_wrap_counter ^= 1;
1391        }
1392
1393        /*
1394         * If we expect an interrupt for the next entry, tell host
1395         * by writing event index and flush out the write before
1396         * the read in the next get_buf call.
1397         */
1398        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1399                virtio_store_mb(vq->weak_barriers,
1400                                &vq->packed.vring.driver->off_wrap,
1401                                cpu_to_le16(vq->last_used_idx |
1402                                        (vq->packed.used_wrap_counter <<
1403                                         VRING_PACKED_EVENT_F_WRAP_CTR)));
1404
1405        LAST_ADD_TIME_INVALID(vq);
1406
1407        END_USE(vq);
1408        return ret;
1409}
1410
1411static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1412{
1413        struct vring_virtqueue *vq = to_vvq(_vq);
1414
1415        if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1416                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1417                vq->packed.vring.driver->flags =
1418                        cpu_to_le16(vq->packed.event_flags_shadow);
1419        }
1420}
1421
1422static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1423{
1424        struct vring_virtqueue *vq = to_vvq(_vq);
1425
1426        START_USE(vq);
1427
1428        /*
1429         * We optimistically turn back on interrupts, then check if there was
1430         * more to do.
1431         */
1432
1433        if (vq->event) {
1434                vq->packed.vring.driver->off_wrap =
1435                        cpu_to_le16(vq->last_used_idx |
1436                                (vq->packed.used_wrap_counter <<
1437                                 VRING_PACKED_EVENT_F_WRAP_CTR));
1438                /*
1439                 * We need to update event offset and event wrap
1440                 * counter first before updating event flags.
1441                 */
1442                virtio_wmb(vq->weak_barriers);
1443        }
1444
1445        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1446                vq->packed.event_flags_shadow = vq->event ?
1447                                VRING_PACKED_EVENT_FLAG_DESC :
1448                                VRING_PACKED_EVENT_FLAG_ENABLE;
1449                vq->packed.vring.driver->flags =
1450                                cpu_to_le16(vq->packed.event_flags_shadow);
1451        }
1452
1453        END_USE(vq);
1454        return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1455                        VRING_PACKED_EVENT_F_WRAP_CTR);
1456}
1457
1458static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1459{
1460        struct vring_virtqueue *vq = to_vvq(_vq);
1461        bool wrap_counter;
1462        u16 used_idx;
1463
1464        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1465        used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1466
1467        return is_used_desc_packed(vq, used_idx, wrap_counter);
1468}
1469
1470static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1471{
1472        struct vring_virtqueue *vq = to_vvq(_vq);
1473        u16 used_idx, wrap_counter;
1474        u16 bufs;
1475
1476        START_USE(vq);
1477
1478        /*
1479         * We optimistically turn back on interrupts, then check if there was
1480         * more to do.
1481         */
1482
1483        if (vq->event) {
1484                /* TODO: tune this threshold */
1485                bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1486                wrap_counter = vq->packed.used_wrap_counter;
1487
1488                used_idx = vq->last_used_idx + bufs;
1489                if (used_idx >= vq->packed.vring.num) {
1490                        used_idx -= vq->packed.vring.num;
1491                        wrap_counter ^= 1;
1492                }
1493
1494                vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1495                        (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1496
1497                /*
1498                 * We need to update event offset and event wrap
1499                 * counter first before updating event flags.
1500                 */
1501                virtio_wmb(vq->weak_barriers);
1502        }
1503
1504        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1505                vq->packed.event_flags_shadow = vq->event ?
1506                                VRING_PACKED_EVENT_FLAG_DESC :
1507                                VRING_PACKED_EVENT_FLAG_ENABLE;
1508                vq->packed.vring.driver->flags =
1509                                cpu_to_le16(vq->packed.event_flags_shadow);
1510        }
1511
1512        /*
1513         * We need to update event suppression structure first
1514         * before re-checking for more used buffers.
1515         */
1516        virtio_mb(vq->weak_barriers);
1517
1518        if (is_used_desc_packed(vq,
1519                                vq->last_used_idx,
1520                                vq->packed.used_wrap_counter)) {
1521                END_USE(vq);
1522                return false;
1523        }
1524
1525        END_USE(vq);
1526        return true;
1527}
1528
1529static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1530{
1531        struct vring_virtqueue *vq = to_vvq(_vq);
1532        unsigned int i;
1533        void *buf;
1534
1535        START_USE(vq);
1536
1537        for (i = 0; i < vq->packed.vring.num; i++) {
1538                if (!vq->packed.desc_state[i].data)
1539                        continue;
1540                /* detach_buf clears data, so grab it now. */
1541                buf = vq->packed.desc_state[i].data;
1542                detach_buf_packed(vq, i, NULL);
1543                END_USE(vq);
1544                return buf;
1545        }
1546        /* That should have freed everything. */
1547        BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1548
1549        END_USE(vq);
1550        return NULL;
1551}
1552
1553static struct virtqueue *vring_create_virtqueue_packed(
1554        unsigned int index,
1555        unsigned int num,
1556        unsigned int vring_align,
1557        struct virtio_device *vdev,
1558        bool weak_barriers,
1559        bool may_reduce_num,
1560        bool context,
1561        bool (*notify)(struct virtqueue *),
1562        void (*callback)(struct virtqueue *),
1563        const char *name)
1564{
1565        struct vring_virtqueue *vq;
1566        struct vring_packed_desc *ring;
1567        struct vring_packed_desc_event *driver, *device;
1568        dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1569        size_t ring_size_in_bytes, event_size_in_bytes;
1570        unsigned int i;
1571
1572        ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1573
1574        ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1575                                 &ring_dma_addr,
1576                                 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1577        if (!ring)
1578                goto err_ring;
1579
1580        event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1581
1582        driver = vring_alloc_queue(vdev, event_size_in_bytes,
1583                                   &driver_event_dma_addr,
1584                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1585        if (!driver)
1586                goto err_driver;
1587
1588        device = vring_alloc_queue(vdev, event_size_in_bytes,
1589                                   &device_event_dma_addr,
1590                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1591        if (!device)
1592                goto err_device;
1593
1594        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1595        if (!vq)
1596                goto err_vq;
1597
1598        vq->vq.callback = callback;
1599        vq->vq.vdev = vdev;
1600        vq->vq.name = name;
1601        vq->vq.num_free = num;
1602        vq->vq.index = index;
1603        vq->we_own_ring = true;
1604        vq->notify = notify;
1605        vq->weak_barriers = weak_barriers;
1606        vq->broken = false;
1607        vq->last_used_idx = 0;
1608        vq->num_added = 0;
1609        vq->packed_ring = true;
1610        vq->use_dma_api = vring_use_dma_api(vdev);
1611        list_add_tail(&vq->vq.list, &vdev->vqs);
1612#ifdef DEBUG
1613        vq->in_use = false;
1614        vq->last_add_time_valid = false;
1615#endif
1616
1617        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1618                !context;
1619        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1620
1621        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1622                vq->weak_barriers = false;
1623
1624        vq->packed.ring_dma_addr = ring_dma_addr;
1625        vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1626        vq->packed.device_event_dma_addr = device_event_dma_addr;
1627
1628        vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1629        vq->packed.event_size_in_bytes = event_size_in_bytes;
1630
1631        vq->packed.vring.num = num;
1632        vq->packed.vring.desc = ring;
1633        vq->packed.vring.driver = driver;
1634        vq->packed.vring.device = device;
1635
1636        vq->packed.next_avail_idx = 0;
1637        vq->packed.avail_wrap_counter = 1;
1638        vq->packed.used_wrap_counter = 1;
1639        vq->packed.event_flags_shadow = 0;
1640        vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1641
1642        vq->packed.desc_state = kmalloc_array(num,
1643                        sizeof(struct vring_desc_state_packed),
1644                        GFP_KERNEL);
1645        if (!vq->packed.desc_state)
1646                goto err_desc_state;
1647
1648        memset(vq->packed.desc_state, 0,
1649                num * sizeof(struct vring_desc_state_packed));
1650
1651        /* Put everything in free lists. */
1652        vq->free_head = 0;
1653        for (i = 0; i < num-1; i++)
1654                vq->packed.desc_state[i].next = i + 1;
1655
1656        vq->packed.desc_extra = kmalloc_array(num,
1657                        sizeof(struct vring_desc_extra_packed),
1658                        GFP_KERNEL);
1659        if (!vq->packed.desc_extra)
1660                goto err_desc_extra;
1661
1662        memset(vq->packed.desc_extra, 0,
1663                num * sizeof(struct vring_desc_extra_packed));
1664
1665        /* No callback?  Tell other side not to bother us. */
1666        if (!callback) {
1667                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1668                vq->packed.vring.driver->flags =
1669                        cpu_to_le16(vq->packed.event_flags_shadow);
1670        }
1671
1672        return &vq->vq;
1673
1674err_desc_extra:
1675        kfree(vq->packed.desc_state);
1676err_desc_state:
1677        kfree(vq);
1678err_vq:
1679        vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr);
1680err_device:
1681        vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr);
1682err_driver:
1683        vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1684err_ring:
1685        return NULL;
1686}
1687
1688
1689/*
1690 * Generic functions and exported symbols.
1691 */
1692
1693static inline int virtqueue_add(struct virtqueue *_vq,
1694                                struct scatterlist *sgs[],
1695                                unsigned int total_sg,
1696                                unsigned int out_sgs,
1697                                unsigned int in_sgs,
1698                                void *data,
1699                                void *ctx,
1700                                gfp_t gfp)
1701{
1702        struct vring_virtqueue *vq = to_vvq(_vq);
1703
1704        return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1705                                        out_sgs, in_sgs, data, ctx, gfp) :
1706                                 virtqueue_add_split(_vq, sgs, total_sg,
1707                                        out_sgs, in_sgs, data, ctx, gfp);
1708}
1709
1710/**
1711 * virtqueue_add_sgs - expose buffers to other end
1712 * @_vq: the struct virtqueue we're talking about.
1713 * @sgs: array of terminated scatterlists.
1714 * @out_sgs: the number of scatterlists readable by other side
1715 * @in_sgs: the number of scatterlists which are writable (after readable ones)
1716 * @data: the token identifying the buffer.
1717 * @gfp: how to do memory allocations (if necessary).
1718 *
1719 * Caller must ensure we don't call this with other virtqueue operations
1720 * at the same time (except where noted).
1721 *
1722 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1723 */
1724int virtqueue_add_sgs(struct virtqueue *_vq,
1725                      struct scatterlist *sgs[],
1726                      unsigned int out_sgs,
1727                      unsigned int in_sgs,
1728                      void *data,
1729                      gfp_t gfp)
1730{
1731        unsigned int i, total_sg = 0;
1732
1733        /* Count them first. */
1734        for (i = 0; i < out_sgs + in_sgs; i++) {
1735                struct scatterlist *sg;
1736
1737                for (sg = sgs[i]; sg; sg = sg_next(sg))
1738                        total_sg++;
1739        }
1740        return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1741                             data, NULL, gfp);
1742}
1743EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1744
1745/**
1746 * virtqueue_add_outbuf - expose output buffers to other end
1747 * @vq: the struct virtqueue we're talking about.
1748 * @sg: scatterlist (must be well-formed and terminated!)
1749 * @num: the number of entries in @sg readable by other side
1750 * @data: the token identifying the buffer.
1751 * @gfp: how to do memory allocations (if necessary).
1752 *
1753 * Caller must ensure we don't call this with other virtqueue operations
1754 * at the same time (except where noted).
1755 *
1756 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1757 */
1758int virtqueue_add_outbuf(struct virtqueue *vq,
1759                         struct scatterlist *sg, unsigned int num,
1760                         void *data,
1761                         gfp_t gfp)
1762{
1763        return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1764}
1765EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1766
1767/**
1768 * virtqueue_add_inbuf - expose input buffers to other end
1769 * @vq: the struct virtqueue we're talking about.
1770 * @sg: scatterlist (must be well-formed and terminated!)
1771 * @num: the number of entries in @sg writable by other side
1772 * @data: the token identifying the buffer.
1773 * @gfp: how to do memory allocations (if necessary).
1774 *
1775 * Caller must ensure we don't call this with other virtqueue operations
1776 * at the same time (except where noted).
1777 *
1778 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1779 */
1780int virtqueue_add_inbuf(struct virtqueue *vq,
1781                        struct scatterlist *sg, unsigned int num,
1782                        void *data,
1783                        gfp_t gfp)
1784{
1785        return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1786}
1787EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1788
1789/**
1790 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1791 * @vq: the struct virtqueue we're talking about.
1792 * @sg: scatterlist (must be well-formed and terminated!)
1793 * @num: the number of entries in @sg writable by other side
1794 * @data: the token identifying the buffer.
1795 * @ctx: extra context for the token
1796 * @gfp: how to do memory allocations (if necessary).
1797 *
1798 * Caller must ensure we don't call this with other virtqueue operations
1799 * at the same time (except where noted).
1800 *
1801 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1802 */
1803int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1804                        struct scatterlist *sg, unsigned int num,
1805                        void *data,
1806                        void *ctx,
1807                        gfp_t gfp)
1808{
1809        return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1810}
1811EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1812
1813/**
1814 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
1815 * @_vq: the struct virtqueue
1816 *
1817 * Instead of virtqueue_kick(), you can do:
1818 *      if (virtqueue_kick_prepare(vq))
1819 *              virtqueue_notify(vq);
1820 *
1821 * This is sometimes useful because the virtqueue_kick_prepare() needs
1822 * to be serialized, but the actual virtqueue_notify() call does not.
1823 */
1824bool virtqueue_kick_prepare(struct virtqueue *_vq)
1825{
1826        struct vring_virtqueue *vq = to_vvq(_vq);
1827
1828        return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1829                                 virtqueue_kick_prepare_split(_vq);
1830}
1831EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1832
1833/**
1834 * virtqueue_notify - second half of split virtqueue_kick call.
1835 * @_vq: the struct virtqueue
1836 *
1837 * This does not need to be serialized.
1838 *
1839 * Returns false if host notify failed or queue is broken, otherwise true.
1840 */
1841bool virtqueue_notify(struct virtqueue *_vq)
1842{
1843        struct vring_virtqueue *vq = to_vvq(_vq);
1844
1845        if (unlikely(vq->broken))
1846                return false;
1847
1848        /* Prod other side to tell it about changes. */
1849        if (!vq->notify(_vq)) {
1850                vq->broken = true;
1851                return false;
1852        }
1853        return true;
1854}
1855EXPORT_SYMBOL_GPL(virtqueue_notify);
1856
1857/**
1858 * virtqueue_kick - update after add_buf
1859 * @vq: the struct virtqueue
1860 *
1861 * After one or more virtqueue_add_* calls, invoke this to kick
1862 * the other side.
1863 *
1864 * Caller must ensure we don't call this with other virtqueue
1865 * operations at the same time (except where noted).
1866 *
1867 * Returns false if kick failed, otherwise true.
1868 */
1869bool virtqueue_kick(struct virtqueue *vq)
1870{
1871        if (virtqueue_kick_prepare(vq))
1872                return virtqueue_notify(vq);
1873        return true;
1874}
1875EXPORT_SYMBOL_GPL(virtqueue_kick);
1876
1877/**
1878 * virtqueue_get_buf - get the next used buffer
1879 * @_vq: the struct virtqueue we're talking about.
1880 * @len: the length written into the buffer
1881 * @ctx: extra context for the token
1882 *
1883 * If the device wrote data into the buffer, @len will be set to the
1884 * amount written.  This means you don't need to clear the buffer
1885 * beforehand to ensure there's no data leakage in the case of short
1886 * writes.
1887 *
1888 * Caller must ensure we don't call this with other virtqueue
1889 * operations at the same time (except where noted).
1890 *
1891 * Returns NULL if there are no used buffers, or the "data" token
1892 * handed to virtqueue_add_*().
1893 */
1894void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1895                            void **ctx)
1896{
1897        struct vring_virtqueue *vq = to_vvq(_vq);
1898
1899        return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
1900                                 virtqueue_get_buf_ctx_split(_vq, len, ctx);
1901}
1902EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
1903
1904void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
1905{
1906        return virtqueue_get_buf_ctx(_vq, len, NULL);
1907}
1908EXPORT_SYMBOL_GPL(virtqueue_get_buf);
1909/**
1910 * virtqueue_disable_cb - disable callbacks
1911 * @_vq: the struct virtqueue we're talking about.
1912 *
1913 * Note that this is not necessarily synchronous, hence unreliable and only
1914 * useful as an optimization.
1915 *
1916 * Unlike other operations, this need not be serialized.
1917 */
1918void virtqueue_disable_cb(struct virtqueue *_vq)
1919{
1920        struct vring_virtqueue *vq = to_vvq(_vq);
1921
1922        if (vq->packed_ring)
1923                virtqueue_disable_cb_packed(_vq);
1924        else
1925                virtqueue_disable_cb_split(_vq);
1926}
1927EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
1928
1929/**
1930 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
1931 * @_vq: the struct virtqueue we're talking about.
1932 *
1933 * This re-enables callbacks; it returns current queue state
1934 * in an opaque unsigned value. This value should be later tested by
1935 * virtqueue_poll, to detect a possible race between the driver checking for
1936 * more work, and enabling callbacks.
1937 *
1938 * Caller must ensure we don't call this with other virtqueue
1939 * operations at the same time (except where noted).
1940 */
1941unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
1942{
1943        struct vring_virtqueue *vq = to_vvq(_vq);
1944
1945        return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
1946                                 virtqueue_enable_cb_prepare_split(_vq);
1947}
1948EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
1949
1950/**
1951 * virtqueue_poll - query pending used buffers
1952 * @_vq: the struct virtqueue we're talking about.
1953 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
1954 *
1955 * Returns "true" if there are pending used buffers in the queue.
1956 *
1957 * This does not need to be serialized.
1958 */
1959bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
1960{
1961        struct vring_virtqueue *vq = to_vvq(_vq);
1962
1963        virtio_mb(vq->weak_barriers);
1964        return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
1965                                 virtqueue_poll_split(_vq, last_used_idx);
1966}
1967EXPORT_SYMBOL_GPL(virtqueue_poll);
1968
1969/**
1970 * virtqueue_enable_cb - restart callbacks after disable_cb.
1971 * @_vq: the struct virtqueue we're talking about.
1972 *
1973 * This re-enables callbacks; it returns "false" if there are pending
1974 * buffers in the queue, to detect a possible race between the driver
1975 * checking for more work, and enabling callbacks.
1976 *
1977 * Caller must ensure we don't call this with other virtqueue
1978 * operations at the same time (except where noted).
1979 */
1980bool virtqueue_enable_cb(struct virtqueue *_vq)
1981{
1982        unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
1983
1984        return !virtqueue_poll(_vq, last_used_idx);
1985}
1986EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
1987
1988/**
1989 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
1990 * @_vq: the struct virtqueue we're talking about.
1991 *
1992 * This re-enables callbacks but hints to the other side to delay
1993 * interrupts until most of the available buffers have been processed;
1994 * it returns "false" if there are many pending buffers in the queue,
1995 * to detect a possible race between the driver checking for more work,
1996 * and enabling callbacks.
1997 *
1998 * Caller must ensure we don't call this with other virtqueue
1999 * operations at the same time (except where noted).
2000 */
2001bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2002{
2003        struct vring_virtqueue *vq = to_vvq(_vq);
2004
2005        return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2006                                 virtqueue_enable_cb_delayed_split(_vq);
2007}
2008EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2009
2010/**
2011 * virtqueue_detach_unused_buf - detach first unused buffer
2012 * @_vq: the struct virtqueue we're talking about.
2013 *
2014 * Returns NULL or the "data" token handed to virtqueue_add_*().
2015 * This is not valid on an active queue; it is useful only for device
2016 * shutdown.
2017 */
2018void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2019{
2020        struct vring_virtqueue *vq = to_vvq(_vq);
2021
2022        return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2023                                 virtqueue_detach_unused_buf_split(_vq);
2024}
2025EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2026
2027static inline bool more_used(const struct vring_virtqueue *vq)
2028{
2029        return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2030}
2031
2032irqreturn_t vring_interrupt(int irq, void *_vq)
2033{
2034        struct vring_virtqueue *vq = to_vvq(_vq);
2035
2036        if (!more_used(vq)) {
2037                pr_debug("virtqueue interrupt with no work for %p\n", vq);
2038                return IRQ_NONE;
2039        }
2040
2041        if (unlikely(vq->broken))
2042                return IRQ_HANDLED;
2043
2044        pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2045        if (vq->vq.callback)
2046                vq->vq.callback(&vq->vq);
2047
2048        return IRQ_HANDLED;
2049}
2050EXPORT_SYMBOL_GPL(vring_interrupt);
2051
2052/* Only available for split ring */
2053struct virtqueue *__vring_new_virtqueue(unsigned int index,
2054                                        struct vring vring,
2055                                        struct virtio_device *vdev,
2056                                        bool weak_barriers,
2057                                        bool context,
2058                                        bool (*notify)(struct virtqueue *),
2059                                        void (*callback)(struct virtqueue *),
2060                                        const char *name)
2061{
2062        unsigned int i;
2063        struct vring_virtqueue *vq;
2064
2065        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2066                return NULL;
2067
2068        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2069        if (!vq)
2070                return NULL;
2071
2072        vq->packed_ring = false;
2073        vq->vq.callback = callback;
2074        vq->vq.vdev = vdev;
2075        vq->vq.name = name;
2076        vq->vq.num_free = vring.num;
2077        vq->vq.index = index;
2078        vq->we_own_ring = false;
2079        vq->notify = notify;
2080        vq->weak_barriers = weak_barriers;
2081        vq->broken = false;
2082        vq->last_used_idx = 0;
2083        vq->num_added = 0;
2084        vq->use_dma_api = vring_use_dma_api(vdev);
2085        list_add_tail(&vq->vq.list, &vdev->vqs);
2086#ifdef DEBUG
2087        vq->in_use = false;
2088        vq->last_add_time_valid = false;
2089#endif
2090
2091        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2092                !context;
2093        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2094
2095        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2096                vq->weak_barriers = false;
2097
2098        vq->split.queue_dma_addr = 0;
2099        vq->split.queue_size_in_bytes = 0;
2100
2101        vq->split.vring = vring;
2102        vq->split.avail_flags_shadow = 0;
2103        vq->split.avail_idx_shadow = 0;
2104
2105        /* No callback?  Tell other side not to bother us. */
2106        if (!callback) {
2107                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2108                if (!vq->event)
2109                        vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2110                                        vq->split.avail_flags_shadow);
2111        }
2112
2113        vq->split.desc_state = kmalloc_array(vring.num,
2114                        sizeof(struct vring_desc_state_split), GFP_KERNEL);
2115        if (!vq->split.desc_state) {
2116                kfree(vq);
2117                return NULL;
2118        }
2119
2120        /* Put everything in free lists. */
2121        vq->free_head = 0;
2122        for (i = 0; i < vring.num-1; i++)
2123                vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
2124        memset(vq->split.desc_state, 0, vring.num *
2125                        sizeof(struct vring_desc_state_split));
2126
2127        return &vq->vq;
2128}
2129EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2130
2131struct virtqueue *vring_create_virtqueue(
2132        unsigned int index,
2133        unsigned int num,
2134        unsigned int vring_align,
2135        struct virtio_device *vdev,
2136        bool weak_barriers,
2137        bool may_reduce_num,
2138        bool context,
2139        bool (*notify)(struct virtqueue *),
2140        void (*callback)(struct virtqueue *),
2141        const char *name)
2142{
2143
2144        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2145                return vring_create_virtqueue_packed(index, num, vring_align,
2146                                vdev, weak_barriers, may_reduce_num,
2147                                context, notify, callback, name);
2148
2149        return vring_create_virtqueue_split(index, num, vring_align,
2150                        vdev, weak_barriers, may_reduce_num,
2151                        context, notify, callback, name);
2152}
2153EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2154
2155/* Only available for split ring */
2156struct virtqueue *vring_new_virtqueue(unsigned int index,
2157                                      unsigned int num,
2158                                      unsigned int vring_align,
2159                                      struct virtio_device *vdev,
2160                                      bool weak_barriers,
2161                                      bool context,
2162                                      void *pages,
2163                                      bool (*notify)(struct virtqueue *vq),
2164                                      void (*callback)(struct virtqueue *vq),
2165                                      const char *name)
2166{
2167        struct vring vring;
2168
2169        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2170                return NULL;
2171
2172        vring_init(&vring, num, pages, vring_align);
2173        return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2174                                     notify, callback, name);
2175}
2176EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2177
2178void vring_del_virtqueue(struct virtqueue *_vq)
2179{
2180        struct vring_virtqueue *vq = to_vvq(_vq);
2181
2182        if (vq->we_own_ring) {
2183                if (vq->packed_ring) {
2184                        vring_free_queue(vq->vq.vdev,
2185                                         vq->packed.ring_size_in_bytes,
2186                                         vq->packed.vring.desc,
2187                                         vq->packed.ring_dma_addr);
2188
2189                        vring_free_queue(vq->vq.vdev,
2190                                         vq->packed.event_size_in_bytes,
2191                                         vq->packed.vring.driver,
2192                                         vq->packed.driver_event_dma_addr);
2193
2194                        vring_free_queue(vq->vq.vdev,
2195                                         vq->packed.event_size_in_bytes,
2196                                         vq->packed.vring.device,
2197                                         vq->packed.device_event_dma_addr);
2198
2199                        kfree(vq->packed.desc_state);
2200                        kfree(vq->packed.desc_extra);
2201                } else {
2202                        vring_free_queue(vq->vq.vdev,
2203                                         vq->split.queue_size_in_bytes,
2204                                         vq->split.vring.desc,
2205                                         vq->split.queue_dma_addr);
2206
2207                        kfree(vq->split.desc_state);
2208                }
2209        }
2210        list_del(&_vq->list);
2211        kfree(vq);
2212}
2213EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2214
2215/* Manipulates transport-specific feature bits. */
2216void vring_transport_features(struct virtio_device *vdev)
2217{
2218        unsigned int i;
2219
2220        for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2221                switch (i) {
2222                case VIRTIO_RING_F_INDIRECT_DESC:
2223                        break;
2224                case VIRTIO_RING_F_EVENT_IDX:
2225                        break;
2226                case VIRTIO_F_VERSION_1:
2227                        break;
2228                case VIRTIO_F_IOMMU_PLATFORM:
2229                        break;
2230                case VIRTIO_F_RING_PACKED:
2231                        break;
2232                case VIRTIO_F_ORDER_PLATFORM:
2233                        break;
2234                default:
2235                        /* We don't understand this bit. */
2236                        __virtio_clear_bit(vdev, i);
2237                }
2238        }
2239}
2240EXPORT_SYMBOL_GPL(vring_transport_features);
2241
2242/**
2243 * virtqueue_get_vring_size - return the size of the virtqueue's vring
2244 * @_vq: the struct virtqueue containing the vring of interest.
2245 *
2246 * Returns the size of the vring.  This is mainly used for boasting to
2247 * userspace.  Unlike other operations, this need not be serialized.
2248 */
2249unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2250{
2251
2252        struct vring_virtqueue *vq = to_vvq(_vq);
2253
2254        return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2255}
2256EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2257
2258bool virtqueue_is_broken(struct virtqueue *_vq)
2259{
2260        struct vring_virtqueue *vq = to_vvq(_vq);
2261
2262        return vq->broken;
2263}
2264EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2265
2266/*
2267 * This should prevent the device from being used, allowing drivers to
2268 * recover.  You may need to grab appropriate locks to flush.
2269 */
2270void virtio_break_device(struct virtio_device *dev)
2271{
2272        struct virtqueue *_vq;
2273
2274        list_for_each_entry(_vq, &dev->vqs, list) {
2275                struct vring_virtqueue *vq = to_vvq(_vq);
2276                vq->broken = true;
2277        }
2278}
2279EXPORT_SYMBOL_GPL(virtio_break_device);
2280
2281dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2282{
2283        struct vring_virtqueue *vq = to_vvq(_vq);
2284
2285        BUG_ON(!vq->we_own_ring);
2286
2287        if (vq->packed_ring)
2288                return vq->packed.ring_dma_addr;
2289
2290        return vq->split.queue_dma_addr;
2291}
2292EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2293
2294dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2295{
2296        struct vring_virtqueue *vq = to_vvq(_vq);
2297
2298        BUG_ON(!vq->we_own_ring);
2299
2300        if (vq->packed_ring)
2301                return vq->packed.driver_event_dma_addr;
2302
2303        return vq->split.queue_dma_addr +
2304                ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2305}
2306EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2307
2308dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2309{
2310        struct vring_virtqueue *vq = to_vvq(_vq);
2311
2312        BUG_ON(!vq->we_own_ring);
2313
2314        if (vq->packed_ring)
2315                return vq->packed.device_event_dma_addr;
2316
2317        return vq->split.queue_dma_addr +
2318                ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2319}
2320EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2321
2322/* Only available for split ring */
2323const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2324{
2325        return &to_vvq(vq)->split.vring;
2326}
2327EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2328
2329MODULE_LICENSE("GPL");
2330