linux/drivers/virtio/virtio_ring.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* Virtio ring implementation.
   3 *
   4 *  Copyright 2007 Rusty Russell IBM Corporation
   5 */
   6#include <linux/virtio.h>
   7#include <linux/virtio_ring.h>
   8#include <linux/virtio_config.h>
   9#include <linux/device.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/hrtimer.h>
  13#include <linux/dma-mapping.h>
  14#include <linux/spinlock.h>
  15#include <xen/xen.h>
  16
  17#ifdef DEBUG
  18/* For development, we want to crash whenever the ring is screwed. */
  19#define BAD_RING(_vq, fmt, args...)                             \
  20        do {                                                    \
  21                dev_err(&(_vq)->vq.vdev->dev,                   \
  22                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  23                BUG();                                          \
  24        } while (0)
  25/* Caller is supposed to guarantee no reentry. */
  26#define START_USE(_vq)                                          \
  27        do {                                                    \
  28                if ((_vq)->in_use)                              \
  29                        panic("%s:in_use = %i\n",               \
  30                              (_vq)->vq.name, (_vq)->in_use);   \
  31                (_vq)->in_use = __LINE__;                       \
  32        } while (0)
  33#define END_USE(_vq) \
  34        do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
  35#define LAST_ADD_TIME_UPDATE(_vq)                               \
  36        do {                                                    \
  37                ktime_t now = ktime_get();                      \
  38                                                                \
  39                /* No kick or get, with .1 second between?  Warn. */ \
  40                if ((_vq)->last_add_time_valid)                 \
  41                        WARN_ON(ktime_to_ms(ktime_sub(now,      \
  42                                (_vq)->last_add_time)) > 100);  \
  43                (_vq)->last_add_time = now;                     \
  44                (_vq)->last_add_time_valid = true;              \
  45        } while (0)
  46#define LAST_ADD_TIME_CHECK(_vq)                                \
  47        do {                                                    \
  48                if ((_vq)->last_add_time_valid) {               \
  49                        WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
  50                                      (_vq)->last_add_time)) > 100); \
  51                }                                               \
  52        } while (0)
  53#define LAST_ADD_TIME_INVALID(_vq)                              \
  54        ((_vq)->last_add_time_valid = false)
  55#else
  56#define BAD_RING(_vq, fmt, args...)                             \
  57        do {                                                    \
  58                dev_err(&_vq->vq.vdev->dev,                     \
  59                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  60                (_vq)->broken = true;                           \
  61        } while (0)
  62#define START_USE(vq)
  63#define END_USE(vq)
  64#define LAST_ADD_TIME_UPDATE(vq)
  65#define LAST_ADD_TIME_CHECK(vq)
  66#define LAST_ADD_TIME_INVALID(vq)
  67#endif
  68
  69struct vring_desc_state_split {
  70        void *data;                     /* Data for callback. */
  71        struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
  72};
  73
  74struct vring_desc_state_packed {
  75        void *data;                     /* Data for callback. */
  76        struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
  77        u16 num;                        /* Descriptor list length. */
  78        u16 last;                       /* The last desc state in a list. */
  79};
  80
  81struct vring_desc_extra {
  82        dma_addr_t addr;                /* Buffer DMA addr. */
  83        u32 len;                        /* Buffer length. */
  84        u16 flags;                      /* Descriptor flags. */
  85        u16 next;                       /* The next desc state in a list. */
  86};
  87
  88struct vring_virtqueue {
  89        struct virtqueue vq;
  90
  91        /* Is this a packed ring? */
  92        bool packed_ring;
  93
  94        /* Is DMA API used? */
  95        bool use_dma_api;
  96
  97        /* Can we use weak barriers? */
  98        bool weak_barriers;
  99
 100        /* Other side has made a mess, don't try any more. */
 101        bool broken;
 102
 103        /* Host supports indirect buffers */
 104        bool indirect;
 105
 106        /* Host publishes avail event idx */
 107        bool event;
 108
 109        /* Head of free buffer list. */
 110        unsigned int free_head;
 111        /* Number we've added since last sync. */
 112        unsigned int num_added;
 113
 114        /* Last used index we've seen. */
 115        u16 last_used_idx;
 116
 117        /* Hint for event idx: already triggered no need to disable. */
 118        bool event_triggered;
 119
 120        union {
 121                /* Available for split ring */
 122                struct {
 123                        /* Actual memory layout for this queue. */
 124                        struct vring vring;
 125
 126                        /* Last written value to avail->flags */
 127                        u16 avail_flags_shadow;
 128
 129                        /*
 130                         * Last written value to avail->idx in
 131                         * guest byte order.
 132                         */
 133                        u16 avail_idx_shadow;
 134
 135                        /* Per-descriptor state. */
 136                        struct vring_desc_state_split *desc_state;
 137                        struct vring_desc_extra *desc_extra;
 138
 139                        /* DMA address and size information */
 140                        dma_addr_t queue_dma_addr;
 141                        size_t queue_size_in_bytes;
 142                } split;
 143
 144                /* Available for packed ring */
 145                struct {
 146                        /* Actual memory layout for this queue. */
 147                        struct {
 148                                unsigned int num;
 149                                struct vring_packed_desc *desc;
 150                                struct vring_packed_desc_event *driver;
 151                                struct vring_packed_desc_event *device;
 152                        } vring;
 153
 154                        /* Driver ring wrap counter. */
 155                        bool avail_wrap_counter;
 156
 157                        /* Device ring wrap counter. */
 158                        bool used_wrap_counter;
 159
 160                        /* Avail used flags. */
 161                        u16 avail_used_flags;
 162
 163                        /* Index of the next avail descriptor. */
 164                        u16 next_avail_idx;
 165
 166                        /*
 167                         * Last written value to driver->flags in
 168                         * guest byte order.
 169                         */
 170                        u16 event_flags_shadow;
 171
 172                        /* Per-descriptor state. */
 173                        struct vring_desc_state_packed *desc_state;
 174                        struct vring_desc_extra *desc_extra;
 175
 176                        /* DMA address and size information */
 177                        dma_addr_t ring_dma_addr;
 178                        dma_addr_t driver_event_dma_addr;
 179                        dma_addr_t device_event_dma_addr;
 180                        size_t ring_size_in_bytes;
 181                        size_t event_size_in_bytes;
 182                } packed;
 183        };
 184
 185        /* How to notify other side. FIXME: commonalize hcalls! */
 186        bool (*notify)(struct virtqueue *vq);
 187
 188        /* DMA, allocation, and size information */
 189        bool we_own_ring;
 190
 191#ifdef DEBUG
 192        /* They're supposed to lock for us. */
 193        unsigned int in_use;
 194
 195        /* Figure out if their kicks are too delayed. */
 196        bool last_add_time_valid;
 197        ktime_t last_add_time;
 198#endif
 199};
 200
 201
 202/*
 203 * Helpers.
 204 */
 205
 206#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 207
 208static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
 209                                          unsigned int total_sg)
 210{
 211        struct vring_virtqueue *vq = to_vvq(_vq);
 212
 213        /*
 214         * If the host supports indirect descriptor tables, and we have multiple
 215         * buffers, then go indirect. FIXME: tune this threshold
 216         */
 217        return (vq->indirect && total_sg > 1 && vq->vq.num_free);
 218}
 219
 220/*
 221 * Modern virtio devices have feature bits to specify whether they need a
 222 * quirk and bypass the IOMMU. If not there, just use the DMA API.
 223 *
 224 * If there, the interaction between virtio and DMA API is messy.
 225 *
 226 * On most systems with virtio, physical addresses match bus addresses,
 227 * and it doesn't particularly matter whether we use the DMA API.
 228 *
 229 * On some systems, including Xen and any system with a physical device
 230 * that speaks virtio behind a physical IOMMU, we must use the DMA API
 231 * for virtio DMA to work at all.
 232 *
 233 * On other systems, including SPARC and PPC64, virtio-pci devices are
 234 * enumerated as though they are behind an IOMMU, but the virtio host
 235 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
 236 * there or somehow map everything as the identity.
 237 *
 238 * For the time being, we preserve historic behavior and bypass the DMA
 239 * API.
 240 *
 241 * TODO: install a per-device DMA ops structure that does the right thing
 242 * taking into account all the above quirks, and use the DMA API
 243 * unconditionally on data path.
 244 */
 245
 246static bool vring_use_dma_api(struct virtio_device *vdev)
 247{
 248        if (!virtio_has_dma_quirk(vdev))
 249                return true;
 250
 251        /* Otherwise, we are left to guess. */
 252        /*
 253         * In theory, it's possible to have a buggy QEMU-supposed
 254         * emulated Q35 IOMMU and Xen enabled at the same time.  On
 255         * such a configuration, virtio has never worked and will
 256         * not work without an even larger kludge.  Instead, enable
 257         * the DMA API if we're a Xen guest, which at least allows
 258         * all of the sensible Xen configurations to work correctly.
 259         */
 260        if (xen_domain())
 261                return true;
 262
 263        return false;
 264}
 265
 266size_t virtio_max_dma_size(struct virtio_device *vdev)
 267{
 268        size_t max_segment_size = SIZE_MAX;
 269
 270        if (vring_use_dma_api(vdev))
 271                max_segment_size = dma_max_mapping_size(&vdev->dev);
 272
 273        return max_segment_size;
 274}
 275EXPORT_SYMBOL_GPL(virtio_max_dma_size);
 276
 277static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
 278                              dma_addr_t *dma_handle, gfp_t flag)
 279{
 280        if (vring_use_dma_api(vdev)) {
 281                return dma_alloc_coherent(vdev->dev.parent, size,
 282                                          dma_handle, flag);
 283        } else {
 284                void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
 285
 286                if (queue) {
 287                        phys_addr_t phys_addr = virt_to_phys(queue);
 288                        *dma_handle = (dma_addr_t)phys_addr;
 289
 290                        /*
 291                         * Sanity check: make sure we dind't truncate
 292                         * the address.  The only arches I can find that
 293                         * have 64-bit phys_addr_t but 32-bit dma_addr_t
 294                         * are certain non-highmem MIPS and x86
 295                         * configurations, but these configurations
 296                         * should never allocate physical pages above 32
 297                         * bits, so this is fine.  Just in case, throw a
 298                         * warning and abort if we end up with an
 299                         * unrepresentable address.
 300                         */
 301                        if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
 302                                free_pages_exact(queue, PAGE_ALIGN(size));
 303                                return NULL;
 304                        }
 305                }
 306                return queue;
 307        }
 308}
 309
 310static void vring_free_queue(struct virtio_device *vdev, size_t size,
 311                             void *queue, dma_addr_t dma_handle)
 312{
 313        if (vring_use_dma_api(vdev))
 314                dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
 315        else
 316                free_pages_exact(queue, PAGE_ALIGN(size));
 317}
 318
 319/*
 320 * The DMA ops on various arches are rather gnarly right now, and
 321 * making all of the arch DMA ops work on the vring device itself
 322 * is a mess.  For now, we use the parent device for DMA ops.
 323 */
 324static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 325{
 326        return vq->vq.vdev->dev.parent;
 327}
 328
 329/* Map one sg entry. */
 330static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
 331                                   struct scatterlist *sg,
 332                                   enum dma_data_direction direction)
 333{
 334        if (!vq->use_dma_api)
 335                return (dma_addr_t)sg_phys(sg);
 336
 337        /*
 338         * We can't use dma_map_sg, because we don't use scatterlists in
 339         * the way it expects (we don't guarantee that the scatterlist
 340         * will exist for the lifetime of the mapping).
 341         */
 342        return dma_map_page(vring_dma_dev(vq),
 343                            sg_page(sg), sg->offset, sg->length,
 344                            direction);
 345}
 346
 347static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
 348                                   void *cpu_addr, size_t size,
 349                                   enum dma_data_direction direction)
 350{
 351        if (!vq->use_dma_api)
 352                return (dma_addr_t)virt_to_phys(cpu_addr);
 353
 354        return dma_map_single(vring_dma_dev(vq),
 355                              cpu_addr, size, direction);
 356}
 357
 358static int vring_mapping_error(const struct vring_virtqueue *vq,
 359                               dma_addr_t addr)
 360{
 361        if (!vq->use_dma_api)
 362                return 0;
 363
 364        return dma_mapping_error(vring_dma_dev(vq), addr);
 365}
 366
 367
 368/*
 369 * Split ring specific functions - *_split().
 370 */
 371
 372static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
 373                                           struct vring_desc *desc)
 374{
 375        u16 flags;
 376
 377        if (!vq->use_dma_api)
 378                return;
 379
 380        flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
 381
 382        if (flags & VRING_DESC_F_INDIRECT) {
 383                dma_unmap_single(vring_dma_dev(vq),
 384                                 virtio64_to_cpu(vq->vq.vdev, desc->addr),
 385                                 virtio32_to_cpu(vq->vq.vdev, desc->len),
 386                                 (flags & VRING_DESC_F_WRITE) ?
 387                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 388        } else {
 389                dma_unmap_page(vring_dma_dev(vq),
 390                               virtio64_to_cpu(vq->vq.vdev, desc->addr),
 391                               virtio32_to_cpu(vq->vq.vdev, desc->len),
 392                               (flags & VRING_DESC_F_WRITE) ?
 393                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 394        }
 395}
 396
 397static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
 398                                          unsigned int i)
 399{
 400        struct vring_desc_extra *extra = vq->split.desc_extra;
 401        u16 flags;
 402
 403        if (!vq->use_dma_api)
 404                goto out;
 405
 406        flags = extra[i].flags;
 407
 408        if (flags & VRING_DESC_F_INDIRECT) {
 409                dma_unmap_single(vring_dma_dev(vq),
 410                                 extra[i].addr,
 411                                 extra[i].len,
 412                                 (flags & VRING_DESC_F_WRITE) ?
 413                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 414        } else {
 415                dma_unmap_page(vring_dma_dev(vq),
 416                               extra[i].addr,
 417                               extra[i].len,
 418                               (flags & VRING_DESC_F_WRITE) ?
 419                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 420        }
 421
 422out:
 423        return extra[i].next;
 424}
 425
 426static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
 427                                               unsigned int total_sg,
 428                                               gfp_t gfp)
 429{
 430        struct vring_desc *desc;
 431        unsigned int i;
 432
 433        /*
 434         * We require lowmem mappings for the descriptors because
 435         * otherwise virt_to_phys will give us bogus addresses in the
 436         * virtqueue.
 437         */
 438        gfp &= ~__GFP_HIGHMEM;
 439
 440        desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
 441        if (!desc)
 442                return NULL;
 443
 444        for (i = 0; i < total_sg; i++)
 445                desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
 446        return desc;
 447}
 448
 449static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
 450                                                    struct vring_desc *desc,
 451                                                    unsigned int i,
 452                                                    dma_addr_t addr,
 453                                                    unsigned int len,
 454                                                    u16 flags,
 455                                                    bool indirect)
 456{
 457        struct vring_virtqueue *vring = to_vvq(vq);
 458        struct vring_desc_extra *extra = vring->split.desc_extra;
 459        u16 next;
 460
 461        desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
 462        desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
 463        desc[i].len = cpu_to_virtio32(vq->vdev, len);
 464
 465        if (!indirect) {
 466                next = extra[i].next;
 467                desc[i].next = cpu_to_virtio16(vq->vdev, next);
 468
 469                extra[i].addr = addr;
 470                extra[i].len = len;
 471                extra[i].flags = flags;
 472        } else
 473                next = virtio16_to_cpu(vq->vdev, desc[i].next);
 474
 475        return next;
 476}
 477
 478static inline int virtqueue_add_split(struct virtqueue *_vq,
 479                                      struct scatterlist *sgs[],
 480                                      unsigned int total_sg,
 481                                      unsigned int out_sgs,
 482                                      unsigned int in_sgs,
 483                                      void *data,
 484                                      void *ctx,
 485                                      gfp_t gfp)
 486{
 487        struct vring_virtqueue *vq = to_vvq(_vq);
 488        struct scatterlist *sg;
 489        struct vring_desc *desc;
 490        unsigned int i, n, avail, descs_used, prev, err_idx;
 491        int head;
 492        bool indirect;
 493
 494        START_USE(vq);
 495
 496        BUG_ON(data == NULL);
 497        BUG_ON(ctx && vq->indirect);
 498
 499        if (unlikely(vq->broken)) {
 500                END_USE(vq);
 501                return -EIO;
 502        }
 503
 504        LAST_ADD_TIME_UPDATE(vq);
 505
 506        BUG_ON(total_sg == 0);
 507
 508        head = vq->free_head;
 509
 510        if (virtqueue_use_indirect(_vq, total_sg))
 511                desc = alloc_indirect_split(_vq, total_sg, gfp);
 512        else {
 513                desc = NULL;
 514                WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
 515        }
 516
 517        if (desc) {
 518                /* Use a single buffer which doesn't continue */
 519                indirect = true;
 520                /* Set up rest to use this indirect table. */
 521                i = 0;
 522                descs_used = 1;
 523        } else {
 524                indirect = false;
 525                desc = vq->split.vring.desc;
 526                i = head;
 527                descs_used = total_sg;
 528        }
 529
 530        if (vq->vq.num_free < descs_used) {
 531                pr_debug("Can't add buf len %i - avail = %i\n",
 532                         descs_used, vq->vq.num_free);
 533                /* FIXME: for historical reasons, we force a notify here if
 534                 * there are outgoing parts to the buffer.  Presumably the
 535                 * host should service the ring ASAP. */
 536                if (out_sgs)
 537                        vq->notify(&vq->vq);
 538                if (indirect)
 539                        kfree(desc);
 540                END_USE(vq);
 541                return -ENOSPC;
 542        }
 543
 544        for (n = 0; n < out_sgs; n++) {
 545                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 546                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
 547                        if (vring_mapping_error(vq, addr))
 548                                goto unmap_release;
 549
 550                        prev = i;
 551                        /* Note that we trust indirect descriptor
 552                         * table since it use stream DMA mapping.
 553                         */
 554                        i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
 555                                                     VRING_DESC_F_NEXT,
 556                                                     indirect);
 557                }
 558        }
 559        for (; n < (out_sgs + in_sgs); n++) {
 560                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 561                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
 562                        if (vring_mapping_error(vq, addr))
 563                                goto unmap_release;
 564
 565                        prev = i;
 566                        /* Note that we trust indirect descriptor
 567                         * table since it use stream DMA mapping.
 568                         */
 569                        i = virtqueue_add_desc_split(_vq, desc, i, addr,
 570                                                     sg->length,
 571                                                     VRING_DESC_F_NEXT |
 572                                                     VRING_DESC_F_WRITE,
 573                                                     indirect);
 574                }
 575        }
 576        /* Last one doesn't continue. */
 577        desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
 578        if (!indirect && vq->use_dma_api)
 579                vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags =
 580                        ~VRING_DESC_F_NEXT;
 581
 582        if (indirect) {
 583                /* Now that the indirect table is filled in, map it. */
 584                dma_addr_t addr = vring_map_single(
 585                        vq, desc, total_sg * sizeof(struct vring_desc),
 586                        DMA_TO_DEVICE);
 587                if (vring_mapping_error(vq, addr))
 588                        goto unmap_release;
 589
 590                virtqueue_add_desc_split(_vq, vq->split.vring.desc,
 591                                         head, addr,
 592                                         total_sg * sizeof(struct vring_desc),
 593                                         VRING_DESC_F_INDIRECT,
 594                                         false);
 595        }
 596
 597        /* We're using some buffers from the free list. */
 598        vq->vq.num_free -= descs_used;
 599
 600        /* Update free pointer */
 601        if (indirect)
 602                vq->free_head = vq->split.desc_extra[head].next;
 603        else
 604                vq->free_head = i;
 605
 606        /* Store token and indirect buffer state. */
 607        vq->split.desc_state[head].data = data;
 608        if (indirect)
 609                vq->split.desc_state[head].indir_desc = desc;
 610        else
 611                vq->split.desc_state[head].indir_desc = ctx;
 612
 613        /* Put entry in available array (but don't update avail->idx until they
 614         * do sync). */
 615        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
 616        vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 617
 618        /* Descriptors and available array need to be set before we expose the
 619         * new available array entries. */
 620        virtio_wmb(vq->weak_barriers);
 621        vq->split.avail_idx_shadow++;
 622        vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 623                                                vq->split.avail_idx_shadow);
 624        vq->num_added++;
 625
 626        pr_debug("Added buffer head %i to %p\n", head, vq);
 627        END_USE(vq);
 628
 629        /* This is very unlikely, but theoretically possible.  Kick
 630         * just in case. */
 631        if (unlikely(vq->num_added == (1 << 16) - 1))
 632                virtqueue_kick(_vq);
 633
 634        return 0;
 635
 636unmap_release:
 637        err_idx = i;
 638
 639        if (indirect)
 640                i = 0;
 641        else
 642                i = head;
 643
 644        for (n = 0; n < total_sg; n++) {
 645                if (i == err_idx)
 646                        break;
 647                if (indirect) {
 648                        vring_unmap_one_split_indirect(vq, &desc[i]);
 649                        i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 650                } else
 651                        i = vring_unmap_one_split(vq, i);
 652        }
 653
 654        if (indirect)
 655                kfree(desc);
 656
 657        END_USE(vq);
 658        return -ENOMEM;
 659}
 660
 661static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 662{
 663        struct vring_virtqueue *vq = to_vvq(_vq);
 664        u16 new, old;
 665        bool needs_kick;
 666
 667        START_USE(vq);
 668        /* We need to expose available array entries before checking avail
 669         * event. */
 670        virtio_mb(vq->weak_barriers);
 671
 672        old = vq->split.avail_idx_shadow - vq->num_added;
 673        new = vq->split.avail_idx_shadow;
 674        vq->num_added = 0;
 675
 676        LAST_ADD_TIME_CHECK(vq);
 677        LAST_ADD_TIME_INVALID(vq);
 678
 679        if (vq->event) {
 680                needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
 681                                        vring_avail_event(&vq->split.vring)),
 682                                              new, old);
 683        } else {
 684                needs_kick = !(vq->split.vring.used->flags &
 685                                        cpu_to_virtio16(_vq->vdev,
 686                                                VRING_USED_F_NO_NOTIFY));
 687        }
 688        END_USE(vq);
 689        return needs_kick;
 690}
 691
 692static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 693                             void **ctx)
 694{
 695        unsigned int i, j;
 696        __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
 697
 698        /* Clear data ptr. */
 699        vq->split.desc_state[head].data = NULL;
 700
 701        /* Put back on free list: unmap first-level descriptors and find end */
 702        i = head;
 703
 704        while (vq->split.vring.desc[i].flags & nextflag) {
 705                vring_unmap_one_split(vq, i);
 706                i = vq->split.desc_extra[i].next;
 707                vq->vq.num_free++;
 708        }
 709
 710        vring_unmap_one_split(vq, i);
 711        vq->split.desc_extra[i].next = vq->free_head;
 712        vq->free_head = head;
 713
 714        /* Plus final descriptor */
 715        vq->vq.num_free++;
 716
 717        if (vq->indirect) {
 718                struct vring_desc *indir_desc =
 719                                vq->split.desc_state[head].indir_desc;
 720                u32 len;
 721
 722                /* Free the indirect table, if any, now that it's unmapped. */
 723                if (!indir_desc)
 724                        return;
 725
 726                len = vq->split.desc_extra[head].len;
 727
 728                BUG_ON(!(vq->split.desc_extra[head].flags &
 729                                VRING_DESC_F_INDIRECT));
 730                BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 731
 732                for (j = 0; j < len / sizeof(struct vring_desc); j++)
 733                        vring_unmap_one_split_indirect(vq, &indir_desc[j]);
 734
 735                kfree(indir_desc);
 736                vq->split.desc_state[head].indir_desc = NULL;
 737        } else if (ctx) {
 738                *ctx = vq->split.desc_state[head].indir_desc;
 739        }
 740}
 741
 742static inline bool more_used_split(const struct vring_virtqueue *vq)
 743{
 744        return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
 745                        vq->split.vring.used->idx);
 746}
 747
 748static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 749                                         unsigned int *len,
 750                                         void **ctx)
 751{
 752        struct vring_virtqueue *vq = to_vvq(_vq);
 753        void *ret;
 754        unsigned int i;
 755        u16 last_used;
 756
 757        START_USE(vq);
 758
 759        if (unlikely(vq->broken)) {
 760                END_USE(vq);
 761                return NULL;
 762        }
 763
 764        if (!more_used_split(vq)) {
 765                pr_debug("No more buffers in queue\n");
 766                END_USE(vq);
 767                return NULL;
 768        }
 769
 770        /* Only get used array entries after they have been exposed by host. */
 771        virtio_rmb(vq->weak_barriers);
 772
 773        last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
 774        i = virtio32_to_cpu(_vq->vdev,
 775                        vq->split.vring.used->ring[last_used].id);
 776        *len = virtio32_to_cpu(_vq->vdev,
 777                        vq->split.vring.used->ring[last_used].len);
 778
 779        if (unlikely(i >= vq->split.vring.num)) {
 780                BAD_RING(vq, "id %u out of range\n", i);
 781                return NULL;
 782        }
 783        if (unlikely(!vq->split.desc_state[i].data)) {
 784                BAD_RING(vq, "id %u is not a head!\n", i);
 785                return NULL;
 786        }
 787
 788        /* detach_buf_split clears data, so grab it now. */
 789        ret = vq->split.desc_state[i].data;
 790        detach_buf_split(vq, i, ctx);
 791        vq->last_used_idx++;
 792        /* If we expect an interrupt for the next entry, tell host
 793         * by writing event index and flush out the write before
 794         * the read in the next get_buf call. */
 795        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
 796                virtio_store_mb(vq->weak_barriers,
 797                                &vring_used_event(&vq->split.vring),
 798                                cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
 799
 800        LAST_ADD_TIME_INVALID(vq);
 801
 802        END_USE(vq);
 803        return ret;
 804}
 805
 806static void virtqueue_disable_cb_split(struct virtqueue *_vq)
 807{
 808        struct vring_virtqueue *vq = to_vvq(_vq);
 809
 810        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
 811                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
 812                if (vq->event)
 813                        /* TODO: this is a hack. Figure out a cleaner value to write. */
 814                        vring_used_event(&vq->split.vring) = 0x0;
 815                else
 816                        vq->split.vring.avail->flags =
 817                                cpu_to_virtio16(_vq->vdev,
 818                                                vq->split.avail_flags_shadow);
 819        }
 820}
 821
 822static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
 823{
 824        struct vring_virtqueue *vq = to_vvq(_vq);
 825        u16 last_used_idx;
 826
 827        START_USE(vq);
 828
 829        /* We optimistically turn back on interrupts, then check if there was
 830         * more to do. */
 831        /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
 832         * either clear the flags bit or point the event index at the next
 833         * entry. Always do both to keep code simple. */
 834        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 835                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 836                if (!vq->event)
 837                        vq->split.vring.avail->flags =
 838                                cpu_to_virtio16(_vq->vdev,
 839                                                vq->split.avail_flags_shadow);
 840        }
 841        vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
 842                        last_used_idx = vq->last_used_idx);
 843        END_USE(vq);
 844        return last_used_idx;
 845}
 846
 847static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
 848{
 849        struct vring_virtqueue *vq = to_vvq(_vq);
 850
 851        return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
 852                        vq->split.vring.used->idx);
 853}
 854
 855static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
 856{
 857        struct vring_virtqueue *vq = to_vvq(_vq);
 858        u16 bufs;
 859
 860        START_USE(vq);
 861
 862        /* We optimistically turn back on interrupts, then check if there was
 863         * more to do. */
 864        /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
 865         * either clear the flags bit or point the event index at the next
 866         * entry. Always update the event index to keep code simple. */
 867        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 868                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 869                if (!vq->event)
 870                        vq->split.vring.avail->flags =
 871                                cpu_to_virtio16(_vq->vdev,
 872                                                vq->split.avail_flags_shadow);
 873        }
 874        /* TODO: tune this threshold */
 875        bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
 876
 877        virtio_store_mb(vq->weak_barriers,
 878                        &vring_used_event(&vq->split.vring),
 879                        cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
 880
 881        if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
 882                                        - vq->last_used_idx) > bufs)) {
 883                END_USE(vq);
 884                return false;
 885        }
 886
 887        END_USE(vq);
 888        return true;
 889}
 890
 891static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
 892{
 893        struct vring_virtqueue *vq = to_vvq(_vq);
 894        unsigned int i;
 895        void *buf;
 896
 897        START_USE(vq);
 898
 899        for (i = 0; i < vq->split.vring.num; i++) {
 900                if (!vq->split.desc_state[i].data)
 901                        continue;
 902                /* detach_buf_split clears data, so grab it now. */
 903                buf = vq->split.desc_state[i].data;
 904                detach_buf_split(vq, i, NULL);
 905                vq->split.avail_idx_shadow--;
 906                vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 907                                vq->split.avail_idx_shadow);
 908                END_USE(vq);
 909                return buf;
 910        }
 911        /* That should have freed everything. */
 912        BUG_ON(vq->vq.num_free != vq->split.vring.num);
 913
 914        END_USE(vq);
 915        return NULL;
 916}
 917
 918static struct virtqueue *vring_create_virtqueue_split(
 919        unsigned int index,
 920        unsigned int num,
 921        unsigned int vring_align,
 922        struct virtio_device *vdev,
 923        bool weak_barriers,
 924        bool may_reduce_num,
 925        bool context,
 926        bool (*notify)(struct virtqueue *),
 927        void (*callback)(struct virtqueue *),
 928        const char *name)
 929{
 930        struct virtqueue *vq;
 931        void *queue = NULL;
 932        dma_addr_t dma_addr;
 933        size_t queue_size_in_bytes;
 934        struct vring vring;
 935
 936        /* We assume num is a power of 2. */
 937        if (num & (num - 1)) {
 938                dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
 939                return NULL;
 940        }
 941
 942        /* TODO: allocate each queue chunk individually */
 943        for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
 944                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 945                                          &dma_addr,
 946                                          GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 947                if (queue)
 948                        break;
 949                if (!may_reduce_num)
 950                        return NULL;
 951        }
 952
 953        if (!num)
 954                return NULL;
 955
 956        if (!queue) {
 957                /* Try to get a single page. You are my only hope! */
 958                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 959                                          &dma_addr, GFP_KERNEL|__GFP_ZERO);
 960        }
 961        if (!queue)
 962                return NULL;
 963
 964        queue_size_in_bytes = vring_size(num, vring_align);
 965        vring_init(&vring, num, queue, vring_align);
 966
 967        vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 968                                   notify, callback, name);
 969        if (!vq) {
 970                vring_free_queue(vdev, queue_size_in_bytes, queue,
 971                                 dma_addr);
 972                return NULL;
 973        }
 974
 975        to_vvq(vq)->split.queue_dma_addr = dma_addr;
 976        to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
 977        to_vvq(vq)->we_own_ring = true;
 978
 979        return vq;
 980}
 981
 982
 983/*
 984 * Packed ring specific functions - *_packed().
 985 */
 986
 987static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
 988                                     struct vring_desc_extra *state)
 989{
 990        u16 flags;
 991
 992        if (!vq->use_dma_api)
 993                return;
 994
 995        flags = state->flags;
 996
 997        if (flags & VRING_DESC_F_INDIRECT) {
 998                dma_unmap_single(vring_dma_dev(vq),
 999                                 state->addr, state->len,
1000                                 (flags & VRING_DESC_F_WRITE) ?
1001                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1002        } else {
1003                dma_unmap_page(vring_dma_dev(vq),
1004                               state->addr, state->len,
1005                               (flags & VRING_DESC_F_WRITE) ?
1006                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
1007        }
1008}
1009
1010static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1011                                   struct vring_packed_desc *desc)
1012{
1013        u16 flags;
1014
1015        if (!vq->use_dma_api)
1016                return;
1017
1018        flags = le16_to_cpu(desc->flags);
1019
1020        if (flags & VRING_DESC_F_INDIRECT) {
1021                dma_unmap_single(vring_dma_dev(vq),
1022                                 le64_to_cpu(desc->addr),
1023                                 le32_to_cpu(desc->len),
1024                                 (flags & VRING_DESC_F_WRITE) ?
1025                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1026        } else {
1027                dma_unmap_page(vring_dma_dev(vq),
1028                               le64_to_cpu(desc->addr),
1029                               le32_to_cpu(desc->len),
1030                               (flags & VRING_DESC_F_WRITE) ?
1031                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
1032        }
1033}
1034
1035static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1036                                                       gfp_t gfp)
1037{
1038        struct vring_packed_desc *desc;
1039
1040        /*
1041         * We require lowmem mappings for the descriptors because
1042         * otherwise virt_to_phys will give us bogus addresses in the
1043         * virtqueue.
1044         */
1045        gfp &= ~__GFP_HIGHMEM;
1046
1047        desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1048
1049        return desc;
1050}
1051
1052static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1053                                       struct scatterlist *sgs[],
1054                                       unsigned int total_sg,
1055                                       unsigned int out_sgs,
1056                                       unsigned int in_sgs,
1057                                       void *data,
1058                                       gfp_t gfp)
1059{
1060        struct vring_packed_desc *desc;
1061        struct scatterlist *sg;
1062        unsigned int i, n, err_idx;
1063        u16 head, id;
1064        dma_addr_t addr;
1065
1066        head = vq->packed.next_avail_idx;
1067        desc = alloc_indirect_packed(total_sg, gfp);
1068
1069        if (unlikely(vq->vq.num_free < 1)) {
1070                pr_debug("Can't add buf len 1 - avail = 0\n");
1071                kfree(desc);
1072                END_USE(vq);
1073                return -ENOSPC;
1074        }
1075
1076        i = 0;
1077        id = vq->free_head;
1078        BUG_ON(id == vq->packed.vring.num);
1079
1080        for (n = 0; n < out_sgs + in_sgs; n++) {
1081                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1082                        addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1083                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1084                        if (vring_mapping_error(vq, addr))
1085                                goto unmap_release;
1086
1087                        desc[i].flags = cpu_to_le16(n < out_sgs ?
1088                                                0 : VRING_DESC_F_WRITE);
1089                        desc[i].addr = cpu_to_le64(addr);
1090                        desc[i].len = cpu_to_le32(sg->length);
1091                        i++;
1092                }
1093        }
1094
1095        /* Now that the indirect table is filled in, map it. */
1096        addr = vring_map_single(vq, desc,
1097                        total_sg * sizeof(struct vring_packed_desc),
1098                        DMA_TO_DEVICE);
1099        if (vring_mapping_error(vq, addr))
1100                goto unmap_release;
1101
1102        vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1103        vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1104                                sizeof(struct vring_packed_desc));
1105        vq->packed.vring.desc[head].id = cpu_to_le16(id);
1106
1107        if (vq->use_dma_api) {
1108                vq->packed.desc_extra[id].addr = addr;
1109                vq->packed.desc_extra[id].len = total_sg *
1110                                sizeof(struct vring_packed_desc);
1111                vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1112                                                  vq->packed.avail_used_flags;
1113        }
1114
1115        /*
1116         * A driver MUST NOT make the first descriptor in the list
1117         * available before all subsequent descriptors comprising
1118         * the list are made available.
1119         */
1120        virtio_wmb(vq->weak_barriers);
1121        vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1122                                                vq->packed.avail_used_flags);
1123
1124        /* We're using some buffers from the free list. */
1125        vq->vq.num_free -= 1;
1126
1127        /* Update free pointer */
1128        n = head + 1;
1129        if (n >= vq->packed.vring.num) {
1130                n = 0;
1131                vq->packed.avail_wrap_counter ^= 1;
1132                vq->packed.avail_used_flags ^=
1133                                1 << VRING_PACKED_DESC_F_AVAIL |
1134                                1 << VRING_PACKED_DESC_F_USED;
1135        }
1136        vq->packed.next_avail_idx = n;
1137        vq->free_head = vq->packed.desc_extra[id].next;
1138
1139        /* Store token and indirect buffer state. */
1140        vq->packed.desc_state[id].num = 1;
1141        vq->packed.desc_state[id].data = data;
1142        vq->packed.desc_state[id].indir_desc = desc;
1143        vq->packed.desc_state[id].last = id;
1144
1145        vq->num_added += 1;
1146
1147        pr_debug("Added buffer head %i to %p\n", head, vq);
1148        END_USE(vq);
1149
1150        return 0;
1151
1152unmap_release:
1153        err_idx = i;
1154
1155        for (i = 0; i < err_idx; i++)
1156                vring_unmap_desc_packed(vq, &desc[i]);
1157
1158        kfree(desc);
1159
1160        END_USE(vq);
1161        return -ENOMEM;
1162}
1163
1164static inline int virtqueue_add_packed(struct virtqueue *_vq,
1165                                       struct scatterlist *sgs[],
1166                                       unsigned int total_sg,
1167                                       unsigned int out_sgs,
1168                                       unsigned int in_sgs,
1169                                       void *data,
1170                                       void *ctx,
1171                                       gfp_t gfp)
1172{
1173        struct vring_virtqueue *vq = to_vvq(_vq);
1174        struct vring_packed_desc *desc;
1175        struct scatterlist *sg;
1176        unsigned int i, n, c, descs_used, err_idx;
1177        __le16 head_flags, flags;
1178        u16 head, id, prev, curr, avail_used_flags;
1179
1180        START_USE(vq);
1181
1182        BUG_ON(data == NULL);
1183        BUG_ON(ctx && vq->indirect);
1184
1185        if (unlikely(vq->broken)) {
1186                END_USE(vq);
1187                return -EIO;
1188        }
1189
1190        LAST_ADD_TIME_UPDATE(vq);
1191
1192        BUG_ON(total_sg == 0);
1193
1194        if (virtqueue_use_indirect(_vq, total_sg))
1195                return virtqueue_add_indirect_packed(vq, sgs, total_sg,
1196                                out_sgs, in_sgs, data, gfp);
1197
1198        head = vq->packed.next_avail_idx;
1199        avail_used_flags = vq->packed.avail_used_flags;
1200
1201        WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1202
1203        desc = vq->packed.vring.desc;
1204        i = head;
1205        descs_used = total_sg;
1206
1207        if (unlikely(vq->vq.num_free < descs_used)) {
1208                pr_debug("Can't add buf len %i - avail = %i\n",
1209                         descs_used, vq->vq.num_free);
1210                END_USE(vq);
1211                return -ENOSPC;
1212        }
1213
1214        id = vq->free_head;
1215        BUG_ON(id == vq->packed.vring.num);
1216
1217        curr = id;
1218        c = 0;
1219        for (n = 0; n < out_sgs + in_sgs; n++) {
1220                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1221                        dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1222                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1223                        if (vring_mapping_error(vq, addr))
1224                                goto unmap_release;
1225
1226                        flags = cpu_to_le16(vq->packed.avail_used_flags |
1227                                    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1228                                    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1229                        if (i == head)
1230                                head_flags = flags;
1231                        else
1232                                desc[i].flags = flags;
1233
1234                        desc[i].addr = cpu_to_le64(addr);
1235                        desc[i].len = cpu_to_le32(sg->length);
1236                        desc[i].id = cpu_to_le16(id);
1237
1238                        if (unlikely(vq->use_dma_api)) {
1239                                vq->packed.desc_extra[curr].addr = addr;
1240                                vq->packed.desc_extra[curr].len = sg->length;
1241                                vq->packed.desc_extra[curr].flags =
1242                                        le16_to_cpu(flags);
1243                        }
1244                        prev = curr;
1245                        curr = vq->packed.desc_extra[curr].next;
1246
1247                        if ((unlikely(++i >= vq->packed.vring.num))) {
1248                                i = 0;
1249                                vq->packed.avail_used_flags ^=
1250                                        1 << VRING_PACKED_DESC_F_AVAIL |
1251                                        1 << VRING_PACKED_DESC_F_USED;
1252                        }
1253                }
1254        }
1255
1256        if (i < head)
1257                vq->packed.avail_wrap_counter ^= 1;
1258
1259        /* We're using some buffers from the free list. */
1260        vq->vq.num_free -= descs_used;
1261
1262        /* Update free pointer */
1263        vq->packed.next_avail_idx = i;
1264        vq->free_head = curr;
1265
1266        /* Store token. */
1267        vq->packed.desc_state[id].num = descs_used;
1268        vq->packed.desc_state[id].data = data;
1269        vq->packed.desc_state[id].indir_desc = ctx;
1270        vq->packed.desc_state[id].last = prev;
1271
1272        /*
1273         * A driver MUST NOT make the first descriptor in the list
1274         * available before all subsequent descriptors comprising
1275         * the list are made available.
1276         */
1277        virtio_wmb(vq->weak_barriers);
1278        vq->packed.vring.desc[head].flags = head_flags;
1279        vq->num_added += descs_used;
1280
1281        pr_debug("Added buffer head %i to %p\n", head, vq);
1282        END_USE(vq);
1283
1284        return 0;
1285
1286unmap_release:
1287        err_idx = i;
1288        i = head;
1289        curr = vq->free_head;
1290
1291        vq->packed.avail_used_flags = avail_used_flags;
1292
1293        for (n = 0; n < total_sg; n++) {
1294                if (i == err_idx)
1295                        break;
1296                vring_unmap_state_packed(vq,
1297                                         &vq->packed.desc_extra[curr]);
1298                curr = vq->packed.desc_extra[curr].next;
1299                i++;
1300                if (i >= vq->packed.vring.num)
1301                        i = 0;
1302        }
1303
1304        END_USE(vq);
1305        return -EIO;
1306}
1307
1308static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1309{
1310        struct vring_virtqueue *vq = to_vvq(_vq);
1311        u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1312        bool needs_kick;
1313        union {
1314                struct {
1315                        __le16 off_wrap;
1316                        __le16 flags;
1317                };
1318                u32 u32;
1319        } snapshot;
1320
1321        START_USE(vq);
1322
1323        /*
1324         * We need to expose the new flags value before checking notification
1325         * suppressions.
1326         */
1327        virtio_mb(vq->weak_barriers);
1328
1329        old = vq->packed.next_avail_idx - vq->num_added;
1330        new = vq->packed.next_avail_idx;
1331        vq->num_added = 0;
1332
1333        snapshot.u32 = *(u32 *)vq->packed.vring.device;
1334        flags = le16_to_cpu(snapshot.flags);
1335
1336        LAST_ADD_TIME_CHECK(vq);
1337        LAST_ADD_TIME_INVALID(vq);
1338
1339        if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1340                needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1341                goto out;
1342        }
1343
1344        off_wrap = le16_to_cpu(snapshot.off_wrap);
1345
1346        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1347        event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1348        if (wrap_counter != vq->packed.avail_wrap_counter)
1349                event_idx -= vq->packed.vring.num;
1350
1351        needs_kick = vring_need_event(event_idx, new, old);
1352out:
1353        END_USE(vq);
1354        return needs_kick;
1355}
1356
1357static void detach_buf_packed(struct vring_virtqueue *vq,
1358                              unsigned int id, void **ctx)
1359{
1360        struct vring_desc_state_packed *state = NULL;
1361        struct vring_packed_desc *desc;
1362        unsigned int i, curr;
1363
1364        state = &vq->packed.desc_state[id];
1365
1366        /* Clear data ptr. */
1367        state->data = NULL;
1368
1369        vq->packed.desc_extra[state->last].next = vq->free_head;
1370        vq->free_head = id;
1371        vq->vq.num_free += state->num;
1372
1373        if (unlikely(vq->use_dma_api)) {
1374                curr = id;
1375                for (i = 0; i < state->num; i++) {
1376                        vring_unmap_state_packed(vq,
1377                                &vq->packed.desc_extra[curr]);
1378                        curr = vq->packed.desc_extra[curr].next;
1379                }
1380        }
1381
1382        if (vq->indirect) {
1383                u32 len;
1384
1385                /* Free the indirect table, if any, now that it's unmapped. */
1386                desc = state->indir_desc;
1387                if (!desc)
1388                        return;
1389
1390                if (vq->use_dma_api) {
1391                        len = vq->packed.desc_extra[id].len;
1392                        for (i = 0; i < len / sizeof(struct vring_packed_desc);
1393                                        i++)
1394                                vring_unmap_desc_packed(vq, &desc[i]);
1395                }
1396                kfree(desc);
1397                state->indir_desc = NULL;
1398        } else if (ctx) {
1399                *ctx = state->indir_desc;
1400        }
1401}
1402
1403static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1404                                       u16 idx, bool used_wrap_counter)
1405{
1406        bool avail, used;
1407        u16 flags;
1408
1409        flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1410        avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1411        used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1412
1413        return avail == used && used == used_wrap_counter;
1414}
1415
1416static inline bool more_used_packed(const struct vring_virtqueue *vq)
1417{
1418        return is_used_desc_packed(vq, vq->last_used_idx,
1419                        vq->packed.used_wrap_counter);
1420}
1421
1422static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1423                                          unsigned int *len,
1424                                          void **ctx)
1425{
1426        struct vring_virtqueue *vq = to_vvq(_vq);
1427        u16 last_used, id;
1428        void *ret;
1429
1430        START_USE(vq);
1431
1432        if (unlikely(vq->broken)) {
1433                END_USE(vq);
1434                return NULL;
1435        }
1436
1437        if (!more_used_packed(vq)) {
1438                pr_debug("No more buffers in queue\n");
1439                END_USE(vq);
1440                return NULL;
1441        }
1442
1443        /* Only get used elements after they have been exposed by host. */
1444        virtio_rmb(vq->weak_barriers);
1445
1446        last_used = vq->last_used_idx;
1447        id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1448        *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1449
1450        if (unlikely(id >= vq->packed.vring.num)) {
1451                BAD_RING(vq, "id %u out of range\n", id);
1452                return NULL;
1453        }
1454        if (unlikely(!vq->packed.desc_state[id].data)) {
1455                BAD_RING(vq, "id %u is not a head!\n", id);
1456                return NULL;
1457        }
1458
1459        /* detach_buf_packed clears data, so grab it now. */
1460        ret = vq->packed.desc_state[id].data;
1461        detach_buf_packed(vq, id, ctx);
1462
1463        vq->last_used_idx += vq->packed.desc_state[id].num;
1464        if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1465                vq->last_used_idx -= vq->packed.vring.num;
1466                vq->packed.used_wrap_counter ^= 1;
1467        }
1468
1469        /*
1470         * If we expect an interrupt for the next entry, tell host
1471         * by writing event index and flush out the write before
1472         * the read in the next get_buf call.
1473         */
1474        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1475                virtio_store_mb(vq->weak_barriers,
1476                                &vq->packed.vring.driver->off_wrap,
1477                                cpu_to_le16(vq->last_used_idx |
1478                                        (vq->packed.used_wrap_counter <<
1479                                         VRING_PACKED_EVENT_F_WRAP_CTR)));
1480
1481        LAST_ADD_TIME_INVALID(vq);
1482
1483        END_USE(vq);
1484        return ret;
1485}
1486
1487static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1488{
1489        struct vring_virtqueue *vq = to_vvq(_vq);
1490
1491        if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1492                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1493                vq->packed.vring.driver->flags =
1494                        cpu_to_le16(vq->packed.event_flags_shadow);
1495        }
1496}
1497
1498static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1499{
1500        struct vring_virtqueue *vq = to_vvq(_vq);
1501
1502        START_USE(vq);
1503
1504        /*
1505         * We optimistically turn back on interrupts, then check if there was
1506         * more to do.
1507         */
1508
1509        if (vq->event) {
1510                vq->packed.vring.driver->off_wrap =
1511                        cpu_to_le16(vq->last_used_idx |
1512                                (vq->packed.used_wrap_counter <<
1513                                 VRING_PACKED_EVENT_F_WRAP_CTR));
1514                /*
1515                 * We need to update event offset and event wrap
1516                 * counter first before updating event flags.
1517                 */
1518                virtio_wmb(vq->weak_barriers);
1519        }
1520
1521        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1522                vq->packed.event_flags_shadow = vq->event ?
1523                                VRING_PACKED_EVENT_FLAG_DESC :
1524                                VRING_PACKED_EVENT_FLAG_ENABLE;
1525                vq->packed.vring.driver->flags =
1526                                cpu_to_le16(vq->packed.event_flags_shadow);
1527        }
1528
1529        END_USE(vq);
1530        return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1531                        VRING_PACKED_EVENT_F_WRAP_CTR);
1532}
1533
1534static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1535{
1536        struct vring_virtqueue *vq = to_vvq(_vq);
1537        bool wrap_counter;
1538        u16 used_idx;
1539
1540        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1541        used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1542
1543        return is_used_desc_packed(vq, used_idx, wrap_counter);
1544}
1545
1546static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1547{
1548        struct vring_virtqueue *vq = to_vvq(_vq);
1549        u16 used_idx, wrap_counter;
1550        u16 bufs;
1551
1552        START_USE(vq);
1553
1554        /*
1555         * We optimistically turn back on interrupts, then check if there was
1556         * more to do.
1557         */
1558
1559        if (vq->event) {
1560                /* TODO: tune this threshold */
1561                bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1562                wrap_counter = vq->packed.used_wrap_counter;
1563
1564                used_idx = vq->last_used_idx + bufs;
1565                if (used_idx >= vq->packed.vring.num) {
1566                        used_idx -= vq->packed.vring.num;
1567                        wrap_counter ^= 1;
1568                }
1569
1570                vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1571                        (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1572
1573                /*
1574                 * We need to update event offset and event wrap
1575                 * counter first before updating event flags.
1576                 */
1577                virtio_wmb(vq->weak_barriers);
1578        }
1579
1580        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1581                vq->packed.event_flags_shadow = vq->event ?
1582                                VRING_PACKED_EVENT_FLAG_DESC :
1583                                VRING_PACKED_EVENT_FLAG_ENABLE;
1584                vq->packed.vring.driver->flags =
1585                                cpu_to_le16(vq->packed.event_flags_shadow);
1586        }
1587
1588        /*
1589         * We need to update event suppression structure first
1590         * before re-checking for more used buffers.
1591         */
1592        virtio_mb(vq->weak_barriers);
1593
1594        if (is_used_desc_packed(vq,
1595                                vq->last_used_idx,
1596                                vq->packed.used_wrap_counter)) {
1597                END_USE(vq);
1598                return false;
1599        }
1600
1601        END_USE(vq);
1602        return true;
1603}
1604
1605static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1606{
1607        struct vring_virtqueue *vq = to_vvq(_vq);
1608        unsigned int i;
1609        void *buf;
1610
1611        START_USE(vq);
1612
1613        for (i = 0; i < vq->packed.vring.num; i++) {
1614                if (!vq->packed.desc_state[i].data)
1615                        continue;
1616                /* detach_buf clears data, so grab it now. */
1617                buf = vq->packed.desc_state[i].data;
1618                detach_buf_packed(vq, i, NULL);
1619                END_USE(vq);
1620                return buf;
1621        }
1622        /* That should have freed everything. */
1623        BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1624
1625        END_USE(vq);
1626        return NULL;
1627}
1628
1629static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
1630                                                       unsigned int num)
1631{
1632        struct vring_desc_extra *desc_extra;
1633        unsigned int i;
1634
1635        desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1636                                   GFP_KERNEL);
1637        if (!desc_extra)
1638                return NULL;
1639
1640        memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1641
1642        for (i = 0; i < num - 1; i++)
1643                desc_extra[i].next = i + 1;
1644
1645        return desc_extra;
1646}
1647
1648static struct virtqueue *vring_create_virtqueue_packed(
1649        unsigned int index,
1650        unsigned int num,
1651        unsigned int vring_align,
1652        struct virtio_device *vdev,
1653        bool weak_barriers,
1654        bool may_reduce_num,
1655        bool context,
1656        bool (*notify)(struct virtqueue *),
1657        void (*callback)(struct virtqueue *),
1658        const char *name)
1659{
1660        struct vring_virtqueue *vq;
1661        struct vring_packed_desc *ring;
1662        struct vring_packed_desc_event *driver, *device;
1663        dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1664        size_t ring_size_in_bytes, event_size_in_bytes;
1665
1666        ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1667
1668        ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1669                                 &ring_dma_addr,
1670                                 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1671        if (!ring)
1672                goto err_ring;
1673
1674        event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1675
1676        driver = vring_alloc_queue(vdev, event_size_in_bytes,
1677                                   &driver_event_dma_addr,
1678                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1679        if (!driver)
1680                goto err_driver;
1681
1682        device = vring_alloc_queue(vdev, event_size_in_bytes,
1683                                   &device_event_dma_addr,
1684                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1685        if (!device)
1686                goto err_device;
1687
1688        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1689        if (!vq)
1690                goto err_vq;
1691
1692        vq->vq.callback = callback;
1693        vq->vq.vdev = vdev;
1694        vq->vq.name = name;
1695        vq->vq.num_free = num;
1696        vq->vq.index = index;
1697        vq->we_own_ring = true;
1698        vq->notify = notify;
1699        vq->weak_barriers = weak_barriers;
1700        vq->broken = false;
1701        vq->last_used_idx = 0;
1702        vq->event_triggered = false;
1703        vq->num_added = 0;
1704        vq->packed_ring = true;
1705        vq->use_dma_api = vring_use_dma_api(vdev);
1706#ifdef DEBUG
1707        vq->in_use = false;
1708        vq->last_add_time_valid = false;
1709#endif
1710
1711        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1712                !context;
1713        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1714
1715        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1716                vq->weak_barriers = false;
1717
1718        vq->packed.ring_dma_addr = ring_dma_addr;
1719        vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1720        vq->packed.device_event_dma_addr = device_event_dma_addr;
1721
1722        vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1723        vq->packed.event_size_in_bytes = event_size_in_bytes;
1724
1725        vq->packed.vring.num = num;
1726        vq->packed.vring.desc = ring;
1727        vq->packed.vring.driver = driver;
1728        vq->packed.vring.device = device;
1729
1730        vq->packed.next_avail_idx = 0;
1731        vq->packed.avail_wrap_counter = 1;
1732        vq->packed.used_wrap_counter = 1;
1733        vq->packed.event_flags_shadow = 0;
1734        vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1735
1736        vq->packed.desc_state = kmalloc_array(num,
1737                        sizeof(struct vring_desc_state_packed),
1738                        GFP_KERNEL);
1739        if (!vq->packed.desc_state)
1740                goto err_desc_state;
1741
1742        memset(vq->packed.desc_state, 0,
1743                num * sizeof(struct vring_desc_state_packed));
1744
1745        /* Put everything in free lists. */
1746        vq->free_head = 0;
1747
1748        vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
1749        if (!vq->packed.desc_extra)
1750                goto err_desc_extra;
1751
1752        /* No callback?  Tell other side not to bother us. */
1753        if (!callback) {
1754                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1755                vq->packed.vring.driver->flags =
1756                        cpu_to_le16(vq->packed.event_flags_shadow);
1757        }
1758
1759        spin_lock(&vdev->vqs_list_lock);
1760        list_add_tail(&vq->vq.list, &vdev->vqs);
1761        spin_unlock(&vdev->vqs_list_lock);
1762        return &vq->vq;
1763
1764err_desc_extra:
1765        kfree(vq->packed.desc_state);
1766err_desc_state:
1767        kfree(vq);
1768err_vq:
1769        vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1770err_device:
1771        vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1772err_driver:
1773        vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1774err_ring:
1775        return NULL;
1776}
1777
1778
1779/*
1780 * Generic functions and exported symbols.
1781 */
1782
1783static inline int virtqueue_add(struct virtqueue *_vq,
1784                                struct scatterlist *sgs[],
1785                                unsigned int total_sg,
1786                                unsigned int out_sgs,
1787                                unsigned int in_sgs,
1788                                void *data,
1789                                void *ctx,
1790                                gfp_t gfp)
1791{
1792        struct vring_virtqueue *vq = to_vvq(_vq);
1793
1794        return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1795                                        out_sgs, in_sgs, data, ctx, gfp) :
1796                                 virtqueue_add_split(_vq, sgs, total_sg,
1797                                        out_sgs, in_sgs, data, ctx, gfp);
1798}
1799
1800/**
1801 * virtqueue_add_sgs - expose buffers to other end
1802 * @_vq: the struct virtqueue we're talking about.
1803 * @sgs: array of terminated scatterlists.
1804 * @out_sgs: the number of scatterlists readable by other side
1805 * @in_sgs: the number of scatterlists which are writable (after readable ones)
1806 * @data: the token identifying the buffer.
1807 * @gfp: how to do memory allocations (if necessary).
1808 *
1809 * Caller must ensure we don't call this with other virtqueue operations
1810 * at the same time (except where noted).
1811 *
1812 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1813 */
1814int virtqueue_add_sgs(struct virtqueue *_vq,
1815                      struct scatterlist *sgs[],
1816                      unsigned int out_sgs,
1817                      unsigned int in_sgs,
1818                      void *data,
1819                      gfp_t gfp)
1820{
1821        unsigned int i, total_sg = 0;
1822
1823        /* Count them first. */
1824        for (i = 0; i < out_sgs + in_sgs; i++) {
1825                struct scatterlist *sg;
1826
1827                for (sg = sgs[i]; sg; sg = sg_next(sg))
1828                        total_sg++;
1829        }
1830        return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1831                             data, NULL, gfp);
1832}
1833EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1834
1835/**
1836 * virtqueue_add_outbuf - expose output buffers to other end
1837 * @vq: the struct virtqueue we're talking about.
1838 * @sg: scatterlist (must be well-formed and terminated!)
1839 * @num: the number of entries in @sg readable by other side
1840 * @data: the token identifying the buffer.
1841 * @gfp: how to do memory allocations (if necessary).
1842 *
1843 * Caller must ensure we don't call this with other virtqueue operations
1844 * at the same time (except where noted).
1845 *
1846 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1847 */
1848int virtqueue_add_outbuf(struct virtqueue *vq,
1849                         struct scatterlist *sg, unsigned int num,
1850                         void *data,
1851                         gfp_t gfp)
1852{
1853        return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1854}
1855EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1856
1857/**
1858 * virtqueue_add_inbuf - expose input buffers to other end
1859 * @vq: the struct virtqueue we're talking about.
1860 * @sg: scatterlist (must be well-formed and terminated!)
1861 * @num: the number of entries in @sg writable by other side
1862 * @data: the token identifying the buffer.
1863 * @gfp: how to do memory allocations (if necessary).
1864 *
1865 * Caller must ensure we don't call this with other virtqueue operations
1866 * at the same time (except where noted).
1867 *
1868 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1869 */
1870int virtqueue_add_inbuf(struct virtqueue *vq,
1871                        struct scatterlist *sg, unsigned int num,
1872                        void *data,
1873                        gfp_t gfp)
1874{
1875        return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1876}
1877EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1878
1879/**
1880 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1881 * @vq: the struct virtqueue we're talking about.
1882 * @sg: scatterlist (must be well-formed and terminated!)
1883 * @num: the number of entries in @sg writable by other side
1884 * @data: the token identifying the buffer.
1885 * @ctx: extra context for the token
1886 * @gfp: how to do memory allocations (if necessary).
1887 *
1888 * Caller must ensure we don't call this with other virtqueue operations
1889 * at the same time (except where noted).
1890 *
1891 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1892 */
1893int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1894                        struct scatterlist *sg, unsigned int num,
1895                        void *data,
1896                        void *ctx,
1897                        gfp_t gfp)
1898{
1899        return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1900}
1901EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1902
1903/**
1904 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
1905 * @_vq: the struct virtqueue
1906 *
1907 * Instead of virtqueue_kick(), you can do:
1908 *      if (virtqueue_kick_prepare(vq))
1909 *              virtqueue_notify(vq);
1910 *
1911 * This is sometimes useful because the virtqueue_kick_prepare() needs
1912 * to be serialized, but the actual virtqueue_notify() call does not.
1913 */
1914bool virtqueue_kick_prepare(struct virtqueue *_vq)
1915{
1916        struct vring_virtqueue *vq = to_vvq(_vq);
1917
1918        return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1919                                 virtqueue_kick_prepare_split(_vq);
1920}
1921EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1922
1923/**
1924 * virtqueue_notify - second half of split virtqueue_kick call.
1925 * @_vq: the struct virtqueue
1926 *
1927 * This does not need to be serialized.
1928 *
1929 * Returns false if host notify failed or queue is broken, otherwise true.
1930 */
1931bool virtqueue_notify(struct virtqueue *_vq)
1932{
1933        struct vring_virtqueue *vq = to_vvq(_vq);
1934
1935        if (unlikely(vq->broken))
1936                return false;
1937
1938        /* Prod other side to tell it about changes. */
1939        if (!vq->notify(_vq)) {
1940                vq->broken = true;
1941                return false;
1942        }
1943        return true;
1944}
1945EXPORT_SYMBOL_GPL(virtqueue_notify);
1946
1947/**
1948 * virtqueue_kick - update after add_buf
1949 * @vq: the struct virtqueue
1950 *
1951 * After one or more virtqueue_add_* calls, invoke this to kick
1952 * the other side.
1953 *
1954 * Caller must ensure we don't call this with other virtqueue
1955 * operations at the same time (except where noted).
1956 *
1957 * Returns false if kick failed, otherwise true.
1958 */
1959bool virtqueue_kick(struct virtqueue *vq)
1960{
1961        if (virtqueue_kick_prepare(vq))
1962                return virtqueue_notify(vq);
1963        return true;
1964}
1965EXPORT_SYMBOL_GPL(virtqueue_kick);
1966
1967/**
1968 * virtqueue_get_buf_ctx - get the next used buffer
1969 * @_vq: the struct virtqueue we're talking about.
1970 * @len: the length written into the buffer
1971 * @ctx: extra context for the token
1972 *
1973 * If the device wrote data into the buffer, @len will be set to the
1974 * amount written.  This means you don't need to clear the buffer
1975 * beforehand to ensure there's no data leakage in the case of short
1976 * writes.
1977 *
1978 * Caller must ensure we don't call this with other virtqueue
1979 * operations at the same time (except where noted).
1980 *
1981 * Returns NULL if there are no used buffers, or the "data" token
1982 * handed to virtqueue_add_*().
1983 */
1984void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1985                            void **ctx)
1986{
1987        struct vring_virtqueue *vq = to_vvq(_vq);
1988
1989        return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
1990                                 virtqueue_get_buf_ctx_split(_vq, len, ctx);
1991}
1992EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
1993
1994void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
1995{
1996        return virtqueue_get_buf_ctx(_vq, len, NULL);
1997}
1998EXPORT_SYMBOL_GPL(virtqueue_get_buf);
1999/**
2000 * virtqueue_disable_cb - disable callbacks
2001 * @_vq: the struct virtqueue we're talking about.
2002 *
2003 * Note that this is not necessarily synchronous, hence unreliable and only
2004 * useful as an optimization.
2005 *
2006 * Unlike other operations, this need not be serialized.
2007 */
2008void virtqueue_disable_cb(struct virtqueue *_vq)
2009{
2010        struct vring_virtqueue *vq = to_vvq(_vq);
2011
2012        /* If device triggered an event already it won't trigger one again:
2013         * no need to disable.
2014         */
2015        if (vq->event_triggered)
2016                return;
2017
2018        if (vq->packed_ring)
2019                virtqueue_disable_cb_packed(_vq);
2020        else
2021                virtqueue_disable_cb_split(_vq);
2022}
2023EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2024
2025/**
2026 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2027 * @_vq: the struct virtqueue we're talking about.
2028 *
2029 * This re-enables callbacks; it returns current queue state
2030 * in an opaque unsigned value. This value should be later tested by
2031 * virtqueue_poll, to detect a possible race between the driver checking for
2032 * more work, and enabling callbacks.
2033 *
2034 * Caller must ensure we don't call this with other virtqueue
2035 * operations at the same time (except where noted).
2036 */
2037unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2038{
2039        struct vring_virtqueue *vq = to_vvq(_vq);
2040
2041        if (vq->event_triggered)
2042                vq->event_triggered = false;
2043
2044        return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2045                                 virtqueue_enable_cb_prepare_split(_vq);
2046}
2047EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2048
2049/**
2050 * virtqueue_poll - query pending used buffers
2051 * @_vq: the struct virtqueue we're talking about.
2052 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2053 *
2054 * Returns "true" if there are pending used buffers in the queue.
2055 *
2056 * This does not need to be serialized.
2057 */
2058bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
2059{
2060        struct vring_virtqueue *vq = to_vvq(_vq);
2061
2062        if (unlikely(vq->broken))
2063                return false;
2064
2065        virtio_mb(vq->weak_barriers);
2066        return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2067                                 virtqueue_poll_split(_vq, last_used_idx);
2068}
2069EXPORT_SYMBOL_GPL(virtqueue_poll);
2070
2071/**
2072 * virtqueue_enable_cb - restart callbacks after disable_cb.
2073 * @_vq: the struct virtqueue we're talking about.
2074 *
2075 * This re-enables callbacks; it returns "false" if there are pending
2076 * buffers in the queue, to detect a possible race between the driver
2077 * checking for more work, and enabling callbacks.
2078 *
2079 * Caller must ensure we don't call this with other virtqueue
2080 * operations at the same time (except where noted).
2081 */
2082bool virtqueue_enable_cb(struct virtqueue *_vq)
2083{
2084        unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
2085
2086        return !virtqueue_poll(_vq, last_used_idx);
2087}
2088EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2089
2090/**
2091 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2092 * @_vq: the struct virtqueue we're talking about.
2093 *
2094 * This re-enables callbacks but hints to the other side to delay
2095 * interrupts until most of the available buffers have been processed;
2096 * it returns "false" if there are many pending buffers in the queue,
2097 * to detect a possible race between the driver checking for more work,
2098 * and enabling callbacks.
2099 *
2100 * Caller must ensure we don't call this with other virtqueue
2101 * operations at the same time (except where noted).
2102 */
2103bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2104{
2105        struct vring_virtqueue *vq = to_vvq(_vq);
2106
2107        if (vq->event_triggered)
2108                vq->event_triggered = false;
2109
2110        return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2111                                 virtqueue_enable_cb_delayed_split(_vq);
2112}
2113EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2114
2115/**
2116 * virtqueue_detach_unused_buf - detach first unused buffer
2117 * @_vq: the struct virtqueue we're talking about.
2118 *
2119 * Returns NULL or the "data" token handed to virtqueue_add_*().
2120 * This is not valid on an active queue; it is useful only for device
2121 * shutdown.
2122 */
2123void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2124{
2125        struct vring_virtqueue *vq = to_vvq(_vq);
2126
2127        return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2128                                 virtqueue_detach_unused_buf_split(_vq);
2129}
2130EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2131
2132static inline bool more_used(const struct vring_virtqueue *vq)
2133{
2134        return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2135}
2136
2137irqreturn_t vring_interrupt(int irq, void *_vq)
2138{
2139        struct vring_virtqueue *vq = to_vvq(_vq);
2140
2141        if (!more_used(vq)) {
2142                pr_debug("virtqueue interrupt with no work for %p\n", vq);
2143                return IRQ_NONE;
2144        }
2145
2146        if (unlikely(vq->broken))
2147                return IRQ_HANDLED;
2148
2149        /* Just a hint for performance: so it's ok that this can be racy! */
2150        if (vq->event)
2151                vq->event_triggered = true;
2152
2153        pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2154        if (vq->vq.callback)
2155                vq->vq.callback(&vq->vq);
2156
2157        return IRQ_HANDLED;
2158}
2159EXPORT_SYMBOL_GPL(vring_interrupt);
2160
2161/* Only available for split ring */
2162struct virtqueue *__vring_new_virtqueue(unsigned int index,
2163                                        struct vring vring,
2164                                        struct virtio_device *vdev,
2165                                        bool weak_barriers,
2166                                        bool context,
2167                                        bool (*notify)(struct virtqueue *),
2168                                        void (*callback)(struct virtqueue *),
2169                                        const char *name)
2170{
2171        struct vring_virtqueue *vq;
2172
2173        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2174                return NULL;
2175
2176        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2177        if (!vq)
2178                return NULL;
2179
2180        vq->packed_ring = false;
2181        vq->vq.callback = callback;
2182        vq->vq.vdev = vdev;
2183        vq->vq.name = name;
2184        vq->vq.num_free = vring.num;
2185        vq->vq.index = index;
2186        vq->we_own_ring = false;
2187        vq->notify = notify;
2188        vq->weak_barriers = weak_barriers;
2189        vq->broken = false;
2190        vq->last_used_idx = 0;
2191        vq->event_triggered = false;
2192        vq->num_added = 0;
2193        vq->use_dma_api = vring_use_dma_api(vdev);
2194#ifdef DEBUG
2195        vq->in_use = false;
2196        vq->last_add_time_valid = false;
2197#endif
2198
2199        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2200                !context;
2201        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2202
2203        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2204                vq->weak_barriers = false;
2205
2206        vq->split.queue_dma_addr = 0;
2207        vq->split.queue_size_in_bytes = 0;
2208
2209        vq->split.vring = vring;
2210        vq->split.avail_flags_shadow = 0;
2211        vq->split.avail_idx_shadow = 0;
2212
2213        /* No callback?  Tell other side not to bother us. */
2214        if (!callback) {
2215                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2216                if (!vq->event)
2217                        vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2218                                        vq->split.avail_flags_shadow);
2219        }
2220
2221        vq->split.desc_state = kmalloc_array(vring.num,
2222                        sizeof(struct vring_desc_state_split), GFP_KERNEL);
2223        if (!vq->split.desc_state)
2224                goto err_state;
2225
2226        vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
2227        if (!vq->split.desc_extra)
2228                goto err_extra;
2229
2230        /* Put everything in free lists. */
2231        vq->free_head = 0;
2232        memset(vq->split.desc_state, 0, vring.num *
2233                        sizeof(struct vring_desc_state_split));
2234
2235        spin_lock(&vdev->vqs_list_lock);
2236        list_add_tail(&vq->vq.list, &vdev->vqs);
2237        spin_unlock(&vdev->vqs_list_lock);
2238        return &vq->vq;
2239
2240err_extra:
2241        kfree(vq->split.desc_state);
2242err_state:
2243        kfree(vq);
2244        return NULL;
2245}
2246EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2247
2248struct virtqueue *vring_create_virtqueue(
2249        unsigned int index,
2250        unsigned int num,
2251        unsigned int vring_align,
2252        struct virtio_device *vdev,
2253        bool weak_barriers,
2254        bool may_reduce_num,
2255        bool context,
2256        bool (*notify)(struct virtqueue *),
2257        void (*callback)(struct virtqueue *),
2258        const char *name)
2259{
2260
2261        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2262                return vring_create_virtqueue_packed(index, num, vring_align,
2263                                vdev, weak_barriers, may_reduce_num,
2264                                context, notify, callback, name);
2265
2266        return vring_create_virtqueue_split(index, num, vring_align,
2267                        vdev, weak_barriers, may_reduce_num,
2268                        context, notify, callback, name);
2269}
2270EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2271
2272/* Only available for split ring */
2273struct virtqueue *vring_new_virtqueue(unsigned int index,
2274                                      unsigned int num,
2275                                      unsigned int vring_align,
2276                                      struct virtio_device *vdev,
2277                                      bool weak_barriers,
2278                                      bool context,
2279                                      void *pages,
2280                                      bool (*notify)(struct virtqueue *vq),
2281                                      void (*callback)(struct virtqueue *vq),
2282                                      const char *name)
2283{
2284        struct vring vring;
2285
2286        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2287                return NULL;
2288
2289        vring_init(&vring, num, pages, vring_align);
2290        return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2291                                     notify, callback, name);
2292}
2293EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2294
2295void vring_del_virtqueue(struct virtqueue *_vq)
2296{
2297        struct vring_virtqueue *vq = to_vvq(_vq);
2298
2299        spin_lock(&vq->vq.vdev->vqs_list_lock);
2300        list_del(&_vq->list);
2301        spin_unlock(&vq->vq.vdev->vqs_list_lock);
2302
2303        if (vq->we_own_ring) {
2304                if (vq->packed_ring) {
2305                        vring_free_queue(vq->vq.vdev,
2306                                         vq->packed.ring_size_in_bytes,
2307                                         vq->packed.vring.desc,
2308                                         vq->packed.ring_dma_addr);
2309
2310                        vring_free_queue(vq->vq.vdev,
2311                                         vq->packed.event_size_in_bytes,
2312                                         vq->packed.vring.driver,
2313                                         vq->packed.driver_event_dma_addr);
2314
2315                        vring_free_queue(vq->vq.vdev,
2316                                         vq->packed.event_size_in_bytes,
2317                                         vq->packed.vring.device,
2318                                         vq->packed.device_event_dma_addr);
2319
2320                        kfree(vq->packed.desc_state);
2321                        kfree(vq->packed.desc_extra);
2322                } else {
2323                        vring_free_queue(vq->vq.vdev,
2324                                         vq->split.queue_size_in_bytes,
2325                                         vq->split.vring.desc,
2326                                         vq->split.queue_dma_addr);
2327                }
2328        }
2329        if (!vq->packed_ring) {
2330                kfree(vq->split.desc_state);
2331                kfree(vq->split.desc_extra);
2332        }
2333        kfree(vq);
2334}
2335EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2336
2337/* Manipulates transport-specific feature bits. */
2338void vring_transport_features(struct virtio_device *vdev)
2339{
2340        unsigned int i;
2341
2342        for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2343                switch (i) {
2344                case VIRTIO_RING_F_INDIRECT_DESC:
2345                        break;
2346                case VIRTIO_RING_F_EVENT_IDX:
2347                        break;
2348                case VIRTIO_F_VERSION_1:
2349                        break;
2350                case VIRTIO_F_ACCESS_PLATFORM:
2351                        break;
2352                case VIRTIO_F_RING_PACKED:
2353                        break;
2354                case VIRTIO_F_ORDER_PLATFORM:
2355                        break;
2356                default:
2357                        /* We don't understand this bit. */
2358                        __virtio_clear_bit(vdev, i);
2359                }
2360        }
2361}
2362EXPORT_SYMBOL_GPL(vring_transport_features);
2363
2364/**
2365 * virtqueue_get_vring_size - return the size of the virtqueue's vring
2366 * @_vq: the struct virtqueue containing the vring of interest.
2367 *
2368 * Returns the size of the vring.  This is mainly used for boasting to
2369 * userspace.  Unlike other operations, this need not be serialized.
2370 */
2371unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2372{
2373
2374        struct vring_virtqueue *vq = to_vvq(_vq);
2375
2376        return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2377}
2378EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2379
2380bool virtqueue_is_broken(struct virtqueue *_vq)
2381{
2382        struct vring_virtqueue *vq = to_vvq(_vq);
2383
2384        return READ_ONCE(vq->broken);
2385}
2386EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2387
2388/*
2389 * This should prevent the device from being used, allowing drivers to
2390 * recover.  You may need to grab appropriate locks to flush.
2391 */
2392void virtio_break_device(struct virtio_device *dev)
2393{
2394        struct virtqueue *_vq;
2395
2396        spin_lock(&dev->vqs_list_lock);
2397        list_for_each_entry(_vq, &dev->vqs, list) {
2398                struct vring_virtqueue *vq = to_vvq(_vq);
2399
2400                /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2401                WRITE_ONCE(vq->broken, true);
2402        }
2403        spin_unlock(&dev->vqs_list_lock);
2404}
2405EXPORT_SYMBOL_GPL(virtio_break_device);
2406
2407dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2408{
2409        struct vring_virtqueue *vq = to_vvq(_vq);
2410
2411        BUG_ON(!vq->we_own_ring);
2412
2413        if (vq->packed_ring)
2414                return vq->packed.ring_dma_addr;
2415
2416        return vq->split.queue_dma_addr;
2417}
2418EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2419
2420dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2421{
2422        struct vring_virtqueue *vq = to_vvq(_vq);
2423
2424        BUG_ON(!vq->we_own_ring);
2425
2426        if (vq->packed_ring)
2427                return vq->packed.driver_event_dma_addr;
2428
2429        return vq->split.queue_dma_addr +
2430                ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2431}
2432EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2433
2434dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2435{
2436        struct vring_virtqueue *vq = to_vvq(_vq);
2437
2438        BUG_ON(!vq->we_own_ring);
2439
2440        if (vq->packed_ring)
2441                return vq->packed.device_event_dma_addr;
2442
2443        return vq->split.queue_dma_addr +
2444                ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2445}
2446EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2447
2448/* Only available for split ring */
2449const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2450{
2451        return &to_vvq(vq)->split.vring;
2452}
2453EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2454
2455MODULE_LICENSE("GPL");
2456