linux/drivers/virtio/virtio_ring.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* Virtio ring implementation.
   3 *
   4 *  Copyright 2007 Rusty Russell IBM Corporation
   5 */
   6#include <linux/virtio.h>
   7#include <linux/virtio_ring.h>
   8#include <linux/virtio_config.h>
   9#include <linux/device.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/hrtimer.h>
  13#include <linux/dma-mapping.h>
  14#include <linux/spinlock.h>
  15#include <xen/xen.h>
  16
  17#ifdef DEBUG
  18/* For development, we want to crash whenever the ring is screwed. */
  19#define BAD_RING(_vq, fmt, args...)                             \
  20        do {                                                    \
  21                dev_err(&(_vq)->vq.vdev->dev,                   \
  22                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  23                BUG();                                          \
  24        } while (0)
  25/* Caller is supposed to guarantee no reentry. */
  26#define START_USE(_vq)                                          \
  27        do {                                                    \
  28                if ((_vq)->in_use)                              \
  29                        panic("%s:in_use = %i\n",               \
  30                              (_vq)->vq.name, (_vq)->in_use);   \
  31                (_vq)->in_use = __LINE__;                       \
  32        } while (0)
  33#define END_USE(_vq) \
  34        do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
  35#define LAST_ADD_TIME_UPDATE(_vq)                               \
  36        do {                                                    \
  37                ktime_t now = ktime_get();                      \
  38                                                                \
  39                /* No kick or get, with .1 second between?  Warn. */ \
  40                if ((_vq)->last_add_time_valid)                 \
  41                        WARN_ON(ktime_to_ms(ktime_sub(now,      \
  42                                (_vq)->last_add_time)) > 100);  \
  43                (_vq)->last_add_time = now;                     \
  44                (_vq)->last_add_time_valid = true;              \
  45        } while (0)
  46#define LAST_ADD_TIME_CHECK(_vq)                                \
  47        do {                                                    \
  48                if ((_vq)->last_add_time_valid) {               \
  49                        WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
  50                                      (_vq)->last_add_time)) > 100); \
  51                }                                               \
  52        } while (0)
  53#define LAST_ADD_TIME_INVALID(_vq)                              \
  54        ((_vq)->last_add_time_valid = false)
  55#else
  56#define BAD_RING(_vq, fmt, args...)                             \
  57        do {                                                    \
  58                dev_err(&_vq->vq.vdev->dev,                     \
  59                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  60                (_vq)->broken = true;                           \
  61        } while (0)
  62#define START_USE(vq)
  63#define END_USE(vq)
  64#define LAST_ADD_TIME_UPDATE(vq)
  65#define LAST_ADD_TIME_CHECK(vq)
  66#define LAST_ADD_TIME_INVALID(vq)
  67#endif
  68
  69struct vring_desc_state_split {
  70        void *data;                     /* Data for callback. */
  71        struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
  72};
  73
  74struct vring_desc_state_packed {
  75        void *data;                     /* Data for callback. */
  76        struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
  77        u16 num;                        /* Descriptor list length. */
  78        u16 last;                       /* The last desc state in a list. */
  79};
  80
  81struct vring_desc_extra {
  82        dma_addr_t addr;                /* Descriptor DMA addr. */
  83        u32 len;                        /* Descriptor length. */
  84        u16 flags;                      /* Descriptor flags. */
  85        u16 next;                       /* The next desc state in a list. */
  86};
  87
  88struct vring_virtqueue {
  89        struct virtqueue vq;
  90
  91        /* Is this a packed ring? */
  92        bool packed_ring;
  93
  94        /* Is DMA API used? */
  95        bool use_dma_api;
  96
  97        /* Can we use weak barriers? */
  98        bool weak_barriers;
  99
 100        /* Other side has made a mess, don't try any more. */
 101        bool broken;
 102
 103        /* Host supports indirect buffers */
 104        bool indirect;
 105
 106        /* Host publishes avail event idx */
 107        bool event;
 108
 109        /* Head of free buffer list. */
 110        unsigned int free_head;
 111        /* Number we've added since last sync. */
 112        unsigned int num_added;
 113
 114        /* Last used index we've seen. */
 115        u16 last_used_idx;
 116
 117        /* Hint for event idx: already triggered no need to disable. */
 118        bool event_triggered;
 119
 120        union {
 121                /* Available for split ring */
 122                struct {
 123                        /* Actual memory layout for this queue. */
 124                        struct vring vring;
 125
 126                        /* Last written value to avail->flags */
 127                        u16 avail_flags_shadow;
 128
 129                        /*
 130                         * Last written value to avail->idx in
 131                         * guest byte order.
 132                         */
 133                        u16 avail_idx_shadow;
 134
 135                        /* Per-descriptor state. */
 136                        struct vring_desc_state_split *desc_state;
 137                        struct vring_desc_extra *desc_extra;
 138
 139                        /* DMA address and size information */
 140                        dma_addr_t queue_dma_addr;
 141                        size_t queue_size_in_bytes;
 142                } split;
 143
 144                /* Available for packed ring */
 145                struct {
 146                        /* Actual memory layout for this queue. */
 147                        struct {
 148                                unsigned int num;
 149                                struct vring_packed_desc *desc;
 150                                struct vring_packed_desc_event *driver;
 151                                struct vring_packed_desc_event *device;
 152                        } vring;
 153
 154                        /* Driver ring wrap counter. */
 155                        bool avail_wrap_counter;
 156
 157                        /* Device ring wrap counter. */
 158                        bool used_wrap_counter;
 159
 160                        /* Avail used flags. */
 161                        u16 avail_used_flags;
 162
 163                        /* Index of the next avail descriptor. */
 164                        u16 next_avail_idx;
 165
 166                        /*
 167                         * Last written value to driver->flags in
 168                         * guest byte order.
 169                         */
 170                        u16 event_flags_shadow;
 171
 172                        /* Per-descriptor state. */
 173                        struct vring_desc_state_packed *desc_state;
 174                        struct vring_desc_extra *desc_extra;
 175
 176                        /* DMA address and size information */
 177                        dma_addr_t ring_dma_addr;
 178                        dma_addr_t driver_event_dma_addr;
 179                        dma_addr_t device_event_dma_addr;
 180                        size_t ring_size_in_bytes;
 181                        size_t event_size_in_bytes;
 182                } packed;
 183        };
 184
 185        /* How to notify other side. FIXME: commonalize hcalls! */
 186        bool (*notify)(struct virtqueue *vq);
 187
 188        /* DMA, allocation, and size information */
 189        bool we_own_ring;
 190
 191#ifdef DEBUG
 192        /* They're supposed to lock for us. */
 193        unsigned int in_use;
 194
 195        /* Figure out if their kicks are too delayed. */
 196        bool last_add_time_valid;
 197        ktime_t last_add_time;
 198#endif
 199};
 200
 201
 202/*
 203 * Helpers.
 204 */
 205
 206#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 207
 208static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
 209                                          unsigned int total_sg)
 210{
 211        struct vring_virtqueue *vq = to_vvq(_vq);
 212
 213        /*
 214         * If the host supports indirect descriptor tables, and we have multiple
 215         * buffers, then go indirect. FIXME: tune this threshold
 216         */
 217        return (vq->indirect && total_sg > 1 && vq->vq.num_free);
 218}
 219
 220/*
 221 * Modern virtio devices have feature bits to specify whether they need a
 222 * quirk and bypass the IOMMU. If not there, just use the DMA API.
 223 *
 224 * If there, the interaction between virtio and DMA API is messy.
 225 *
 226 * On most systems with virtio, physical addresses match bus addresses,
 227 * and it doesn't particularly matter whether we use the DMA API.
 228 *
 229 * On some systems, including Xen and any system with a physical device
 230 * that speaks virtio behind a physical IOMMU, we must use the DMA API
 231 * for virtio DMA to work at all.
 232 *
 233 * On other systems, including SPARC and PPC64, virtio-pci devices are
 234 * enumerated as though they are behind an IOMMU, but the virtio host
 235 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
 236 * there or somehow map everything as the identity.
 237 *
 238 * For the time being, we preserve historic behavior and bypass the DMA
 239 * API.
 240 *
 241 * TODO: install a per-device DMA ops structure that does the right thing
 242 * taking into account all the above quirks, and use the DMA API
 243 * unconditionally on data path.
 244 */
 245
 246static bool vring_use_dma_api(struct virtio_device *vdev)
 247{
 248        if (!virtio_has_dma_quirk(vdev))
 249                return true;
 250
 251        /* Otherwise, we are left to guess. */
 252        /*
 253         * In theory, it's possible to have a buggy QEMU-supposed
 254         * emulated Q35 IOMMU and Xen enabled at the same time.  On
 255         * such a configuration, virtio has never worked and will
 256         * not work without an even larger kludge.  Instead, enable
 257         * the DMA API if we're a Xen guest, which at least allows
 258         * all of the sensible Xen configurations to work correctly.
 259         */
 260        if (xen_domain())
 261                return true;
 262
 263        return false;
 264}
 265
 266size_t virtio_max_dma_size(struct virtio_device *vdev)
 267{
 268        size_t max_segment_size = SIZE_MAX;
 269
 270        if (vring_use_dma_api(vdev))
 271                max_segment_size = dma_max_mapping_size(vdev->dev.parent);
 272
 273        return max_segment_size;
 274}
 275EXPORT_SYMBOL_GPL(virtio_max_dma_size);
 276
 277static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
 278                              dma_addr_t *dma_handle, gfp_t flag)
 279{
 280        if (vring_use_dma_api(vdev)) {
 281                return dma_alloc_coherent(vdev->dev.parent, size,
 282                                          dma_handle, flag);
 283        } else {
 284                void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
 285
 286                if (queue) {
 287                        phys_addr_t phys_addr = virt_to_phys(queue);
 288                        *dma_handle = (dma_addr_t)phys_addr;
 289
 290                        /*
 291                         * Sanity check: make sure we dind't truncate
 292                         * the address.  The only arches I can find that
 293                         * have 64-bit phys_addr_t but 32-bit dma_addr_t
 294                         * are certain non-highmem MIPS and x86
 295                         * configurations, but these configurations
 296                         * should never allocate physical pages above 32
 297                         * bits, so this is fine.  Just in case, throw a
 298                         * warning and abort if we end up with an
 299                         * unrepresentable address.
 300                         */
 301                        if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
 302                                free_pages_exact(queue, PAGE_ALIGN(size));
 303                                return NULL;
 304                        }
 305                }
 306                return queue;
 307        }
 308}
 309
 310static void vring_free_queue(struct virtio_device *vdev, size_t size,
 311                             void *queue, dma_addr_t dma_handle)
 312{
 313        if (vring_use_dma_api(vdev))
 314                dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
 315        else
 316                free_pages_exact(queue, PAGE_ALIGN(size));
 317}
 318
 319/*
 320 * The DMA ops on various arches are rather gnarly right now, and
 321 * making all of the arch DMA ops work on the vring device itself
 322 * is a mess.  For now, we use the parent device for DMA ops.
 323 */
 324static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 325{
 326        return vq->vq.vdev->dev.parent;
 327}
 328
 329/* Map one sg entry. */
 330static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
 331                                   struct scatterlist *sg,
 332                                   enum dma_data_direction direction)
 333{
 334        if (!vq->use_dma_api)
 335                return (dma_addr_t)sg_phys(sg);
 336
 337        /*
 338         * We can't use dma_map_sg, because we don't use scatterlists in
 339         * the way it expects (we don't guarantee that the scatterlist
 340         * will exist for the lifetime of the mapping).
 341         */
 342        return dma_map_page(vring_dma_dev(vq),
 343                            sg_page(sg), sg->offset, sg->length,
 344                            direction);
 345}
 346
 347static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
 348                                   void *cpu_addr, size_t size,
 349                                   enum dma_data_direction direction)
 350{
 351        if (!vq->use_dma_api)
 352                return (dma_addr_t)virt_to_phys(cpu_addr);
 353
 354        return dma_map_single(vring_dma_dev(vq),
 355                              cpu_addr, size, direction);
 356}
 357
 358static int vring_mapping_error(const struct vring_virtqueue *vq,
 359                               dma_addr_t addr)
 360{
 361        if (!vq->use_dma_api)
 362                return 0;
 363
 364        return dma_mapping_error(vring_dma_dev(vq), addr);
 365}
 366
 367
 368/*
 369 * Split ring specific functions - *_split().
 370 */
 371
 372static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
 373                                           struct vring_desc *desc)
 374{
 375        u16 flags;
 376
 377        if (!vq->use_dma_api)
 378                return;
 379
 380        flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
 381
 382        if (flags & VRING_DESC_F_INDIRECT) {
 383                dma_unmap_single(vring_dma_dev(vq),
 384                                 virtio64_to_cpu(vq->vq.vdev, desc->addr),
 385                                 virtio32_to_cpu(vq->vq.vdev, desc->len),
 386                                 (flags & VRING_DESC_F_WRITE) ?
 387                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 388        } else {
 389                dma_unmap_page(vring_dma_dev(vq),
 390                               virtio64_to_cpu(vq->vq.vdev, desc->addr),
 391                               virtio32_to_cpu(vq->vq.vdev, desc->len),
 392                               (flags & VRING_DESC_F_WRITE) ?
 393                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 394        }
 395}
 396
 397static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
 398                                          unsigned int i)
 399{
 400        struct vring_desc_extra *extra = vq->split.desc_extra;
 401        u16 flags;
 402
 403        if (!vq->use_dma_api)
 404                goto out;
 405
 406        flags = extra[i].flags;
 407
 408        if (flags & VRING_DESC_F_INDIRECT) {
 409                dma_unmap_single(vring_dma_dev(vq),
 410                                 extra[i].addr,
 411                                 extra[i].len,
 412                                 (flags & VRING_DESC_F_WRITE) ?
 413                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 414        } else {
 415                dma_unmap_page(vring_dma_dev(vq),
 416                               extra[i].addr,
 417                               extra[i].len,
 418                               (flags & VRING_DESC_F_WRITE) ?
 419                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 420        }
 421
 422out:
 423        return extra[i].next;
 424}
 425
 426static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
 427                                               unsigned int total_sg,
 428                                               gfp_t gfp)
 429{
 430        struct vring_desc *desc;
 431        unsigned int i;
 432
 433        /*
 434         * We require lowmem mappings for the descriptors because
 435         * otherwise virt_to_phys will give us bogus addresses in the
 436         * virtqueue.
 437         */
 438        gfp &= ~__GFP_HIGHMEM;
 439
 440        desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
 441        if (!desc)
 442                return NULL;
 443
 444        for (i = 0; i < total_sg; i++)
 445                desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
 446        return desc;
 447}
 448
 449static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
 450                                                    struct vring_desc *desc,
 451                                                    unsigned int i,
 452                                                    dma_addr_t addr,
 453                                                    unsigned int len,
 454                                                    u16 flags,
 455                                                    bool indirect)
 456{
 457        struct vring_virtqueue *vring = to_vvq(vq);
 458        struct vring_desc_extra *extra = vring->split.desc_extra;
 459        u16 next;
 460
 461        desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
 462        desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
 463        desc[i].len = cpu_to_virtio32(vq->vdev, len);
 464
 465        if (!indirect) {
 466                next = extra[i].next;
 467                desc[i].next = cpu_to_virtio16(vq->vdev, next);
 468
 469                extra[i].addr = addr;
 470                extra[i].len = len;
 471                extra[i].flags = flags;
 472        } else
 473                next = virtio16_to_cpu(vq->vdev, desc[i].next);
 474
 475        return next;
 476}
 477
 478static inline int virtqueue_add_split(struct virtqueue *_vq,
 479                                      struct scatterlist *sgs[],
 480                                      unsigned int total_sg,
 481                                      unsigned int out_sgs,
 482                                      unsigned int in_sgs,
 483                                      void *data,
 484                                      void *ctx,
 485                                      gfp_t gfp)
 486{
 487        struct vring_virtqueue *vq = to_vvq(_vq);
 488        struct scatterlist *sg;
 489        struct vring_desc *desc;
 490        unsigned int i, n, avail, descs_used, prev, err_idx;
 491        int head;
 492        bool indirect;
 493
 494        START_USE(vq);
 495
 496        BUG_ON(data == NULL);
 497        BUG_ON(ctx && vq->indirect);
 498
 499        if (unlikely(vq->broken)) {
 500                END_USE(vq);
 501                return -EIO;
 502        }
 503
 504        LAST_ADD_TIME_UPDATE(vq);
 505
 506        BUG_ON(total_sg == 0);
 507
 508        head = vq->free_head;
 509
 510        if (virtqueue_use_indirect(_vq, total_sg))
 511                desc = alloc_indirect_split(_vq, total_sg, gfp);
 512        else {
 513                desc = NULL;
 514                WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
 515        }
 516
 517        if (desc) {
 518                /* Use a single buffer which doesn't continue */
 519                indirect = true;
 520                /* Set up rest to use this indirect table. */
 521                i = 0;
 522                descs_used = 1;
 523        } else {
 524                indirect = false;
 525                desc = vq->split.vring.desc;
 526                i = head;
 527                descs_used = total_sg;
 528        }
 529
 530        if (vq->vq.num_free < descs_used) {
 531                pr_debug("Can't add buf len %i - avail = %i\n",
 532                         descs_used, vq->vq.num_free);
 533                /* FIXME: for historical reasons, we force a notify here if
 534                 * there are outgoing parts to the buffer.  Presumably the
 535                 * host should service the ring ASAP. */
 536                if (out_sgs)
 537                        vq->notify(&vq->vq);
 538                if (indirect)
 539                        kfree(desc);
 540                END_USE(vq);
 541                return -ENOSPC;
 542        }
 543
 544        for (n = 0; n < out_sgs; n++) {
 545                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 546                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
 547                        if (vring_mapping_error(vq, addr))
 548                                goto unmap_release;
 549
 550                        prev = i;
 551                        /* Note that we trust indirect descriptor
 552                         * table since it use stream DMA mapping.
 553                         */
 554                        i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
 555                                                     VRING_DESC_F_NEXT,
 556                                                     indirect);
 557                }
 558        }
 559        for (; n < (out_sgs + in_sgs); n++) {
 560                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 561                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
 562                        if (vring_mapping_error(vq, addr))
 563                                goto unmap_release;
 564
 565                        prev = i;
 566                        /* Note that we trust indirect descriptor
 567                         * table since it use stream DMA mapping.
 568                         */
 569                        i = virtqueue_add_desc_split(_vq, desc, i, addr,
 570                                                     sg->length,
 571                                                     VRING_DESC_F_NEXT |
 572                                                     VRING_DESC_F_WRITE,
 573                                                     indirect);
 574                }
 575        }
 576        /* Last one doesn't continue. */
 577        desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
 578        if (!indirect && vq->use_dma_api)
 579                vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
 580                        ~VRING_DESC_F_NEXT;
 581
 582        if (indirect) {
 583                /* Now that the indirect table is filled in, map it. */
 584                dma_addr_t addr = vring_map_single(
 585                        vq, desc, total_sg * sizeof(struct vring_desc),
 586                        DMA_TO_DEVICE);
 587                if (vring_mapping_error(vq, addr))
 588                        goto unmap_release;
 589
 590                virtqueue_add_desc_split(_vq, vq->split.vring.desc,
 591                                         head, addr,
 592                                         total_sg * sizeof(struct vring_desc),
 593                                         VRING_DESC_F_INDIRECT,
 594                                         false);
 595        }
 596
 597        /* We're using some buffers from the free list. */
 598        vq->vq.num_free -= descs_used;
 599
 600        /* Update free pointer */
 601        if (indirect)
 602                vq->free_head = vq->split.desc_extra[head].next;
 603        else
 604                vq->free_head = i;
 605
 606        /* Store token and indirect buffer state. */
 607        vq->split.desc_state[head].data = data;
 608        if (indirect)
 609                vq->split.desc_state[head].indir_desc = desc;
 610        else
 611                vq->split.desc_state[head].indir_desc = ctx;
 612
 613        /* Put entry in available array (but don't update avail->idx until they
 614         * do sync). */
 615        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
 616        vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 617
 618        /* Descriptors and available array need to be set before we expose the
 619         * new available array entries. */
 620        virtio_wmb(vq->weak_barriers);
 621        vq->split.avail_idx_shadow++;
 622        vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 623                                                vq->split.avail_idx_shadow);
 624        vq->num_added++;
 625
 626        pr_debug("Added buffer head %i to %p\n", head, vq);
 627        END_USE(vq);
 628
 629        /* This is very unlikely, but theoretically possible.  Kick
 630         * just in case. */
 631        if (unlikely(vq->num_added == (1 << 16) - 1))
 632                virtqueue_kick(_vq);
 633
 634        return 0;
 635
 636unmap_release:
 637        err_idx = i;
 638
 639        if (indirect)
 640                i = 0;
 641        else
 642                i = head;
 643
 644        for (n = 0; n < total_sg; n++) {
 645                if (i == err_idx)
 646                        break;
 647                if (indirect) {
 648                        vring_unmap_one_split_indirect(vq, &desc[i]);
 649                        i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 650                } else
 651                        i = vring_unmap_one_split(vq, i);
 652        }
 653
 654        if (indirect)
 655                kfree(desc);
 656
 657        END_USE(vq);
 658        return -ENOMEM;
 659}
 660
 661static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 662{
 663        struct vring_virtqueue *vq = to_vvq(_vq);
 664        u16 new, old;
 665        bool needs_kick;
 666
 667        START_USE(vq);
 668        /* We need to expose available array entries before checking avail
 669         * event. */
 670        virtio_mb(vq->weak_barriers);
 671
 672        old = vq->split.avail_idx_shadow - vq->num_added;
 673        new = vq->split.avail_idx_shadow;
 674        vq->num_added = 0;
 675
 676        LAST_ADD_TIME_CHECK(vq);
 677        LAST_ADD_TIME_INVALID(vq);
 678
 679        if (vq->event) {
 680                needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
 681                                        vring_avail_event(&vq->split.vring)),
 682                                              new, old);
 683        } else {
 684                needs_kick = !(vq->split.vring.used->flags &
 685                                        cpu_to_virtio16(_vq->vdev,
 686                                                VRING_USED_F_NO_NOTIFY));
 687        }
 688        END_USE(vq);
 689        return needs_kick;
 690}
 691
 692static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 693                             void **ctx)
 694{
 695        unsigned int i, j;
 696        __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
 697
 698        /* Clear data ptr. */
 699        vq->split.desc_state[head].data = NULL;
 700
 701        /* Put back on free list: unmap first-level descriptors and find end */
 702        i = head;
 703
 704        while (vq->split.vring.desc[i].flags & nextflag) {
 705                vring_unmap_one_split(vq, i);
 706                i = vq->split.desc_extra[i].next;
 707                vq->vq.num_free++;
 708        }
 709
 710        vring_unmap_one_split(vq, i);
 711        vq->split.desc_extra[i].next = vq->free_head;
 712        vq->free_head = head;
 713
 714        /* Plus final descriptor */
 715        vq->vq.num_free++;
 716
 717        if (vq->indirect) {
 718                struct vring_desc *indir_desc =
 719                                vq->split.desc_state[head].indir_desc;
 720                u32 len;
 721
 722                /* Free the indirect table, if any, now that it's unmapped. */
 723                if (!indir_desc)
 724                        return;
 725
 726                len = vq->split.desc_extra[head].len;
 727
 728                BUG_ON(!(vq->split.desc_extra[head].flags &
 729                                VRING_DESC_F_INDIRECT));
 730                BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 731
 732                for (j = 0; j < len / sizeof(struct vring_desc); j++)
 733                        vring_unmap_one_split_indirect(vq, &indir_desc[j]);
 734
 735                kfree(indir_desc);
 736                vq->split.desc_state[head].indir_desc = NULL;
 737        } else if (ctx) {
 738                *ctx = vq->split.desc_state[head].indir_desc;
 739        }
 740}
 741
 742static inline bool more_used_split(const struct vring_virtqueue *vq)
 743{
 744        return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
 745                        vq->split.vring.used->idx);
 746}
 747
 748static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 749                                         unsigned int *len,
 750                                         void **ctx)
 751{
 752        struct vring_virtqueue *vq = to_vvq(_vq);
 753        void *ret;
 754        unsigned int i;
 755        u16 last_used;
 756
 757        START_USE(vq);
 758
 759        if (unlikely(vq->broken)) {
 760                END_USE(vq);
 761                return NULL;
 762        }
 763
 764        if (!more_used_split(vq)) {
 765                pr_debug("No more buffers in queue\n");
 766                END_USE(vq);
 767                return NULL;
 768        }
 769
 770        /* Only get used array entries after they have been exposed by host. */
 771        virtio_rmb(vq->weak_barriers);
 772
 773        last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
 774        i = virtio32_to_cpu(_vq->vdev,
 775                        vq->split.vring.used->ring[last_used].id);
 776        *len = virtio32_to_cpu(_vq->vdev,
 777                        vq->split.vring.used->ring[last_used].len);
 778
 779        if (unlikely(i >= vq->split.vring.num)) {
 780                BAD_RING(vq, "id %u out of range\n", i);
 781                return NULL;
 782        }
 783        if (unlikely(!vq->split.desc_state[i].data)) {
 784                BAD_RING(vq, "id %u is not a head!\n", i);
 785                return NULL;
 786        }
 787
 788        /* detach_buf_split clears data, so grab it now. */
 789        ret = vq->split.desc_state[i].data;
 790        detach_buf_split(vq, i, ctx);
 791        vq->last_used_idx++;
 792        /* If we expect an interrupt for the next entry, tell host
 793         * by writing event index and flush out the write before
 794         * the read in the next get_buf call. */
 795        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
 796                virtio_store_mb(vq->weak_barriers,
 797                                &vring_used_event(&vq->split.vring),
 798                                cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
 799
 800        LAST_ADD_TIME_INVALID(vq);
 801
 802        END_USE(vq);
 803        return ret;
 804}
 805
 806static void virtqueue_disable_cb_split(struct virtqueue *_vq)
 807{
 808        struct vring_virtqueue *vq = to_vvq(_vq);
 809
 810        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
 811                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
 812                if (vq->event)
 813                        /* TODO: this is a hack. Figure out a cleaner value to write. */
 814                        vring_used_event(&vq->split.vring) = 0x0;
 815                else
 816                        vq->split.vring.avail->flags =
 817                                cpu_to_virtio16(_vq->vdev,
 818                                                vq->split.avail_flags_shadow);
 819        }
 820}
 821
 822static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
 823{
 824        struct vring_virtqueue *vq = to_vvq(_vq);
 825        u16 last_used_idx;
 826
 827        START_USE(vq);
 828
 829        /* We optimistically turn back on interrupts, then check if there was
 830         * more to do. */
 831        /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
 832         * either clear the flags bit or point the event index at the next
 833         * entry. Always do both to keep code simple. */
 834        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 835                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 836                if (!vq->event)
 837                        vq->split.vring.avail->flags =
 838                                cpu_to_virtio16(_vq->vdev,
 839                                                vq->split.avail_flags_shadow);
 840        }
 841        vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
 842                        last_used_idx = vq->last_used_idx);
 843        END_USE(vq);
 844        return last_used_idx;
 845}
 846
 847static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
 848{
 849        struct vring_virtqueue *vq = to_vvq(_vq);
 850
 851        return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
 852                        vq->split.vring.used->idx);
 853}
 854
 855static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
 856{
 857        struct vring_virtqueue *vq = to_vvq(_vq);
 858        u16 bufs;
 859
 860        START_USE(vq);
 861
 862        /* We optimistically turn back on interrupts, then check if there was
 863         * more to do. */
 864        /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
 865         * either clear the flags bit or point the event index at the next
 866         * entry. Always update the event index to keep code simple. */
 867        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 868                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 869                if (!vq->event)
 870                        vq->split.vring.avail->flags =
 871                                cpu_to_virtio16(_vq->vdev,
 872                                                vq->split.avail_flags_shadow);
 873        }
 874        /* TODO: tune this threshold */
 875        bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
 876
 877        virtio_store_mb(vq->weak_barriers,
 878                        &vring_used_event(&vq->split.vring),
 879                        cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
 880
 881        if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
 882                                        - vq->last_used_idx) > bufs)) {
 883                END_USE(vq);
 884                return false;
 885        }
 886
 887        END_USE(vq);
 888        return true;
 889}
 890
 891static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
 892{
 893        struct vring_virtqueue *vq = to_vvq(_vq);
 894        unsigned int i;
 895        void *buf;
 896
 897        START_USE(vq);
 898
 899        for (i = 0; i < vq->split.vring.num; i++) {
 900                if (!vq->split.desc_state[i].data)
 901                        continue;
 902                /* detach_buf_split clears data, so grab it now. */
 903                buf = vq->split.desc_state[i].data;
 904                detach_buf_split(vq, i, NULL);
 905                vq->split.avail_idx_shadow--;
 906                vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 907                                vq->split.avail_idx_shadow);
 908                END_USE(vq);
 909                return buf;
 910        }
 911        /* That should have freed everything. */
 912        BUG_ON(vq->vq.num_free != vq->split.vring.num);
 913
 914        END_USE(vq);
 915        return NULL;
 916}
 917
 918static struct virtqueue *vring_create_virtqueue_split(
 919        unsigned int index,
 920        unsigned int num,
 921        unsigned int vring_align,
 922        struct virtio_device *vdev,
 923        bool weak_barriers,
 924        bool may_reduce_num,
 925        bool context,
 926        bool (*notify)(struct virtqueue *),
 927        void (*callback)(struct virtqueue *),
 928        const char *name)
 929{
 930        struct virtqueue *vq;
 931        void *queue = NULL;
 932        dma_addr_t dma_addr;
 933        size_t queue_size_in_bytes;
 934        struct vring vring;
 935
 936        /* We assume num is a power of 2. */
 937        if (num & (num - 1)) {
 938                dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
 939                return NULL;
 940        }
 941
 942        /* TODO: allocate each queue chunk individually */
 943        for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
 944                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 945                                          &dma_addr,
 946                                          GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 947                if (queue)
 948                        break;
 949                if (!may_reduce_num)
 950                        return NULL;
 951        }
 952
 953        if (!num)
 954                return NULL;
 955
 956        if (!queue) {
 957                /* Try to get a single page. You are my only hope! */
 958                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 959                                          &dma_addr, GFP_KERNEL|__GFP_ZERO);
 960        }
 961        if (!queue)
 962                return NULL;
 963
 964        queue_size_in_bytes = vring_size(num, vring_align);
 965        vring_init(&vring, num, queue, vring_align);
 966
 967        vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 968                                   notify, callback, name);
 969        if (!vq) {
 970                vring_free_queue(vdev, queue_size_in_bytes, queue,
 971                                 dma_addr);
 972                return NULL;
 973        }
 974
 975        to_vvq(vq)->split.queue_dma_addr = dma_addr;
 976        to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
 977        to_vvq(vq)->we_own_ring = true;
 978
 979        return vq;
 980}
 981
 982
 983/*
 984 * Packed ring specific functions - *_packed().
 985 */
 986
 987static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
 988                                     struct vring_desc_extra *state)
 989{
 990        u16 flags;
 991
 992        if (!vq->use_dma_api)
 993                return;
 994
 995        flags = state->flags;
 996
 997        if (flags & VRING_DESC_F_INDIRECT) {
 998                dma_unmap_single(vring_dma_dev(vq),
 999                                 state->addr, state->len,
1000                                 (flags & VRING_DESC_F_WRITE) ?
1001                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1002        } else {
1003                dma_unmap_page(vring_dma_dev(vq),
1004                               state->addr, state->len,
1005                               (flags & VRING_DESC_F_WRITE) ?
1006                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
1007        }
1008}
1009
1010static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1011                                   struct vring_packed_desc *desc)
1012{
1013        u16 flags;
1014
1015        if (!vq->use_dma_api)
1016                return;
1017
1018        flags = le16_to_cpu(desc->flags);
1019
1020        if (flags & VRING_DESC_F_INDIRECT) {
1021                dma_unmap_single(vring_dma_dev(vq),
1022                                 le64_to_cpu(desc->addr),
1023                                 le32_to_cpu(desc->len),
1024                                 (flags & VRING_DESC_F_WRITE) ?
1025                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
1026        } else {
1027                dma_unmap_page(vring_dma_dev(vq),
1028                               le64_to_cpu(desc->addr),
1029                               le32_to_cpu(desc->len),
1030                               (flags & VRING_DESC_F_WRITE) ?
1031                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
1032        }
1033}
1034
1035static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1036                                                       gfp_t gfp)
1037{
1038        struct vring_packed_desc *desc;
1039
1040        /*
1041         * We require lowmem mappings for the descriptors because
1042         * otherwise virt_to_phys will give us bogus addresses in the
1043         * virtqueue.
1044         */
1045        gfp &= ~__GFP_HIGHMEM;
1046
1047        desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1048
1049        return desc;
1050}
1051
1052static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1053                                         struct scatterlist *sgs[],
1054                                         unsigned int total_sg,
1055                                         unsigned int out_sgs,
1056                                         unsigned int in_sgs,
1057                                         void *data,
1058                                         gfp_t gfp)
1059{
1060        struct vring_packed_desc *desc;
1061        struct scatterlist *sg;
1062        unsigned int i, n, err_idx;
1063        u16 head, id;
1064        dma_addr_t addr;
1065
1066        head = vq->packed.next_avail_idx;
1067        desc = alloc_indirect_packed(total_sg, gfp);
1068        if (!desc)
1069                return -ENOMEM;
1070
1071        if (unlikely(vq->vq.num_free < 1)) {
1072                pr_debug("Can't add buf len 1 - avail = 0\n");
1073                kfree(desc);
1074                END_USE(vq);
1075                return -ENOSPC;
1076        }
1077
1078        i = 0;
1079        id = vq->free_head;
1080        BUG_ON(id == vq->packed.vring.num);
1081
1082        for (n = 0; n < out_sgs + in_sgs; n++) {
1083                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1084                        addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1085                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1086                        if (vring_mapping_error(vq, addr))
1087                                goto unmap_release;
1088
1089                        desc[i].flags = cpu_to_le16(n < out_sgs ?
1090                                                0 : VRING_DESC_F_WRITE);
1091                        desc[i].addr = cpu_to_le64(addr);
1092                        desc[i].len = cpu_to_le32(sg->length);
1093                        i++;
1094                }
1095        }
1096
1097        /* Now that the indirect table is filled in, map it. */
1098        addr = vring_map_single(vq, desc,
1099                        total_sg * sizeof(struct vring_packed_desc),
1100                        DMA_TO_DEVICE);
1101        if (vring_mapping_error(vq, addr))
1102                goto unmap_release;
1103
1104        vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1105        vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1106                                sizeof(struct vring_packed_desc));
1107        vq->packed.vring.desc[head].id = cpu_to_le16(id);
1108
1109        if (vq->use_dma_api) {
1110                vq->packed.desc_extra[id].addr = addr;
1111                vq->packed.desc_extra[id].len = total_sg *
1112                                sizeof(struct vring_packed_desc);
1113                vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1114                                                  vq->packed.avail_used_flags;
1115        }
1116
1117        /*
1118         * A driver MUST NOT make the first descriptor in the list
1119         * available before all subsequent descriptors comprising
1120         * the list are made available.
1121         */
1122        virtio_wmb(vq->weak_barriers);
1123        vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1124                                                vq->packed.avail_used_flags);
1125
1126        /* We're using some buffers from the free list. */
1127        vq->vq.num_free -= 1;
1128
1129        /* Update free pointer */
1130        n = head + 1;
1131        if (n >= vq->packed.vring.num) {
1132                n = 0;
1133                vq->packed.avail_wrap_counter ^= 1;
1134                vq->packed.avail_used_flags ^=
1135                                1 << VRING_PACKED_DESC_F_AVAIL |
1136                                1 << VRING_PACKED_DESC_F_USED;
1137        }
1138        vq->packed.next_avail_idx = n;
1139        vq->free_head = vq->packed.desc_extra[id].next;
1140
1141        /* Store token and indirect buffer state. */
1142        vq->packed.desc_state[id].num = 1;
1143        vq->packed.desc_state[id].data = data;
1144        vq->packed.desc_state[id].indir_desc = desc;
1145        vq->packed.desc_state[id].last = id;
1146
1147        vq->num_added += 1;
1148
1149        pr_debug("Added buffer head %i to %p\n", head, vq);
1150        END_USE(vq);
1151
1152        return 0;
1153
1154unmap_release:
1155        err_idx = i;
1156
1157        for (i = 0; i < err_idx; i++)
1158                vring_unmap_desc_packed(vq, &desc[i]);
1159
1160        kfree(desc);
1161
1162        END_USE(vq);
1163        return -ENOMEM;
1164}
1165
1166static inline int virtqueue_add_packed(struct virtqueue *_vq,
1167                                       struct scatterlist *sgs[],
1168                                       unsigned int total_sg,
1169                                       unsigned int out_sgs,
1170                                       unsigned int in_sgs,
1171                                       void *data,
1172                                       void *ctx,
1173                                       gfp_t gfp)
1174{
1175        struct vring_virtqueue *vq = to_vvq(_vq);
1176        struct vring_packed_desc *desc;
1177        struct scatterlist *sg;
1178        unsigned int i, n, c, descs_used, err_idx;
1179        __le16 head_flags, flags;
1180        u16 head, id, prev, curr, avail_used_flags;
1181        int err;
1182
1183        START_USE(vq);
1184
1185        BUG_ON(data == NULL);
1186        BUG_ON(ctx && vq->indirect);
1187
1188        if (unlikely(vq->broken)) {
1189                END_USE(vq);
1190                return -EIO;
1191        }
1192
1193        LAST_ADD_TIME_UPDATE(vq);
1194
1195        BUG_ON(total_sg == 0);
1196
1197        if (virtqueue_use_indirect(_vq, total_sg)) {
1198                err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1199                                                    in_sgs, data, gfp);
1200                if (err != -ENOMEM) {
1201                        END_USE(vq);
1202                        return err;
1203                }
1204
1205                /* fall back on direct */
1206        }
1207
1208        head = vq->packed.next_avail_idx;
1209        avail_used_flags = vq->packed.avail_used_flags;
1210
1211        WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1212
1213        desc = vq->packed.vring.desc;
1214        i = head;
1215        descs_used = total_sg;
1216
1217        if (unlikely(vq->vq.num_free < descs_used)) {
1218                pr_debug("Can't add buf len %i - avail = %i\n",
1219                         descs_used, vq->vq.num_free);
1220                END_USE(vq);
1221                return -ENOSPC;
1222        }
1223
1224        id = vq->free_head;
1225        BUG_ON(id == vq->packed.vring.num);
1226
1227        curr = id;
1228        c = 0;
1229        for (n = 0; n < out_sgs + in_sgs; n++) {
1230                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1231                        dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1232                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1233                        if (vring_mapping_error(vq, addr))
1234                                goto unmap_release;
1235
1236                        flags = cpu_to_le16(vq->packed.avail_used_flags |
1237                                    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1238                                    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1239                        if (i == head)
1240                                head_flags = flags;
1241                        else
1242                                desc[i].flags = flags;
1243
1244                        desc[i].addr = cpu_to_le64(addr);
1245                        desc[i].len = cpu_to_le32(sg->length);
1246                        desc[i].id = cpu_to_le16(id);
1247
1248                        if (unlikely(vq->use_dma_api)) {
1249                                vq->packed.desc_extra[curr].addr = addr;
1250                                vq->packed.desc_extra[curr].len = sg->length;
1251                                vq->packed.desc_extra[curr].flags =
1252                                        le16_to_cpu(flags);
1253                        }
1254                        prev = curr;
1255                        curr = vq->packed.desc_extra[curr].next;
1256
1257                        if ((unlikely(++i >= vq->packed.vring.num))) {
1258                                i = 0;
1259                                vq->packed.avail_used_flags ^=
1260                                        1 << VRING_PACKED_DESC_F_AVAIL |
1261                                        1 << VRING_PACKED_DESC_F_USED;
1262                        }
1263                }
1264        }
1265
1266        if (i < head)
1267                vq->packed.avail_wrap_counter ^= 1;
1268
1269        /* We're using some buffers from the free list. */
1270        vq->vq.num_free -= descs_used;
1271
1272        /* Update free pointer */
1273        vq->packed.next_avail_idx = i;
1274        vq->free_head = curr;
1275
1276        /* Store token. */
1277        vq->packed.desc_state[id].num = descs_used;
1278        vq->packed.desc_state[id].data = data;
1279        vq->packed.desc_state[id].indir_desc = ctx;
1280        vq->packed.desc_state[id].last = prev;
1281
1282        /*
1283         * A driver MUST NOT make the first descriptor in the list
1284         * available before all subsequent descriptors comprising
1285         * the list are made available.
1286         */
1287        virtio_wmb(vq->weak_barriers);
1288        vq->packed.vring.desc[head].flags = head_flags;
1289        vq->num_added += descs_used;
1290
1291        pr_debug("Added buffer head %i to %p\n", head, vq);
1292        END_USE(vq);
1293
1294        return 0;
1295
1296unmap_release:
1297        err_idx = i;
1298        i = head;
1299        curr = vq->free_head;
1300
1301        vq->packed.avail_used_flags = avail_used_flags;
1302
1303        for (n = 0; n < total_sg; n++) {
1304                if (i == err_idx)
1305                        break;
1306                vring_unmap_state_packed(vq,
1307                                         &vq->packed.desc_extra[curr]);
1308                curr = vq->packed.desc_extra[curr].next;
1309                i++;
1310                if (i >= vq->packed.vring.num)
1311                        i = 0;
1312        }
1313
1314        END_USE(vq);
1315        return -EIO;
1316}
1317
1318static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1319{
1320        struct vring_virtqueue *vq = to_vvq(_vq);
1321        u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1322        bool needs_kick;
1323        union {
1324                struct {
1325                        __le16 off_wrap;
1326                        __le16 flags;
1327                };
1328                u32 u32;
1329        } snapshot;
1330
1331        START_USE(vq);
1332
1333        /*
1334         * We need to expose the new flags value before checking notification
1335         * suppressions.
1336         */
1337        virtio_mb(vq->weak_barriers);
1338
1339        old = vq->packed.next_avail_idx - vq->num_added;
1340        new = vq->packed.next_avail_idx;
1341        vq->num_added = 0;
1342
1343        snapshot.u32 = *(u32 *)vq->packed.vring.device;
1344        flags = le16_to_cpu(snapshot.flags);
1345
1346        LAST_ADD_TIME_CHECK(vq);
1347        LAST_ADD_TIME_INVALID(vq);
1348
1349        if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1350                needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1351                goto out;
1352        }
1353
1354        off_wrap = le16_to_cpu(snapshot.off_wrap);
1355
1356        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1357        event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1358        if (wrap_counter != vq->packed.avail_wrap_counter)
1359                event_idx -= vq->packed.vring.num;
1360
1361        needs_kick = vring_need_event(event_idx, new, old);
1362out:
1363        END_USE(vq);
1364        return needs_kick;
1365}
1366
1367static void detach_buf_packed(struct vring_virtqueue *vq,
1368                              unsigned int id, void **ctx)
1369{
1370        struct vring_desc_state_packed *state = NULL;
1371        struct vring_packed_desc *desc;
1372        unsigned int i, curr;
1373
1374        state = &vq->packed.desc_state[id];
1375
1376        /* Clear data ptr. */
1377        state->data = NULL;
1378
1379        vq->packed.desc_extra[state->last].next = vq->free_head;
1380        vq->free_head = id;
1381        vq->vq.num_free += state->num;
1382
1383        if (unlikely(vq->use_dma_api)) {
1384                curr = id;
1385                for (i = 0; i < state->num; i++) {
1386                        vring_unmap_state_packed(vq,
1387                                &vq->packed.desc_extra[curr]);
1388                        curr = vq->packed.desc_extra[curr].next;
1389                }
1390        }
1391
1392        if (vq->indirect) {
1393                u32 len;
1394
1395                /* Free the indirect table, if any, now that it's unmapped. */
1396                desc = state->indir_desc;
1397                if (!desc)
1398                        return;
1399
1400                if (vq->use_dma_api) {
1401                        len = vq->packed.desc_extra[id].len;
1402                        for (i = 0; i < len / sizeof(struct vring_packed_desc);
1403                                        i++)
1404                                vring_unmap_desc_packed(vq, &desc[i]);
1405                }
1406                kfree(desc);
1407                state->indir_desc = NULL;
1408        } else if (ctx) {
1409                *ctx = state->indir_desc;
1410        }
1411}
1412
1413static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1414                                       u16 idx, bool used_wrap_counter)
1415{
1416        bool avail, used;
1417        u16 flags;
1418
1419        flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1420        avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1421        used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1422
1423        return avail == used && used == used_wrap_counter;
1424}
1425
1426static inline bool more_used_packed(const struct vring_virtqueue *vq)
1427{
1428        return is_used_desc_packed(vq, vq->last_used_idx,
1429                        vq->packed.used_wrap_counter);
1430}
1431
1432static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1433                                          unsigned int *len,
1434                                          void **ctx)
1435{
1436        struct vring_virtqueue *vq = to_vvq(_vq);
1437        u16 last_used, id;
1438        void *ret;
1439
1440        START_USE(vq);
1441
1442        if (unlikely(vq->broken)) {
1443                END_USE(vq);
1444                return NULL;
1445        }
1446
1447        if (!more_used_packed(vq)) {
1448                pr_debug("No more buffers in queue\n");
1449                END_USE(vq);
1450                return NULL;
1451        }
1452
1453        /* Only get used elements after they have been exposed by host. */
1454        virtio_rmb(vq->weak_barriers);
1455
1456        last_used = vq->last_used_idx;
1457        id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1458        *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1459
1460        if (unlikely(id >= vq->packed.vring.num)) {
1461                BAD_RING(vq, "id %u out of range\n", id);
1462                return NULL;
1463        }
1464        if (unlikely(!vq->packed.desc_state[id].data)) {
1465                BAD_RING(vq, "id %u is not a head!\n", id);
1466                return NULL;
1467        }
1468
1469        /* detach_buf_packed clears data, so grab it now. */
1470        ret = vq->packed.desc_state[id].data;
1471        detach_buf_packed(vq, id, ctx);
1472
1473        vq->last_used_idx += vq->packed.desc_state[id].num;
1474        if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1475                vq->last_used_idx -= vq->packed.vring.num;
1476                vq->packed.used_wrap_counter ^= 1;
1477        }
1478
1479        /*
1480         * If we expect an interrupt for the next entry, tell host
1481         * by writing event index and flush out the write before
1482         * the read in the next get_buf call.
1483         */
1484        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1485                virtio_store_mb(vq->weak_barriers,
1486                                &vq->packed.vring.driver->off_wrap,
1487                                cpu_to_le16(vq->last_used_idx |
1488                                        (vq->packed.used_wrap_counter <<
1489                                         VRING_PACKED_EVENT_F_WRAP_CTR)));
1490
1491        LAST_ADD_TIME_INVALID(vq);
1492
1493        END_USE(vq);
1494        return ret;
1495}
1496
1497static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1498{
1499        struct vring_virtqueue *vq = to_vvq(_vq);
1500
1501        if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1502                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1503                vq->packed.vring.driver->flags =
1504                        cpu_to_le16(vq->packed.event_flags_shadow);
1505        }
1506}
1507
1508static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1509{
1510        struct vring_virtqueue *vq = to_vvq(_vq);
1511
1512        START_USE(vq);
1513
1514        /*
1515         * We optimistically turn back on interrupts, then check if there was
1516         * more to do.
1517         */
1518
1519        if (vq->event) {
1520                vq->packed.vring.driver->off_wrap =
1521                        cpu_to_le16(vq->last_used_idx |
1522                                (vq->packed.used_wrap_counter <<
1523                                 VRING_PACKED_EVENT_F_WRAP_CTR));
1524                /*
1525                 * We need to update event offset and event wrap
1526                 * counter first before updating event flags.
1527                 */
1528                virtio_wmb(vq->weak_barriers);
1529        }
1530
1531        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1532                vq->packed.event_flags_shadow = vq->event ?
1533                                VRING_PACKED_EVENT_FLAG_DESC :
1534                                VRING_PACKED_EVENT_FLAG_ENABLE;
1535                vq->packed.vring.driver->flags =
1536                                cpu_to_le16(vq->packed.event_flags_shadow);
1537        }
1538
1539        END_USE(vq);
1540        return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1541                        VRING_PACKED_EVENT_F_WRAP_CTR);
1542}
1543
1544static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1545{
1546        struct vring_virtqueue *vq = to_vvq(_vq);
1547        bool wrap_counter;
1548        u16 used_idx;
1549
1550        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1551        used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1552
1553        return is_used_desc_packed(vq, used_idx, wrap_counter);
1554}
1555
1556static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1557{
1558        struct vring_virtqueue *vq = to_vvq(_vq);
1559        u16 used_idx, wrap_counter;
1560        u16 bufs;
1561
1562        START_USE(vq);
1563
1564        /*
1565         * We optimistically turn back on interrupts, then check if there was
1566         * more to do.
1567         */
1568
1569        if (vq->event) {
1570                /* TODO: tune this threshold */
1571                bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1572                wrap_counter = vq->packed.used_wrap_counter;
1573
1574                used_idx = vq->last_used_idx + bufs;
1575                if (used_idx >= vq->packed.vring.num) {
1576                        used_idx -= vq->packed.vring.num;
1577                        wrap_counter ^= 1;
1578                }
1579
1580                vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1581                        (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1582
1583                /*
1584                 * We need to update event offset and event wrap
1585                 * counter first before updating event flags.
1586                 */
1587                virtio_wmb(vq->weak_barriers);
1588        }
1589
1590        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1591                vq->packed.event_flags_shadow = vq->event ?
1592                                VRING_PACKED_EVENT_FLAG_DESC :
1593                                VRING_PACKED_EVENT_FLAG_ENABLE;
1594                vq->packed.vring.driver->flags =
1595                                cpu_to_le16(vq->packed.event_flags_shadow);
1596        }
1597
1598        /*
1599         * We need to update event suppression structure first
1600         * before re-checking for more used buffers.
1601         */
1602        virtio_mb(vq->weak_barriers);
1603
1604        if (is_used_desc_packed(vq,
1605                                vq->last_used_idx,
1606                                vq->packed.used_wrap_counter)) {
1607                END_USE(vq);
1608                return false;
1609        }
1610
1611        END_USE(vq);
1612        return true;
1613}
1614
1615static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1616{
1617        struct vring_virtqueue *vq = to_vvq(_vq);
1618        unsigned int i;
1619        void *buf;
1620
1621        START_USE(vq);
1622
1623        for (i = 0; i < vq->packed.vring.num; i++) {
1624                if (!vq->packed.desc_state[i].data)
1625                        continue;
1626                /* detach_buf clears data, so grab it now. */
1627                buf = vq->packed.desc_state[i].data;
1628                detach_buf_packed(vq, i, NULL);
1629                END_USE(vq);
1630                return buf;
1631        }
1632        /* That should have freed everything. */
1633        BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1634
1635        END_USE(vq);
1636        return NULL;
1637}
1638
1639static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
1640                                                       unsigned int num)
1641{
1642        struct vring_desc_extra *desc_extra;
1643        unsigned int i;
1644
1645        desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1646                                   GFP_KERNEL);
1647        if (!desc_extra)
1648                return NULL;
1649
1650        memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1651
1652        for (i = 0; i < num - 1; i++)
1653                desc_extra[i].next = i + 1;
1654
1655        return desc_extra;
1656}
1657
1658static struct virtqueue *vring_create_virtqueue_packed(
1659        unsigned int index,
1660        unsigned int num,
1661        unsigned int vring_align,
1662        struct virtio_device *vdev,
1663        bool weak_barriers,
1664        bool may_reduce_num,
1665        bool context,
1666        bool (*notify)(struct virtqueue *),
1667        void (*callback)(struct virtqueue *),
1668        const char *name)
1669{
1670        struct vring_virtqueue *vq;
1671        struct vring_packed_desc *ring;
1672        struct vring_packed_desc_event *driver, *device;
1673        dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1674        size_t ring_size_in_bytes, event_size_in_bytes;
1675
1676        ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1677
1678        ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1679                                 &ring_dma_addr,
1680                                 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1681        if (!ring)
1682                goto err_ring;
1683
1684        event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1685
1686        driver = vring_alloc_queue(vdev, event_size_in_bytes,
1687                                   &driver_event_dma_addr,
1688                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1689        if (!driver)
1690                goto err_driver;
1691
1692        device = vring_alloc_queue(vdev, event_size_in_bytes,
1693                                   &device_event_dma_addr,
1694                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1695        if (!device)
1696                goto err_device;
1697
1698        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1699        if (!vq)
1700                goto err_vq;
1701
1702        vq->vq.callback = callback;
1703        vq->vq.vdev = vdev;
1704        vq->vq.name = name;
1705        vq->vq.num_free = num;
1706        vq->vq.index = index;
1707        vq->we_own_ring = true;
1708        vq->notify = notify;
1709        vq->weak_barriers = weak_barriers;
1710        vq->broken = false;
1711        vq->last_used_idx = 0;
1712        vq->event_triggered = false;
1713        vq->num_added = 0;
1714        vq->packed_ring = true;
1715        vq->use_dma_api = vring_use_dma_api(vdev);
1716#ifdef DEBUG
1717        vq->in_use = false;
1718        vq->last_add_time_valid = false;
1719#endif
1720
1721        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1722                !context;
1723        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1724
1725        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1726                vq->weak_barriers = false;
1727
1728        vq->packed.ring_dma_addr = ring_dma_addr;
1729        vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1730        vq->packed.device_event_dma_addr = device_event_dma_addr;
1731
1732        vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1733        vq->packed.event_size_in_bytes = event_size_in_bytes;
1734
1735        vq->packed.vring.num = num;
1736        vq->packed.vring.desc = ring;
1737        vq->packed.vring.driver = driver;
1738        vq->packed.vring.device = device;
1739
1740        vq->packed.next_avail_idx = 0;
1741        vq->packed.avail_wrap_counter = 1;
1742        vq->packed.used_wrap_counter = 1;
1743        vq->packed.event_flags_shadow = 0;
1744        vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1745
1746        vq->packed.desc_state = kmalloc_array(num,
1747                        sizeof(struct vring_desc_state_packed),
1748                        GFP_KERNEL);
1749        if (!vq->packed.desc_state)
1750                goto err_desc_state;
1751
1752        memset(vq->packed.desc_state, 0,
1753                num * sizeof(struct vring_desc_state_packed));
1754
1755        /* Put everything in free lists. */
1756        vq->free_head = 0;
1757
1758        vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
1759        if (!vq->packed.desc_extra)
1760                goto err_desc_extra;
1761
1762        /* No callback?  Tell other side not to bother us. */
1763        if (!callback) {
1764                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1765                vq->packed.vring.driver->flags =
1766                        cpu_to_le16(vq->packed.event_flags_shadow);
1767        }
1768
1769        spin_lock(&vdev->vqs_list_lock);
1770        list_add_tail(&vq->vq.list, &vdev->vqs);
1771        spin_unlock(&vdev->vqs_list_lock);
1772        return &vq->vq;
1773
1774err_desc_extra:
1775        kfree(vq->packed.desc_state);
1776err_desc_state:
1777        kfree(vq);
1778err_vq:
1779        vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1780err_device:
1781        vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1782err_driver:
1783        vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1784err_ring:
1785        return NULL;
1786}
1787
1788
1789/*
1790 * Generic functions and exported symbols.
1791 */
1792
1793static inline int virtqueue_add(struct virtqueue *_vq,
1794                                struct scatterlist *sgs[],
1795                                unsigned int total_sg,
1796                                unsigned int out_sgs,
1797                                unsigned int in_sgs,
1798                                void *data,
1799                                void *ctx,
1800                                gfp_t gfp)
1801{
1802        struct vring_virtqueue *vq = to_vvq(_vq);
1803
1804        return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1805                                        out_sgs, in_sgs, data, ctx, gfp) :
1806                                 virtqueue_add_split(_vq, sgs, total_sg,
1807                                        out_sgs, in_sgs, data, ctx, gfp);
1808}
1809
1810/**
1811 * virtqueue_add_sgs - expose buffers to other end
1812 * @_vq: the struct virtqueue we're talking about.
1813 * @sgs: array of terminated scatterlists.
1814 * @out_sgs: the number of scatterlists readable by other side
1815 * @in_sgs: the number of scatterlists which are writable (after readable ones)
1816 * @data: the token identifying the buffer.
1817 * @gfp: how to do memory allocations (if necessary).
1818 *
1819 * Caller must ensure we don't call this with other virtqueue operations
1820 * at the same time (except where noted).
1821 *
1822 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1823 */
1824int virtqueue_add_sgs(struct virtqueue *_vq,
1825                      struct scatterlist *sgs[],
1826                      unsigned int out_sgs,
1827                      unsigned int in_sgs,
1828                      void *data,
1829                      gfp_t gfp)
1830{
1831        unsigned int i, total_sg = 0;
1832
1833        /* Count them first. */
1834        for (i = 0; i < out_sgs + in_sgs; i++) {
1835                struct scatterlist *sg;
1836
1837                for (sg = sgs[i]; sg; sg = sg_next(sg))
1838                        total_sg++;
1839        }
1840        return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1841                             data, NULL, gfp);
1842}
1843EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1844
1845/**
1846 * virtqueue_add_outbuf - expose output buffers to other end
1847 * @vq: the struct virtqueue we're talking about.
1848 * @sg: scatterlist (must be well-formed and terminated!)
1849 * @num: the number of entries in @sg readable by other side
1850 * @data: the token identifying the buffer.
1851 * @gfp: how to do memory allocations (if necessary).
1852 *
1853 * Caller must ensure we don't call this with other virtqueue operations
1854 * at the same time (except where noted).
1855 *
1856 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1857 */
1858int virtqueue_add_outbuf(struct virtqueue *vq,
1859                         struct scatterlist *sg, unsigned int num,
1860                         void *data,
1861                         gfp_t gfp)
1862{
1863        return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1864}
1865EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1866
1867/**
1868 * virtqueue_add_inbuf - expose input buffers to other end
1869 * @vq: the struct virtqueue we're talking about.
1870 * @sg: scatterlist (must be well-formed and terminated!)
1871 * @num: the number of entries in @sg writable by other side
1872 * @data: the token identifying the buffer.
1873 * @gfp: how to do memory allocations (if necessary).
1874 *
1875 * Caller must ensure we don't call this with other virtqueue operations
1876 * at the same time (except where noted).
1877 *
1878 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1879 */
1880int virtqueue_add_inbuf(struct virtqueue *vq,
1881                        struct scatterlist *sg, unsigned int num,
1882                        void *data,
1883                        gfp_t gfp)
1884{
1885        return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1886}
1887EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1888
1889/**
1890 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1891 * @vq: the struct virtqueue we're talking about.
1892 * @sg: scatterlist (must be well-formed and terminated!)
1893 * @num: the number of entries in @sg writable by other side
1894 * @data: the token identifying the buffer.
1895 * @ctx: extra context for the token
1896 * @gfp: how to do memory allocations (if necessary).
1897 *
1898 * Caller must ensure we don't call this with other virtqueue operations
1899 * at the same time (except where noted).
1900 *
1901 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1902 */
1903int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1904                        struct scatterlist *sg, unsigned int num,
1905                        void *data,
1906                        void *ctx,
1907                        gfp_t gfp)
1908{
1909        return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1910}
1911EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1912
1913/**
1914 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
1915 * @_vq: the struct virtqueue
1916 *
1917 * Instead of virtqueue_kick(), you can do:
1918 *      if (virtqueue_kick_prepare(vq))
1919 *              virtqueue_notify(vq);
1920 *
1921 * This is sometimes useful because the virtqueue_kick_prepare() needs
1922 * to be serialized, but the actual virtqueue_notify() call does not.
1923 */
1924bool virtqueue_kick_prepare(struct virtqueue *_vq)
1925{
1926        struct vring_virtqueue *vq = to_vvq(_vq);
1927
1928        return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1929                                 virtqueue_kick_prepare_split(_vq);
1930}
1931EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1932
1933/**
1934 * virtqueue_notify - second half of split virtqueue_kick call.
1935 * @_vq: the struct virtqueue
1936 *
1937 * This does not need to be serialized.
1938 *
1939 * Returns false if host notify failed or queue is broken, otherwise true.
1940 */
1941bool virtqueue_notify(struct virtqueue *_vq)
1942{
1943        struct vring_virtqueue *vq = to_vvq(_vq);
1944
1945        if (unlikely(vq->broken))
1946                return false;
1947
1948        /* Prod other side to tell it about changes. */
1949        if (!vq->notify(_vq)) {
1950                vq->broken = true;
1951                return false;
1952        }
1953        return true;
1954}
1955EXPORT_SYMBOL_GPL(virtqueue_notify);
1956
1957/**
1958 * virtqueue_kick - update after add_buf
1959 * @vq: the struct virtqueue
1960 *
1961 * After one or more virtqueue_add_* calls, invoke this to kick
1962 * the other side.
1963 *
1964 * Caller must ensure we don't call this with other virtqueue
1965 * operations at the same time (except where noted).
1966 *
1967 * Returns false if kick failed, otherwise true.
1968 */
1969bool virtqueue_kick(struct virtqueue *vq)
1970{
1971        if (virtqueue_kick_prepare(vq))
1972                return virtqueue_notify(vq);
1973        return true;
1974}
1975EXPORT_SYMBOL_GPL(virtqueue_kick);
1976
1977/**
1978 * virtqueue_get_buf_ctx - get the next used buffer
1979 * @_vq: the struct virtqueue we're talking about.
1980 * @len: the length written into the buffer
1981 * @ctx: extra context for the token
1982 *
1983 * If the device wrote data into the buffer, @len will be set to the
1984 * amount written.  This means you don't need to clear the buffer
1985 * beforehand to ensure there's no data leakage in the case of short
1986 * writes.
1987 *
1988 * Caller must ensure we don't call this with other virtqueue
1989 * operations at the same time (except where noted).
1990 *
1991 * Returns NULL if there are no used buffers, or the "data" token
1992 * handed to virtqueue_add_*().
1993 */
1994void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1995                            void **ctx)
1996{
1997        struct vring_virtqueue *vq = to_vvq(_vq);
1998
1999        return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2000                                 virtqueue_get_buf_ctx_split(_vq, len, ctx);
2001}
2002EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2003
2004void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2005{
2006        return virtqueue_get_buf_ctx(_vq, len, NULL);
2007}
2008EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2009/**
2010 * virtqueue_disable_cb - disable callbacks
2011 * @_vq: the struct virtqueue we're talking about.
2012 *
2013 * Note that this is not necessarily synchronous, hence unreliable and only
2014 * useful as an optimization.
2015 *
2016 * Unlike other operations, this need not be serialized.
2017 */
2018void virtqueue_disable_cb(struct virtqueue *_vq)
2019{
2020        struct vring_virtqueue *vq = to_vvq(_vq);
2021
2022        /* If device triggered an event already it won't trigger one again:
2023         * no need to disable.
2024         */
2025        if (vq->event_triggered)
2026                return;
2027
2028        if (vq->packed_ring)
2029                virtqueue_disable_cb_packed(_vq);
2030        else
2031                virtqueue_disable_cb_split(_vq);
2032}
2033EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2034
2035/**
2036 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2037 * @_vq: the struct virtqueue we're talking about.
2038 *
2039 * This re-enables callbacks; it returns current queue state
2040 * in an opaque unsigned value. This value should be later tested by
2041 * virtqueue_poll, to detect a possible race between the driver checking for
2042 * more work, and enabling callbacks.
2043 *
2044 * Caller must ensure we don't call this with other virtqueue
2045 * operations at the same time (except where noted).
2046 */
2047unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2048{
2049        struct vring_virtqueue *vq = to_vvq(_vq);
2050
2051        if (vq->event_triggered)
2052                vq->event_triggered = false;
2053
2054        return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2055                                 virtqueue_enable_cb_prepare_split(_vq);
2056}
2057EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2058
2059/**
2060 * virtqueue_poll - query pending used buffers
2061 * @_vq: the struct virtqueue we're talking about.
2062 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2063 *
2064 * Returns "true" if there are pending used buffers in the queue.
2065 *
2066 * This does not need to be serialized.
2067 */
2068bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
2069{
2070        struct vring_virtqueue *vq = to_vvq(_vq);
2071
2072        if (unlikely(vq->broken))
2073                return false;
2074
2075        virtio_mb(vq->weak_barriers);
2076        return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2077                                 virtqueue_poll_split(_vq, last_used_idx);
2078}
2079EXPORT_SYMBOL_GPL(virtqueue_poll);
2080
2081/**
2082 * virtqueue_enable_cb - restart callbacks after disable_cb.
2083 * @_vq: the struct virtqueue we're talking about.
2084 *
2085 * This re-enables callbacks; it returns "false" if there are pending
2086 * buffers in the queue, to detect a possible race between the driver
2087 * checking for more work, and enabling callbacks.
2088 *
2089 * Caller must ensure we don't call this with other virtqueue
2090 * operations at the same time (except where noted).
2091 */
2092bool virtqueue_enable_cb(struct virtqueue *_vq)
2093{
2094        unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
2095
2096        return !virtqueue_poll(_vq, last_used_idx);
2097}
2098EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2099
2100/**
2101 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2102 * @_vq: the struct virtqueue we're talking about.
2103 *
2104 * This re-enables callbacks but hints to the other side to delay
2105 * interrupts until most of the available buffers have been processed;
2106 * it returns "false" if there are many pending buffers in the queue,
2107 * to detect a possible race between the driver checking for more work,
2108 * and enabling callbacks.
2109 *
2110 * Caller must ensure we don't call this with other virtqueue
2111 * operations at the same time (except where noted).
2112 */
2113bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2114{
2115        struct vring_virtqueue *vq = to_vvq(_vq);
2116
2117        if (vq->event_triggered)
2118                vq->event_triggered = false;
2119
2120        return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2121                                 virtqueue_enable_cb_delayed_split(_vq);
2122}
2123EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2124
2125/**
2126 * virtqueue_detach_unused_buf - detach first unused buffer
2127 * @_vq: the struct virtqueue we're talking about.
2128 *
2129 * Returns NULL or the "data" token handed to virtqueue_add_*().
2130 * This is not valid on an active queue; it is useful only for device
2131 * shutdown.
2132 */
2133void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2134{
2135        struct vring_virtqueue *vq = to_vvq(_vq);
2136
2137        return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2138                                 virtqueue_detach_unused_buf_split(_vq);
2139}
2140EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2141
2142static inline bool more_used(const struct vring_virtqueue *vq)
2143{
2144        return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2145}
2146
2147irqreturn_t vring_interrupt(int irq, void *_vq)
2148{
2149        struct vring_virtqueue *vq = to_vvq(_vq);
2150
2151        if (!more_used(vq)) {
2152                pr_debug("virtqueue interrupt with no work for %p\n", vq);
2153                return IRQ_NONE;
2154        }
2155
2156        if (unlikely(vq->broken))
2157                return IRQ_HANDLED;
2158
2159        /* Just a hint for performance: so it's ok that this can be racy! */
2160        if (vq->event)
2161                vq->event_triggered = true;
2162
2163        pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2164        if (vq->vq.callback)
2165                vq->vq.callback(&vq->vq);
2166
2167        return IRQ_HANDLED;
2168}
2169EXPORT_SYMBOL_GPL(vring_interrupt);
2170
2171/* Only available for split ring */
2172struct virtqueue *__vring_new_virtqueue(unsigned int index,
2173                                        struct vring vring,
2174                                        struct virtio_device *vdev,
2175                                        bool weak_barriers,
2176                                        bool context,
2177                                        bool (*notify)(struct virtqueue *),
2178                                        void (*callback)(struct virtqueue *),
2179                                        const char *name)
2180{
2181        struct vring_virtqueue *vq;
2182
2183        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2184                return NULL;
2185
2186        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2187        if (!vq)
2188                return NULL;
2189
2190        vq->packed_ring = false;
2191        vq->vq.callback = callback;
2192        vq->vq.vdev = vdev;
2193        vq->vq.name = name;
2194        vq->vq.num_free = vring.num;
2195        vq->vq.index = index;
2196        vq->we_own_ring = false;
2197        vq->notify = notify;
2198        vq->weak_barriers = weak_barriers;
2199        vq->broken = false;
2200        vq->last_used_idx = 0;
2201        vq->event_triggered = false;
2202        vq->num_added = 0;
2203        vq->use_dma_api = vring_use_dma_api(vdev);
2204#ifdef DEBUG
2205        vq->in_use = false;
2206        vq->last_add_time_valid = false;
2207#endif
2208
2209        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2210                !context;
2211        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2212
2213        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2214                vq->weak_barriers = false;
2215
2216        vq->split.queue_dma_addr = 0;
2217        vq->split.queue_size_in_bytes = 0;
2218
2219        vq->split.vring = vring;
2220        vq->split.avail_flags_shadow = 0;
2221        vq->split.avail_idx_shadow = 0;
2222
2223        /* No callback?  Tell other side not to bother us. */
2224        if (!callback) {
2225                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2226                if (!vq->event)
2227                        vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2228                                        vq->split.avail_flags_shadow);
2229        }
2230
2231        vq->split.desc_state = kmalloc_array(vring.num,
2232                        sizeof(struct vring_desc_state_split), GFP_KERNEL);
2233        if (!vq->split.desc_state)
2234                goto err_state;
2235
2236        vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
2237        if (!vq->split.desc_extra)
2238                goto err_extra;
2239
2240        /* Put everything in free lists. */
2241        vq->free_head = 0;
2242        memset(vq->split.desc_state, 0, vring.num *
2243                        sizeof(struct vring_desc_state_split));
2244
2245        spin_lock(&vdev->vqs_list_lock);
2246        list_add_tail(&vq->vq.list, &vdev->vqs);
2247        spin_unlock(&vdev->vqs_list_lock);
2248        return &vq->vq;
2249
2250err_extra:
2251        kfree(vq->split.desc_state);
2252err_state:
2253        kfree(vq);
2254        return NULL;
2255}
2256EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2257
2258struct virtqueue *vring_create_virtqueue(
2259        unsigned int index,
2260        unsigned int num,
2261        unsigned int vring_align,
2262        struct virtio_device *vdev,
2263        bool weak_barriers,
2264        bool may_reduce_num,
2265        bool context,
2266        bool (*notify)(struct virtqueue *),
2267        void (*callback)(struct virtqueue *),
2268        const char *name)
2269{
2270
2271        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2272                return vring_create_virtqueue_packed(index, num, vring_align,
2273                                vdev, weak_barriers, may_reduce_num,
2274                                context, notify, callback, name);
2275
2276        return vring_create_virtqueue_split(index, num, vring_align,
2277                        vdev, weak_barriers, may_reduce_num,
2278                        context, notify, callback, name);
2279}
2280EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2281
2282/* Only available for split ring */
2283struct virtqueue *vring_new_virtqueue(unsigned int index,
2284                                      unsigned int num,
2285                                      unsigned int vring_align,
2286                                      struct virtio_device *vdev,
2287                                      bool weak_barriers,
2288                                      bool context,
2289                                      void *pages,
2290                                      bool (*notify)(struct virtqueue *vq),
2291                                      void (*callback)(struct virtqueue *vq),
2292                                      const char *name)
2293{
2294        struct vring vring;
2295
2296        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2297                return NULL;
2298
2299        vring_init(&vring, num, pages, vring_align);
2300        return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2301                                     notify, callback, name);
2302}
2303EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2304
2305void vring_del_virtqueue(struct virtqueue *_vq)
2306{
2307        struct vring_virtqueue *vq = to_vvq(_vq);
2308
2309        spin_lock(&vq->vq.vdev->vqs_list_lock);
2310        list_del(&_vq->list);
2311        spin_unlock(&vq->vq.vdev->vqs_list_lock);
2312
2313        if (vq->we_own_ring) {
2314                if (vq->packed_ring) {
2315                        vring_free_queue(vq->vq.vdev,
2316                                         vq->packed.ring_size_in_bytes,
2317                                         vq->packed.vring.desc,
2318                                         vq->packed.ring_dma_addr);
2319
2320                        vring_free_queue(vq->vq.vdev,
2321                                         vq->packed.event_size_in_bytes,
2322                                         vq->packed.vring.driver,
2323                                         vq->packed.driver_event_dma_addr);
2324
2325                        vring_free_queue(vq->vq.vdev,
2326                                         vq->packed.event_size_in_bytes,
2327                                         vq->packed.vring.device,
2328                                         vq->packed.device_event_dma_addr);
2329
2330                        kfree(vq->packed.desc_state);
2331                        kfree(vq->packed.desc_extra);
2332                } else {
2333                        vring_free_queue(vq->vq.vdev,
2334                                         vq->split.queue_size_in_bytes,
2335                                         vq->split.vring.desc,
2336                                         vq->split.queue_dma_addr);
2337                }
2338        }
2339        if (!vq->packed_ring) {
2340                kfree(vq->split.desc_state);
2341                kfree(vq->split.desc_extra);
2342        }
2343        kfree(vq);
2344}
2345EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2346
2347/* Manipulates transport-specific feature bits. */
2348void vring_transport_features(struct virtio_device *vdev)
2349{
2350        unsigned int i;
2351
2352        for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2353                switch (i) {
2354                case VIRTIO_RING_F_INDIRECT_DESC:
2355                        break;
2356                case VIRTIO_RING_F_EVENT_IDX:
2357                        break;
2358                case VIRTIO_F_VERSION_1:
2359                        break;
2360                case VIRTIO_F_ACCESS_PLATFORM:
2361                        break;
2362                case VIRTIO_F_RING_PACKED:
2363                        break;
2364                case VIRTIO_F_ORDER_PLATFORM:
2365                        break;
2366                default:
2367                        /* We don't understand this bit. */
2368                        __virtio_clear_bit(vdev, i);
2369                }
2370        }
2371}
2372EXPORT_SYMBOL_GPL(vring_transport_features);
2373
2374/**
2375 * virtqueue_get_vring_size - return the size of the virtqueue's vring
2376 * @_vq: the struct virtqueue containing the vring of interest.
2377 *
2378 * Returns the size of the vring.  This is mainly used for boasting to
2379 * userspace.  Unlike other operations, this need not be serialized.
2380 */
2381unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2382{
2383
2384        struct vring_virtqueue *vq = to_vvq(_vq);
2385
2386        return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2387}
2388EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2389
2390bool virtqueue_is_broken(struct virtqueue *_vq)
2391{
2392        struct vring_virtqueue *vq = to_vvq(_vq);
2393
2394        return READ_ONCE(vq->broken);
2395}
2396EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2397
2398/*
2399 * This should prevent the device from being used, allowing drivers to
2400 * recover.  You may need to grab appropriate locks to flush.
2401 */
2402void virtio_break_device(struct virtio_device *dev)
2403{
2404        struct virtqueue *_vq;
2405
2406        spin_lock(&dev->vqs_list_lock);
2407        list_for_each_entry(_vq, &dev->vqs, list) {
2408                struct vring_virtqueue *vq = to_vvq(_vq);
2409
2410                /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2411                WRITE_ONCE(vq->broken, true);
2412        }
2413        spin_unlock(&dev->vqs_list_lock);
2414}
2415EXPORT_SYMBOL_GPL(virtio_break_device);
2416
2417dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2418{
2419        struct vring_virtqueue *vq = to_vvq(_vq);
2420
2421        BUG_ON(!vq->we_own_ring);
2422
2423        if (vq->packed_ring)
2424                return vq->packed.ring_dma_addr;
2425
2426        return vq->split.queue_dma_addr;
2427}
2428EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2429
2430dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2431{
2432        struct vring_virtqueue *vq = to_vvq(_vq);
2433
2434        BUG_ON(!vq->we_own_ring);
2435
2436        if (vq->packed_ring)
2437                return vq->packed.driver_event_dma_addr;
2438
2439        return vq->split.queue_dma_addr +
2440                ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2441}
2442EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2443
2444dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2445{
2446        struct vring_virtqueue *vq = to_vvq(_vq);
2447
2448        BUG_ON(!vq->we_own_ring);
2449
2450        if (vq->packed_ring)
2451                return vq->packed.device_event_dma_addr;
2452
2453        return vq->split.queue_dma_addr +
2454                ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2455}
2456EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2457
2458/* Only available for split ring */
2459const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2460{
2461        return &to_vvq(vq)->split.vring;
2462}
2463EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2464
2465MODULE_LICENSE("GPL");
2466