linux/drivers/virtio/virtio_ring.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* Virtio ring implementation.
   3 *
   4 *  Copyright 2007 Rusty Russell IBM Corporation
   5 */
   6#include <linux/virtio.h>
   7#include <linux/virtio_ring.h>
   8#include <linux/virtio_config.h>
   9#include <linux/device.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/hrtimer.h>
  13#include <linux/dma-mapping.h>
  14#include <linux/spinlock.h>
  15#include <xen/xen.h>
  16
  17#ifdef DEBUG
  18/* For development, we want to crash whenever the ring is screwed. */
  19#define BAD_RING(_vq, fmt, args...)                             \
  20        do {                                                    \
  21                dev_err(&(_vq)->vq.vdev->dev,                   \
  22                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  23                BUG();                                          \
  24        } while (0)
  25/* Caller is supposed to guarantee no reentry. */
  26#define START_USE(_vq)                                          \
  27        do {                                                    \
  28                if ((_vq)->in_use)                              \
  29                        panic("%s:in_use = %i\n",               \
  30                              (_vq)->vq.name, (_vq)->in_use);   \
  31                (_vq)->in_use = __LINE__;                       \
  32        } while (0)
  33#define END_USE(_vq) \
  34        do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
  35#define LAST_ADD_TIME_UPDATE(_vq)                               \
  36        do {                                                    \
  37                ktime_t now = ktime_get();                      \
  38                                                                \
  39                /* No kick or get, with .1 second between?  Warn. */ \
  40                if ((_vq)->last_add_time_valid)                 \
  41                        WARN_ON(ktime_to_ms(ktime_sub(now,      \
  42                                (_vq)->last_add_time)) > 100);  \
  43                (_vq)->last_add_time = now;                     \
  44                (_vq)->last_add_time_valid = true;              \
  45        } while (0)
  46#define LAST_ADD_TIME_CHECK(_vq)                                \
  47        do {                                                    \
  48                if ((_vq)->last_add_time_valid) {               \
  49                        WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
  50                                      (_vq)->last_add_time)) > 100); \
  51                }                                               \
  52        } while (0)
  53#define LAST_ADD_TIME_INVALID(_vq)                              \
  54        ((_vq)->last_add_time_valid = false)
  55#else
  56#define BAD_RING(_vq, fmt, args...)                             \
  57        do {                                                    \
  58                dev_err(&_vq->vq.vdev->dev,                     \
  59                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  60                (_vq)->broken = true;                           \
  61        } while (0)
  62#define START_USE(vq)
  63#define END_USE(vq)
  64#define LAST_ADD_TIME_UPDATE(vq)
  65#define LAST_ADD_TIME_CHECK(vq)
  66#define LAST_ADD_TIME_INVALID(vq)
  67#endif
  68
  69struct vring_desc_state_split {
  70        void *data;                     /* Data for callback. */
  71        struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
  72};
  73
  74struct vring_desc_state_packed {
  75        void *data;                     /* Data for callback. */
  76        struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
  77        u16 num;                        /* Descriptor list length. */
  78        u16 last;                       /* The last desc state in a list. */
  79};
  80
  81struct vring_desc_extra {
  82        dma_addr_t addr;                /* Descriptor DMA addr. */
  83        u32 len;                        /* Descriptor length. */
  84        u16 flags;                      /* Descriptor flags. */
  85        u16 next;                       /* The next desc state in a list. */
  86};
  87
  88struct vring_virtqueue {
  89        struct virtqueue vq;
  90
  91        /* Is this a packed ring? */
  92        bool packed_ring;
  93
  94        /* Is DMA API used? */
  95        bool use_dma_api;
  96
  97        /* Can we use weak barriers? */
  98        bool weak_barriers;
  99
 100        /* Other side has made a mess, don't try any more. */
 101        bool broken;
 102
 103        /* Host supports indirect buffers */
 104        bool indirect;
 105
 106        /* Host publishes avail event idx */
 107        bool event;
 108
 109        /* Head of free buffer list. */
 110        unsigned int free_head;
 111        /* Number we've added since last sync. */
 112        unsigned int num_added;
 113
 114        /* Last used index we've seen. */
 115        u16 last_used_idx;
 116
 117        /* Hint for event idx: already triggered no need to disable. */
 118        bool event_triggered;
 119
 120        union {
 121                /* Available for split ring */
 122                struct {
 123                        /* Actual memory layout for this queue. */
 124                        struct vring vring;
 125
 126                        /* Last written value to avail->flags */
 127                        u16 avail_flags_shadow;
 128
 129                        /*
 130                         * Last written value to avail->idx in
 131                         * guest byte order.
 132                         */
 133                        u16 avail_idx_shadow;
 134
 135                        /* Per-descriptor state. */
 136                        struct vring_desc_state_split *desc_state;
 137                        struct vring_desc_extra *desc_extra;
 138
 139                        /* DMA address and size information */
 140                        dma_addr_t queue_dma_addr;
 141                        size_t queue_size_in_bytes;
 142                } split;
 143
 144                /* Available for packed ring */
 145                struct {
 146                        /* Actual memory layout for this queue. */
 147                        struct {
 148                                unsigned int num;
 149                                struct vring_packed_desc *desc;
 150                                struct vring_packed_desc_event *driver;
 151                                struct vring_packed_desc_event *device;
 152                        } vring;
 153
 154                        /* Driver ring wrap counter. */
 155                        bool avail_wrap_counter;
 156
 157                        /* Device ring wrap counter. */
 158                        bool used_wrap_counter;
 159
 160                        /* Avail used flags. */
 161                        u16 avail_used_flags;
 162
 163                        /* Index of the next avail descriptor. */
 164                        u16 next_avail_idx;
 165
 166                        /*
 167                         * Last written value to driver->flags in
 168                         * guest byte order.
 169                         */
 170                        u16 event_flags_shadow;
 171
 172                        /* Per-descriptor state. */
 173                        struct vring_desc_state_packed *desc_state;
 174                        struct vring_desc_extra *desc_extra;
 175
 176                        /* DMA address and size information */
 177                        dma_addr_t ring_dma_addr;
 178                        dma_addr_t driver_event_dma_addr;
 179                        dma_addr_t device_event_dma_addr;
 180                        size_t ring_size_in_bytes;
 181                        size_t event_size_in_bytes;
 182                } packed;
 183        };
 184
 185        /* How to notify other side. FIXME: commonalize hcalls! */
 186        bool (*notify)(struct virtqueue *vq);
 187
 188        /* DMA, allocation, and size information */
 189        bool we_own_ring;
 190
 191#ifdef DEBUG
 192        /* They're supposed to lock for us. */
 193        unsigned int in_use;
 194
 195        /* Figure out if their kicks are too delayed. */
 196        bool last_add_time_valid;
 197        ktime_t last_add_time;
 198#endif
 199};
 200
 201
 202/*
 203 * Helpers.
 204 */
 205
 206#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 207
 208static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
 209                                          unsigned int total_sg)
 210{
 211        struct vring_virtqueue *vq = to_vvq(_vq);
 212
 213        /*
 214         * If the host supports indirect descriptor tables, and we have multiple
 215         * buffers, then go indirect. FIXME: tune this threshold
 216         */
 217        return (vq->indirect && total_sg > 1 && vq->vq.num_free);
 218}
 219
 220/*
 221 * Modern virtio devices have feature bits to specify whether they need a
 222 * quirk and bypass the IOMMU. If not there, just use the DMA API.
 223 *
 224 * If there, the interaction between virtio and DMA API is messy.
 225 *
 226 * On most systems with virtio, physical addresses match bus addresses,
 227 * and it doesn't particularly matter whether we use the DMA API.
 228 *
 229 * On some systems, including Xen and any system with a physical device
 230 * that speaks virtio behind a physical IOMMU, we must use the DMA API
 231 * for virtio DMA to work at all.
 232 *
 233 * On other systems, including SPARC and PPC64, virtio-pci devices are
 234 * enumerated as though they are behind an IOMMU, but the virtio host
 235 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
 236 * there or somehow map everything as the identity.
 237 *
 238 * For the time being, we preserve historic behavior and bypass the DMA
 239 * API.
 240 *
 241 * TODO: install a per-device DMA ops structure that does the right thing
 242 * taking into account all the above quirks, and use the DMA API
 243 * unconditionally on data path.
 244 */
 245
 246static bool vring_use_dma_api(struct virtio_device *vdev)
 247{
 248        if (!virtio_has_dma_quirk(vdev))
 249                return true;
 250
 251        /* Otherwise, we are left to guess. */
 252        /*
 253         * In theory, it's possible to have a buggy QEMU-supposed
 254         * emulated Q35 IOMMU and Xen enabled at the same time.  On
 255         * such a configuration, virtio has never worked and will
 256         * not work without an even larger kludge.  Instead, enable
 257         * the DMA API if we're a Xen guest, which at least allows
 258         * all of the sensible Xen configurations to work correctly.
 259         */
 260        if (xen_domain())
 261                return true;
 262
 263        return false;
 264}
 265
 266size_t virtio_max_dma_size(struct virtio_device *vdev)
 267{
 268        size_t max_segment_size = SIZE_MAX;
 269
 270        if (vring_use_dma_api(vdev))
 271                max_segment_size = dma_max_mapping_size(vdev->dev.parent);
 272
 273        return max_segment_size;
 274}
 275EXPORT_SYMBOL_GPL(virtio_max_dma_size);
 276
 277static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
 278                              dma_addr_t *dma_handle, gfp_t flag)
 279{
 280        if (vring_use_dma_api(vdev)) {
 281                return dma_alloc_coherent(vdev->dev.parent, size,
 282                                          dma_handle, flag);
 283        } else {
 284                void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
 285
 286                if (queue) {
 287                        phys_addr_t phys_addr = virt_to_phys(queue);
 288                        *dma_handle = (dma_addr_t)phys_addr;
 289
 290                        /*
 291                         * Sanity check: make sure we dind't truncate
 292                         * the address.  The only arches I can find that
 293                         * have 64-bit phys_addr_t but 32-bit dma_addr_t
 294                         * are certain non-highmem MIPS and x86
 295                         * configurations, but these configurations
 296                         * should never allocate physical pages above 32
 297                         * bits, so this is fine.  Just in case, throw a
 298                         * warning and abort if we end up with an
 299                         * unrepresentable address.
 300                         */
 301                        if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
 302                                free_pages_exact(queue, PAGE_ALIGN(size));
 303                                return NULL;
 304                        }
 305                }
 306                return queue;
 307        }
 308}
 309
 310static void vring_free_queue(struct virtio_device *vdev, size_t size,
 311                             void *queue, dma_addr_t dma_handle)
 312{
 313        if (vring_use_dma_api(vdev))
 314                dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
 315        else
 316                free_pages_exact(queue, PAGE_ALIGN(size));
 317}
 318
 319/*
 320 * The DMA ops on various arches are rather gnarly right now, and
 321 * making all of the arch DMA ops work on the vring device itself
 322 * is a mess.  For now, we use the parent device for DMA ops.
 323 */
 324static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 325{
 326        return vq->vq.vdev->dev.parent;
 327}
 328
 329/* Map one sg entry. */
 330static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
 331                                   struct scatterlist *sg,
 332                                   enum dma_data_direction direction)
 333{
 334        if (!vq->use_dma_api)
 335                return (dma_addr_t)sg_phys(sg);
 336
 337        /*
 338         * We can't use dma_map_sg, because we don't use scatterlists in
 339         * the way it expects (we don't guarantee that the scatterlist
 340         * will exist for the lifetime of the mapping).
 341         */
 342        return dma_map_page(vring_dma_dev(vq),
 343                            sg_page(sg), sg->offset, sg->length,
 344                            direction);
 345}
 346
 347static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
 348                                   void *cpu_addr, size_t size,
 349                                   enum dma_data_direction direction)
 350{
 351        if (!vq->use_dma_api)
 352                return (dma_addr_t)virt_to_phys(cpu_addr);
 353
 354        return dma_map_single(vring_dma_dev(vq),
 355                              cpu_addr, size, direction);
 356}
 357
 358static int vring_mapping_error(const struct vring_virtqueue *vq,
 359                               dma_addr_t addr)
 360{
 361        if (!vq->use_dma_api)
 362                return 0;
 363
 364        return dma_mapping_error(vring_dma_dev(vq), addr);
 365}
 366
 367
 368/*
 369 * Split ring specific functions - *_split().
 370 */
 371
 372static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
 373                                           struct vring_desc *desc)
 374{
 375        u16 flags;
 376
 377        if (!vq->use_dma_api)
 378                return;
 379
 380        flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
 381
 382        dma_unmap_page(vring_dma_dev(vq),
 383                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
 384                       virtio32_to_cpu(vq->vq.vdev, desc->len),
 385                       (flags & VRING_DESC_F_WRITE) ?
 386                       DMA_FROM_DEVICE : DMA_TO_DEVICE);
 387}
 388
 389static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
 390                                          unsigned int i)
 391{
 392        struct vring_desc_extra *extra = vq->split.desc_extra;
 393        u16 flags;
 394
 395        if (!vq->use_dma_api)
 396                goto out;
 397
 398        flags = extra[i].flags;
 399
 400        if (flags & VRING_DESC_F_INDIRECT) {
 401                dma_unmap_single(vring_dma_dev(vq),
 402                                 extra[i].addr,
 403                                 extra[i].len,
 404                                 (flags & VRING_DESC_F_WRITE) ?
 405                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 406        } else {
 407                dma_unmap_page(vring_dma_dev(vq),
 408                               extra[i].addr,
 409                               extra[i].len,
 410                               (flags & VRING_DESC_F_WRITE) ?
 411                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 412        }
 413
 414out:
 415        return extra[i].next;
 416}
 417
 418static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
 419                                               unsigned int total_sg,
 420                                               gfp_t gfp)
 421{
 422        struct vring_desc *desc;
 423        unsigned int i;
 424
 425        /*
 426         * We require lowmem mappings for the descriptors because
 427         * otherwise virt_to_phys will give us bogus addresses in the
 428         * virtqueue.
 429         */
 430        gfp &= ~__GFP_HIGHMEM;
 431
 432        desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
 433        if (!desc)
 434                return NULL;
 435
 436        for (i = 0; i < total_sg; i++)
 437                desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
 438        return desc;
 439}
 440
 441static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
 442                                                    struct vring_desc *desc,
 443                                                    unsigned int i,
 444                                                    dma_addr_t addr,
 445                                                    unsigned int len,
 446                                                    u16 flags,
 447                                                    bool indirect)
 448{
 449        struct vring_virtqueue *vring = to_vvq(vq);
 450        struct vring_desc_extra *extra = vring->split.desc_extra;
 451        u16 next;
 452
 453        desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
 454        desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
 455        desc[i].len = cpu_to_virtio32(vq->vdev, len);
 456
 457        if (!indirect) {
 458                next = extra[i].next;
 459                desc[i].next = cpu_to_virtio16(vq->vdev, next);
 460
 461                extra[i].addr = addr;
 462                extra[i].len = len;
 463                extra[i].flags = flags;
 464        } else
 465                next = virtio16_to_cpu(vq->vdev, desc[i].next);
 466
 467        return next;
 468}
 469
 470static inline int virtqueue_add_split(struct virtqueue *_vq,
 471                                      struct scatterlist *sgs[],
 472                                      unsigned int total_sg,
 473                                      unsigned int out_sgs,
 474                                      unsigned int in_sgs,
 475                                      void *data,
 476                                      void *ctx,
 477                                      gfp_t gfp)
 478{
 479        struct vring_virtqueue *vq = to_vvq(_vq);
 480        struct scatterlist *sg;
 481        struct vring_desc *desc;
 482        unsigned int i, n, avail, descs_used, prev, err_idx;
 483        int head;
 484        bool indirect;
 485
 486        START_USE(vq);
 487
 488        BUG_ON(data == NULL);
 489        BUG_ON(ctx && vq->indirect);
 490
 491        if (unlikely(vq->broken)) {
 492                END_USE(vq);
 493                return -EIO;
 494        }
 495
 496        LAST_ADD_TIME_UPDATE(vq);
 497
 498        BUG_ON(total_sg == 0);
 499
 500        head = vq->free_head;
 501
 502        if (virtqueue_use_indirect(_vq, total_sg))
 503                desc = alloc_indirect_split(_vq, total_sg, gfp);
 504        else {
 505                desc = NULL;
 506                WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
 507        }
 508
 509        if (desc) {
 510                /* Use a single buffer which doesn't continue */
 511                indirect = true;
 512                /* Set up rest to use this indirect table. */
 513                i = 0;
 514                descs_used = 1;
 515        } else {
 516                indirect = false;
 517                desc = vq->split.vring.desc;
 518                i = head;
 519                descs_used = total_sg;
 520        }
 521
 522        if (vq->vq.num_free < descs_used) {
 523                pr_debug("Can't add buf len %i - avail = %i\n",
 524                         descs_used, vq->vq.num_free);
 525                /* FIXME: for historical reasons, we force a notify here if
 526                 * there are outgoing parts to the buffer.  Presumably the
 527                 * host should service the ring ASAP. */
 528                if (out_sgs)
 529                        vq->notify(&vq->vq);
 530                if (indirect)
 531                        kfree(desc);
 532                END_USE(vq);
 533                return -ENOSPC;
 534        }
 535
 536        for (n = 0; n < out_sgs; n++) {
 537                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 538                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
 539                        if (vring_mapping_error(vq, addr))
 540                                goto unmap_release;
 541
 542                        prev = i;
 543                        /* Note that we trust indirect descriptor
 544                         * table since it use stream DMA mapping.
 545                         */
 546                        i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
 547                                                     VRING_DESC_F_NEXT,
 548                                                     indirect);
 549                }
 550        }
 551        for (; n < (out_sgs + in_sgs); n++) {
 552                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 553                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
 554                        if (vring_mapping_error(vq, addr))
 555                                goto unmap_release;
 556
 557                        prev = i;
 558                        /* Note that we trust indirect descriptor
 559                         * table since it use stream DMA mapping.
 560                         */
 561                        i = virtqueue_add_desc_split(_vq, desc, i, addr,
 562                                                     sg->length,
 563                                                     VRING_DESC_F_NEXT |
 564                                                     VRING_DESC_F_WRITE,
 565                                                     indirect);
 566                }
 567        }
 568        /* Last one doesn't continue. */
 569        desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
 570        if (!indirect && vq->use_dma_api)
 571                vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
 572                        ~VRING_DESC_F_NEXT;
 573
 574        if (indirect) {
 575                /* Now that the indirect table is filled in, map it. */
 576                dma_addr_t addr = vring_map_single(
 577                        vq, desc, total_sg * sizeof(struct vring_desc),
 578                        DMA_TO_DEVICE);
 579                if (vring_mapping_error(vq, addr))
 580                        goto unmap_release;
 581
 582                virtqueue_add_desc_split(_vq, vq->split.vring.desc,
 583                                         head, addr,
 584                                         total_sg * sizeof(struct vring_desc),
 585                                         VRING_DESC_F_INDIRECT,
 586                                         false);
 587        }
 588
 589        /* We're using some buffers from the free list. */
 590        vq->vq.num_free -= descs_used;
 591
 592        /* Update free pointer */
 593        if (indirect)
 594                vq->free_head = vq->split.desc_extra[head].next;
 595        else
 596                vq->free_head = i;
 597
 598        /* Store token and indirect buffer state. */
 599        vq->split.desc_state[head].data = data;
 600        if (indirect)
 601                vq->split.desc_state[head].indir_desc = desc;
 602        else
 603                vq->split.desc_state[head].indir_desc = ctx;
 604
 605        /* Put entry in available array (but don't update avail->idx until they
 606         * do sync). */
 607        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
 608        vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 609
 610        /* Descriptors and available array need to be set before we expose the
 611         * new available array entries. */
 612        virtio_wmb(vq->weak_barriers);
 613        vq->split.avail_idx_shadow++;
 614        vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 615                                                vq->split.avail_idx_shadow);
 616        vq->num_added++;
 617
 618        pr_debug("Added buffer head %i to %p\n", head, vq);
 619        END_USE(vq);
 620
 621        /* This is very unlikely, but theoretically possible.  Kick
 622         * just in case. */
 623        if (unlikely(vq->num_added == (1 << 16) - 1))
 624                virtqueue_kick(_vq);
 625
 626        return 0;
 627
 628unmap_release:
 629        err_idx = i;
 630
 631        if (indirect)
 632                i = 0;
 633        else
 634                i = head;
 635
 636        for (n = 0; n < total_sg; n++) {
 637                if (i == err_idx)
 638                        break;
 639                if (indirect) {
 640                        vring_unmap_one_split_indirect(vq, &desc[i]);
 641                        i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 642                } else
 643                        i = vring_unmap_one_split(vq, i);
 644        }
 645
 646        if (indirect)
 647                kfree(desc);
 648
 649        END_USE(vq);
 650        return -ENOMEM;
 651}
 652
 653static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 654{
 655        struct vring_virtqueue *vq = to_vvq(_vq);
 656        u16 new, old;
 657        bool needs_kick;
 658
 659        START_USE(vq);
 660        /* We need to expose available array entries before checking avail
 661         * event. */
 662        virtio_mb(vq->weak_barriers);
 663
 664        old = vq->split.avail_idx_shadow - vq->num_added;
 665        new = vq->split.avail_idx_shadow;
 666        vq->num_added = 0;
 667
 668        LAST_ADD_TIME_CHECK(vq);
 669        LAST_ADD_TIME_INVALID(vq);
 670
 671        if (vq->event) {
 672                needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
 673                                        vring_avail_event(&vq->split.vring)),
 674                                              new, old);
 675        } else {
 676                needs_kick = !(vq->split.vring.used->flags &
 677                                        cpu_to_virtio16(_vq->vdev,
 678                                                VRING_USED_F_NO_NOTIFY));
 679        }
 680        END_USE(vq);
 681        return needs_kick;
 682}
 683
 684static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 685                             void **ctx)
 686{
 687        unsigned int i, j;
 688        __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
 689
 690        /* Clear data ptr. */
 691        vq->split.desc_state[head].data = NULL;
 692
 693        /* Put back on free list: unmap first-level descriptors and find end */
 694        i = head;
 695
 696        while (vq->split.vring.desc[i].flags & nextflag) {
 697                vring_unmap_one_split(vq, i);
 698                i = vq->split.desc_extra[i].next;
 699                vq->vq.num_free++;
 700        }
 701
 702        vring_unmap_one_split(vq, i);
 703        vq->split.desc_extra[i].next = vq->free_head;
 704        vq->free_head = head;
 705
 706        /* Plus final descriptor */
 707        vq->vq.num_free++;
 708
 709        if (vq->indirect) {
 710                struct vring_desc *indir_desc =
 711                                vq->split.desc_state[head].indir_desc;
 712                u32 len;
 713
 714                /* Free the indirect table, if any, now that it's unmapped. */
 715                if (!indir_desc)
 716                        return;
 717
 718                len = vq->split.desc_extra[head].len;
 719
 720                BUG_ON(!(vq->split.desc_extra[head].flags &
 721                                VRING_DESC_F_INDIRECT));
 722                BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 723
 724                for (j = 0; j < len / sizeof(struct vring_desc); j++)
 725                        vring_unmap_one_split_indirect(vq, &indir_desc[j]);
 726
 727                kfree(indir_desc);
 728                vq->split.desc_state[head].indir_desc = NULL;
 729        } else if (ctx) {
 730                *ctx = vq->split.desc_state[head].indir_desc;
 731        }
 732}
 733
 734static inline bool more_used_split(const struct vring_virtqueue *vq)
 735{
 736        return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
 737                        vq->split.vring.used->idx);
 738}
 739
 740static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 741                                         unsigned int *len,
 742                                         void **ctx)
 743{
 744        struct vring_virtqueue *vq = to_vvq(_vq);
 745        void *ret;
 746        unsigned int i;
 747        u16 last_used;
 748
 749        START_USE(vq);
 750
 751        if (unlikely(vq->broken)) {
 752                END_USE(vq);
 753                return NULL;
 754        }
 755
 756        if (!more_used_split(vq)) {
 757                pr_debug("No more buffers in queue\n");
 758                END_USE(vq);
 759                return NULL;
 760        }
 761
 762        /* Only get used array entries after they have been exposed by host. */
 763        virtio_rmb(vq->weak_barriers);
 764
 765        last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
 766        i = virtio32_to_cpu(_vq->vdev,
 767                        vq->split.vring.used->ring[last_used].id);
 768        *len = virtio32_to_cpu(_vq->vdev,
 769                        vq->split.vring.used->ring[last_used].len);
 770
 771        if (unlikely(i >= vq->split.vring.num)) {
 772                BAD_RING(vq, "id %u out of range\n", i);
 773                return NULL;
 774        }
 775        if (unlikely(!vq->split.desc_state[i].data)) {
 776                BAD_RING(vq, "id %u is not a head!\n", i);
 777                return NULL;
 778        }
 779
 780        /* detach_buf_split clears data, so grab it now. */
 781        ret = vq->split.desc_state[i].data;
 782        detach_buf_split(vq, i, ctx);
 783        vq->last_used_idx++;
 784        /* If we expect an interrupt for the next entry, tell host
 785         * by writing event index and flush out the write before
 786         * the read in the next get_buf call. */
 787        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
 788                virtio_store_mb(vq->weak_barriers,
 789                                &vring_used_event(&vq->split.vring),
 790                                cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
 791
 792        LAST_ADD_TIME_INVALID(vq);
 793
 794        END_USE(vq);
 795        return ret;
 796}
 797
 798static void virtqueue_disable_cb_split(struct virtqueue *_vq)
 799{
 800        struct vring_virtqueue *vq = to_vvq(_vq);
 801
 802        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
 803                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
 804                if (vq->event)
 805                        /* TODO: this is a hack. Figure out a cleaner value to write. */
 806                        vring_used_event(&vq->split.vring) = 0x0;
 807                else
 808                        vq->split.vring.avail->flags =
 809                                cpu_to_virtio16(_vq->vdev,
 810                                                vq->split.avail_flags_shadow);
 811        }
 812}
 813
 814static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
 815{
 816        struct vring_virtqueue *vq = to_vvq(_vq);
 817        u16 last_used_idx;
 818
 819        START_USE(vq);
 820
 821        /* We optimistically turn back on interrupts, then check if there was
 822         * more to do. */
 823        /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
 824         * either clear the flags bit or point the event index at the next
 825         * entry. Always do both to keep code simple. */
 826        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 827                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 828                if (!vq->event)
 829                        vq->split.vring.avail->flags =
 830                                cpu_to_virtio16(_vq->vdev,
 831                                                vq->split.avail_flags_shadow);
 832        }
 833        vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
 834                        last_used_idx = vq->last_used_idx);
 835        END_USE(vq);
 836        return last_used_idx;
 837}
 838
 839static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
 840{
 841        struct vring_virtqueue *vq = to_vvq(_vq);
 842
 843        return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
 844                        vq->split.vring.used->idx);
 845}
 846
 847static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
 848{
 849        struct vring_virtqueue *vq = to_vvq(_vq);
 850        u16 bufs;
 851
 852        START_USE(vq);
 853
 854        /* We optimistically turn back on interrupts, then check if there was
 855         * more to do. */
 856        /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
 857         * either clear the flags bit or point the event index at the next
 858         * entry. Always update the event index to keep code simple. */
 859        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 860                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 861                if (!vq->event)
 862                        vq->split.vring.avail->flags =
 863                                cpu_to_virtio16(_vq->vdev,
 864                                                vq->split.avail_flags_shadow);
 865        }
 866        /* TODO: tune this threshold */
 867        bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
 868
 869        virtio_store_mb(vq->weak_barriers,
 870                        &vring_used_event(&vq->split.vring),
 871                        cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
 872
 873        if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
 874                                        - vq->last_used_idx) > bufs)) {
 875                END_USE(vq);
 876                return false;
 877        }
 878
 879        END_USE(vq);
 880        return true;
 881}
 882
 883static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
 884{
 885        struct vring_virtqueue *vq = to_vvq(_vq);
 886        unsigned int i;
 887        void *buf;
 888
 889        START_USE(vq);
 890
 891        for (i = 0; i < vq->split.vring.num; i++) {
 892                if (!vq->split.desc_state[i].data)
 893                        continue;
 894                /* detach_buf_split clears data, so grab it now. */
 895                buf = vq->split.desc_state[i].data;
 896                detach_buf_split(vq, i, NULL);
 897                vq->split.avail_idx_shadow--;
 898                vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 899                                vq->split.avail_idx_shadow);
 900                END_USE(vq);
 901                return buf;
 902        }
 903        /* That should have freed everything. */
 904        BUG_ON(vq->vq.num_free != vq->split.vring.num);
 905
 906        END_USE(vq);
 907        return NULL;
 908}
 909
 910static struct virtqueue *vring_create_virtqueue_split(
 911        unsigned int index,
 912        unsigned int num,
 913        unsigned int vring_align,
 914        struct virtio_device *vdev,
 915        bool weak_barriers,
 916        bool may_reduce_num,
 917        bool context,
 918        bool (*notify)(struct virtqueue *),
 919        void (*callback)(struct virtqueue *),
 920        const char *name)
 921{
 922        struct virtqueue *vq;
 923        void *queue = NULL;
 924        dma_addr_t dma_addr;
 925        size_t queue_size_in_bytes;
 926        struct vring vring;
 927
 928        /* We assume num is a power of 2. */
 929        if (num & (num - 1)) {
 930                dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
 931                return NULL;
 932        }
 933
 934        /* TODO: allocate each queue chunk individually */
 935        for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
 936                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 937                                          &dma_addr,
 938                                          GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 939                if (queue)
 940                        break;
 941                if (!may_reduce_num)
 942                        return NULL;
 943        }
 944
 945        if (!num)
 946                return NULL;
 947
 948        if (!queue) {
 949                /* Try to get a single page. You are my only hope! */
 950                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 951                                          &dma_addr, GFP_KERNEL|__GFP_ZERO);
 952        }
 953        if (!queue)
 954                return NULL;
 955
 956        queue_size_in_bytes = vring_size(num, vring_align);
 957        vring_init(&vring, num, queue, vring_align);
 958
 959        vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 960                                   notify, callback, name);
 961        if (!vq) {
 962                vring_free_queue(vdev, queue_size_in_bytes, queue,
 963                                 dma_addr);
 964                return NULL;
 965        }
 966
 967        to_vvq(vq)->split.queue_dma_addr = dma_addr;
 968        to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
 969        to_vvq(vq)->we_own_ring = true;
 970
 971        return vq;
 972}
 973
 974
 975/*
 976 * Packed ring specific functions - *_packed().
 977 */
 978
 979static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
 980                                     struct vring_desc_extra *extra)
 981{
 982        u16 flags;
 983
 984        if (!vq->use_dma_api)
 985                return;
 986
 987        flags = extra->flags;
 988
 989        if (flags & VRING_DESC_F_INDIRECT) {
 990                dma_unmap_single(vring_dma_dev(vq),
 991                                 extra->addr, extra->len,
 992                                 (flags & VRING_DESC_F_WRITE) ?
 993                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 994        } else {
 995                dma_unmap_page(vring_dma_dev(vq),
 996                               extra->addr, extra->len,
 997                               (flags & VRING_DESC_F_WRITE) ?
 998                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 999        }
1000}
1001
1002static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1003                                   struct vring_packed_desc *desc)
1004{
1005        u16 flags;
1006
1007        if (!vq->use_dma_api)
1008                return;
1009
1010        flags = le16_to_cpu(desc->flags);
1011
1012        dma_unmap_page(vring_dma_dev(vq),
1013                       le64_to_cpu(desc->addr),
1014                       le32_to_cpu(desc->len),
1015                       (flags & VRING_DESC_F_WRITE) ?
1016                       DMA_FROM_DEVICE : DMA_TO_DEVICE);
1017}
1018
1019static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1020                                                       gfp_t gfp)
1021{
1022        struct vring_packed_desc *desc;
1023
1024        /*
1025         * We require lowmem mappings for the descriptors because
1026         * otherwise virt_to_phys will give us bogus addresses in the
1027         * virtqueue.
1028         */
1029        gfp &= ~__GFP_HIGHMEM;
1030
1031        desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1032
1033        return desc;
1034}
1035
1036static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1037                                         struct scatterlist *sgs[],
1038                                         unsigned int total_sg,
1039                                         unsigned int out_sgs,
1040                                         unsigned int in_sgs,
1041                                         void *data,
1042                                         gfp_t gfp)
1043{
1044        struct vring_packed_desc *desc;
1045        struct scatterlist *sg;
1046        unsigned int i, n, err_idx;
1047        u16 head, id;
1048        dma_addr_t addr;
1049
1050        head = vq->packed.next_avail_idx;
1051        desc = alloc_indirect_packed(total_sg, gfp);
1052        if (!desc)
1053                return -ENOMEM;
1054
1055        if (unlikely(vq->vq.num_free < 1)) {
1056                pr_debug("Can't add buf len 1 - avail = 0\n");
1057                kfree(desc);
1058                END_USE(vq);
1059                return -ENOSPC;
1060        }
1061
1062        i = 0;
1063        id = vq->free_head;
1064        BUG_ON(id == vq->packed.vring.num);
1065
1066        for (n = 0; n < out_sgs + in_sgs; n++) {
1067                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1068                        addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1069                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1070                        if (vring_mapping_error(vq, addr))
1071                                goto unmap_release;
1072
1073                        desc[i].flags = cpu_to_le16(n < out_sgs ?
1074                                                0 : VRING_DESC_F_WRITE);
1075                        desc[i].addr = cpu_to_le64(addr);
1076                        desc[i].len = cpu_to_le32(sg->length);
1077                        i++;
1078                }
1079        }
1080
1081        /* Now that the indirect table is filled in, map it. */
1082        addr = vring_map_single(vq, desc,
1083                        total_sg * sizeof(struct vring_packed_desc),
1084                        DMA_TO_DEVICE);
1085        if (vring_mapping_error(vq, addr))
1086                goto unmap_release;
1087
1088        vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1089        vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1090                                sizeof(struct vring_packed_desc));
1091        vq->packed.vring.desc[head].id = cpu_to_le16(id);
1092
1093        if (vq->use_dma_api) {
1094                vq->packed.desc_extra[id].addr = addr;
1095                vq->packed.desc_extra[id].len = total_sg *
1096                                sizeof(struct vring_packed_desc);
1097                vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1098                                                  vq->packed.avail_used_flags;
1099        }
1100
1101        /*
1102         * A driver MUST NOT make the first descriptor in the list
1103         * available before all subsequent descriptors comprising
1104         * the list are made available.
1105         */
1106        virtio_wmb(vq->weak_barriers);
1107        vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1108                                                vq->packed.avail_used_flags);
1109
1110        /* We're using some buffers from the free list. */
1111        vq->vq.num_free -= 1;
1112
1113        /* Update free pointer */
1114        n = head + 1;
1115        if (n >= vq->packed.vring.num) {
1116                n = 0;
1117                vq->packed.avail_wrap_counter ^= 1;
1118                vq->packed.avail_used_flags ^=
1119                                1 << VRING_PACKED_DESC_F_AVAIL |
1120                                1 << VRING_PACKED_DESC_F_USED;
1121        }
1122        vq->packed.next_avail_idx = n;
1123        vq->free_head = vq->packed.desc_extra[id].next;
1124
1125        /* Store token and indirect buffer state. */
1126        vq->packed.desc_state[id].num = 1;
1127        vq->packed.desc_state[id].data = data;
1128        vq->packed.desc_state[id].indir_desc = desc;
1129        vq->packed.desc_state[id].last = id;
1130
1131        vq->num_added += 1;
1132
1133        pr_debug("Added buffer head %i to %p\n", head, vq);
1134        END_USE(vq);
1135
1136        return 0;
1137
1138unmap_release:
1139        err_idx = i;
1140
1141        for (i = 0; i < err_idx; i++)
1142                vring_unmap_desc_packed(vq, &desc[i]);
1143
1144        kfree(desc);
1145
1146        END_USE(vq);
1147        return -ENOMEM;
1148}
1149
1150static inline int virtqueue_add_packed(struct virtqueue *_vq,
1151                                       struct scatterlist *sgs[],
1152                                       unsigned int total_sg,
1153                                       unsigned int out_sgs,
1154                                       unsigned int in_sgs,
1155                                       void *data,
1156                                       void *ctx,
1157                                       gfp_t gfp)
1158{
1159        struct vring_virtqueue *vq = to_vvq(_vq);
1160        struct vring_packed_desc *desc;
1161        struct scatterlist *sg;
1162        unsigned int i, n, c, descs_used, err_idx;
1163        __le16 head_flags, flags;
1164        u16 head, id, prev, curr, avail_used_flags;
1165        int err;
1166
1167        START_USE(vq);
1168
1169        BUG_ON(data == NULL);
1170        BUG_ON(ctx && vq->indirect);
1171
1172        if (unlikely(vq->broken)) {
1173                END_USE(vq);
1174                return -EIO;
1175        }
1176
1177        LAST_ADD_TIME_UPDATE(vq);
1178
1179        BUG_ON(total_sg == 0);
1180
1181        if (virtqueue_use_indirect(_vq, total_sg)) {
1182                err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1183                                                    in_sgs, data, gfp);
1184                if (err != -ENOMEM) {
1185                        END_USE(vq);
1186                        return err;
1187                }
1188
1189                /* fall back on direct */
1190        }
1191
1192        head = vq->packed.next_avail_idx;
1193        avail_used_flags = vq->packed.avail_used_flags;
1194
1195        WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1196
1197        desc = vq->packed.vring.desc;
1198        i = head;
1199        descs_used = total_sg;
1200
1201        if (unlikely(vq->vq.num_free < descs_used)) {
1202                pr_debug("Can't add buf len %i - avail = %i\n",
1203                         descs_used, vq->vq.num_free);
1204                END_USE(vq);
1205                return -ENOSPC;
1206        }
1207
1208        id = vq->free_head;
1209        BUG_ON(id == vq->packed.vring.num);
1210
1211        curr = id;
1212        c = 0;
1213        for (n = 0; n < out_sgs + in_sgs; n++) {
1214                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1215                        dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1216                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1217                        if (vring_mapping_error(vq, addr))
1218                                goto unmap_release;
1219
1220                        flags = cpu_to_le16(vq->packed.avail_used_flags |
1221                                    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1222                                    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1223                        if (i == head)
1224                                head_flags = flags;
1225                        else
1226                                desc[i].flags = flags;
1227
1228                        desc[i].addr = cpu_to_le64(addr);
1229                        desc[i].len = cpu_to_le32(sg->length);
1230                        desc[i].id = cpu_to_le16(id);
1231
1232                        if (unlikely(vq->use_dma_api)) {
1233                                vq->packed.desc_extra[curr].addr = addr;
1234                                vq->packed.desc_extra[curr].len = sg->length;
1235                                vq->packed.desc_extra[curr].flags =
1236                                        le16_to_cpu(flags);
1237                        }
1238                        prev = curr;
1239                        curr = vq->packed.desc_extra[curr].next;
1240
1241                        if ((unlikely(++i >= vq->packed.vring.num))) {
1242                                i = 0;
1243                                vq->packed.avail_used_flags ^=
1244                                        1 << VRING_PACKED_DESC_F_AVAIL |
1245                                        1 << VRING_PACKED_DESC_F_USED;
1246                        }
1247                }
1248        }
1249
1250        if (i < head)
1251                vq->packed.avail_wrap_counter ^= 1;
1252
1253        /* We're using some buffers from the free list. */
1254        vq->vq.num_free -= descs_used;
1255
1256        /* Update free pointer */
1257        vq->packed.next_avail_idx = i;
1258        vq->free_head = curr;
1259
1260        /* Store token. */
1261        vq->packed.desc_state[id].num = descs_used;
1262        vq->packed.desc_state[id].data = data;
1263        vq->packed.desc_state[id].indir_desc = ctx;
1264        vq->packed.desc_state[id].last = prev;
1265
1266        /*
1267         * A driver MUST NOT make the first descriptor in the list
1268         * available before all subsequent descriptors comprising
1269         * the list are made available.
1270         */
1271        virtio_wmb(vq->weak_barriers);
1272        vq->packed.vring.desc[head].flags = head_flags;
1273        vq->num_added += descs_used;
1274
1275        pr_debug("Added buffer head %i to %p\n", head, vq);
1276        END_USE(vq);
1277
1278        return 0;
1279
1280unmap_release:
1281        err_idx = i;
1282        i = head;
1283        curr = vq->free_head;
1284
1285        vq->packed.avail_used_flags = avail_used_flags;
1286
1287        for (n = 0; n < total_sg; n++) {
1288                if (i == err_idx)
1289                        break;
1290                vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1291                curr = vq->packed.desc_extra[curr].next;
1292                i++;
1293                if (i >= vq->packed.vring.num)
1294                        i = 0;
1295        }
1296
1297        END_USE(vq);
1298        return -EIO;
1299}
1300
1301static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1302{
1303        struct vring_virtqueue *vq = to_vvq(_vq);
1304        u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1305        bool needs_kick;
1306        union {
1307                struct {
1308                        __le16 off_wrap;
1309                        __le16 flags;
1310                };
1311                u32 u32;
1312        } snapshot;
1313
1314        START_USE(vq);
1315
1316        /*
1317         * We need to expose the new flags value before checking notification
1318         * suppressions.
1319         */
1320        virtio_mb(vq->weak_barriers);
1321
1322        old = vq->packed.next_avail_idx - vq->num_added;
1323        new = vq->packed.next_avail_idx;
1324        vq->num_added = 0;
1325
1326        snapshot.u32 = *(u32 *)vq->packed.vring.device;
1327        flags = le16_to_cpu(snapshot.flags);
1328
1329        LAST_ADD_TIME_CHECK(vq);
1330        LAST_ADD_TIME_INVALID(vq);
1331
1332        if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1333                needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1334                goto out;
1335        }
1336
1337        off_wrap = le16_to_cpu(snapshot.off_wrap);
1338
1339        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1340        event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1341        if (wrap_counter != vq->packed.avail_wrap_counter)
1342                event_idx -= vq->packed.vring.num;
1343
1344        needs_kick = vring_need_event(event_idx, new, old);
1345out:
1346        END_USE(vq);
1347        return needs_kick;
1348}
1349
1350static void detach_buf_packed(struct vring_virtqueue *vq,
1351                              unsigned int id, void **ctx)
1352{
1353        struct vring_desc_state_packed *state = NULL;
1354        struct vring_packed_desc *desc;
1355        unsigned int i, curr;
1356
1357        state = &vq->packed.desc_state[id];
1358
1359        /* Clear data ptr. */
1360        state->data = NULL;
1361
1362        vq->packed.desc_extra[state->last].next = vq->free_head;
1363        vq->free_head = id;
1364        vq->vq.num_free += state->num;
1365
1366        if (unlikely(vq->use_dma_api)) {
1367                curr = id;
1368                for (i = 0; i < state->num; i++) {
1369                        vring_unmap_extra_packed(vq,
1370                                                 &vq->packed.desc_extra[curr]);
1371                        curr = vq->packed.desc_extra[curr].next;
1372                }
1373        }
1374
1375        if (vq->indirect) {
1376                u32 len;
1377
1378                /* Free the indirect table, if any, now that it's unmapped. */
1379                desc = state->indir_desc;
1380                if (!desc)
1381                        return;
1382
1383                if (vq->use_dma_api) {
1384                        len = vq->packed.desc_extra[id].len;
1385                        for (i = 0; i < len / sizeof(struct vring_packed_desc);
1386                                        i++)
1387                                vring_unmap_desc_packed(vq, &desc[i]);
1388                }
1389                kfree(desc);
1390                state->indir_desc = NULL;
1391        } else if (ctx) {
1392                *ctx = state->indir_desc;
1393        }
1394}
1395
1396static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1397                                       u16 idx, bool used_wrap_counter)
1398{
1399        bool avail, used;
1400        u16 flags;
1401
1402        flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1403        avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1404        used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1405
1406        return avail == used && used == used_wrap_counter;
1407}
1408
1409static inline bool more_used_packed(const struct vring_virtqueue *vq)
1410{
1411        return is_used_desc_packed(vq, vq->last_used_idx,
1412                        vq->packed.used_wrap_counter);
1413}
1414
1415static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1416                                          unsigned int *len,
1417                                          void **ctx)
1418{
1419        struct vring_virtqueue *vq = to_vvq(_vq);
1420        u16 last_used, id;
1421        void *ret;
1422
1423        START_USE(vq);
1424
1425        if (unlikely(vq->broken)) {
1426                END_USE(vq);
1427                return NULL;
1428        }
1429
1430        if (!more_used_packed(vq)) {
1431                pr_debug("No more buffers in queue\n");
1432                END_USE(vq);
1433                return NULL;
1434        }
1435
1436        /* Only get used elements after they have been exposed by host. */
1437        virtio_rmb(vq->weak_barriers);
1438
1439        last_used = vq->last_used_idx;
1440        id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1441        *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1442
1443        if (unlikely(id >= vq->packed.vring.num)) {
1444                BAD_RING(vq, "id %u out of range\n", id);
1445                return NULL;
1446        }
1447        if (unlikely(!vq->packed.desc_state[id].data)) {
1448                BAD_RING(vq, "id %u is not a head!\n", id);
1449                return NULL;
1450        }
1451
1452        /* detach_buf_packed clears data, so grab it now. */
1453        ret = vq->packed.desc_state[id].data;
1454        detach_buf_packed(vq, id, ctx);
1455
1456        vq->last_used_idx += vq->packed.desc_state[id].num;
1457        if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1458                vq->last_used_idx -= vq->packed.vring.num;
1459                vq->packed.used_wrap_counter ^= 1;
1460        }
1461
1462        /*
1463         * If we expect an interrupt for the next entry, tell host
1464         * by writing event index and flush out the write before
1465         * the read in the next get_buf call.
1466         */
1467        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1468                virtio_store_mb(vq->weak_barriers,
1469                                &vq->packed.vring.driver->off_wrap,
1470                                cpu_to_le16(vq->last_used_idx |
1471                                        (vq->packed.used_wrap_counter <<
1472                                         VRING_PACKED_EVENT_F_WRAP_CTR)));
1473
1474        LAST_ADD_TIME_INVALID(vq);
1475
1476        END_USE(vq);
1477        return ret;
1478}
1479
1480static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1481{
1482        struct vring_virtqueue *vq = to_vvq(_vq);
1483
1484        if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1485                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1486                vq->packed.vring.driver->flags =
1487                        cpu_to_le16(vq->packed.event_flags_shadow);
1488        }
1489}
1490
1491static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1492{
1493        struct vring_virtqueue *vq = to_vvq(_vq);
1494
1495        START_USE(vq);
1496
1497        /*
1498         * We optimistically turn back on interrupts, then check if there was
1499         * more to do.
1500         */
1501
1502        if (vq->event) {
1503                vq->packed.vring.driver->off_wrap =
1504                        cpu_to_le16(vq->last_used_idx |
1505                                (vq->packed.used_wrap_counter <<
1506                                 VRING_PACKED_EVENT_F_WRAP_CTR));
1507                /*
1508                 * We need to update event offset and event wrap
1509                 * counter first before updating event flags.
1510                 */
1511                virtio_wmb(vq->weak_barriers);
1512        }
1513
1514        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1515                vq->packed.event_flags_shadow = vq->event ?
1516                                VRING_PACKED_EVENT_FLAG_DESC :
1517                                VRING_PACKED_EVENT_FLAG_ENABLE;
1518                vq->packed.vring.driver->flags =
1519                                cpu_to_le16(vq->packed.event_flags_shadow);
1520        }
1521
1522        END_USE(vq);
1523        return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1524                        VRING_PACKED_EVENT_F_WRAP_CTR);
1525}
1526
1527static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1528{
1529        struct vring_virtqueue *vq = to_vvq(_vq);
1530        bool wrap_counter;
1531        u16 used_idx;
1532
1533        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1534        used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1535
1536        return is_used_desc_packed(vq, used_idx, wrap_counter);
1537}
1538
1539static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1540{
1541        struct vring_virtqueue *vq = to_vvq(_vq);
1542        u16 used_idx, wrap_counter;
1543        u16 bufs;
1544
1545        START_USE(vq);
1546
1547        /*
1548         * We optimistically turn back on interrupts, then check if there was
1549         * more to do.
1550         */
1551
1552        if (vq->event) {
1553                /* TODO: tune this threshold */
1554                bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1555                wrap_counter = vq->packed.used_wrap_counter;
1556
1557                used_idx = vq->last_used_idx + bufs;
1558                if (used_idx >= vq->packed.vring.num) {
1559                        used_idx -= vq->packed.vring.num;
1560                        wrap_counter ^= 1;
1561                }
1562
1563                vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1564                        (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1565
1566                /*
1567                 * We need to update event offset and event wrap
1568                 * counter first before updating event flags.
1569                 */
1570                virtio_wmb(vq->weak_barriers);
1571        }
1572
1573        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1574                vq->packed.event_flags_shadow = vq->event ?
1575                                VRING_PACKED_EVENT_FLAG_DESC :
1576                                VRING_PACKED_EVENT_FLAG_ENABLE;
1577                vq->packed.vring.driver->flags =
1578                                cpu_to_le16(vq->packed.event_flags_shadow);
1579        }
1580
1581        /*
1582         * We need to update event suppression structure first
1583         * before re-checking for more used buffers.
1584         */
1585        virtio_mb(vq->weak_barriers);
1586
1587        if (is_used_desc_packed(vq,
1588                                vq->last_used_idx,
1589                                vq->packed.used_wrap_counter)) {
1590                END_USE(vq);
1591                return false;
1592        }
1593
1594        END_USE(vq);
1595        return true;
1596}
1597
1598static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1599{
1600        struct vring_virtqueue *vq = to_vvq(_vq);
1601        unsigned int i;
1602        void *buf;
1603
1604        START_USE(vq);
1605
1606        for (i = 0; i < vq->packed.vring.num; i++) {
1607                if (!vq->packed.desc_state[i].data)
1608                        continue;
1609                /* detach_buf clears data, so grab it now. */
1610                buf = vq->packed.desc_state[i].data;
1611                detach_buf_packed(vq, i, NULL);
1612                END_USE(vq);
1613                return buf;
1614        }
1615        /* That should have freed everything. */
1616        BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1617
1618        END_USE(vq);
1619        return NULL;
1620}
1621
1622static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
1623                                                       unsigned int num)
1624{
1625        struct vring_desc_extra *desc_extra;
1626        unsigned int i;
1627
1628        desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1629                                   GFP_KERNEL);
1630        if (!desc_extra)
1631                return NULL;
1632
1633        memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1634
1635        for (i = 0; i < num - 1; i++)
1636                desc_extra[i].next = i + 1;
1637
1638        return desc_extra;
1639}
1640
1641static struct virtqueue *vring_create_virtqueue_packed(
1642        unsigned int index,
1643        unsigned int num,
1644        unsigned int vring_align,
1645        struct virtio_device *vdev,
1646        bool weak_barriers,
1647        bool may_reduce_num,
1648        bool context,
1649        bool (*notify)(struct virtqueue *),
1650        void (*callback)(struct virtqueue *),
1651        const char *name)
1652{
1653        struct vring_virtqueue *vq;
1654        struct vring_packed_desc *ring;
1655        struct vring_packed_desc_event *driver, *device;
1656        dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1657        size_t ring_size_in_bytes, event_size_in_bytes;
1658
1659        ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1660
1661        ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1662                                 &ring_dma_addr,
1663                                 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1664        if (!ring)
1665                goto err_ring;
1666
1667        event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1668
1669        driver = vring_alloc_queue(vdev, event_size_in_bytes,
1670                                   &driver_event_dma_addr,
1671                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1672        if (!driver)
1673                goto err_driver;
1674
1675        device = vring_alloc_queue(vdev, event_size_in_bytes,
1676                                   &device_event_dma_addr,
1677                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1678        if (!device)
1679                goto err_device;
1680
1681        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1682        if (!vq)
1683                goto err_vq;
1684
1685        vq->vq.callback = callback;
1686        vq->vq.vdev = vdev;
1687        vq->vq.name = name;
1688        vq->vq.num_free = num;
1689        vq->vq.index = index;
1690        vq->we_own_ring = true;
1691        vq->notify = notify;
1692        vq->weak_barriers = weak_barriers;
1693        vq->broken = false;
1694        vq->last_used_idx = 0;
1695        vq->event_triggered = false;
1696        vq->num_added = 0;
1697        vq->packed_ring = true;
1698        vq->use_dma_api = vring_use_dma_api(vdev);
1699#ifdef DEBUG
1700        vq->in_use = false;
1701        vq->last_add_time_valid = false;
1702#endif
1703
1704        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1705                !context;
1706        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1707
1708        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1709                vq->weak_barriers = false;
1710
1711        vq->packed.ring_dma_addr = ring_dma_addr;
1712        vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1713        vq->packed.device_event_dma_addr = device_event_dma_addr;
1714
1715        vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1716        vq->packed.event_size_in_bytes = event_size_in_bytes;
1717
1718        vq->packed.vring.num = num;
1719        vq->packed.vring.desc = ring;
1720        vq->packed.vring.driver = driver;
1721        vq->packed.vring.device = device;
1722
1723        vq->packed.next_avail_idx = 0;
1724        vq->packed.avail_wrap_counter = 1;
1725        vq->packed.used_wrap_counter = 1;
1726        vq->packed.event_flags_shadow = 0;
1727        vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1728
1729        vq->packed.desc_state = kmalloc_array(num,
1730                        sizeof(struct vring_desc_state_packed),
1731                        GFP_KERNEL);
1732        if (!vq->packed.desc_state)
1733                goto err_desc_state;
1734
1735        memset(vq->packed.desc_state, 0,
1736                num * sizeof(struct vring_desc_state_packed));
1737
1738        /* Put everything in free lists. */
1739        vq->free_head = 0;
1740
1741        vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
1742        if (!vq->packed.desc_extra)
1743                goto err_desc_extra;
1744
1745        /* No callback?  Tell other side not to bother us. */
1746        if (!callback) {
1747                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1748                vq->packed.vring.driver->flags =
1749                        cpu_to_le16(vq->packed.event_flags_shadow);
1750        }
1751
1752        spin_lock(&vdev->vqs_list_lock);
1753        list_add_tail(&vq->vq.list, &vdev->vqs);
1754        spin_unlock(&vdev->vqs_list_lock);
1755        return &vq->vq;
1756
1757err_desc_extra:
1758        kfree(vq->packed.desc_state);
1759err_desc_state:
1760        kfree(vq);
1761err_vq:
1762        vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1763err_device:
1764        vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1765err_driver:
1766        vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1767err_ring:
1768        return NULL;
1769}
1770
1771
1772/*
1773 * Generic functions and exported symbols.
1774 */
1775
1776static inline int virtqueue_add(struct virtqueue *_vq,
1777                                struct scatterlist *sgs[],
1778                                unsigned int total_sg,
1779                                unsigned int out_sgs,
1780                                unsigned int in_sgs,
1781                                void *data,
1782                                void *ctx,
1783                                gfp_t gfp)
1784{
1785        struct vring_virtqueue *vq = to_vvq(_vq);
1786
1787        return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1788                                        out_sgs, in_sgs, data, ctx, gfp) :
1789                                 virtqueue_add_split(_vq, sgs, total_sg,
1790                                        out_sgs, in_sgs, data, ctx, gfp);
1791}
1792
1793/**
1794 * virtqueue_add_sgs - expose buffers to other end
1795 * @_vq: the struct virtqueue we're talking about.
1796 * @sgs: array of terminated scatterlists.
1797 * @out_sgs: the number of scatterlists readable by other side
1798 * @in_sgs: the number of scatterlists which are writable (after readable ones)
1799 * @data: the token identifying the buffer.
1800 * @gfp: how to do memory allocations (if necessary).
1801 *
1802 * Caller must ensure we don't call this with other virtqueue operations
1803 * at the same time (except where noted).
1804 *
1805 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1806 */
1807int virtqueue_add_sgs(struct virtqueue *_vq,
1808                      struct scatterlist *sgs[],
1809                      unsigned int out_sgs,
1810                      unsigned int in_sgs,
1811                      void *data,
1812                      gfp_t gfp)
1813{
1814        unsigned int i, total_sg = 0;
1815
1816        /* Count them first. */
1817        for (i = 0; i < out_sgs + in_sgs; i++) {
1818                struct scatterlist *sg;
1819
1820                for (sg = sgs[i]; sg; sg = sg_next(sg))
1821                        total_sg++;
1822        }
1823        return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1824                             data, NULL, gfp);
1825}
1826EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1827
1828/**
1829 * virtqueue_add_outbuf - expose output buffers to other end
1830 * @vq: the struct virtqueue we're talking about.
1831 * @sg: scatterlist (must be well-formed and terminated!)
1832 * @num: the number of entries in @sg readable by other side
1833 * @data: the token identifying the buffer.
1834 * @gfp: how to do memory allocations (if necessary).
1835 *
1836 * Caller must ensure we don't call this with other virtqueue operations
1837 * at the same time (except where noted).
1838 *
1839 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1840 */
1841int virtqueue_add_outbuf(struct virtqueue *vq,
1842                         struct scatterlist *sg, unsigned int num,
1843                         void *data,
1844                         gfp_t gfp)
1845{
1846        return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1847}
1848EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1849
1850/**
1851 * virtqueue_add_inbuf - expose input buffers to other end
1852 * @vq: the struct virtqueue we're talking about.
1853 * @sg: scatterlist (must be well-formed and terminated!)
1854 * @num: the number of entries in @sg writable by other side
1855 * @data: the token identifying the buffer.
1856 * @gfp: how to do memory allocations (if necessary).
1857 *
1858 * Caller must ensure we don't call this with other virtqueue operations
1859 * at the same time (except where noted).
1860 *
1861 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1862 */
1863int virtqueue_add_inbuf(struct virtqueue *vq,
1864                        struct scatterlist *sg, unsigned int num,
1865                        void *data,
1866                        gfp_t gfp)
1867{
1868        return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1869}
1870EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1871
1872/**
1873 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1874 * @vq: the struct virtqueue we're talking about.
1875 * @sg: scatterlist (must be well-formed and terminated!)
1876 * @num: the number of entries in @sg writable by other side
1877 * @data: the token identifying the buffer.
1878 * @ctx: extra context for the token
1879 * @gfp: how to do memory allocations (if necessary).
1880 *
1881 * Caller must ensure we don't call this with other virtqueue operations
1882 * at the same time (except where noted).
1883 *
1884 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1885 */
1886int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1887                        struct scatterlist *sg, unsigned int num,
1888                        void *data,
1889                        void *ctx,
1890                        gfp_t gfp)
1891{
1892        return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1893}
1894EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1895
1896/**
1897 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
1898 * @_vq: the struct virtqueue
1899 *
1900 * Instead of virtqueue_kick(), you can do:
1901 *      if (virtqueue_kick_prepare(vq))
1902 *              virtqueue_notify(vq);
1903 *
1904 * This is sometimes useful because the virtqueue_kick_prepare() needs
1905 * to be serialized, but the actual virtqueue_notify() call does not.
1906 */
1907bool virtqueue_kick_prepare(struct virtqueue *_vq)
1908{
1909        struct vring_virtqueue *vq = to_vvq(_vq);
1910
1911        return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1912                                 virtqueue_kick_prepare_split(_vq);
1913}
1914EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1915
1916/**
1917 * virtqueue_notify - second half of split virtqueue_kick call.
1918 * @_vq: the struct virtqueue
1919 *
1920 * This does not need to be serialized.
1921 *
1922 * Returns false if host notify failed or queue is broken, otherwise true.
1923 */
1924bool virtqueue_notify(struct virtqueue *_vq)
1925{
1926        struct vring_virtqueue *vq = to_vvq(_vq);
1927
1928        if (unlikely(vq->broken))
1929                return false;
1930
1931        /* Prod other side to tell it about changes. */
1932        if (!vq->notify(_vq)) {
1933                vq->broken = true;
1934                return false;
1935        }
1936        return true;
1937}
1938EXPORT_SYMBOL_GPL(virtqueue_notify);
1939
1940/**
1941 * virtqueue_kick - update after add_buf
1942 * @vq: the struct virtqueue
1943 *
1944 * After one or more virtqueue_add_* calls, invoke this to kick
1945 * the other side.
1946 *
1947 * Caller must ensure we don't call this with other virtqueue
1948 * operations at the same time (except where noted).
1949 *
1950 * Returns false if kick failed, otherwise true.
1951 */
1952bool virtqueue_kick(struct virtqueue *vq)
1953{
1954        if (virtqueue_kick_prepare(vq))
1955                return virtqueue_notify(vq);
1956        return true;
1957}
1958EXPORT_SYMBOL_GPL(virtqueue_kick);
1959
1960/**
1961 * virtqueue_get_buf_ctx - get the next used buffer
1962 * @_vq: the struct virtqueue we're talking about.
1963 * @len: the length written into the buffer
1964 * @ctx: extra context for the token
1965 *
1966 * If the device wrote data into the buffer, @len will be set to the
1967 * amount written.  This means you don't need to clear the buffer
1968 * beforehand to ensure there's no data leakage in the case of short
1969 * writes.
1970 *
1971 * Caller must ensure we don't call this with other virtqueue
1972 * operations at the same time (except where noted).
1973 *
1974 * Returns NULL if there are no used buffers, or the "data" token
1975 * handed to virtqueue_add_*().
1976 */
1977void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1978                            void **ctx)
1979{
1980        struct vring_virtqueue *vq = to_vvq(_vq);
1981
1982        return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
1983                                 virtqueue_get_buf_ctx_split(_vq, len, ctx);
1984}
1985EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
1986
1987void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
1988{
1989        return virtqueue_get_buf_ctx(_vq, len, NULL);
1990}
1991EXPORT_SYMBOL_GPL(virtqueue_get_buf);
1992/**
1993 * virtqueue_disable_cb - disable callbacks
1994 * @_vq: the struct virtqueue we're talking about.
1995 *
1996 * Note that this is not necessarily synchronous, hence unreliable and only
1997 * useful as an optimization.
1998 *
1999 * Unlike other operations, this need not be serialized.
2000 */
2001void virtqueue_disable_cb(struct virtqueue *_vq)
2002{
2003        struct vring_virtqueue *vq = to_vvq(_vq);
2004
2005        /* If device triggered an event already it won't trigger one again:
2006         * no need to disable.
2007         */
2008        if (vq->event_triggered)
2009                return;
2010
2011        if (vq->packed_ring)
2012                virtqueue_disable_cb_packed(_vq);
2013        else
2014                virtqueue_disable_cb_split(_vq);
2015}
2016EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2017
2018/**
2019 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2020 * @_vq: the struct virtqueue we're talking about.
2021 *
2022 * This re-enables callbacks; it returns current queue state
2023 * in an opaque unsigned value. This value should be later tested by
2024 * virtqueue_poll, to detect a possible race between the driver checking for
2025 * more work, and enabling callbacks.
2026 *
2027 * Caller must ensure we don't call this with other virtqueue
2028 * operations at the same time (except where noted).
2029 */
2030unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2031{
2032        struct vring_virtqueue *vq = to_vvq(_vq);
2033
2034        if (vq->event_triggered)
2035                vq->event_triggered = false;
2036
2037        return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2038                                 virtqueue_enable_cb_prepare_split(_vq);
2039}
2040EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2041
2042/**
2043 * virtqueue_poll - query pending used buffers
2044 * @_vq: the struct virtqueue we're talking about.
2045 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2046 *
2047 * Returns "true" if there are pending used buffers in the queue.
2048 *
2049 * This does not need to be serialized.
2050 */
2051bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
2052{
2053        struct vring_virtqueue *vq = to_vvq(_vq);
2054
2055        if (unlikely(vq->broken))
2056                return false;
2057
2058        virtio_mb(vq->weak_barriers);
2059        return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2060                                 virtqueue_poll_split(_vq, last_used_idx);
2061}
2062EXPORT_SYMBOL_GPL(virtqueue_poll);
2063
2064/**
2065 * virtqueue_enable_cb - restart callbacks after disable_cb.
2066 * @_vq: the struct virtqueue we're talking about.
2067 *
2068 * This re-enables callbacks; it returns "false" if there are pending
2069 * buffers in the queue, to detect a possible race between the driver
2070 * checking for more work, and enabling callbacks.
2071 *
2072 * Caller must ensure we don't call this with other virtqueue
2073 * operations at the same time (except where noted).
2074 */
2075bool virtqueue_enable_cb(struct virtqueue *_vq)
2076{
2077        unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
2078
2079        return !virtqueue_poll(_vq, last_used_idx);
2080}
2081EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2082
2083/**
2084 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2085 * @_vq: the struct virtqueue we're talking about.
2086 *
2087 * This re-enables callbacks but hints to the other side to delay
2088 * interrupts until most of the available buffers have been processed;
2089 * it returns "false" if there are many pending buffers in the queue,
2090 * to detect a possible race between the driver checking for more work,
2091 * and enabling callbacks.
2092 *
2093 * Caller must ensure we don't call this with other virtqueue
2094 * operations at the same time (except where noted).
2095 */
2096bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2097{
2098        struct vring_virtqueue *vq = to_vvq(_vq);
2099
2100        if (vq->event_triggered)
2101                vq->event_triggered = false;
2102
2103        return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2104                                 virtqueue_enable_cb_delayed_split(_vq);
2105}
2106EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2107
2108/**
2109 * virtqueue_detach_unused_buf - detach first unused buffer
2110 * @_vq: the struct virtqueue we're talking about.
2111 *
2112 * Returns NULL or the "data" token handed to virtqueue_add_*().
2113 * This is not valid on an active queue; it is useful only for device
2114 * shutdown.
2115 */
2116void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2117{
2118        struct vring_virtqueue *vq = to_vvq(_vq);
2119
2120        return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2121                                 virtqueue_detach_unused_buf_split(_vq);
2122}
2123EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2124
2125static inline bool more_used(const struct vring_virtqueue *vq)
2126{
2127        return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2128}
2129
2130irqreturn_t vring_interrupt(int irq, void *_vq)
2131{
2132        struct vring_virtqueue *vq = to_vvq(_vq);
2133
2134        if (!more_used(vq)) {
2135                pr_debug("virtqueue interrupt with no work for %p\n", vq);
2136                return IRQ_NONE;
2137        }
2138
2139        if (unlikely(vq->broken))
2140                return IRQ_HANDLED;
2141
2142        /* Just a hint for performance: so it's ok that this can be racy! */
2143        if (vq->event)
2144                vq->event_triggered = true;
2145
2146        pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2147        if (vq->vq.callback)
2148                vq->vq.callback(&vq->vq);
2149
2150        return IRQ_HANDLED;
2151}
2152EXPORT_SYMBOL_GPL(vring_interrupt);
2153
2154/* Only available for split ring */
2155struct virtqueue *__vring_new_virtqueue(unsigned int index,
2156                                        struct vring vring,
2157                                        struct virtio_device *vdev,
2158                                        bool weak_barriers,
2159                                        bool context,
2160                                        bool (*notify)(struct virtqueue *),
2161                                        void (*callback)(struct virtqueue *),
2162                                        const char *name)
2163{
2164        struct vring_virtqueue *vq;
2165
2166        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2167                return NULL;
2168
2169        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2170        if (!vq)
2171                return NULL;
2172
2173        vq->packed_ring = false;
2174        vq->vq.callback = callback;
2175        vq->vq.vdev = vdev;
2176        vq->vq.name = name;
2177        vq->vq.num_free = vring.num;
2178        vq->vq.index = index;
2179        vq->we_own_ring = false;
2180        vq->notify = notify;
2181        vq->weak_barriers = weak_barriers;
2182        vq->broken = false;
2183        vq->last_used_idx = 0;
2184        vq->event_triggered = false;
2185        vq->num_added = 0;
2186        vq->use_dma_api = vring_use_dma_api(vdev);
2187#ifdef DEBUG
2188        vq->in_use = false;
2189        vq->last_add_time_valid = false;
2190#endif
2191
2192        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2193                !context;
2194        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2195
2196        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2197                vq->weak_barriers = false;
2198
2199        vq->split.queue_dma_addr = 0;
2200        vq->split.queue_size_in_bytes = 0;
2201
2202        vq->split.vring = vring;
2203        vq->split.avail_flags_shadow = 0;
2204        vq->split.avail_idx_shadow = 0;
2205
2206        /* No callback?  Tell other side not to bother us. */
2207        if (!callback) {
2208                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2209                if (!vq->event)
2210                        vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2211                                        vq->split.avail_flags_shadow);
2212        }
2213
2214        vq->split.desc_state = kmalloc_array(vring.num,
2215                        sizeof(struct vring_desc_state_split), GFP_KERNEL);
2216        if (!vq->split.desc_state)
2217                goto err_state;
2218
2219        vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
2220        if (!vq->split.desc_extra)
2221                goto err_extra;
2222
2223        /* Put everything in free lists. */
2224        vq->free_head = 0;
2225        memset(vq->split.desc_state, 0, vring.num *
2226                        sizeof(struct vring_desc_state_split));
2227
2228        spin_lock(&vdev->vqs_list_lock);
2229        list_add_tail(&vq->vq.list, &vdev->vqs);
2230        spin_unlock(&vdev->vqs_list_lock);
2231        return &vq->vq;
2232
2233err_extra:
2234        kfree(vq->split.desc_state);
2235err_state:
2236        kfree(vq);
2237        return NULL;
2238}
2239EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2240
2241struct virtqueue *vring_create_virtqueue(
2242        unsigned int index,
2243        unsigned int num,
2244        unsigned int vring_align,
2245        struct virtio_device *vdev,
2246        bool weak_barriers,
2247        bool may_reduce_num,
2248        bool context,
2249        bool (*notify)(struct virtqueue *),
2250        void (*callback)(struct virtqueue *),
2251        const char *name)
2252{
2253
2254        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2255                return vring_create_virtqueue_packed(index, num, vring_align,
2256                                vdev, weak_barriers, may_reduce_num,
2257                                context, notify, callback, name);
2258
2259        return vring_create_virtqueue_split(index, num, vring_align,
2260                        vdev, weak_barriers, may_reduce_num,
2261                        context, notify, callback, name);
2262}
2263EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2264
2265/* Only available for split ring */
2266struct virtqueue *vring_new_virtqueue(unsigned int index,
2267                                      unsigned int num,
2268                                      unsigned int vring_align,
2269                                      struct virtio_device *vdev,
2270                                      bool weak_barriers,
2271                                      bool context,
2272                                      void *pages,
2273                                      bool (*notify)(struct virtqueue *vq),
2274                                      void (*callback)(struct virtqueue *vq),
2275                                      const char *name)
2276{
2277        struct vring vring;
2278
2279        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2280                return NULL;
2281
2282        vring_init(&vring, num, pages, vring_align);
2283        return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2284                                     notify, callback, name);
2285}
2286EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2287
2288void vring_del_virtqueue(struct virtqueue *_vq)
2289{
2290        struct vring_virtqueue *vq = to_vvq(_vq);
2291
2292        spin_lock(&vq->vq.vdev->vqs_list_lock);
2293        list_del(&_vq->list);
2294        spin_unlock(&vq->vq.vdev->vqs_list_lock);
2295
2296        if (vq->we_own_ring) {
2297                if (vq->packed_ring) {
2298                        vring_free_queue(vq->vq.vdev,
2299                                         vq->packed.ring_size_in_bytes,
2300                                         vq->packed.vring.desc,
2301                                         vq->packed.ring_dma_addr);
2302
2303                        vring_free_queue(vq->vq.vdev,
2304                                         vq->packed.event_size_in_bytes,
2305                                         vq->packed.vring.driver,
2306                                         vq->packed.driver_event_dma_addr);
2307
2308                        vring_free_queue(vq->vq.vdev,
2309                                         vq->packed.event_size_in_bytes,
2310                                         vq->packed.vring.device,
2311                                         vq->packed.device_event_dma_addr);
2312
2313                        kfree(vq->packed.desc_state);
2314                        kfree(vq->packed.desc_extra);
2315                } else {
2316                        vring_free_queue(vq->vq.vdev,
2317                                         vq->split.queue_size_in_bytes,
2318                                         vq->split.vring.desc,
2319                                         vq->split.queue_dma_addr);
2320                }
2321        }
2322        if (!vq->packed_ring) {
2323                kfree(vq->split.desc_state);
2324                kfree(vq->split.desc_extra);
2325        }
2326        kfree(vq);
2327}
2328EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2329
2330/* Manipulates transport-specific feature bits. */
2331void vring_transport_features(struct virtio_device *vdev)
2332{
2333        unsigned int i;
2334
2335        for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2336                switch (i) {
2337                case VIRTIO_RING_F_INDIRECT_DESC:
2338                        break;
2339                case VIRTIO_RING_F_EVENT_IDX:
2340                        break;
2341                case VIRTIO_F_VERSION_1:
2342                        break;
2343                case VIRTIO_F_ACCESS_PLATFORM:
2344                        break;
2345                case VIRTIO_F_RING_PACKED:
2346                        break;
2347                case VIRTIO_F_ORDER_PLATFORM:
2348                        break;
2349                default:
2350                        /* We don't understand this bit. */
2351                        __virtio_clear_bit(vdev, i);
2352                }
2353        }
2354}
2355EXPORT_SYMBOL_GPL(vring_transport_features);
2356
2357/**
2358 * virtqueue_get_vring_size - return the size of the virtqueue's vring
2359 * @_vq: the struct virtqueue containing the vring of interest.
2360 *
2361 * Returns the size of the vring.  This is mainly used for boasting to
2362 * userspace.  Unlike other operations, this need not be serialized.
2363 */
2364unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2365{
2366
2367        struct vring_virtqueue *vq = to_vvq(_vq);
2368
2369        return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2370}
2371EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2372
2373bool virtqueue_is_broken(struct virtqueue *_vq)
2374{
2375        struct vring_virtqueue *vq = to_vvq(_vq);
2376
2377        return READ_ONCE(vq->broken);
2378}
2379EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2380
2381/*
2382 * This should prevent the device from being used, allowing drivers to
2383 * recover.  You may need to grab appropriate locks to flush.
2384 */
2385void virtio_break_device(struct virtio_device *dev)
2386{
2387        struct virtqueue *_vq;
2388
2389        spin_lock(&dev->vqs_list_lock);
2390        list_for_each_entry(_vq, &dev->vqs, list) {
2391                struct vring_virtqueue *vq = to_vvq(_vq);
2392
2393                /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2394                WRITE_ONCE(vq->broken, true);
2395        }
2396        spin_unlock(&dev->vqs_list_lock);
2397}
2398EXPORT_SYMBOL_GPL(virtio_break_device);
2399
2400dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2401{
2402        struct vring_virtqueue *vq = to_vvq(_vq);
2403
2404        BUG_ON(!vq->we_own_ring);
2405
2406        if (vq->packed_ring)
2407                return vq->packed.ring_dma_addr;
2408
2409        return vq->split.queue_dma_addr;
2410}
2411EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2412
2413dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2414{
2415        struct vring_virtqueue *vq = to_vvq(_vq);
2416
2417        BUG_ON(!vq->we_own_ring);
2418
2419        if (vq->packed_ring)
2420                return vq->packed.driver_event_dma_addr;
2421
2422        return vq->split.queue_dma_addr +
2423                ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2424}
2425EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2426
2427dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2428{
2429        struct vring_virtqueue *vq = to_vvq(_vq);
2430
2431        BUG_ON(!vq->we_own_ring);
2432
2433        if (vq->packed_ring)
2434                return vq->packed.device_event_dma_addr;
2435
2436        return vq->split.queue_dma_addr +
2437                ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2438}
2439EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2440
2441/* Only available for split ring */
2442const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2443{
2444        return &to_vvq(vq)->split.vring;
2445}
2446EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2447
2448MODULE_LICENSE("GPL");
2449