linux/drivers/virtio/virtio_ring.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* Virtio ring implementation.
   3 *
   4 *  Copyright 2007 Rusty Russell IBM Corporation
   5 */
   6#include <linux/virtio.h>
   7#include <linux/virtio_ring.h>
   8#include <linux/virtio_config.h>
   9#include <linux/device.h>
  10#include <linux/slab.h>
  11#include <linux/module.h>
  12#include <linux/hrtimer.h>
  13#include <linux/dma-mapping.h>
  14#include <xen/xen.h>
  15
  16#ifdef DEBUG
  17/* For development, we want to crash whenever the ring is screwed. */
  18#define BAD_RING(_vq, fmt, args...)                             \
  19        do {                                                    \
  20                dev_err(&(_vq)->vq.vdev->dev,                   \
  21                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  22                BUG();                                          \
  23        } while (0)
  24/* Caller is supposed to guarantee no reentry. */
  25#define START_USE(_vq)                                          \
  26        do {                                                    \
  27                if ((_vq)->in_use)                              \
  28                        panic("%s:in_use = %i\n",               \
  29                              (_vq)->vq.name, (_vq)->in_use);   \
  30                (_vq)->in_use = __LINE__;                       \
  31        } while (0)
  32#define END_USE(_vq) \
  33        do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
  34#define LAST_ADD_TIME_UPDATE(_vq)                               \
  35        do {                                                    \
  36                ktime_t now = ktime_get();                      \
  37                                                                \
  38                /* No kick or get, with .1 second between?  Warn. */ \
  39                if ((_vq)->last_add_time_valid)                 \
  40                        WARN_ON(ktime_to_ms(ktime_sub(now,      \
  41                                (_vq)->last_add_time)) > 100);  \
  42                (_vq)->last_add_time = now;                     \
  43                (_vq)->last_add_time_valid = true;              \
  44        } while (0)
  45#define LAST_ADD_TIME_CHECK(_vq)                                \
  46        do {                                                    \
  47                if ((_vq)->last_add_time_valid) {               \
  48                        WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
  49                                      (_vq)->last_add_time)) > 100); \
  50                }                                               \
  51        } while (0)
  52#define LAST_ADD_TIME_INVALID(_vq)                              \
  53        ((_vq)->last_add_time_valid = false)
  54#else
  55#define BAD_RING(_vq, fmt, args...)                             \
  56        do {                                                    \
  57                dev_err(&_vq->vq.vdev->dev,                     \
  58                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  59                (_vq)->broken = true;                           \
  60        } while (0)
  61#define START_USE(vq)
  62#define END_USE(vq)
  63#define LAST_ADD_TIME_UPDATE(vq)
  64#define LAST_ADD_TIME_CHECK(vq)
  65#define LAST_ADD_TIME_INVALID(vq)
  66#endif
  67
  68struct vring_desc_state_split {
  69        void *data;                     /* Data for callback. */
  70        struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
  71};
  72
  73struct vring_desc_state_packed {
  74        void *data;                     /* Data for callback. */
  75        struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
  76        u16 num;                        /* Descriptor list length. */
  77        u16 next;                       /* The next desc state in a list. */
  78        u16 last;                       /* The last desc state in a list. */
  79};
  80
  81struct vring_desc_extra_packed {
  82        dma_addr_t addr;                /* Buffer DMA addr. */
  83        u32 len;                        /* Buffer length. */
  84        u16 flags;                      /* Descriptor flags. */
  85};
  86
  87struct vring_virtqueue {
  88        struct virtqueue vq;
  89
  90        /* Is this a packed ring? */
  91        bool packed_ring;
  92
  93        /* Is DMA API used? */
  94        bool use_dma_api;
  95
  96        /* Can we use weak barriers? */
  97        bool weak_barriers;
  98
  99        /* Other side has made a mess, don't try any more. */
 100        bool broken;
 101
 102        /* Host supports indirect buffers */
 103        bool indirect;
 104
 105        /* Host publishes avail event idx */
 106        bool event;
 107
 108        /* Head of free buffer list. */
 109        unsigned int free_head;
 110        /* Number we've added since last sync. */
 111        unsigned int num_added;
 112
 113        /* Last used index we've seen. */
 114        u16 last_used_idx;
 115
 116        union {
 117                /* Available for split ring */
 118                struct {
 119                        /* Actual memory layout for this queue. */
 120                        struct vring vring;
 121
 122                        /* Last written value to avail->flags */
 123                        u16 avail_flags_shadow;
 124
 125                        /*
 126                         * Last written value to avail->idx in
 127                         * guest byte order.
 128                         */
 129                        u16 avail_idx_shadow;
 130
 131                        /* Per-descriptor state. */
 132                        struct vring_desc_state_split *desc_state;
 133
 134                        /* DMA address and size information */
 135                        dma_addr_t queue_dma_addr;
 136                        size_t queue_size_in_bytes;
 137                } split;
 138
 139                /* Available for packed ring */
 140                struct {
 141                        /* Actual memory layout for this queue. */
 142                        struct {
 143                                unsigned int num;
 144                                struct vring_packed_desc *desc;
 145                                struct vring_packed_desc_event *driver;
 146                                struct vring_packed_desc_event *device;
 147                        } vring;
 148
 149                        /* Driver ring wrap counter. */
 150                        bool avail_wrap_counter;
 151
 152                        /* Device ring wrap counter. */
 153                        bool used_wrap_counter;
 154
 155                        /* Avail used flags. */
 156                        u16 avail_used_flags;
 157
 158                        /* Index of the next avail descriptor. */
 159                        u16 next_avail_idx;
 160
 161                        /*
 162                         * Last written value to driver->flags in
 163                         * guest byte order.
 164                         */
 165                        u16 event_flags_shadow;
 166
 167                        /* Per-descriptor state. */
 168                        struct vring_desc_state_packed *desc_state;
 169                        struct vring_desc_extra_packed *desc_extra;
 170
 171                        /* DMA address and size information */
 172                        dma_addr_t ring_dma_addr;
 173                        dma_addr_t driver_event_dma_addr;
 174                        dma_addr_t device_event_dma_addr;
 175                        size_t ring_size_in_bytes;
 176                        size_t event_size_in_bytes;
 177                } packed;
 178        };
 179
 180        /* How to notify other side. FIXME: commonalize hcalls! */
 181        bool (*notify)(struct virtqueue *vq);
 182
 183        /* DMA, allocation, and size information */
 184        bool we_own_ring;
 185
 186#ifdef DEBUG
 187        /* They're supposed to lock for us. */
 188        unsigned int in_use;
 189
 190        /* Figure out if their kicks are too delayed. */
 191        bool last_add_time_valid;
 192        ktime_t last_add_time;
 193#endif
 194};
 195
 196
 197/*
 198 * Helpers.
 199 */
 200
 201#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 202
 203static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
 204                                          unsigned int total_sg)
 205{
 206        struct vring_virtqueue *vq = to_vvq(_vq);
 207
 208        /*
 209         * If the host supports indirect descriptor tables, and we have multiple
 210         * buffers, then go indirect. FIXME: tune this threshold
 211         */
 212        return (vq->indirect && total_sg > 1 && vq->vq.num_free);
 213}
 214
 215/*
 216 * Modern virtio devices have feature bits to specify whether they need a
 217 * quirk and bypass the IOMMU. If not there, just use the DMA API.
 218 *
 219 * If there, the interaction between virtio and DMA API is messy.
 220 *
 221 * On most systems with virtio, physical addresses match bus addresses,
 222 * and it doesn't particularly matter whether we use the DMA API.
 223 *
 224 * On some systems, including Xen and any system with a physical device
 225 * that speaks virtio behind a physical IOMMU, we must use the DMA API
 226 * for virtio DMA to work at all.
 227 *
 228 * On other systems, including SPARC and PPC64, virtio-pci devices are
 229 * enumerated as though they are behind an IOMMU, but the virtio host
 230 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
 231 * there or somehow map everything as the identity.
 232 *
 233 * For the time being, we preserve historic behavior and bypass the DMA
 234 * API.
 235 *
 236 * TODO: install a per-device DMA ops structure that does the right thing
 237 * taking into account all the above quirks, and use the DMA API
 238 * unconditionally on data path.
 239 */
 240
 241static bool vring_use_dma_api(struct virtio_device *vdev)
 242{
 243        if (!virtio_has_iommu_quirk(vdev))
 244                return true;
 245
 246        /* Otherwise, we are left to guess. */
 247        /*
 248         * In theory, it's possible to have a buggy QEMU-supposed
 249         * emulated Q35 IOMMU and Xen enabled at the same time.  On
 250         * such a configuration, virtio has never worked and will
 251         * not work without an even larger kludge.  Instead, enable
 252         * the DMA API if we're a Xen guest, which at least allows
 253         * all of the sensible Xen configurations to work correctly.
 254         */
 255        if (xen_domain())
 256                return true;
 257
 258        return false;
 259}
 260
 261size_t virtio_max_dma_size(struct virtio_device *vdev)
 262{
 263        size_t max_segment_size = SIZE_MAX;
 264
 265        if (vring_use_dma_api(vdev))
 266                max_segment_size = dma_max_mapping_size(&vdev->dev);
 267
 268        return max_segment_size;
 269}
 270EXPORT_SYMBOL_GPL(virtio_max_dma_size);
 271
 272static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
 273                              dma_addr_t *dma_handle, gfp_t flag)
 274{
 275        if (vring_use_dma_api(vdev)) {
 276                return dma_alloc_coherent(vdev->dev.parent, size,
 277                                          dma_handle, flag);
 278        } else {
 279                void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
 280
 281                if (queue) {
 282                        phys_addr_t phys_addr = virt_to_phys(queue);
 283                        *dma_handle = (dma_addr_t)phys_addr;
 284
 285                        /*
 286                         * Sanity check: make sure we dind't truncate
 287                         * the address.  The only arches I can find that
 288                         * have 64-bit phys_addr_t but 32-bit dma_addr_t
 289                         * are certain non-highmem MIPS and x86
 290                         * configurations, but these configurations
 291                         * should never allocate physical pages above 32
 292                         * bits, so this is fine.  Just in case, throw a
 293                         * warning and abort if we end up with an
 294                         * unrepresentable address.
 295                         */
 296                        if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
 297                                free_pages_exact(queue, PAGE_ALIGN(size));
 298                                return NULL;
 299                        }
 300                }
 301                return queue;
 302        }
 303}
 304
 305static void vring_free_queue(struct virtio_device *vdev, size_t size,
 306                             void *queue, dma_addr_t dma_handle)
 307{
 308        if (vring_use_dma_api(vdev))
 309                dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
 310        else
 311                free_pages_exact(queue, PAGE_ALIGN(size));
 312}
 313
 314/*
 315 * The DMA ops on various arches are rather gnarly right now, and
 316 * making all of the arch DMA ops work on the vring device itself
 317 * is a mess.  For now, we use the parent device for DMA ops.
 318 */
 319static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 320{
 321        return vq->vq.vdev->dev.parent;
 322}
 323
 324/* Map one sg entry. */
 325static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
 326                                   struct scatterlist *sg,
 327                                   enum dma_data_direction direction)
 328{
 329        if (!vq->use_dma_api)
 330                return (dma_addr_t)sg_phys(sg);
 331
 332        /*
 333         * We can't use dma_map_sg, because we don't use scatterlists in
 334         * the way it expects (we don't guarantee that the scatterlist
 335         * will exist for the lifetime of the mapping).
 336         */
 337        return dma_map_page(vring_dma_dev(vq),
 338                            sg_page(sg), sg->offset, sg->length,
 339                            direction);
 340}
 341
 342static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
 343                                   void *cpu_addr, size_t size,
 344                                   enum dma_data_direction direction)
 345{
 346        if (!vq->use_dma_api)
 347                return (dma_addr_t)virt_to_phys(cpu_addr);
 348
 349        return dma_map_single(vring_dma_dev(vq),
 350                              cpu_addr, size, direction);
 351}
 352
 353static int vring_mapping_error(const struct vring_virtqueue *vq,
 354                               dma_addr_t addr)
 355{
 356        if (!vq->use_dma_api)
 357                return 0;
 358
 359        return dma_mapping_error(vring_dma_dev(vq), addr);
 360}
 361
 362
 363/*
 364 * Split ring specific functions - *_split().
 365 */
 366
 367static void vring_unmap_one_split(const struct vring_virtqueue *vq,
 368                                  struct vring_desc *desc)
 369{
 370        u16 flags;
 371
 372        if (!vq->use_dma_api)
 373                return;
 374
 375        flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
 376
 377        if (flags & VRING_DESC_F_INDIRECT) {
 378                dma_unmap_single(vring_dma_dev(vq),
 379                                 virtio64_to_cpu(vq->vq.vdev, desc->addr),
 380                                 virtio32_to_cpu(vq->vq.vdev, desc->len),
 381                                 (flags & VRING_DESC_F_WRITE) ?
 382                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 383        } else {
 384                dma_unmap_page(vring_dma_dev(vq),
 385                               virtio64_to_cpu(vq->vq.vdev, desc->addr),
 386                               virtio32_to_cpu(vq->vq.vdev, desc->len),
 387                               (flags & VRING_DESC_F_WRITE) ?
 388                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 389        }
 390}
 391
 392static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
 393                                               unsigned int total_sg,
 394                                               gfp_t gfp)
 395{
 396        struct vring_desc *desc;
 397        unsigned int i;
 398
 399        /*
 400         * We require lowmem mappings for the descriptors because
 401         * otherwise virt_to_phys will give us bogus addresses in the
 402         * virtqueue.
 403         */
 404        gfp &= ~__GFP_HIGHMEM;
 405
 406        desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
 407        if (!desc)
 408                return NULL;
 409
 410        for (i = 0; i < total_sg; i++)
 411                desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
 412        return desc;
 413}
 414
 415static inline int virtqueue_add_split(struct virtqueue *_vq,
 416                                      struct scatterlist *sgs[],
 417                                      unsigned int total_sg,
 418                                      unsigned int out_sgs,
 419                                      unsigned int in_sgs,
 420                                      void *data,
 421                                      void *ctx,
 422                                      gfp_t gfp)
 423{
 424        struct vring_virtqueue *vq = to_vvq(_vq);
 425        struct scatterlist *sg;
 426        struct vring_desc *desc;
 427        unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
 428        int head;
 429        bool indirect;
 430
 431        START_USE(vq);
 432
 433        BUG_ON(data == NULL);
 434        BUG_ON(ctx && vq->indirect);
 435
 436        if (unlikely(vq->broken)) {
 437                END_USE(vq);
 438                return -EIO;
 439        }
 440
 441        LAST_ADD_TIME_UPDATE(vq);
 442
 443        BUG_ON(total_sg == 0);
 444
 445        head = vq->free_head;
 446
 447        if (virtqueue_use_indirect(_vq, total_sg))
 448                desc = alloc_indirect_split(_vq, total_sg, gfp);
 449        else {
 450                desc = NULL;
 451                WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
 452        }
 453
 454        if (desc) {
 455                /* Use a single buffer which doesn't continue */
 456                indirect = true;
 457                /* Set up rest to use this indirect table. */
 458                i = 0;
 459                descs_used = 1;
 460        } else {
 461                indirect = false;
 462                desc = vq->split.vring.desc;
 463                i = head;
 464                descs_used = total_sg;
 465        }
 466
 467        if (vq->vq.num_free < descs_used) {
 468                pr_debug("Can't add buf len %i - avail = %i\n",
 469                         descs_used, vq->vq.num_free);
 470                /* FIXME: for historical reasons, we force a notify here if
 471                 * there are outgoing parts to the buffer.  Presumably the
 472                 * host should service the ring ASAP. */
 473                if (out_sgs)
 474                        vq->notify(&vq->vq);
 475                if (indirect)
 476                        kfree(desc);
 477                END_USE(vq);
 478                return -ENOSPC;
 479        }
 480
 481        for (n = 0; n < out_sgs; n++) {
 482                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 483                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
 484                        if (vring_mapping_error(vq, addr))
 485                                goto unmap_release;
 486
 487                        desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
 488                        desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
 489                        desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
 490                        prev = i;
 491                        i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 492                }
 493        }
 494        for (; n < (out_sgs + in_sgs); n++) {
 495                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 496                        dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
 497                        if (vring_mapping_error(vq, addr))
 498                                goto unmap_release;
 499
 500                        desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
 501                        desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
 502                        desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
 503                        prev = i;
 504                        i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 505                }
 506        }
 507        /* Last one doesn't continue. */
 508        desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
 509
 510        if (indirect) {
 511                /* Now that the indirect table is filled in, map it. */
 512                dma_addr_t addr = vring_map_single(
 513                        vq, desc, total_sg * sizeof(struct vring_desc),
 514                        DMA_TO_DEVICE);
 515                if (vring_mapping_error(vq, addr))
 516                        goto unmap_release;
 517
 518                vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
 519                                VRING_DESC_F_INDIRECT);
 520                vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
 521                                addr);
 522
 523                vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
 524                                total_sg * sizeof(struct vring_desc));
 525        }
 526
 527        /* We're using some buffers from the free list. */
 528        vq->vq.num_free -= descs_used;
 529
 530        /* Update free pointer */
 531        if (indirect)
 532                vq->free_head = virtio16_to_cpu(_vq->vdev,
 533                                        vq->split.vring.desc[head].next);
 534        else
 535                vq->free_head = i;
 536
 537        /* Store token and indirect buffer state. */
 538        vq->split.desc_state[head].data = data;
 539        if (indirect)
 540                vq->split.desc_state[head].indir_desc = desc;
 541        else
 542                vq->split.desc_state[head].indir_desc = ctx;
 543
 544        /* Put entry in available array (but don't update avail->idx until they
 545         * do sync). */
 546        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
 547        vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 548
 549        /* Descriptors and available array need to be set before we expose the
 550         * new available array entries. */
 551        virtio_wmb(vq->weak_barriers);
 552        vq->split.avail_idx_shadow++;
 553        vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 554                                                vq->split.avail_idx_shadow);
 555        vq->num_added++;
 556
 557        pr_debug("Added buffer head %i to %p\n", head, vq);
 558        END_USE(vq);
 559
 560        /* This is very unlikely, but theoretically possible.  Kick
 561         * just in case. */
 562        if (unlikely(vq->num_added == (1 << 16) - 1))
 563                virtqueue_kick(_vq);
 564
 565        return 0;
 566
 567unmap_release:
 568        err_idx = i;
 569
 570        if (indirect)
 571                i = 0;
 572        else
 573                i = head;
 574
 575        for (n = 0; n < total_sg; n++) {
 576                if (i == err_idx)
 577                        break;
 578                vring_unmap_one_split(vq, &desc[i]);
 579                i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 580        }
 581
 582        if (indirect)
 583                kfree(desc);
 584
 585        END_USE(vq);
 586        return -EIO;
 587}
 588
 589static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 590{
 591        struct vring_virtqueue *vq = to_vvq(_vq);
 592        u16 new, old;
 593        bool needs_kick;
 594
 595        START_USE(vq);
 596        /* We need to expose available array entries before checking avail
 597         * event. */
 598        virtio_mb(vq->weak_barriers);
 599
 600        old = vq->split.avail_idx_shadow - vq->num_added;
 601        new = vq->split.avail_idx_shadow;
 602        vq->num_added = 0;
 603
 604        LAST_ADD_TIME_CHECK(vq);
 605        LAST_ADD_TIME_INVALID(vq);
 606
 607        if (vq->event) {
 608                needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
 609                                        vring_avail_event(&vq->split.vring)),
 610                                              new, old);
 611        } else {
 612                needs_kick = !(vq->split.vring.used->flags &
 613                                        cpu_to_virtio16(_vq->vdev,
 614                                                VRING_USED_F_NO_NOTIFY));
 615        }
 616        END_USE(vq);
 617        return needs_kick;
 618}
 619
 620static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 621                             void **ctx)
 622{
 623        unsigned int i, j;
 624        __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
 625
 626        /* Clear data ptr. */
 627        vq->split.desc_state[head].data = NULL;
 628
 629        /* Put back on free list: unmap first-level descriptors and find end */
 630        i = head;
 631
 632        while (vq->split.vring.desc[i].flags & nextflag) {
 633                vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
 634                i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next);
 635                vq->vq.num_free++;
 636        }
 637
 638        vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
 639        vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev,
 640                                                vq->free_head);
 641        vq->free_head = head;
 642
 643        /* Plus final descriptor */
 644        vq->vq.num_free++;
 645
 646        if (vq->indirect) {
 647                struct vring_desc *indir_desc =
 648                                vq->split.desc_state[head].indir_desc;
 649                u32 len;
 650
 651                /* Free the indirect table, if any, now that it's unmapped. */
 652                if (!indir_desc)
 653                        return;
 654
 655                len = virtio32_to_cpu(vq->vq.vdev,
 656                                vq->split.vring.desc[head].len);
 657
 658                BUG_ON(!(vq->split.vring.desc[head].flags &
 659                         cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
 660                BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 661
 662                for (j = 0; j < len / sizeof(struct vring_desc); j++)
 663                        vring_unmap_one_split(vq, &indir_desc[j]);
 664
 665                kfree(indir_desc);
 666                vq->split.desc_state[head].indir_desc = NULL;
 667        } else if (ctx) {
 668                *ctx = vq->split.desc_state[head].indir_desc;
 669        }
 670}
 671
 672static inline bool more_used_split(const struct vring_virtqueue *vq)
 673{
 674        return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
 675                        vq->split.vring.used->idx);
 676}
 677
 678static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 679                                         unsigned int *len,
 680                                         void **ctx)
 681{
 682        struct vring_virtqueue *vq = to_vvq(_vq);
 683        void *ret;
 684        unsigned int i;
 685        u16 last_used;
 686
 687        START_USE(vq);
 688
 689        if (unlikely(vq->broken)) {
 690                END_USE(vq);
 691                return NULL;
 692        }
 693
 694        if (!more_used_split(vq)) {
 695                pr_debug("No more buffers in queue\n");
 696                END_USE(vq);
 697                return NULL;
 698        }
 699
 700        /* Only get used array entries after they have been exposed by host. */
 701        virtio_rmb(vq->weak_barriers);
 702
 703        last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
 704        i = virtio32_to_cpu(_vq->vdev,
 705                        vq->split.vring.used->ring[last_used].id);
 706        *len = virtio32_to_cpu(_vq->vdev,
 707                        vq->split.vring.used->ring[last_used].len);
 708
 709        if (unlikely(i >= vq->split.vring.num)) {
 710                BAD_RING(vq, "id %u out of range\n", i);
 711                return NULL;
 712        }
 713        if (unlikely(!vq->split.desc_state[i].data)) {
 714                BAD_RING(vq, "id %u is not a head!\n", i);
 715                return NULL;
 716        }
 717
 718        /* detach_buf_split clears data, so grab it now. */
 719        ret = vq->split.desc_state[i].data;
 720        detach_buf_split(vq, i, ctx);
 721        vq->last_used_idx++;
 722        /* If we expect an interrupt for the next entry, tell host
 723         * by writing event index and flush out the write before
 724         * the read in the next get_buf call. */
 725        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
 726                virtio_store_mb(vq->weak_barriers,
 727                                &vring_used_event(&vq->split.vring),
 728                                cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
 729
 730        LAST_ADD_TIME_INVALID(vq);
 731
 732        END_USE(vq);
 733        return ret;
 734}
 735
 736static void virtqueue_disable_cb_split(struct virtqueue *_vq)
 737{
 738        struct vring_virtqueue *vq = to_vvq(_vq);
 739
 740        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
 741                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
 742                if (!vq->event)
 743                        vq->split.vring.avail->flags =
 744                                cpu_to_virtio16(_vq->vdev,
 745                                                vq->split.avail_flags_shadow);
 746        }
 747}
 748
 749static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
 750{
 751        struct vring_virtqueue *vq = to_vvq(_vq);
 752        u16 last_used_idx;
 753
 754        START_USE(vq);
 755
 756        /* We optimistically turn back on interrupts, then check if there was
 757         * more to do. */
 758        /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
 759         * either clear the flags bit or point the event index at the next
 760         * entry. Always do both to keep code simple. */
 761        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 762                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 763                if (!vq->event)
 764                        vq->split.vring.avail->flags =
 765                                cpu_to_virtio16(_vq->vdev,
 766                                                vq->split.avail_flags_shadow);
 767        }
 768        vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
 769                        last_used_idx = vq->last_used_idx);
 770        END_USE(vq);
 771        return last_used_idx;
 772}
 773
 774static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
 775{
 776        struct vring_virtqueue *vq = to_vvq(_vq);
 777
 778        return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
 779                        vq->split.vring.used->idx);
 780}
 781
 782static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
 783{
 784        struct vring_virtqueue *vq = to_vvq(_vq);
 785        u16 bufs;
 786
 787        START_USE(vq);
 788
 789        /* We optimistically turn back on interrupts, then check if there was
 790         * more to do. */
 791        /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
 792         * either clear the flags bit or point the event index at the next
 793         * entry. Always update the event index to keep code simple. */
 794        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
 795                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 796                if (!vq->event)
 797                        vq->split.vring.avail->flags =
 798                                cpu_to_virtio16(_vq->vdev,
 799                                                vq->split.avail_flags_shadow);
 800        }
 801        /* TODO: tune this threshold */
 802        bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
 803
 804        virtio_store_mb(vq->weak_barriers,
 805                        &vring_used_event(&vq->split.vring),
 806                        cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
 807
 808        if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
 809                                        - vq->last_used_idx) > bufs)) {
 810                END_USE(vq);
 811                return false;
 812        }
 813
 814        END_USE(vq);
 815        return true;
 816}
 817
 818static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
 819{
 820        struct vring_virtqueue *vq = to_vvq(_vq);
 821        unsigned int i;
 822        void *buf;
 823
 824        START_USE(vq);
 825
 826        for (i = 0; i < vq->split.vring.num; i++) {
 827                if (!vq->split.desc_state[i].data)
 828                        continue;
 829                /* detach_buf_split clears data, so grab it now. */
 830                buf = vq->split.desc_state[i].data;
 831                detach_buf_split(vq, i, NULL);
 832                vq->split.avail_idx_shadow--;
 833                vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
 834                                vq->split.avail_idx_shadow);
 835                END_USE(vq);
 836                return buf;
 837        }
 838        /* That should have freed everything. */
 839        BUG_ON(vq->vq.num_free != vq->split.vring.num);
 840
 841        END_USE(vq);
 842        return NULL;
 843}
 844
 845static struct virtqueue *vring_create_virtqueue_split(
 846        unsigned int index,
 847        unsigned int num,
 848        unsigned int vring_align,
 849        struct virtio_device *vdev,
 850        bool weak_barriers,
 851        bool may_reduce_num,
 852        bool context,
 853        bool (*notify)(struct virtqueue *),
 854        void (*callback)(struct virtqueue *),
 855        const char *name)
 856{
 857        struct virtqueue *vq;
 858        void *queue = NULL;
 859        dma_addr_t dma_addr;
 860        size_t queue_size_in_bytes;
 861        struct vring vring;
 862
 863        /* We assume num is a power of 2. */
 864        if (num & (num - 1)) {
 865                dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
 866                return NULL;
 867        }
 868
 869        /* TODO: allocate each queue chunk individually */
 870        for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
 871                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 872                                          &dma_addr,
 873                                          GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 874                if (queue)
 875                        break;
 876                if (!may_reduce_num)
 877                        return NULL;
 878        }
 879
 880        if (!num)
 881                return NULL;
 882
 883        if (!queue) {
 884                /* Try to get a single page. You are my only hope! */
 885                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
 886                                          &dma_addr, GFP_KERNEL|__GFP_ZERO);
 887        }
 888        if (!queue)
 889                return NULL;
 890
 891        queue_size_in_bytes = vring_size(num, vring_align);
 892        vring_init(&vring, num, queue, vring_align);
 893
 894        vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 895                                   notify, callback, name);
 896        if (!vq) {
 897                vring_free_queue(vdev, queue_size_in_bytes, queue,
 898                                 dma_addr);
 899                return NULL;
 900        }
 901
 902        to_vvq(vq)->split.queue_dma_addr = dma_addr;
 903        to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
 904        to_vvq(vq)->we_own_ring = true;
 905
 906        return vq;
 907}
 908
 909
 910/*
 911 * Packed ring specific functions - *_packed().
 912 */
 913
 914static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
 915                                     struct vring_desc_extra_packed *state)
 916{
 917        u16 flags;
 918
 919        if (!vq->use_dma_api)
 920                return;
 921
 922        flags = state->flags;
 923
 924        if (flags & VRING_DESC_F_INDIRECT) {
 925                dma_unmap_single(vring_dma_dev(vq),
 926                                 state->addr, state->len,
 927                                 (flags & VRING_DESC_F_WRITE) ?
 928                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 929        } else {
 930                dma_unmap_page(vring_dma_dev(vq),
 931                               state->addr, state->len,
 932                               (flags & VRING_DESC_F_WRITE) ?
 933                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 934        }
 935}
 936
 937static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
 938                                   struct vring_packed_desc *desc)
 939{
 940        u16 flags;
 941
 942        if (!vq->use_dma_api)
 943                return;
 944
 945        flags = le16_to_cpu(desc->flags);
 946
 947        if (flags & VRING_DESC_F_INDIRECT) {
 948                dma_unmap_single(vring_dma_dev(vq),
 949                                 le64_to_cpu(desc->addr),
 950                                 le32_to_cpu(desc->len),
 951                                 (flags & VRING_DESC_F_WRITE) ?
 952                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 953        } else {
 954                dma_unmap_page(vring_dma_dev(vq),
 955                               le64_to_cpu(desc->addr),
 956                               le32_to_cpu(desc->len),
 957                               (flags & VRING_DESC_F_WRITE) ?
 958                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
 959        }
 960}
 961
 962static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
 963                                                       gfp_t gfp)
 964{
 965        struct vring_packed_desc *desc;
 966
 967        /*
 968         * We require lowmem mappings for the descriptors because
 969         * otherwise virt_to_phys will give us bogus addresses in the
 970         * virtqueue.
 971         */
 972        gfp &= ~__GFP_HIGHMEM;
 973
 974        desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
 975
 976        return desc;
 977}
 978
 979static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 980                                       struct scatterlist *sgs[],
 981                                       unsigned int total_sg,
 982                                       unsigned int out_sgs,
 983                                       unsigned int in_sgs,
 984                                       void *data,
 985                                       gfp_t gfp)
 986{
 987        struct vring_packed_desc *desc;
 988        struct scatterlist *sg;
 989        unsigned int i, n, err_idx;
 990        u16 head, id;
 991        dma_addr_t addr;
 992
 993        head = vq->packed.next_avail_idx;
 994        desc = alloc_indirect_packed(total_sg, gfp);
 995
 996        if (unlikely(vq->vq.num_free < 1)) {
 997                pr_debug("Can't add buf len 1 - avail = 0\n");
 998                kfree(desc);
 999                END_USE(vq);
1000                return -ENOSPC;
1001        }
1002
1003        i = 0;
1004        id = vq->free_head;
1005        BUG_ON(id == vq->packed.vring.num);
1006
1007        for (n = 0; n < out_sgs + in_sgs; n++) {
1008                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1009                        addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1010                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1011                        if (vring_mapping_error(vq, addr))
1012                                goto unmap_release;
1013
1014                        desc[i].flags = cpu_to_le16(n < out_sgs ?
1015                                                0 : VRING_DESC_F_WRITE);
1016                        desc[i].addr = cpu_to_le64(addr);
1017                        desc[i].len = cpu_to_le32(sg->length);
1018                        i++;
1019                }
1020        }
1021
1022        /* Now that the indirect table is filled in, map it. */
1023        addr = vring_map_single(vq, desc,
1024                        total_sg * sizeof(struct vring_packed_desc),
1025                        DMA_TO_DEVICE);
1026        if (vring_mapping_error(vq, addr))
1027                goto unmap_release;
1028
1029        vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1030        vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1031                                sizeof(struct vring_packed_desc));
1032        vq->packed.vring.desc[head].id = cpu_to_le16(id);
1033
1034        if (vq->use_dma_api) {
1035                vq->packed.desc_extra[id].addr = addr;
1036                vq->packed.desc_extra[id].len = total_sg *
1037                                sizeof(struct vring_packed_desc);
1038                vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1039                                                  vq->packed.avail_used_flags;
1040        }
1041
1042        /*
1043         * A driver MUST NOT make the first descriptor in the list
1044         * available before all subsequent descriptors comprising
1045         * the list are made available.
1046         */
1047        virtio_wmb(vq->weak_barriers);
1048        vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1049                                                vq->packed.avail_used_flags);
1050
1051        /* We're using some buffers from the free list. */
1052        vq->vq.num_free -= 1;
1053
1054        /* Update free pointer */
1055        n = head + 1;
1056        if (n >= vq->packed.vring.num) {
1057                n = 0;
1058                vq->packed.avail_wrap_counter ^= 1;
1059                vq->packed.avail_used_flags ^=
1060                                1 << VRING_PACKED_DESC_F_AVAIL |
1061                                1 << VRING_PACKED_DESC_F_USED;
1062        }
1063        vq->packed.next_avail_idx = n;
1064        vq->free_head = vq->packed.desc_state[id].next;
1065
1066        /* Store token and indirect buffer state. */
1067        vq->packed.desc_state[id].num = 1;
1068        vq->packed.desc_state[id].data = data;
1069        vq->packed.desc_state[id].indir_desc = desc;
1070        vq->packed.desc_state[id].last = id;
1071
1072        vq->num_added += 1;
1073
1074        pr_debug("Added buffer head %i to %p\n", head, vq);
1075        END_USE(vq);
1076
1077        return 0;
1078
1079unmap_release:
1080        err_idx = i;
1081
1082        for (i = 0; i < err_idx; i++)
1083                vring_unmap_desc_packed(vq, &desc[i]);
1084
1085        kfree(desc);
1086
1087        END_USE(vq);
1088        return -EIO;
1089}
1090
1091static inline int virtqueue_add_packed(struct virtqueue *_vq,
1092                                       struct scatterlist *sgs[],
1093                                       unsigned int total_sg,
1094                                       unsigned int out_sgs,
1095                                       unsigned int in_sgs,
1096                                       void *data,
1097                                       void *ctx,
1098                                       gfp_t gfp)
1099{
1100        struct vring_virtqueue *vq = to_vvq(_vq);
1101        struct vring_packed_desc *desc;
1102        struct scatterlist *sg;
1103        unsigned int i, n, c, descs_used, err_idx;
1104        __le16 uninitialized_var(head_flags), flags;
1105        u16 head, id, uninitialized_var(prev), curr, avail_used_flags;
1106
1107        START_USE(vq);
1108
1109        BUG_ON(data == NULL);
1110        BUG_ON(ctx && vq->indirect);
1111
1112        if (unlikely(vq->broken)) {
1113                END_USE(vq);
1114                return -EIO;
1115        }
1116
1117        LAST_ADD_TIME_UPDATE(vq);
1118
1119        BUG_ON(total_sg == 0);
1120
1121        if (virtqueue_use_indirect(_vq, total_sg))
1122                return virtqueue_add_indirect_packed(vq, sgs, total_sg,
1123                                out_sgs, in_sgs, data, gfp);
1124
1125        head = vq->packed.next_avail_idx;
1126        avail_used_flags = vq->packed.avail_used_flags;
1127
1128        WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1129
1130        desc = vq->packed.vring.desc;
1131        i = head;
1132        descs_used = total_sg;
1133
1134        if (unlikely(vq->vq.num_free < descs_used)) {
1135                pr_debug("Can't add buf len %i - avail = %i\n",
1136                         descs_used, vq->vq.num_free);
1137                END_USE(vq);
1138                return -ENOSPC;
1139        }
1140
1141        id = vq->free_head;
1142        BUG_ON(id == vq->packed.vring.num);
1143
1144        curr = id;
1145        c = 0;
1146        for (n = 0; n < out_sgs + in_sgs; n++) {
1147                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1148                        dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1149                                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
1150                        if (vring_mapping_error(vq, addr))
1151                                goto unmap_release;
1152
1153                        flags = cpu_to_le16(vq->packed.avail_used_flags |
1154                                    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1155                                    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1156                        if (i == head)
1157                                head_flags = flags;
1158                        else
1159                                desc[i].flags = flags;
1160
1161                        desc[i].addr = cpu_to_le64(addr);
1162                        desc[i].len = cpu_to_le32(sg->length);
1163                        desc[i].id = cpu_to_le16(id);
1164
1165                        if (unlikely(vq->use_dma_api)) {
1166                                vq->packed.desc_extra[curr].addr = addr;
1167                                vq->packed.desc_extra[curr].len = sg->length;
1168                                vq->packed.desc_extra[curr].flags =
1169                                        le16_to_cpu(flags);
1170                        }
1171                        prev = curr;
1172                        curr = vq->packed.desc_state[curr].next;
1173
1174                        if ((unlikely(++i >= vq->packed.vring.num))) {
1175                                i = 0;
1176                                vq->packed.avail_used_flags ^=
1177                                        1 << VRING_PACKED_DESC_F_AVAIL |
1178                                        1 << VRING_PACKED_DESC_F_USED;
1179                        }
1180                }
1181        }
1182
1183        if (i < head)
1184                vq->packed.avail_wrap_counter ^= 1;
1185
1186        /* We're using some buffers from the free list. */
1187        vq->vq.num_free -= descs_used;
1188
1189        /* Update free pointer */
1190        vq->packed.next_avail_idx = i;
1191        vq->free_head = curr;
1192
1193        /* Store token. */
1194        vq->packed.desc_state[id].num = descs_used;
1195        vq->packed.desc_state[id].data = data;
1196        vq->packed.desc_state[id].indir_desc = ctx;
1197        vq->packed.desc_state[id].last = prev;
1198
1199        /*
1200         * A driver MUST NOT make the first descriptor in the list
1201         * available before all subsequent descriptors comprising
1202         * the list are made available.
1203         */
1204        virtio_wmb(vq->weak_barriers);
1205        vq->packed.vring.desc[head].flags = head_flags;
1206        vq->num_added += descs_used;
1207
1208        pr_debug("Added buffer head %i to %p\n", head, vq);
1209        END_USE(vq);
1210
1211        return 0;
1212
1213unmap_release:
1214        err_idx = i;
1215        i = head;
1216
1217        vq->packed.avail_used_flags = avail_used_flags;
1218
1219        for (n = 0; n < total_sg; n++) {
1220                if (i == err_idx)
1221                        break;
1222                vring_unmap_desc_packed(vq, &desc[i]);
1223                i++;
1224                if (i >= vq->packed.vring.num)
1225                        i = 0;
1226        }
1227
1228        END_USE(vq);
1229        return -EIO;
1230}
1231
1232static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1233{
1234        struct vring_virtqueue *vq = to_vvq(_vq);
1235        u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1236        bool needs_kick;
1237        union {
1238                struct {
1239                        __le16 off_wrap;
1240                        __le16 flags;
1241                };
1242                u32 u32;
1243        } snapshot;
1244
1245        START_USE(vq);
1246
1247        /*
1248         * We need to expose the new flags value before checking notification
1249         * suppressions.
1250         */
1251        virtio_mb(vq->weak_barriers);
1252
1253        old = vq->packed.next_avail_idx - vq->num_added;
1254        new = vq->packed.next_avail_idx;
1255        vq->num_added = 0;
1256
1257        snapshot.u32 = *(u32 *)vq->packed.vring.device;
1258        flags = le16_to_cpu(snapshot.flags);
1259
1260        LAST_ADD_TIME_CHECK(vq);
1261        LAST_ADD_TIME_INVALID(vq);
1262
1263        if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1264                needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1265                goto out;
1266        }
1267
1268        off_wrap = le16_to_cpu(snapshot.off_wrap);
1269
1270        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1271        event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1272        if (wrap_counter != vq->packed.avail_wrap_counter)
1273                event_idx -= vq->packed.vring.num;
1274
1275        needs_kick = vring_need_event(event_idx, new, old);
1276out:
1277        END_USE(vq);
1278        return needs_kick;
1279}
1280
1281static void detach_buf_packed(struct vring_virtqueue *vq,
1282                              unsigned int id, void **ctx)
1283{
1284        struct vring_desc_state_packed *state = NULL;
1285        struct vring_packed_desc *desc;
1286        unsigned int i, curr;
1287
1288        state = &vq->packed.desc_state[id];
1289
1290        /* Clear data ptr. */
1291        state->data = NULL;
1292
1293        vq->packed.desc_state[state->last].next = vq->free_head;
1294        vq->free_head = id;
1295        vq->vq.num_free += state->num;
1296
1297        if (unlikely(vq->use_dma_api)) {
1298                curr = id;
1299                for (i = 0; i < state->num; i++) {
1300                        vring_unmap_state_packed(vq,
1301                                &vq->packed.desc_extra[curr]);
1302                        curr = vq->packed.desc_state[curr].next;
1303                }
1304        }
1305
1306        if (vq->indirect) {
1307                u32 len;
1308
1309                /* Free the indirect table, if any, now that it's unmapped. */
1310                desc = state->indir_desc;
1311                if (!desc)
1312                        return;
1313
1314                if (vq->use_dma_api) {
1315                        len = vq->packed.desc_extra[id].len;
1316                        for (i = 0; i < len / sizeof(struct vring_packed_desc);
1317                                        i++)
1318                                vring_unmap_desc_packed(vq, &desc[i]);
1319                }
1320                kfree(desc);
1321                state->indir_desc = NULL;
1322        } else if (ctx) {
1323                *ctx = state->indir_desc;
1324        }
1325}
1326
1327static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1328                                       u16 idx, bool used_wrap_counter)
1329{
1330        bool avail, used;
1331        u16 flags;
1332
1333        flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1334        avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1335        used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1336
1337        return avail == used && used == used_wrap_counter;
1338}
1339
1340static inline bool more_used_packed(const struct vring_virtqueue *vq)
1341{
1342        return is_used_desc_packed(vq, vq->last_used_idx,
1343                        vq->packed.used_wrap_counter);
1344}
1345
1346static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1347                                          unsigned int *len,
1348                                          void **ctx)
1349{
1350        struct vring_virtqueue *vq = to_vvq(_vq);
1351        u16 last_used, id;
1352        void *ret;
1353
1354        START_USE(vq);
1355
1356        if (unlikely(vq->broken)) {
1357                END_USE(vq);
1358                return NULL;
1359        }
1360
1361        if (!more_used_packed(vq)) {
1362                pr_debug("No more buffers in queue\n");
1363                END_USE(vq);
1364                return NULL;
1365        }
1366
1367        /* Only get used elements after they have been exposed by host. */
1368        virtio_rmb(vq->weak_barriers);
1369
1370        last_used = vq->last_used_idx;
1371        id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1372        *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1373
1374        if (unlikely(id >= vq->packed.vring.num)) {
1375                BAD_RING(vq, "id %u out of range\n", id);
1376                return NULL;
1377        }
1378        if (unlikely(!vq->packed.desc_state[id].data)) {
1379                BAD_RING(vq, "id %u is not a head!\n", id);
1380                return NULL;
1381        }
1382
1383        /* detach_buf_packed clears data, so grab it now. */
1384        ret = vq->packed.desc_state[id].data;
1385        detach_buf_packed(vq, id, ctx);
1386
1387        vq->last_used_idx += vq->packed.desc_state[id].num;
1388        if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1389                vq->last_used_idx -= vq->packed.vring.num;
1390                vq->packed.used_wrap_counter ^= 1;
1391        }
1392
1393        /*
1394         * If we expect an interrupt for the next entry, tell host
1395         * by writing event index and flush out the write before
1396         * the read in the next get_buf call.
1397         */
1398        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1399                virtio_store_mb(vq->weak_barriers,
1400                                &vq->packed.vring.driver->off_wrap,
1401                                cpu_to_le16(vq->last_used_idx |
1402                                        (vq->packed.used_wrap_counter <<
1403                                         VRING_PACKED_EVENT_F_WRAP_CTR)));
1404
1405        LAST_ADD_TIME_INVALID(vq);
1406
1407        END_USE(vq);
1408        return ret;
1409}
1410
1411static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1412{
1413        struct vring_virtqueue *vq = to_vvq(_vq);
1414
1415        if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1416                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1417                vq->packed.vring.driver->flags =
1418                        cpu_to_le16(vq->packed.event_flags_shadow);
1419        }
1420}
1421
1422static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1423{
1424        struct vring_virtqueue *vq = to_vvq(_vq);
1425
1426        START_USE(vq);
1427
1428        /*
1429         * We optimistically turn back on interrupts, then check if there was
1430         * more to do.
1431         */
1432
1433        if (vq->event) {
1434                vq->packed.vring.driver->off_wrap =
1435                        cpu_to_le16(vq->last_used_idx |
1436                                (vq->packed.used_wrap_counter <<
1437                                 VRING_PACKED_EVENT_F_WRAP_CTR));
1438                /*
1439                 * We need to update event offset and event wrap
1440                 * counter first before updating event flags.
1441                 */
1442                virtio_wmb(vq->weak_barriers);
1443        }
1444
1445        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1446                vq->packed.event_flags_shadow = vq->event ?
1447                                VRING_PACKED_EVENT_FLAG_DESC :
1448                                VRING_PACKED_EVENT_FLAG_ENABLE;
1449                vq->packed.vring.driver->flags =
1450                                cpu_to_le16(vq->packed.event_flags_shadow);
1451        }
1452
1453        END_USE(vq);
1454        return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1455                        VRING_PACKED_EVENT_F_WRAP_CTR);
1456}
1457
1458static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1459{
1460        struct vring_virtqueue *vq = to_vvq(_vq);
1461        bool wrap_counter;
1462        u16 used_idx;
1463
1464        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1465        used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1466
1467        return is_used_desc_packed(vq, used_idx, wrap_counter);
1468}
1469
1470static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1471{
1472        struct vring_virtqueue *vq = to_vvq(_vq);
1473        u16 used_idx, wrap_counter;
1474        u16 bufs;
1475
1476        START_USE(vq);
1477
1478        /*
1479         * We optimistically turn back on interrupts, then check if there was
1480         * more to do.
1481         */
1482
1483        if (vq->event) {
1484                /* TODO: tune this threshold */
1485                bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1486                wrap_counter = vq->packed.used_wrap_counter;
1487
1488                used_idx = vq->last_used_idx + bufs;
1489                if (used_idx >= vq->packed.vring.num) {
1490                        used_idx -= vq->packed.vring.num;
1491                        wrap_counter ^= 1;
1492                }
1493
1494                vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1495                        (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1496
1497                /*
1498                 * We need to update event offset and event wrap
1499                 * counter first before updating event flags.
1500                 */
1501                virtio_wmb(vq->weak_barriers);
1502        } else {
1503                used_idx = vq->last_used_idx;
1504                wrap_counter = vq->packed.used_wrap_counter;
1505        }
1506
1507        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1508                vq->packed.event_flags_shadow = vq->event ?
1509                                VRING_PACKED_EVENT_FLAG_DESC :
1510                                VRING_PACKED_EVENT_FLAG_ENABLE;
1511                vq->packed.vring.driver->flags =
1512                                cpu_to_le16(vq->packed.event_flags_shadow);
1513        }
1514
1515        /*
1516         * We need to update event suppression structure first
1517         * before re-checking for more used buffers.
1518         */
1519        virtio_mb(vq->weak_barriers);
1520
1521        if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1522                END_USE(vq);
1523                return false;
1524        }
1525
1526        END_USE(vq);
1527        return true;
1528}
1529
1530static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1531{
1532        struct vring_virtqueue *vq = to_vvq(_vq);
1533        unsigned int i;
1534        void *buf;
1535
1536        START_USE(vq);
1537
1538        for (i = 0; i < vq->packed.vring.num; i++) {
1539                if (!vq->packed.desc_state[i].data)
1540                        continue;
1541                /* detach_buf clears data, so grab it now. */
1542                buf = vq->packed.desc_state[i].data;
1543                detach_buf_packed(vq, i, NULL);
1544                END_USE(vq);
1545                return buf;
1546        }
1547        /* That should have freed everything. */
1548        BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1549
1550        END_USE(vq);
1551        return NULL;
1552}
1553
1554static struct virtqueue *vring_create_virtqueue_packed(
1555        unsigned int index,
1556        unsigned int num,
1557        unsigned int vring_align,
1558        struct virtio_device *vdev,
1559        bool weak_barriers,
1560        bool may_reduce_num,
1561        bool context,
1562        bool (*notify)(struct virtqueue *),
1563        void (*callback)(struct virtqueue *),
1564        const char *name)
1565{
1566        struct vring_virtqueue *vq;
1567        struct vring_packed_desc *ring;
1568        struct vring_packed_desc_event *driver, *device;
1569        dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1570        size_t ring_size_in_bytes, event_size_in_bytes;
1571        unsigned int i;
1572
1573        ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1574
1575        ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1576                                 &ring_dma_addr,
1577                                 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1578        if (!ring)
1579                goto err_ring;
1580
1581        event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1582
1583        driver = vring_alloc_queue(vdev, event_size_in_bytes,
1584                                   &driver_event_dma_addr,
1585                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1586        if (!driver)
1587                goto err_driver;
1588
1589        device = vring_alloc_queue(vdev, event_size_in_bytes,
1590                                   &device_event_dma_addr,
1591                                   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1592        if (!device)
1593                goto err_device;
1594
1595        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1596        if (!vq)
1597                goto err_vq;
1598
1599        vq->vq.callback = callback;
1600        vq->vq.vdev = vdev;
1601        vq->vq.name = name;
1602        vq->vq.num_free = num;
1603        vq->vq.index = index;
1604        vq->we_own_ring = true;
1605        vq->notify = notify;
1606        vq->weak_barriers = weak_barriers;
1607        vq->broken = false;
1608        vq->last_used_idx = 0;
1609        vq->num_added = 0;
1610        vq->packed_ring = true;
1611        vq->use_dma_api = vring_use_dma_api(vdev);
1612        list_add_tail(&vq->vq.list, &vdev->vqs);
1613#ifdef DEBUG
1614        vq->in_use = false;
1615        vq->last_add_time_valid = false;
1616#endif
1617
1618        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1619                !context;
1620        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1621
1622        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1623                vq->weak_barriers = false;
1624
1625        vq->packed.ring_dma_addr = ring_dma_addr;
1626        vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1627        vq->packed.device_event_dma_addr = device_event_dma_addr;
1628
1629        vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1630        vq->packed.event_size_in_bytes = event_size_in_bytes;
1631
1632        vq->packed.vring.num = num;
1633        vq->packed.vring.desc = ring;
1634        vq->packed.vring.driver = driver;
1635        vq->packed.vring.device = device;
1636
1637        vq->packed.next_avail_idx = 0;
1638        vq->packed.avail_wrap_counter = 1;
1639        vq->packed.used_wrap_counter = 1;
1640        vq->packed.event_flags_shadow = 0;
1641        vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1642
1643        vq->packed.desc_state = kmalloc_array(num,
1644                        sizeof(struct vring_desc_state_packed),
1645                        GFP_KERNEL);
1646        if (!vq->packed.desc_state)
1647                goto err_desc_state;
1648
1649        memset(vq->packed.desc_state, 0,
1650                num * sizeof(struct vring_desc_state_packed));
1651
1652        /* Put everything in free lists. */
1653        vq->free_head = 0;
1654        for (i = 0; i < num-1; i++)
1655                vq->packed.desc_state[i].next = i + 1;
1656
1657        vq->packed.desc_extra = kmalloc_array(num,
1658                        sizeof(struct vring_desc_extra_packed),
1659                        GFP_KERNEL);
1660        if (!vq->packed.desc_extra)
1661                goto err_desc_extra;
1662
1663        memset(vq->packed.desc_extra, 0,
1664                num * sizeof(struct vring_desc_extra_packed));
1665
1666        /* No callback?  Tell other side not to bother us. */
1667        if (!callback) {
1668                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1669                vq->packed.vring.driver->flags =
1670                        cpu_to_le16(vq->packed.event_flags_shadow);
1671        }
1672
1673        return &vq->vq;
1674
1675err_desc_extra:
1676        kfree(vq->packed.desc_state);
1677err_desc_state:
1678        kfree(vq);
1679err_vq:
1680        vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr);
1681err_device:
1682        vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr);
1683err_driver:
1684        vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1685err_ring:
1686        return NULL;
1687}
1688
1689
1690/*
1691 * Generic functions and exported symbols.
1692 */
1693
1694static inline int virtqueue_add(struct virtqueue *_vq,
1695                                struct scatterlist *sgs[],
1696                                unsigned int total_sg,
1697                                unsigned int out_sgs,
1698                                unsigned int in_sgs,
1699                                void *data,
1700                                void *ctx,
1701                                gfp_t gfp)
1702{
1703        struct vring_virtqueue *vq = to_vvq(_vq);
1704
1705        return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1706                                        out_sgs, in_sgs, data, ctx, gfp) :
1707                                 virtqueue_add_split(_vq, sgs, total_sg,
1708                                        out_sgs, in_sgs, data, ctx, gfp);
1709}
1710
1711/**
1712 * virtqueue_add_sgs - expose buffers to other end
1713 * @_vq: the struct virtqueue we're talking about.
1714 * @sgs: array of terminated scatterlists.
1715 * @out_sgs: the number of scatterlists readable by other side
1716 * @in_sgs: the number of scatterlists which are writable (after readable ones)
1717 * @data: the token identifying the buffer.
1718 * @gfp: how to do memory allocations (if necessary).
1719 *
1720 * Caller must ensure we don't call this with other virtqueue operations
1721 * at the same time (except where noted).
1722 *
1723 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1724 */
1725int virtqueue_add_sgs(struct virtqueue *_vq,
1726                      struct scatterlist *sgs[],
1727                      unsigned int out_sgs,
1728                      unsigned int in_sgs,
1729                      void *data,
1730                      gfp_t gfp)
1731{
1732        unsigned int i, total_sg = 0;
1733
1734        /* Count them first. */
1735        for (i = 0; i < out_sgs + in_sgs; i++) {
1736                struct scatterlist *sg;
1737
1738                for (sg = sgs[i]; sg; sg = sg_next(sg))
1739                        total_sg++;
1740        }
1741        return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1742                             data, NULL, gfp);
1743}
1744EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1745
1746/**
1747 * virtqueue_add_outbuf - expose output buffers to other end
1748 * @vq: the struct virtqueue we're talking about.
1749 * @sg: scatterlist (must be well-formed and terminated!)
1750 * @num: the number of entries in @sg readable by other side
1751 * @data: the token identifying the buffer.
1752 * @gfp: how to do memory allocations (if necessary).
1753 *
1754 * Caller must ensure we don't call this with other virtqueue operations
1755 * at the same time (except where noted).
1756 *
1757 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1758 */
1759int virtqueue_add_outbuf(struct virtqueue *vq,
1760                         struct scatterlist *sg, unsigned int num,
1761                         void *data,
1762                         gfp_t gfp)
1763{
1764        return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1765}
1766EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1767
1768/**
1769 * virtqueue_add_inbuf - expose input buffers to other end
1770 * @vq: the struct virtqueue we're talking about.
1771 * @sg: scatterlist (must be well-formed and terminated!)
1772 * @num: the number of entries in @sg writable by other side
1773 * @data: the token identifying the buffer.
1774 * @gfp: how to do memory allocations (if necessary).
1775 *
1776 * Caller must ensure we don't call this with other virtqueue operations
1777 * at the same time (except where noted).
1778 *
1779 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1780 */
1781int virtqueue_add_inbuf(struct virtqueue *vq,
1782                        struct scatterlist *sg, unsigned int num,
1783                        void *data,
1784                        gfp_t gfp)
1785{
1786        return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1787}
1788EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1789
1790/**
1791 * virtqueue_add_inbuf_ctx - expose input buffers to other end
1792 * @vq: the struct virtqueue we're talking about.
1793 * @sg: scatterlist (must be well-formed and terminated!)
1794 * @num: the number of entries in @sg writable by other side
1795 * @data: the token identifying the buffer.
1796 * @ctx: extra context for the token
1797 * @gfp: how to do memory allocations (if necessary).
1798 *
1799 * Caller must ensure we don't call this with other virtqueue operations
1800 * at the same time (except where noted).
1801 *
1802 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1803 */
1804int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1805                        struct scatterlist *sg, unsigned int num,
1806                        void *data,
1807                        void *ctx,
1808                        gfp_t gfp)
1809{
1810        return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1811}
1812EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1813
1814/**
1815 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
1816 * @_vq: the struct virtqueue
1817 *
1818 * Instead of virtqueue_kick(), you can do:
1819 *      if (virtqueue_kick_prepare(vq))
1820 *              virtqueue_notify(vq);
1821 *
1822 * This is sometimes useful because the virtqueue_kick_prepare() needs
1823 * to be serialized, but the actual virtqueue_notify() call does not.
1824 */
1825bool virtqueue_kick_prepare(struct virtqueue *_vq)
1826{
1827        struct vring_virtqueue *vq = to_vvq(_vq);
1828
1829        return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1830                                 virtqueue_kick_prepare_split(_vq);
1831}
1832EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1833
1834/**
1835 * virtqueue_notify - second half of split virtqueue_kick call.
1836 * @_vq: the struct virtqueue
1837 *
1838 * This does not need to be serialized.
1839 *
1840 * Returns false if host notify failed or queue is broken, otherwise true.
1841 */
1842bool virtqueue_notify(struct virtqueue *_vq)
1843{
1844        struct vring_virtqueue *vq = to_vvq(_vq);
1845
1846        if (unlikely(vq->broken))
1847                return false;
1848
1849        /* Prod other side to tell it about changes. */
1850        if (!vq->notify(_vq)) {
1851                vq->broken = true;
1852                return false;
1853        }
1854        return true;
1855}
1856EXPORT_SYMBOL_GPL(virtqueue_notify);
1857
1858/**
1859 * virtqueue_kick - update after add_buf
1860 * @vq: the struct virtqueue
1861 *
1862 * After one or more virtqueue_add_* calls, invoke this to kick
1863 * the other side.
1864 *
1865 * Caller must ensure we don't call this with other virtqueue
1866 * operations at the same time (except where noted).
1867 *
1868 * Returns false if kick failed, otherwise true.
1869 */
1870bool virtqueue_kick(struct virtqueue *vq)
1871{
1872        if (virtqueue_kick_prepare(vq))
1873                return virtqueue_notify(vq);
1874        return true;
1875}
1876EXPORT_SYMBOL_GPL(virtqueue_kick);
1877
1878/**
1879 * virtqueue_get_buf - get the next used buffer
1880 * @_vq: the struct virtqueue we're talking about.
1881 * @len: the length written into the buffer
1882 * @ctx: extra context for the token
1883 *
1884 * If the device wrote data into the buffer, @len will be set to the
1885 * amount written.  This means you don't need to clear the buffer
1886 * beforehand to ensure there's no data leakage in the case of short
1887 * writes.
1888 *
1889 * Caller must ensure we don't call this with other virtqueue
1890 * operations at the same time (except where noted).
1891 *
1892 * Returns NULL if there are no used buffers, or the "data" token
1893 * handed to virtqueue_add_*().
1894 */
1895void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1896                            void **ctx)
1897{
1898        struct vring_virtqueue *vq = to_vvq(_vq);
1899
1900        return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
1901                                 virtqueue_get_buf_ctx_split(_vq, len, ctx);
1902}
1903EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
1904
1905void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
1906{
1907        return virtqueue_get_buf_ctx(_vq, len, NULL);
1908}
1909EXPORT_SYMBOL_GPL(virtqueue_get_buf);
1910/**
1911 * virtqueue_disable_cb - disable callbacks
1912 * @_vq: the struct virtqueue we're talking about.
1913 *
1914 * Note that this is not necessarily synchronous, hence unreliable and only
1915 * useful as an optimization.
1916 *
1917 * Unlike other operations, this need not be serialized.
1918 */
1919void virtqueue_disable_cb(struct virtqueue *_vq)
1920{
1921        struct vring_virtqueue *vq = to_vvq(_vq);
1922
1923        if (vq->packed_ring)
1924                virtqueue_disable_cb_packed(_vq);
1925        else
1926                virtqueue_disable_cb_split(_vq);
1927}
1928EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
1929
1930/**
1931 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
1932 * @_vq: the struct virtqueue we're talking about.
1933 *
1934 * This re-enables callbacks; it returns current queue state
1935 * in an opaque unsigned value. This value should be later tested by
1936 * virtqueue_poll, to detect a possible race between the driver checking for
1937 * more work, and enabling callbacks.
1938 *
1939 * Caller must ensure we don't call this with other virtqueue
1940 * operations at the same time (except where noted).
1941 */
1942unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
1943{
1944        struct vring_virtqueue *vq = to_vvq(_vq);
1945
1946        return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
1947                                 virtqueue_enable_cb_prepare_split(_vq);
1948}
1949EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
1950
1951/**
1952 * virtqueue_poll - query pending used buffers
1953 * @_vq: the struct virtqueue we're talking about.
1954 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
1955 *
1956 * Returns "true" if there are pending used buffers in the queue.
1957 *
1958 * This does not need to be serialized.
1959 */
1960bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
1961{
1962        struct vring_virtqueue *vq = to_vvq(_vq);
1963
1964        virtio_mb(vq->weak_barriers);
1965        return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
1966                                 virtqueue_poll_split(_vq, last_used_idx);
1967}
1968EXPORT_SYMBOL_GPL(virtqueue_poll);
1969
1970/**
1971 * virtqueue_enable_cb - restart callbacks after disable_cb.
1972 * @_vq: the struct virtqueue we're talking about.
1973 *
1974 * This re-enables callbacks; it returns "false" if there are pending
1975 * buffers in the queue, to detect a possible race between the driver
1976 * checking for more work, and enabling callbacks.
1977 *
1978 * Caller must ensure we don't call this with other virtqueue
1979 * operations at the same time (except where noted).
1980 */
1981bool virtqueue_enable_cb(struct virtqueue *_vq)
1982{
1983        unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
1984
1985        return !virtqueue_poll(_vq, last_used_idx);
1986}
1987EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
1988
1989/**
1990 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
1991 * @_vq: the struct virtqueue we're talking about.
1992 *
1993 * This re-enables callbacks but hints to the other side to delay
1994 * interrupts until most of the available buffers have been processed;
1995 * it returns "false" if there are many pending buffers in the queue,
1996 * to detect a possible race between the driver checking for more work,
1997 * and enabling callbacks.
1998 *
1999 * Caller must ensure we don't call this with other virtqueue
2000 * operations at the same time (except where noted).
2001 */
2002bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2003{
2004        struct vring_virtqueue *vq = to_vvq(_vq);
2005
2006        return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2007                                 virtqueue_enable_cb_delayed_split(_vq);
2008}
2009EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2010
2011/**
2012 * virtqueue_detach_unused_buf - detach first unused buffer
2013 * @_vq: the struct virtqueue we're talking about.
2014 *
2015 * Returns NULL or the "data" token handed to virtqueue_add_*().
2016 * This is not valid on an active queue; it is useful only for device
2017 * shutdown.
2018 */
2019void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2020{
2021        struct vring_virtqueue *vq = to_vvq(_vq);
2022
2023        return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2024                                 virtqueue_detach_unused_buf_split(_vq);
2025}
2026EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2027
2028static inline bool more_used(const struct vring_virtqueue *vq)
2029{
2030        return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2031}
2032
2033irqreturn_t vring_interrupt(int irq, void *_vq)
2034{
2035        struct vring_virtqueue *vq = to_vvq(_vq);
2036
2037        if (!more_used(vq)) {
2038                pr_debug("virtqueue interrupt with no work for %p\n", vq);
2039                return IRQ_NONE;
2040        }
2041
2042        if (unlikely(vq->broken))
2043                return IRQ_HANDLED;
2044
2045        pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2046        if (vq->vq.callback)
2047                vq->vq.callback(&vq->vq);
2048
2049        return IRQ_HANDLED;
2050}
2051EXPORT_SYMBOL_GPL(vring_interrupt);
2052
2053/* Only available for split ring */
2054struct virtqueue *__vring_new_virtqueue(unsigned int index,
2055                                        struct vring vring,
2056                                        struct virtio_device *vdev,
2057                                        bool weak_barriers,
2058                                        bool context,
2059                                        bool (*notify)(struct virtqueue *),
2060                                        void (*callback)(struct virtqueue *),
2061                                        const char *name)
2062{
2063        unsigned int i;
2064        struct vring_virtqueue *vq;
2065
2066        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2067                return NULL;
2068
2069        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2070        if (!vq)
2071                return NULL;
2072
2073        vq->packed_ring = false;
2074        vq->vq.callback = callback;
2075        vq->vq.vdev = vdev;
2076        vq->vq.name = name;
2077        vq->vq.num_free = vring.num;
2078        vq->vq.index = index;
2079        vq->we_own_ring = false;
2080        vq->notify = notify;
2081        vq->weak_barriers = weak_barriers;
2082        vq->broken = false;
2083        vq->last_used_idx = 0;
2084        vq->num_added = 0;
2085        vq->use_dma_api = vring_use_dma_api(vdev);
2086        list_add_tail(&vq->vq.list, &vdev->vqs);
2087#ifdef DEBUG
2088        vq->in_use = false;
2089        vq->last_add_time_valid = false;
2090#endif
2091
2092        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2093                !context;
2094        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2095
2096        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2097                vq->weak_barriers = false;
2098
2099        vq->split.queue_dma_addr = 0;
2100        vq->split.queue_size_in_bytes = 0;
2101
2102        vq->split.vring = vring;
2103        vq->split.avail_flags_shadow = 0;
2104        vq->split.avail_idx_shadow = 0;
2105
2106        /* No callback?  Tell other side not to bother us. */
2107        if (!callback) {
2108                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2109                if (!vq->event)
2110                        vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2111                                        vq->split.avail_flags_shadow);
2112        }
2113
2114        vq->split.desc_state = kmalloc_array(vring.num,
2115                        sizeof(struct vring_desc_state_split), GFP_KERNEL);
2116        if (!vq->split.desc_state) {
2117                kfree(vq);
2118                return NULL;
2119        }
2120
2121        /* Put everything in free lists. */
2122        vq->free_head = 0;
2123        for (i = 0; i < vring.num-1; i++)
2124                vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
2125        memset(vq->split.desc_state, 0, vring.num *
2126                        sizeof(struct vring_desc_state_split));
2127
2128        return &vq->vq;
2129}
2130EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2131
2132struct virtqueue *vring_create_virtqueue(
2133        unsigned int index,
2134        unsigned int num,
2135        unsigned int vring_align,
2136        struct virtio_device *vdev,
2137        bool weak_barriers,
2138        bool may_reduce_num,
2139        bool context,
2140        bool (*notify)(struct virtqueue *),
2141        void (*callback)(struct virtqueue *),
2142        const char *name)
2143{
2144
2145        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2146                return vring_create_virtqueue_packed(index, num, vring_align,
2147                                vdev, weak_barriers, may_reduce_num,
2148                                context, notify, callback, name);
2149
2150        return vring_create_virtqueue_split(index, num, vring_align,
2151                        vdev, weak_barriers, may_reduce_num,
2152                        context, notify, callback, name);
2153}
2154EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2155
2156/* Only available for split ring */
2157struct virtqueue *vring_new_virtqueue(unsigned int index,
2158                                      unsigned int num,
2159                                      unsigned int vring_align,
2160                                      struct virtio_device *vdev,
2161                                      bool weak_barriers,
2162                                      bool context,
2163                                      void *pages,
2164                                      bool (*notify)(struct virtqueue *vq),
2165                                      void (*callback)(struct virtqueue *vq),
2166                                      const char *name)
2167{
2168        struct vring vring;
2169
2170        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2171                return NULL;
2172
2173        vring_init(&vring, num, pages, vring_align);
2174        return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2175                                     notify, callback, name);
2176}
2177EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2178
2179void vring_del_virtqueue(struct virtqueue *_vq)
2180{
2181        struct vring_virtqueue *vq = to_vvq(_vq);
2182
2183        if (vq->we_own_ring) {
2184                if (vq->packed_ring) {
2185                        vring_free_queue(vq->vq.vdev,
2186                                         vq->packed.ring_size_in_bytes,
2187                                         vq->packed.vring.desc,
2188                                         vq->packed.ring_dma_addr);
2189
2190                        vring_free_queue(vq->vq.vdev,
2191                                         vq->packed.event_size_in_bytes,
2192                                         vq->packed.vring.driver,
2193                                         vq->packed.driver_event_dma_addr);
2194
2195                        vring_free_queue(vq->vq.vdev,
2196                                         vq->packed.event_size_in_bytes,
2197                                         vq->packed.vring.device,
2198                                         vq->packed.device_event_dma_addr);
2199
2200                        kfree(vq->packed.desc_state);
2201                        kfree(vq->packed.desc_extra);
2202                } else {
2203                        vring_free_queue(vq->vq.vdev,
2204                                         vq->split.queue_size_in_bytes,
2205                                         vq->split.vring.desc,
2206                                         vq->split.queue_dma_addr);
2207
2208                        kfree(vq->split.desc_state);
2209                }
2210        }
2211        list_del(&_vq->list);
2212        kfree(vq);
2213}
2214EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2215
2216/* Manipulates transport-specific feature bits. */
2217void vring_transport_features(struct virtio_device *vdev)
2218{
2219        unsigned int i;
2220
2221        for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2222                switch (i) {
2223                case VIRTIO_RING_F_INDIRECT_DESC:
2224                        break;
2225                case VIRTIO_RING_F_EVENT_IDX:
2226                        break;
2227                case VIRTIO_F_VERSION_1:
2228                        break;
2229                case VIRTIO_F_IOMMU_PLATFORM:
2230                        break;
2231                case VIRTIO_F_RING_PACKED:
2232                        break;
2233                case VIRTIO_F_ORDER_PLATFORM:
2234                        break;
2235                default:
2236                        /* We don't understand this bit. */
2237                        __virtio_clear_bit(vdev, i);
2238                }
2239        }
2240}
2241EXPORT_SYMBOL_GPL(vring_transport_features);
2242
2243/**
2244 * virtqueue_get_vring_size - return the size of the virtqueue's vring
2245 * @_vq: the struct virtqueue containing the vring of interest.
2246 *
2247 * Returns the size of the vring.  This is mainly used for boasting to
2248 * userspace.  Unlike other operations, this need not be serialized.
2249 */
2250unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2251{
2252
2253        struct vring_virtqueue *vq = to_vvq(_vq);
2254
2255        return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2256}
2257EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2258
2259bool virtqueue_is_broken(struct virtqueue *_vq)
2260{
2261        struct vring_virtqueue *vq = to_vvq(_vq);
2262
2263        return vq->broken;
2264}
2265EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2266
2267/*
2268 * This should prevent the device from being used, allowing drivers to
2269 * recover.  You may need to grab appropriate locks to flush.
2270 */
2271void virtio_break_device(struct virtio_device *dev)
2272{
2273        struct virtqueue *_vq;
2274
2275        list_for_each_entry(_vq, &dev->vqs, list) {
2276                struct vring_virtqueue *vq = to_vvq(_vq);
2277                vq->broken = true;
2278        }
2279}
2280EXPORT_SYMBOL_GPL(virtio_break_device);
2281
2282dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2283{
2284        struct vring_virtqueue *vq = to_vvq(_vq);
2285
2286        BUG_ON(!vq->we_own_ring);
2287
2288        if (vq->packed_ring)
2289                return vq->packed.ring_dma_addr;
2290
2291        return vq->split.queue_dma_addr;
2292}
2293EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2294
2295dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2296{
2297        struct vring_virtqueue *vq = to_vvq(_vq);
2298
2299        BUG_ON(!vq->we_own_ring);
2300
2301        if (vq->packed_ring)
2302                return vq->packed.driver_event_dma_addr;
2303
2304        return vq->split.queue_dma_addr +
2305                ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2306}
2307EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2308
2309dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2310{
2311        struct vring_virtqueue *vq = to_vvq(_vq);
2312
2313        BUG_ON(!vq->we_own_ring);
2314
2315        if (vq->packed_ring)
2316                return vq->packed.device_event_dma_addr;
2317
2318        return vq->split.queue_dma_addr +
2319                ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2320}
2321EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2322
2323/* Only available for split ring */
2324const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2325{
2326        return &to_vvq(vq)->split.vring;
2327}
2328EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2329
2330MODULE_LICENSE("GPL");
2331