linux/tools/virtio/ringtest/ring.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 Red Hat, Inc.
   3 * Author: Michael S. Tsirkin <mst@redhat.com>
   4 * This work is licensed under the terms of the GNU GPL, version 2.
   5 *
   6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
   7 * signalling, unconditionally.
   8 */
   9#define _GNU_SOURCE
  10#include "main.h"
  11#include <stdlib.h>
  12#include <stdio.h>
  13#include <string.h>
  14
  15/* Next - Where next entry will be written.
  16 * Prev - "Next" value when event triggered previously.
  17 * Event - Peer requested event after writing this entry.
  18 */
  19static inline bool need_event(unsigned short event,
  20                              unsigned short next,
  21                              unsigned short prev)
  22{
  23        return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
  24}
  25
  26/* Design:
  27 * Guest adds descriptors with unique index values and DESC_HW in flags.
  28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
  29 * Flags are always set last.
  30 */
  31#define DESC_HW 0x1
  32
  33struct desc {
  34        unsigned short flags;
  35        unsigned short index;
  36        unsigned len;
  37        unsigned long long addr;
  38};
  39
  40/* how much padding is needed to avoid false cache sharing */
  41#define HOST_GUEST_PADDING 0x80
  42
  43/* Mostly read */
  44struct event {
  45        unsigned short kick_index;
  46        unsigned char reserved0[HOST_GUEST_PADDING - 2];
  47        unsigned short call_index;
  48        unsigned char reserved1[HOST_GUEST_PADDING - 2];
  49};
  50
  51struct data {
  52        void *buf; /* descriptor is writeable, we can't get buf from there */
  53        void *data;
  54} *data;
  55
  56struct desc *ring;
  57struct event *event;
  58
  59struct guest {
  60        unsigned avail_idx;
  61        unsigned last_used_idx;
  62        unsigned num_free;
  63        unsigned kicked_avail_idx;
  64        unsigned char reserved[HOST_GUEST_PADDING - 12];
  65} guest;
  66
  67struct host {
  68        /* we do not need to track last avail index
  69         * unless we have more than one in flight.
  70         */
  71        unsigned used_idx;
  72        unsigned called_used_idx;
  73        unsigned char reserved[HOST_GUEST_PADDING - 4];
  74} host;
  75
  76/* implemented by ring */
  77void alloc_ring(void)
  78{
  79        int ret;
  80        int i;
  81
  82        ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
  83        if (ret) {
  84                perror("Unable to allocate ring buffer.\n");
  85                exit(3);
  86        }
  87        event = malloc(sizeof *event);
  88        if (!event) {
  89                perror("Unable to allocate event buffer.\n");
  90                exit(3);
  91        }
  92        memset(event, 0, sizeof *event);
  93        guest.avail_idx = 0;
  94        guest.kicked_avail_idx = -1;
  95        guest.last_used_idx = 0;
  96        host.used_idx = 0;
  97        host.called_used_idx = -1;
  98        for (i = 0; i < ring_size; ++i) {
  99                struct desc desc = {
 100                        .index = i,
 101                };
 102                ring[i] = desc;
 103        }
 104        guest.num_free = ring_size;
 105        data = malloc(ring_size * sizeof *data);
 106        if (!data) {
 107                perror("Unable to allocate data buffer.\n");
 108                exit(3);
 109        }
 110        memset(data, 0, ring_size * sizeof *data);
 111}
 112
 113/* guest side */
 114int add_inbuf(unsigned len, void *buf, void *datap)
 115{
 116        unsigned head, index;
 117
 118        if (!guest.num_free)
 119                return -1;
 120
 121        guest.num_free--;
 122        head = (ring_size - 1) & (guest.avail_idx++);
 123
 124        /* Start with a write. On MESI architectures this helps
 125         * avoid a shared state with consumer that is polling this descriptor.
 126         */
 127        ring[head].addr = (unsigned long)(void*)buf;
 128        ring[head].len = len;
 129        /* read below might bypass write above. That is OK because it's just an
 130         * optimization. If this happens, we will get the cache line in a
 131         * shared state which is unfortunate, but probably not worth it to
 132         * add an explicit full barrier to avoid this.
 133         */
 134        barrier();
 135        index = ring[head].index;
 136        data[index].buf = buf;
 137        data[index].data = datap;
 138        /* Barrier A (for pairing) */
 139        smp_release();
 140        ring[head].flags = DESC_HW;
 141
 142        return 0;
 143}
 144
 145void *get_buf(unsigned *lenp, void **bufp)
 146{
 147        unsigned head = (ring_size - 1) & guest.last_used_idx;
 148        unsigned index;
 149        void *datap;
 150
 151        if (ring[head].flags & DESC_HW)
 152                return NULL;
 153        /* Barrier B (for pairing) */
 154        smp_acquire();
 155        *lenp = ring[head].len;
 156        index = ring[head].index & (ring_size - 1);
 157        datap = data[index].data;
 158        *bufp = data[index].buf;
 159        data[index].buf = NULL;
 160        data[index].data = NULL;
 161        guest.num_free++;
 162        guest.last_used_idx++;
 163        return datap;
 164}
 165
 166bool used_empty()
 167{
 168        unsigned head = (ring_size - 1) & guest.last_used_idx;
 169
 170        return (ring[head].flags & DESC_HW);
 171}
 172
 173void disable_call()
 174{
 175        /* Doing nothing to disable calls might cause
 176         * extra interrupts, but reduces the number of cache misses.
 177         */
 178}
 179
 180bool enable_call()
 181{
 182        event->call_index = guest.last_used_idx;
 183        /* Flush call index write */
 184        /* Barrier D (for pairing) */
 185        smp_mb();
 186        return used_empty();
 187}
 188
 189void kick_available(void)
 190{
 191        /* Flush in previous flags write */
 192        /* Barrier C (for pairing) */
 193        smp_mb();
 194        if (!need_event(event->kick_index,
 195                        guest.avail_idx,
 196                        guest.kicked_avail_idx))
 197                return;
 198
 199        guest.kicked_avail_idx = guest.avail_idx;
 200        kick();
 201}
 202
 203/* host side */
 204void disable_kick()
 205{
 206        /* Doing nothing to disable kicks might cause
 207         * extra interrupts, but reduces the number of cache misses.
 208         */
 209}
 210
 211bool enable_kick()
 212{
 213        event->kick_index = host.used_idx;
 214        /* Barrier C (for pairing) */
 215        smp_mb();
 216        return avail_empty();
 217}
 218
 219bool avail_empty()
 220{
 221        unsigned head = (ring_size - 1) & host.used_idx;
 222
 223        return !(ring[head].flags & DESC_HW);
 224}
 225
 226bool use_buf(unsigned *lenp, void **bufp)
 227{
 228        unsigned head = (ring_size - 1) & host.used_idx;
 229
 230        if (!(ring[head].flags & DESC_HW))
 231                return false;
 232
 233        /* make sure length read below is not speculated */
 234        /* Barrier A (for pairing) */
 235        smp_acquire();
 236
 237        /* simple in-order completion: we don't need
 238         * to touch index at all. This also means we
 239         * can just modify the descriptor in-place.
 240         */
 241        ring[head].len--;
 242        /* Make sure len is valid before flags.
 243         * Note: alternative is to write len and flags in one access -
 244         * possible on 64 bit architectures but wmb is free on Intel anyway
 245         * so I have no way to test whether it's a gain.
 246         */
 247        /* Barrier B (for pairing) */
 248        smp_release();
 249        ring[head].flags = 0;
 250        host.used_idx++;
 251        return true;
 252}
 253
 254void call_used(void)
 255{
 256        /* Flush in previous flags write */
 257        /* Barrier D (for pairing) */
 258        smp_mb();
 259        if (!need_event(event->call_index,
 260                        host.used_idx,
 261                        host.called_used_idx))
 262                return;
 263
 264        host.called_used_idx = host.used_idx;
 265        call();
 266}
 267