linux/tools/virtio/ringtest/ring.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 Red Hat, Inc.
   3 * Author: Michael S. Tsirkin <mst@redhat.com>
   4 * This work is licensed under the terms of the GNU GPL, version 2.
   5 *
   6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
   7 * signalling, unconditionally.
   8 */
   9#define _GNU_SOURCE
  10#include "main.h"
  11#include <stdlib.h>
  12#include <stdio.h>
  13#include <string.h>
  14
  15/* Next - Where next entry will be written.
  16 * Prev - "Next" value when event triggered previously.
  17 * Event - Peer requested event after writing this entry.
  18 */
  19static inline bool need_event(unsigned short event,
  20                              unsigned short next,
  21                              unsigned short prev)
  22{
  23        return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
  24}
  25
  26/* Design:
  27 * Guest adds descriptors with unique index values and DESC_HW in flags.
  28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
  29 * Flags are always set last.
  30 */
  31#define DESC_HW 0x1
  32
  33struct desc {
  34        unsigned short flags;
  35        unsigned short index;
  36        unsigned len;
  37        unsigned long long addr;
  38};
  39
  40/* how much padding is needed to avoid false cache sharing */
  41#define HOST_GUEST_PADDING 0x80
  42
  43/* Mostly read */
  44struct event {
  45        unsigned short kick_index;
  46        unsigned char reserved0[HOST_GUEST_PADDING - 2];
  47        unsigned short call_index;
  48        unsigned char reserved1[HOST_GUEST_PADDING - 2];
  49};
  50
  51struct data {
  52        void *buf; /* descriptor is writeable, we can't get buf from there */
  53        void *data;
  54} *data;
  55
  56struct desc *ring;
  57struct event *event;
  58
  59struct guest {
  60        unsigned avail_idx;
  61        unsigned last_used_idx;
  62        unsigned num_free;
  63        unsigned kicked_avail_idx;
  64        unsigned char reserved[HOST_GUEST_PADDING - 12];
  65} guest;
  66
  67struct host {
  68        /* we do not need to track last avail index
  69         * unless we have more than one in flight.
  70         */
  71        unsigned used_idx;
  72        unsigned called_used_idx;
  73        unsigned char reserved[HOST_GUEST_PADDING - 4];
  74} host;
  75
  76/* implemented by ring */
  77void alloc_ring(void)
  78{
  79        int ret;
  80        int i;
  81
  82        ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
  83        if (ret) {
  84                perror("Unable to allocate ring buffer.\n");
  85                exit(3);
  86        }
  87        event = calloc(1, sizeof(*event));
  88        if (!event) {
  89                perror("Unable to allocate event buffer.\n");
  90                exit(3);
  91        }
  92        guest.avail_idx = 0;
  93        guest.kicked_avail_idx = -1;
  94        guest.last_used_idx = 0;
  95        host.used_idx = 0;
  96        host.called_used_idx = -1;
  97        for (i = 0; i < ring_size; ++i) {
  98                struct desc desc = {
  99                        .index = i,
 100                };
 101                ring[i] = desc;
 102        }
 103        guest.num_free = ring_size;
 104        data = calloc(ring_size, sizeof(*data));
 105        if (!data) {
 106                perror("Unable to allocate data buffer.\n");
 107                exit(3);
 108        }
 109}
 110
 111/* guest side */
 112int add_inbuf(unsigned len, void *buf, void *datap)
 113{
 114        unsigned head, index;
 115
 116        if (!guest.num_free)
 117                return -1;
 118
 119        guest.num_free--;
 120        head = (ring_size - 1) & (guest.avail_idx++);
 121
 122        /* Start with a write. On MESI architectures this helps
 123         * avoid a shared state with consumer that is polling this descriptor.
 124         */
 125        ring[head].addr = (unsigned long)(void*)buf;
 126        ring[head].len = len;
 127        /* read below might bypass write above. That is OK because it's just an
 128         * optimization. If this happens, we will get the cache line in a
 129         * shared state which is unfortunate, but probably not worth it to
 130         * add an explicit full barrier to avoid this.
 131         */
 132        barrier();
 133        index = ring[head].index;
 134        data[index].buf = buf;
 135        data[index].data = datap;
 136        /* Barrier A (for pairing) */
 137        smp_release();
 138        ring[head].flags = DESC_HW;
 139
 140        return 0;
 141}
 142
 143void *get_buf(unsigned *lenp, void **bufp)
 144{
 145        unsigned head = (ring_size - 1) & guest.last_used_idx;
 146        unsigned index;
 147        void *datap;
 148
 149        if (ring[head].flags & DESC_HW)
 150                return NULL;
 151        /* Barrier B (for pairing) */
 152        smp_acquire();
 153        *lenp = ring[head].len;
 154        index = ring[head].index & (ring_size - 1);
 155        datap = data[index].data;
 156        *bufp = data[index].buf;
 157        data[index].buf = NULL;
 158        data[index].data = NULL;
 159        guest.num_free++;
 160        guest.last_used_idx++;
 161        return datap;
 162}
 163
 164bool used_empty()
 165{
 166        unsigned head = (ring_size - 1) & guest.last_used_idx;
 167
 168        return (ring[head].flags & DESC_HW);
 169}
 170
 171void disable_call()
 172{
 173        /* Doing nothing to disable calls might cause
 174         * extra interrupts, but reduces the number of cache misses.
 175         */
 176}
 177
 178bool enable_call()
 179{
 180        event->call_index = guest.last_used_idx;
 181        /* Flush call index write */
 182        /* Barrier D (for pairing) */
 183        smp_mb();
 184        return used_empty();
 185}
 186
 187void kick_available(void)
 188{
 189        bool need;
 190
 191        /* Flush in previous flags write */
 192        /* Barrier C (for pairing) */
 193        smp_mb();
 194        need = need_event(event->kick_index,
 195                           guest.avail_idx,
 196                           guest.kicked_avail_idx);
 197
 198        guest.kicked_avail_idx = guest.avail_idx;
 199        if (need)
 200                kick();
 201}
 202
 203/* host side */
 204void disable_kick()
 205{
 206        /* Doing nothing to disable kicks might cause
 207         * extra interrupts, but reduces the number of cache misses.
 208         */
 209}
 210
 211bool enable_kick()
 212{
 213        event->kick_index = host.used_idx;
 214        /* Barrier C (for pairing) */
 215        smp_mb();
 216        return avail_empty();
 217}
 218
 219bool avail_empty()
 220{
 221        unsigned head = (ring_size - 1) & host.used_idx;
 222
 223        return !(ring[head].flags & DESC_HW);
 224}
 225
 226bool use_buf(unsigned *lenp, void **bufp)
 227{
 228        unsigned head = (ring_size - 1) & host.used_idx;
 229
 230        if (!(ring[head].flags & DESC_HW))
 231                return false;
 232
 233        /* make sure length read below is not speculated */
 234        /* Barrier A (for pairing) */
 235        smp_acquire();
 236
 237        /* simple in-order completion: we don't need
 238         * to touch index at all. This also means we
 239         * can just modify the descriptor in-place.
 240         */
 241        ring[head].len--;
 242        /* Make sure len is valid before flags.
 243         * Note: alternative is to write len and flags in one access -
 244         * possible on 64 bit architectures but wmb is free on Intel anyway
 245         * so I have no way to test whether it's a gain.
 246         */
 247        /* Barrier B (for pairing) */
 248        smp_release();
 249        ring[head].flags = 0;
 250        host.used_idx++;
 251        return true;
 252}
 253
 254void call_used(void)
 255{
 256        bool need;
 257
 258        /* Flush in previous flags write */
 259        /* Barrier D (for pairing) */
 260        smp_mb();
 261
 262        need = need_event(event->call_index,
 263                        host.used_idx,
 264                        host.called_used_idx);
 265
 266        host.called_used_idx = host.used_idx;
 267
 268        if (need)
 269                call();
 270}
 271