qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/main-loop.h"
  18#include "qemu/module.h"
  19#include "hw/virtio/virtio.h"
  20#include "net/net.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23#include "qemu/error-report.h"
  24#include "qemu/timer.h"
  25#include "qemu/option.h"
  26#include "qemu/option_int.h"
  27#include "qemu/config-file.h"
  28#include "qapi/qmp/qdict.h"
  29#include "hw/virtio/virtio-net.h"
  30#include "net/vhost_net.h"
  31#include "net/announce.h"
  32#include "hw/virtio/virtio-bus.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-net.h"
  35#include "hw/qdev-properties.h"
  36#include "qapi/qapi-types-migration.h"
  37#include "qapi/qapi-events-migration.h"
  38#include "hw/virtio/virtio-access.h"
  39#include "migration/misc.h"
  40#include "standard-headers/linux/ethtool.h"
  41#include "sysemu/sysemu.h"
  42#include "trace.h"
  43#include "monitor/qdev.h"
  44#include "hw/pci/pci.h"
  45#include "net_rx_pkt.h"
  46#include "hw/virtio/vhost.h"
  47
  48#define VIRTIO_NET_VM_VERSION    11
  49
  50#define MAC_TABLE_ENTRIES    64
  51#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  52
  53/* previously fixed value */
  54#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  55#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  56
  57/* for now, only allow larger queues; with virtio-1, guest can downsize */
  58#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  59#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  60
  61#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  62
  63#define VIRTIO_NET_TCP_FLAG         0x3F
  64#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  65
  66/* IPv4 max payload, 16 bits in the header */
  67#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  68#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  69
  70/* header length value in ip header without option */
  71#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  72
  73#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  74#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  75
  76/* Purge coalesced packets timer interval, This value affects the performance
  77   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  78   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  79   tso/gso/gro 'off'. */
  80#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  81
  82#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  83                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  91
  92static VirtIOFeature feature_sizes[] = {
  93    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  94     .end = endof(struct virtio_net_config, mac)},
  95    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  96     .end = endof(struct virtio_net_config, status)},
  97    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  98     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
  99    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 100     .end = endof(struct virtio_net_config, mtu)},
 101    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 102     .end = endof(struct virtio_net_config, duplex)},
 103    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 104     .end = endof(struct virtio_net_config, supported_hash_types)},
 105    {}
 106};
 107
 108static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 109{
 110    VirtIONet *n = qemu_get_nic_opaque(nc);
 111
 112    return &n->vqs[nc->queue_index];
 113}
 114
 115static int vq2q(int queue_index)
 116{
 117    return queue_index / 2;
 118}
 119
 120/* TODO
 121 * - we could suppress RX interrupt if we were so inclined.
 122 */
 123
 124static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 125{
 126    VirtIONet *n = VIRTIO_NET(vdev);
 127    struct virtio_net_config netcfg;
 128    NetClientState *nc = qemu_get_queue(n->nic);
 129    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 130
 131    int ret = 0;
 132    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 133    virtio_stw_p(vdev, &netcfg.status, n->status);
 134    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
 135    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 136    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 137    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 138    netcfg.duplex = n->net_conf.duplex;
 139    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 140    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 141                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 142                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 143    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 144                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 145    memcpy(config, &netcfg, n->config_size);
 146
 147    /*
 148     * Is this VDPA? No peer means not VDPA: there's no way to
 149     * disconnect/reconnect a VDPA peer.
 150     */
 151    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 152        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 153                                   n->config_size);
 154        if (ret != -1) {
 155            /*
 156             * Some NIC/kernel combinations present 0 as the mac address.  As
 157             * that is not a legal address, try to proceed with the
 158             * address from the QEMU command line in the hope that the
 159             * address has been configured correctly elsewhere - just not
 160             * reported by the device.
 161             */
 162            if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 163                info_report("Zero hardware mac address detected. Ignoring.");
 164                memcpy(netcfg.mac, n->mac, ETH_ALEN);
 165            }
 166            memcpy(config, &netcfg, n->config_size);
 167        }
 168    }
 169}
 170
 171static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 172{
 173    VirtIONet *n = VIRTIO_NET(vdev);
 174    struct virtio_net_config netcfg = {};
 175    NetClientState *nc = qemu_get_queue(n->nic);
 176
 177    memcpy(&netcfg, config, n->config_size);
 178
 179    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 180        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 181        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 182        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 183        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 184    }
 185
 186    /*
 187     * Is this VDPA? No peer means not VDPA: there's no way to
 188     * disconnect/reconnect a VDPA peer.
 189     */
 190    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 191        vhost_net_set_config(get_vhost_net(nc->peer),
 192                             (uint8_t *)&netcfg, 0, n->config_size,
 193                             VHOST_SET_CONFIG_TYPE_MASTER);
 194      }
 195}
 196
 197static bool virtio_net_started(VirtIONet *n, uint8_t status)
 198{
 199    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 200    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 201        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 202}
 203
 204static void virtio_net_announce_notify(VirtIONet *net)
 205{
 206    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 207    trace_virtio_net_announce_notify();
 208
 209    net->status |= VIRTIO_NET_S_ANNOUNCE;
 210    virtio_notify_config(vdev);
 211}
 212
 213static void virtio_net_announce_timer(void *opaque)
 214{
 215    VirtIONet *n = opaque;
 216    trace_virtio_net_announce_timer(n->announce_timer.round);
 217
 218    n->announce_timer.round--;
 219    virtio_net_announce_notify(n);
 220}
 221
 222static void virtio_net_announce(NetClientState *nc)
 223{
 224    VirtIONet *n = qemu_get_nic_opaque(nc);
 225    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 226
 227    /*
 228     * Make sure the virtio migration announcement timer isn't running
 229     * If it is, let it trigger announcement so that we do not cause
 230     * confusion.
 231     */
 232    if (n->announce_timer.round) {
 233        return;
 234    }
 235
 236    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 237        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 238            virtio_net_announce_notify(n);
 239    }
 240}
 241
 242static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 243{
 244    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 245    NetClientState *nc = qemu_get_queue(n->nic);
 246    int queues = n->multiqueue ? n->max_queues : 1;
 247
 248    if (!get_vhost_net(nc->peer)) {
 249        return;
 250    }
 251
 252    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 253        !!n->vhost_started) {
 254        return;
 255    }
 256    if (!n->vhost_started) {
 257        int r, i;
 258
 259        if (n->needs_vnet_hdr_swap) {
 260            error_report("backend does not support %s vnet headers; "
 261                         "falling back on userspace virtio",
 262                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 263            return;
 264        }
 265
 266        /* Any packets outstanding? Purge them to avoid touching rings
 267         * when vhost is running.
 268         */
 269        for (i = 0;  i < queues; i++) {
 270            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 271
 272            /* Purge both directions: TX and RX. */
 273            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 274            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 275        }
 276
 277        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 278            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 279            if (r < 0) {
 280                error_report("%uBytes MTU not supported by the backend",
 281                             n->net_conf.mtu);
 282
 283                return;
 284            }
 285        }
 286
 287        n->vhost_started = 1;
 288        r = vhost_net_start(vdev, n->nic->ncs, queues);
 289        if (r < 0) {
 290            error_report("unable to start vhost net: %d: "
 291                         "falling back on userspace virtio", -r);
 292            n->vhost_started = 0;
 293        }
 294    } else {
 295        vhost_net_stop(vdev, n->nic->ncs, queues);
 296        n->vhost_started = 0;
 297    }
 298}
 299
 300static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 301                                          NetClientState *peer,
 302                                          bool enable)
 303{
 304    if (virtio_is_big_endian(vdev)) {
 305        return qemu_set_vnet_be(peer, enable);
 306    } else {
 307        return qemu_set_vnet_le(peer, enable);
 308    }
 309}
 310
 311static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 312                                       int queues, bool enable)
 313{
 314    int i;
 315
 316    for (i = 0; i < queues; i++) {
 317        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 318            enable) {
 319            while (--i >= 0) {
 320                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 321            }
 322
 323            return true;
 324        }
 325    }
 326
 327    return false;
 328}
 329
 330static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 331{
 332    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 333    int queues = n->multiqueue ? n->max_queues : 1;
 334
 335    if (virtio_net_started(n, status)) {
 336        /* Before using the device, we tell the network backend about the
 337         * endianness to use when parsing vnet headers. If the backend
 338         * can't do it, we fallback onto fixing the headers in the core
 339         * virtio-net code.
 340         */
 341        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 342                                                            queues, true);
 343    } else if (virtio_net_started(n, vdev->status)) {
 344        /* After using the device, we need to reset the network backend to
 345         * the default (guest native endianness), otherwise the guest may
 346         * lose network connectivity if it is rebooted into a different
 347         * endianness.
 348         */
 349        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
 350    }
 351}
 352
 353static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 354{
 355    unsigned int dropped = virtqueue_drop_all(vq);
 356    if (dropped) {
 357        virtio_notify(vdev, vq);
 358    }
 359}
 360
 361static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 362{
 363    VirtIONet *n = VIRTIO_NET(vdev);
 364    VirtIONetQueue *q;
 365    int i;
 366    uint8_t queue_status;
 367
 368    virtio_net_vnet_endian_status(n, status);
 369    virtio_net_vhost_status(n, status);
 370
 371    for (i = 0; i < n->max_queues; i++) {
 372        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 373        bool queue_started;
 374        q = &n->vqs[i];
 375
 376        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
 377            queue_status = 0;
 378        } else {
 379            queue_status = status;
 380        }
 381        queue_started =
 382            virtio_net_started(n, queue_status) && !n->vhost_started;
 383
 384        if (queue_started) {
 385            qemu_flush_queued_packets(ncs);
 386        }
 387
 388        if (!q->tx_waiting) {
 389            continue;
 390        }
 391
 392        if (queue_started) {
 393            if (q->tx_timer) {
 394                timer_mod(q->tx_timer,
 395                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 396            } else {
 397                qemu_bh_schedule(q->tx_bh);
 398            }
 399        } else {
 400            if (q->tx_timer) {
 401                timer_del(q->tx_timer);
 402            } else {
 403                qemu_bh_cancel(q->tx_bh);
 404            }
 405            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 406                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 407                vdev->vm_running) {
 408                /* if tx is waiting we are likely have some packets in tx queue
 409                 * and disabled notification */
 410                q->tx_waiting = 0;
 411                virtio_queue_set_notification(q->tx_vq, 1);
 412                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 413            }
 414        }
 415    }
 416}
 417
 418static void virtio_net_set_link_status(NetClientState *nc)
 419{
 420    VirtIONet *n = qemu_get_nic_opaque(nc);
 421    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 422    uint16_t old_status = n->status;
 423
 424    if (nc->link_down)
 425        n->status &= ~VIRTIO_NET_S_LINK_UP;
 426    else
 427        n->status |= VIRTIO_NET_S_LINK_UP;
 428
 429    if (n->status != old_status)
 430        virtio_notify_config(vdev);
 431
 432    virtio_net_set_status(vdev, vdev->status);
 433}
 434
 435static void rxfilter_notify(NetClientState *nc)
 436{
 437    VirtIONet *n = qemu_get_nic_opaque(nc);
 438
 439    if (nc->rxfilter_notify_enabled) {
 440        char *path = object_get_canonical_path(OBJECT(n->qdev));
 441        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 442                                              n->netclient_name, path);
 443        g_free(path);
 444
 445        /* disable event notification to avoid events flooding */
 446        nc->rxfilter_notify_enabled = 0;
 447    }
 448}
 449
 450static intList *get_vlan_table(VirtIONet *n)
 451{
 452    intList *list;
 453    int i, j;
 454
 455    list = NULL;
 456    for (i = 0; i < MAX_VLAN >> 5; i++) {
 457        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 458            if (n->vlans[i] & (1U << j)) {
 459                QAPI_LIST_PREPEND(list, (i << 5) + j);
 460            }
 461        }
 462    }
 463
 464    return list;
 465}
 466
 467static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 468{
 469    VirtIONet *n = qemu_get_nic_opaque(nc);
 470    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 471    RxFilterInfo *info;
 472    strList *str_list;
 473    int i;
 474
 475    info = g_malloc0(sizeof(*info));
 476    info->name = g_strdup(nc->name);
 477    info->promiscuous = n->promisc;
 478
 479    if (n->nouni) {
 480        info->unicast = RX_STATE_NONE;
 481    } else if (n->alluni) {
 482        info->unicast = RX_STATE_ALL;
 483    } else {
 484        info->unicast = RX_STATE_NORMAL;
 485    }
 486
 487    if (n->nomulti) {
 488        info->multicast = RX_STATE_NONE;
 489    } else if (n->allmulti) {
 490        info->multicast = RX_STATE_ALL;
 491    } else {
 492        info->multicast = RX_STATE_NORMAL;
 493    }
 494
 495    info->broadcast_allowed = n->nobcast;
 496    info->multicast_overflow = n->mac_table.multi_overflow;
 497    info->unicast_overflow = n->mac_table.uni_overflow;
 498
 499    info->main_mac = qemu_mac_strdup_printf(n->mac);
 500
 501    str_list = NULL;
 502    for (i = 0; i < n->mac_table.first_multi; i++) {
 503        QAPI_LIST_PREPEND(str_list,
 504                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 505    }
 506    info->unicast_table = str_list;
 507
 508    str_list = NULL;
 509    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 510        QAPI_LIST_PREPEND(str_list,
 511                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 512    }
 513    info->multicast_table = str_list;
 514    info->vlan_table = get_vlan_table(n);
 515
 516    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 517        info->vlan = RX_STATE_ALL;
 518    } else if (!info->vlan_table) {
 519        info->vlan = RX_STATE_NONE;
 520    } else {
 521        info->vlan = RX_STATE_NORMAL;
 522    }
 523
 524    /* enable event notification after query */
 525    nc->rxfilter_notify_enabled = 1;
 526
 527    return info;
 528}
 529
 530static void virtio_net_reset(VirtIODevice *vdev)
 531{
 532    VirtIONet *n = VIRTIO_NET(vdev);
 533    int i;
 534
 535    /* Reset back to compatibility mode */
 536    n->promisc = 1;
 537    n->allmulti = 0;
 538    n->alluni = 0;
 539    n->nomulti = 0;
 540    n->nouni = 0;
 541    n->nobcast = 0;
 542    /* multiqueue is disabled by default */
 543    n->curr_queues = 1;
 544    timer_del(n->announce_timer.tm);
 545    n->announce_timer.round = 0;
 546    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 547
 548    /* Flush any MAC and VLAN filter table state */
 549    n->mac_table.in_use = 0;
 550    n->mac_table.first_multi = 0;
 551    n->mac_table.multi_overflow = 0;
 552    n->mac_table.uni_overflow = 0;
 553    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 554    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 555    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 556    memset(n->vlans, 0, MAX_VLAN >> 3);
 557
 558    /* Flush any async TX */
 559    for (i = 0;  i < n->max_queues; i++) {
 560        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 561
 562        if (nc->peer) {
 563            qemu_flush_or_purge_queued_packets(nc->peer, true);
 564            assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 565        }
 566    }
 567}
 568
 569static void peer_test_vnet_hdr(VirtIONet *n)
 570{
 571    NetClientState *nc = qemu_get_queue(n->nic);
 572    if (!nc->peer) {
 573        return;
 574    }
 575
 576    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 577}
 578
 579static int peer_has_vnet_hdr(VirtIONet *n)
 580{
 581    return n->has_vnet_hdr;
 582}
 583
 584static int peer_has_ufo(VirtIONet *n)
 585{
 586    if (!peer_has_vnet_hdr(n))
 587        return 0;
 588
 589    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 590
 591    return n->has_ufo;
 592}
 593
 594static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 595                                       int version_1, int hash_report)
 596{
 597    int i;
 598    NetClientState *nc;
 599
 600    n->mergeable_rx_bufs = mergeable_rx_bufs;
 601
 602    if (version_1) {
 603        n->guest_hdr_len = hash_report ?
 604            sizeof(struct virtio_net_hdr_v1_hash) :
 605            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 606        n->rss_data.populate_hash = !!hash_report;
 607    } else {
 608        n->guest_hdr_len = n->mergeable_rx_bufs ?
 609            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 610            sizeof(struct virtio_net_hdr);
 611    }
 612
 613    for (i = 0; i < n->max_queues; i++) {
 614        nc = qemu_get_subqueue(n->nic, i);
 615
 616        if (peer_has_vnet_hdr(n) &&
 617            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 618            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 619            n->host_hdr_len = n->guest_hdr_len;
 620        }
 621    }
 622}
 623
 624static int virtio_net_max_tx_queue_size(VirtIONet *n)
 625{
 626    NetClientState *peer = n->nic_conf.peers.ncs[0];
 627
 628    /*
 629     * Backends other than vhost-user don't support max queue size.
 630     */
 631    if (!peer) {
 632        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 633    }
 634
 635    if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
 636        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 637    }
 638
 639    return VIRTQUEUE_MAX_SIZE;
 640}
 641
 642static int peer_attach(VirtIONet *n, int index)
 643{
 644    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 645
 646    if (!nc->peer) {
 647        return 0;
 648    }
 649
 650    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 651        vhost_set_vring_enable(nc->peer, 1);
 652    }
 653
 654    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 655        return 0;
 656    }
 657
 658    if (n->max_queues == 1) {
 659        return 0;
 660    }
 661
 662    return tap_enable(nc->peer);
 663}
 664
 665static int peer_detach(VirtIONet *n, int index)
 666{
 667    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 668
 669    if (!nc->peer) {
 670        return 0;
 671    }
 672
 673    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 674        vhost_set_vring_enable(nc->peer, 0);
 675    }
 676
 677    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 678        return 0;
 679    }
 680
 681    return tap_disable(nc->peer);
 682}
 683
 684static void virtio_net_set_queues(VirtIONet *n)
 685{
 686    int i;
 687    int r;
 688
 689    if (n->nic->peer_deleted) {
 690        return;
 691    }
 692
 693    for (i = 0; i < n->max_queues; i++) {
 694        if (i < n->curr_queues) {
 695            r = peer_attach(n, i);
 696            assert(!r);
 697        } else {
 698            r = peer_detach(n, i);
 699            assert(!r);
 700        }
 701    }
 702}
 703
 704static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 705
 706static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 707                                        Error **errp)
 708{
 709    VirtIONet *n = VIRTIO_NET(vdev);
 710    NetClientState *nc = qemu_get_queue(n->nic);
 711
 712    /* Firstly sync all virtio-net possible supported features */
 713    features |= n->host_features;
 714
 715    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 716
 717    if (!peer_has_vnet_hdr(n)) {
 718        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 719        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 720        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 721        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 722
 723        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 724        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 725        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 726        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 727
 728        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 729    }
 730
 731    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 732        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 733        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 734    }
 735
 736    if (!get_vhost_net(nc->peer)) {
 737        return features;
 738    }
 739
 740    virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 741    virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 742    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 743    vdev->backend_features = features;
 744
 745    if (n->mtu_bypass_backend &&
 746            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 747        features |= (1ULL << VIRTIO_NET_F_MTU);
 748    }
 749
 750    return features;
 751}
 752
 753static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 754{
 755    uint64_t features = 0;
 756
 757    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 758     * but also these: */
 759    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 760    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 761    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 762    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 763    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 764
 765    return features;
 766}
 767
 768static void virtio_net_apply_guest_offloads(VirtIONet *n)
 769{
 770    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 771            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 772            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 773            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 774            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 775            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 776}
 777
 778static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 779{
 780    static const uint64_t guest_offloads_mask =
 781        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 782        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 783        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 784        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 785        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 786
 787    return guest_offloads_mask & features;
 788}
 789
 790static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 791{
 792    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 793    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 794}
 795
 796typedef struct {
 797    VirtIONet *n;
 798    char *id;
 799} FailoverId;
 800
 801/**
 802 * Set the id of the failover primary device
 803 *
 804 * @opaque: FailoverId to setup
 805 * @opts: opts for device we are handling
 806 * @errp: returns an error if this function fails
 807 */
 808static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
 809{
 810    FailoverId *fid = opaque;
 811    const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
 812
 813    if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
 814        fid->id = g_strdup(opts->id);
 815        return 1;
 816    }
 817
 818    return 0;
 819}
 820
 821/**
 822 * Find the primary device id for this failover virtio-net
 823 *
 824 * @n: VirtIONet device
 825 * @errp: returns an error if this function fails
 826 */
 827static char *failover_find_primary_device_id(VirtIONet *n)
 828{
 829    Error *err = NULL;
 830    FailoverId fid;
 831
 832    fid.n = n;
 833    if (!qemu_opts_foreach(qemu_find_opts("device"),
 834                           failover_set_primary, &fid, &err)) {
 835        return NULL;
 836    }
 837    return fid.id;
 838}
 839
 840/**
 841 * Find the primary device for this failover virtio-net
 842 *
 843 * @n: VirtIONet device
 844 * @errp: returns an error if this function fails
 845 */
 846static DeviceState *failover_find_primary_device(VirtIONet *n)
 847{
 848    char *id = failover_find_primary_device_id(n);
 849
 850    if (!id) {
 851        return NULL;
 852    }
 853
 854    return qdev_find_recursive(sysbus_get_default(), id);
 855}
 856
 857static void failover_add_primary(VirtIONet *n, Error **errp)
 858{
 859    Error *err = NULL;
 860    QemuOpts *opts;
 861    char *id;
 862    DeviceState *dev = failover_find_primary_device(n);
 863
 864    if (dev) {
 865        return;
 866    }
 867
 868    id = failover_find_primary_device_id(n);
 869    if (!id) {
 870        error_setg(errp, "Primary device not found");
 871        error_append_hint(errp, "Virtio-net failover will not work. Make "
 872                          "sure primary device has parameter"
 873                          " failover_pair_id=%s\n", n->netclient_name);
 874        return;
 875    }
 876    opts = qemu_opts_find(qemu_find_opts("device"), id);
 877    g_assert(opts); /* cannot be NULL because id was found using opts list */
 878    dev = qdev_device_add(opts, &err);
 879    if (err) {
 880        qemu_opts_del(opts);
 881    } else {
 882        object_unref(OBJECT(dev));
 883    }
 884    error_propagate(errp, err);
 885}
 886
 887static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 888{
 889    VirtIONet *n = VIRTIO_NET(vdev);
 890    Error *err = NULL;
 891    int i;
 892
 893    if (n->mtu_bypass_backend &&
 894            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 895        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 896    }
 897
 898    virtio_net_set_multiqueue(n,
 899                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 900                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 901
 902    virtio_net_set_mrg_rx_bufs(n,
 903                               virtio_has_feature(features,
 904                                                  VIRTIO_NET_F_MRG_RXBUF),
 905                               virtio_has_feature(features,
 906                                                  VIRTIO_F_VERSION_1),
 907                               virtio_has_feature(features,
 908                                                  VIRTIO_NET_F_HASH_REPORT));
 909
 910    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 911        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 912    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 913        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 914    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 915
 916    if (n->has_vnet_hdr) {
 917        n->curr_guest_offloads =
 918            virtio_net_guest_offloads_by_features(features);
 919        virtio_net_apply_guest_offloads(n);
 920    }
 921
 922    for (i = 0;  i < n->max_queues; i++) {
 923        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 924
 925        if (!get_vhost_net(nc->peer)) {
 926            continue;
 927        }
 928        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 929    }
 930
 931    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 932        memset(n->vlans, 0, MAX_VLAN >> 3);
 933    } else {
 934        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 935    }
 936
 937    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 938        qapi_event_send_failover_negotiated(n->netclient_name);
 939        qatomic_set(&n->failover_primary_hidden, false);
 940        failover_add_primary(n, &err);
 941        if (err) {
 942            warn_report_err(err);
 943        }
 944    }
 945}
 946
 947static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 948                                     struct iovec *iov, unsigned int iov_cnt)
 949{
 950    uint8_t on;
 951    size_t s;
 952    NetClientState *nc = qemu_get_queue(n->nic);
 953
 954    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 955    if (s != sizeof(on)) {
 956        return VIRTIO_NET_ERR;
 957    }
 958
 959    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 960        n->promisc = on;
 961    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 962        n->allmulti = on;
 963    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 964        n->alluni = on;
 965    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 966        n->nomulti = on;
 967    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 968        n->nouni = on;
 969    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 970        n->nobcast = on;
 971    } else {
 972        return VIRTIO_NET_ERR;
 973    }
 974
 975    rxfilter_notify(nc);
 976
 977    return VIRTIO_NET_OK;
 978}
 979
 980static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 981                                     struct iovec *iov, unsigned int iov_cnt)
 982{
 983    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 984    uint64_t offloads;
 985    size_t s;
 986
 987    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 988        return VIRTIO_NET_ERR;
 989    }
 990
 991    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 992    if (s != sizeof(offloads)) {
 993        return VIRTIO_NET_ERR;
 994    }
 995
 996    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 997        uint64_t supported_offloads;
 998
 999        offloads = virtio_ldq_p(vdev, &offloads);
1000
1001        if (!n->has_vnet_hdr) {
1002            return VIRTIO_NET_ERR;
1003        }
1004
1005        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1006            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1007        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1008            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1009        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1010
1011        supported_offloads = virtio_net_supported_guest_offloads(n);
1012        if (offloads & ~supported_offloads) {
1013            return VIRTIO_NET_ERR;
1014        }
1015
1016        n->curr_guest_offloads = offloads;
1017        virtio_net_apply_guest_offloads(n);
1018
1019        return VIRTIO_NET_OK;
1020    } else {
1021        return VIRTIO_NET_ERR;
1022    }
1023}
1024
1025static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1026                                 struct iovec *iov, unsigned int iov_cnt)
1027{
1028    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1029    struct virtio_net_ctrl_mac mac_data;
1030    size_t s;
1031    NetClientState *nc = qemu_get_queue(n->nic);
1032
1033    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1034        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1035            return VIRTIO_NET_ERR;
1036        }
1037        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1038        assert(s == sizeof(n->mac));
1039        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1040        rxfilter_notify(nc);
1041
1042        return VIRTIO_NET_OK;
1043    }
1044
1045    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1046        return VIRTIO_NET_ERR;
1047    }
1048
1049    int in_use = 0;
1050    int first_multi = 0;
1051    uint8_t uni_overflow = 0;
1052    uint8_t multi_overflow = 0;
1053    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1054
1055    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1056                   sizeof(mac_data.entries));
1057    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1058    if (s != sizeof(mac_data.entries)) {
1059        goto error;
1060    }
1061    iov_discard_front(&iov, &iov_cnt, s);
1062
1063    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1064        goto error;
1065    }
1066
1067    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1068        s = iov_to_buf(iov, iov_cnt, 0, macs,
1069                       mac_data.entries * ETH_ALEN);
1070        if (s != mac_data.entries * ETH_ALEN) {
1071            goto error;
1072        }
1073        in_use += mac_data.entries;
1074    } else {
1075        uni_overflow = 1;
1076    }
1077
1078    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1079
1080    first_multi = in_use;
1081
1082    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1083                   sizeof(mac_data.entries));
1084    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1085    if (s != sizeof(mac_data.entries)) {
1086        goto error;
1087    }
1088
1089    iov_discard_front(&iov, &iov_cnt, s);
1090
1091    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1092        goto error;
1093    }
1094
1095    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1096        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1097                       mac_data.entries * ETH_ALEN);
1098        if (s != mac_data.entries * ETH_ALEN) {
1099            goto error;
1100        }
1101        in_use += mac_data.entries;
1102    } else {
1103        multi_overflow = 1;
1104    }
1105
1106    n->mac_table.in_use = in_use;
1107    n->mac_table.first_multi = first_multi;
1108    n->mac_table.uni_overflow = uni_overflow;
1109    n->mac_table.multi_overflow = multi_overflow;
1110    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1111    g_free(macs);
1112    rxfilter_notify(nc);
1113
1114    return VIRTIO_NET_OK;
1115
1116error:
1117    g_free(macs);
1118    return VIRTIO_NET_ERR;
1119}
1120
1121static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1122                                        struct iovec *iov, unsigned int iov_cnt)
1123{
1124    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1125    uint16_t vid;
1126    size_t s;
1127    NetClientState *nc = qemu_get_queue(n->nic);
1128
1129    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1130    vid = virtio_lduw_p(vdev, &vid);
1131    if (s != sizeof(vid)) {
1132        return VIRTIO_NET_ERR;
1133    }
1134
1135    if (vid >= MAX_VLAN)
1136        return VIRTIO_NET_ERR;
1137
1138    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1139        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1140    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1141        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1142    else
1143        return VIRTIO_NET_ERR;
1144
1145    rxfilter_notify(nc);
1146
1147    return VIRTIO_NET_OK;
1148}
1149
1150static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1151                                      struct iovec *iov, unsigned int iov_cnt)
1152{
1153    trace_virtio_net_handle_announce(n->announce_timer.round);
1154    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1155        n->status & VIRTIO_NET_S_ANNOUNCE) {
1156        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1157        if (n->announce_timer.round) {
1158            qemu_announce_timer_step(&n->announce_timer);
1159        }
1160        return VIRTIO_NET_OK;
1161    } else {
1162        return VIRTIO_NET_ERR;
1163    }
1164}
1165
1166static void virtio_net_disable_rss(VirtIONet *n)
1167{
1168    if (n->rss_data.enabled) {
1169        trace_virtio_net_rss_disable();
1170    }
1171    n->rss_data.enabled = false;
1172}
1173
1174static uint16_t virtio_net_handle_rss(VirtIONet *n,
1175                                      struct iovec *iov,
1176                                      unsigned int iov_cnt,
1177                                      bool do_rss)
1178{
1179    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1180    struct virtio_net_rss_config cfg;
1181    size_t s, offset = 0, size_get;
1182    uint16_t queues, i;
1183    struct {
1184        uint16_t us;
1185        uint8_t b;
1186    } QEMU_PACKED temp;
1187    const char *err_msg = "";
1188    uint32_t err_value = 0;
1189
1190    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1191        err_msg = "RSS is not negotiated";
1192        goto error;
1193    }
1194    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1195        err_msg = "Hash report is not negotiated";
1196        goto error;
1197    }
1198    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1199    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1200    if (s != size_get) {
1201        err_msg = "Short command buffer";
1202        err_value = (uint32_t)s;
1203        goto error;
1204    }
1205    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1206    n->rss_data.indirections_len =
1207        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1208    n->rss_data.indirections_len++;
1209    if (!do_rss) {
1210        n->rss_data.indirections_len = 1;
1211    }
1212    if (!is_power_of_2(n->rss_data.indirections_len)) {
1213        err_msg = "Invalid size of indirection table";
1214        err_value = n->rss_data.indirections_len;
1215        goto error;
1216    }
1217    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1218        err_msg = "Too large indirection table";
1219        err_value = n->rss_data.indirections_len;
1220        goto error;
1221    }
1222    n->rss_data.default_queue = do_rss ?
1223        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1224    if (n->rss_data.default_queue >= n->max_queues) {
1225        err_msg = "Invalid default queue";
1226        err_value = n->rss_data.default_queue;
1227        goto error;
1228    }
1229    offset += size_get;
1230    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1231    g_free(n->rss_data.indirections_table);
1232    n->rss_data.indirections_table = g_malloc(size_get);
1233    if (!n->rss_data.indirections_table) {
1234        err_msg = "Can't allocate indirections table";
1235        err_value = n->rss_data.indirections_len;
1236        goto error;
1237    }
1238    s = iov_to_buf(iov, iov_cnt, offset,
1239                   n->rss_data.indirections_table, size_get);
1240    if (s != size_get) {
1241        err_msg = "Short indirection table buffer";
1242        err_value = (uint32_t)s;
1243        goto error;
1244    }
1245    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1246        uint16_t val = n->rss_data.indirections_table[i];
1247        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1248    }
1249    offset += size_get;
1250    size_get = sizeof(temp);
1251    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1252    if (s != size_get) {
1253        err_msg = "Can't get queues";
1254        err_value = (uint32_t)s;
1255        goto error;
1256    }
1257    queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1258    if (queues == 0 || queues > n->max_queues) {
1259        err_msg = "Invalid number of queues";
1260        err_value = queues;
1261        goto error;
1262    }
1263    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1264        err_msg = "Invalid key size";
1265        err_value = temp.b;
1266        goto error;
1267    }
1268    if (!temp.b && n->rss_data.hash_types) {
1269        err_msg = "No key provided";
1270        err_value = 0;
1271        goto error;
1272    }
1273    if (!temp.b && !n->rss_data.hash_types) {
1274        virtio_net_disable_rss(n);
1275        return queues;
1276    }
1277    offset += size_get;
1278    size_get = temp.b;
1279    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1280    if (s != size_get) {
1281        err_msg = "Can get key buffer";
1282        err_value = (uint32_t)s;
1283        goto error;
1284    }
1285    n->rss_data.enabled = true;
1286    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1287                                n->rss_data.indirections_len,
1288                                temp.b);
1289    return queues;
1290error:
1291    trace_virtio_net_rss_error(err_msg, err_value);
1292    virtio_net_disable_rss(n);
1293    return 0;
1294}
1295
1296static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1297                                struct iovec *iov, unsigned int iov_cnt)
1298{
1299    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1300    uint16_t queues;
1301
1302    virtio_net_disable_rss(n);
1303    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1304        queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1305        return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1306    }
1307    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1308        queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1309    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1310        struct virtio_net_ctrl_mq mq;
1311        size_t s;
1312        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1313            return VIRTIO_NET_ERR;
1314        }
1315        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1316        if (s != sizeof(mq)) {
1317            return VIRTIO_NET_ERR;
1318        }
1319        queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1320
1321    } else {
1322        return VIRTIO_NET_ERR;
1323    }
1324
1325    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1326        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1327        queues > n->max_queues ||
1328        !n->multiqueue) {
1329        return VIRTIO_NET_ERR;
1330    }
1331
1332    n->curr_queues = queues;
1333    /* stop the backend before changing the number of queues to avoid handling a
1334     * disabled queue */
1335    virtio_net_set_status(vdev, vdev->status);
1336    virtio_net_set_queues(n);
1337
1338    return VIRTIO_NET_OK;
1339}
1340
1341static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1342{
1343    VirtIONet *n = VIRTIO_NET(vdev);
1344    struct virtio_net_ctrl_hdr ctrl;
1345    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1346    VirtQueueElement *elem;
1347    size_t s;
1348    struct iovec *iov, *iov2;
1349    unsigned int iov_cnt;
1350
1351    for (;;) {
1352        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1353        if (!elem) {
1354            break;
1355        }
1356        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1357            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1358            virtio_error(vdev, "virtio-net ctrl missing headers");
1359            virtqueue_detach_element(vq, elem, 0);
1360            g_free(elem);
1361            break;
1362        }
1363
1364        iov_cnt = elem->out_num;
1365        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1366        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1367        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1368        if (s != sizeof(ctrl)) {
1369            status = VIRTIO_NET_ERR;
1370        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1371            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1372        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1373            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1374        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1375            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1376        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1377            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1378        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1379            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1380        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1381            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1382        }
1383
1384        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1385        assert(s == sizeof(status));
1386
1387        virtqueue_push(vq, elem, sizeof(status));
1388        virtio_notify(vdev, vq);
1389        g_free(iov2);
1390        g_free(elem);
1391    }
1392}
1393
1394/* RX */
1395
1396static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1397{
1398    VirtIONet *n = VIRTIO_NET(vdev);
1399    int queue_index = vq2q(virtio_get_queue_index(vq));
1400
1401    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1402}
1403
1404static bool virtio_net_can_receive(NetClientState *nc)
1405{
1406    VirtIONet *n = qemu_get_nic_opaque(nc);
1407    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1408    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1409
1410    if (!vdev->vm_running) {
1411        return false;
1412    }
1413
1414    if (nc->queue_index >= n->curr_queues) {
1415        return false;
1416    }
1417
1418    if (!virtio_queue_ready(q->rx_vq) ||
1419        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1420        return false;
1421    }
1422
1423    return true;
1424}
1425
1426static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1427{
1428    VirtIONet *n = q->n;
1429    if (virtio_queue_empty(q->rx_vq) ||
1430        (n->mergeable_rx_bufs &&
1431         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1432        virtio_queue_set_notification(q->rx_vq, 1);
1433
1434        /* To avoid a race condition where the guest has made some buffers
1435         * available after the above check but before notification was
1436         * enabled, check for available buffers again.
1437         */
1438        if (virtio_queue_empty(q->rx_vq) ||
1439            (n->mergeable_rx_bufs &&
1440             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1441            return 0;
1442        }
1443    }
1444
1445    virtio_queue_set_notification(q->rx_vq, 0);
1446    return 1;
1447}
1448
1449static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1450{
1451    virtio_tswap16s(vdev, &hdr->hdr_len);
1452    virtio_tswap16s(vdev, &hdr->gso_size);
1453    virtio_tswap16s(vdev, &hdr->csum_start);
1454    virtio_tswap16s(vdev, &hdr->csum_offset);
1455}
1456
1457/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1458 * it never finds out that the packets don't have valid checksums.  This
1459 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1460 * fix this with Xen but it hasn't appeared in an upstream release of
1461 * dhclient yet.
1462 *
1463 * To avoid breaking existing guests, we catch udp packets and add
1464 * checksums.  This is terrible but it's better than hacking the guest
1465 * kernels.
1466 *
1467 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1468 * we should provide a mechanism to disable it to avoid polluting the host
1469 * cache.
1470 */
1471static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1472                                        uint8_t *buf, size_t size)
1473{
1474    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1475        (size > 27 && size < 1500) && /* normal sized MTU */
1476        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1477        (buf[23] == 17) && /* ip.protocol == UDP */
1478        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1479        net_checksum_calculate(buf, size, CSUM_UDP);
1480        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1481    }
1482}
1483
1484static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1485                           const void *buf, size_t size)
1486{
1487    if (n->has_vnet_hdr) {
1488        /* FIXME this cast is evil */
1489        void *wbuf = (void *)buf;
1490        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1491                                    size - n->host_hdr_len);
1492
1493        if (n->needs_vnet_hdr_swap) {
1494            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1495        }
1496        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1497    } else {
1498        struct virtio_net_hdr hdr = {
1499            .flags = 0,
1500            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1501        };
1502        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1503    }
1504}
1505
1506static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1507{
1508    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1509    static const uint8_t vlan[] = {0x81, 0x00};
1510    uint8_t *ptr = (uint8_t *)buf;
1511    int i;
1512
1513    if (n->promisc)
1514        return 1;
1515
1516    ptr += n->host_hdr_len;
1517
1518    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1519        int vid = lduw_be_p(ptr + 14) & 0xfff;
1520        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1521            return 0;
1522    }
1523
1524    if (ptr[0] & 1) { // multicast
1525        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1526            return !n->nobcast;
1527        } else if (n->nomulti) {
1528            return 0;
1529        } else if (n->allmulti || n->mac_table.multi_overflow) {
1530            return 1;
1531        }
1532
1533        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1534            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1535                return 1;
1536            }
1537        }
1538    } else { // unicast
1539        if (n->nouni) {
1540            return 0;
1541        } else if (n->alluni || n->mac_table.uni_overflow) {
1542            return 1;
1543        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1544            return 1;
1545        }
1546
1547        for (i = 0; i < n->mac_table.first_multi; i++) {
1548            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1549                return 1;
1550            }
1551        }
1552    }
1553
1554    return 0;
1555}
1556
1557static uint8_t virtio_net_get_hash_type(bool isip4,
1558                                        bool isip6,
1559                                        bool isudp,
1560                                        bool istcp,
1561                                        uint32_t types)
1562{
1563    if (isip4) {
1564        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1565            return NetPktRssIpV4Tcp;
1566        }
1567        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1568            return NetPktRssIpV4Udp;
1569        }
1570        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1571            return NetPktRssIpV4;
1572        }
1573    } else if (isip6) {
1574        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1575                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1576
1577        if (istcp && (types & mask)) {
1578            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1579                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1580        }
1581        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1582        if (isudp && (types & mask)) {
1583            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1584                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1585        }
1586        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1587        if (types & mask) {
1588            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1589                NetPktRssIpV6Ex : NetPktRssIpV6;
1590        }
1591    }
1592    return 0xff;
1593}
1594
1595static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1596                                   uint32_t hash)
1597{
1598    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1599    hdr->hash_value = hash;
1600    hdr->hash_report = report;
1601}
1602
1603static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1604                                  size_t size)
1605{
1606    VirtIONet *n = qemu_get_nic_opaque(nc);
1607    unsigned int index = nc->queue_index, new_index = index;
1608    struct NetRxPkt *pkt = n->rx_pkt;
1609    uint8_t net_hash_type;
1610    uint32_t hash;
1611    bool isip4, isip6, isudp, istcp;
1612    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1613        VIRTIO_NET_HASH_REPORT_IPv4,
1614        VIRTIO_NET_HASH_REPORT_TCPv4,
1615        VIRTIO_NET_HASH_REPORT_TCPv6,
1616        VIRTIO_NET_HASH_REPORT_IPv6,
1617        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1618        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1619        VIRTIO_NET_HASH_REPORT_UDPv4,
1620        VIRTIO_NET_HASH_REPORT_UDPv6,
1621        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1622    };
1623
1624    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1625                             size - n->host_hdr_len);
1626    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1627    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1628        istcp = isudp = false;
1629    }
1630    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1631        istcp = isudp = false;
1632    }
1633    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1634                                             n->rss_data.hash_types);
1635    if (net_hash_type > NetPktRssIpV6UdpEx) {
1636        if (n->rss_data.populate_hash) {
1637            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1638        }
1639        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1640    }
1641
1642    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1643
1644    if (n->rss_data.populate_hash) {
1645        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1646    }
1647
1648    if (n->rss_data.redirect) {
1649        new_index = hash & (n->rss_data.indirections_len - 1);
1650        new_index = n->rss_data.indirections_table[new_index];
1651    }
1652
1653    return (index == new_index) ? -1 : new_index;
1654}
1655
1656static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1657                                      size_t size, bool no_rss)
1658{
1659    VirtIONet *n = qemu_get_nic_opaque(nc);
1660    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1661    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1662    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1663    size_t lens[VIRTQUEUE_MAX_SIZE];
1664    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1665    struct virtio_net_hdr_mrg_rxbuf mhdr;
1666    unsigned mhdr_cnt = 0;
1667    size_t offset, i, guest_offset, j;
1668    ssize_t err;
1669
1670    if (!virtio_net_can_receive(nc)) {
1671        return -1;
1672    }
1673
1674    if (!no_rss && n->rss_data.enabled) {
1675        int index = virtio_net_process_rss(nc, buf, size);
1676        if (index >= 0) {
1677            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1678            return virtio_net_receive_rcu(nc2, buf, size, true);
1679        }
1680    }
1681
1682    /* hdr_len refers to the header we supply to the guest */
1683    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1684        return 0;
1685    }
1686
1687    if (!receive_filter(n, buf, size))
1688        return size;
1689
1690    offset = i = 0;
1691
1692    while (offset < size) {
1693        VirtQueueElement *elem;
1694        int len, total;
1695        const struct iovec *sg;
1696
1697        total = 0;
1698
1699        if (i == VIRTQUEUE_MAX_SIZE) {
1700            virtio_error(vdev, "virtio-net unexpected long buffer chain");
1701            err = size;
1702            goto err;
1703        }
1704
1705        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1706        if (!elem) {
1707            if (i) {
1708                virtio_error(vdev, "virtio-net unexpected empty queue: "
1709                             "i %zd mergeable %d offset %zd, size %zd, "
1710                             "guest hdr len %zd, host hdr len %zd "
1711                             "guest features 0x%" PRIx64,
1712                             i, n->mergeable_rx_bufs, offset, size,
1713                             n->guest_hdr_len, n->host_hdr_len,
1714                             vdev->guest_features);
1715            }
1716            err = -1;
1717            goto err;
1718        }
1719
1720        if (elem->in_num < 1) {
1721            virtio_error(vdev,
1722                         "virtio-net receive queue contains no in buffers");
1723            virtqueue_detach_element(q->rx_vq, elem, 0);
1724            g_free(elem);
1725            err = -1;
1726            goto err;
1727        }
1728
1729        sg = elem->in_sg;
1730        if (i == 0) {
1731            assert(offset == 0);
1732            if (n->mergeable_rx_bufs) {
1733                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1734                                    sg, elem->in_num,
1735                                    offsetof(typeof(mhdr), num_buffers),
1736                                    sizeof(mhdr.num_buffers));
1737            }
1738
1739            receive_header(n, sg, elem->in_num, buf, size);
1740            if (n->rss_data.populate_hash) {
1741                offset = sizeof(mhdr);
1742                iov_from_buf(sg, elem->in_num, offset,
1743                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1744            }
1745            offset = n->host_hdr_len;
1746            total += n->guest_hdr_len;
1747            guest_offset = n->guest_hdr_len;
1748        } else {
1749            guest_offset = 0;
1750        }
1751
1752        /* copy in packet.  ugh */
1753        len = iov_from_buf(sg, elem->in_num, guest_offset,
1754                           buf + offset, size - offset);
1755        total += len;
1756        offset += len;
1757        /* If buffers can't be merged, at this point we
1758         * must have consumed the complete packet.
1759         * Otherwise, drop it. */
1760        if (!n->mergeable_rx_bufs && offset < size) {
1761            virtqueue_unpop(q->rx_vq, elem, total);
1762            g_free(elem);
1763            err = size;
1764            goto err;
1765        }
1766
1767        elems[i] = elem;
1768        lens[i] = total;
1769        i++;
1770    }
1771
1772    if (mhdr_cnt) {
1773        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1774        iov_from_buf(mhdr_sg, mhdr_cnt,
1775                     0,
1776                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1777    }
1778
1779    for (j = 0; j < i; j++) {
1780        /* signal other side */
1781        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1782        g_free(elems[j]);
1783    }
1784
1785    virtqueue_flush(q->rx_vq, i);
1786    virtio_notify(vdev, q->rx_vq);
1787
1788    return size;
1789
1790err:
1791    for (j = 0; j < i; j++) {
1792        g_free(elems[j]);
1793    }
1794
1795    return err;
1796}
1797
1798static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1799                                  size_t size)
1800{
1801    RCU_READ_LOCK_GUARD();
1802
1803    return virtio_net_receive_rcu(nc, buf, size, false);
1804}
1805
1806static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1807                                         const uint8_t *buf,
1808                                         VirtioNetRscUnit *unit)
1809{
1810    uint16_t ip_hdrlen;
1811    struct ip_header *ip;
1812
1813    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1814                              + sizeof(struct eth_header));
1815    unit->ip = (void *)ip;
1816    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1817    unit->ip_plen = &ip->ip_len;
1818    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1819    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1820    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1821}
1822
1823static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1824                                         const uint8_t *buf,
1825                                         VirtioNetRscUnit *unit)
1826{
1827    struct ip6_header *ip6;
1828
1829    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1830                                 + sizeof(struct eth_header));
1831    unit->ip = ip6;
1832    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1833    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1834                                        + sizeof(struct ip6_header));
1835    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1836
1837    /* There is a difference between payload lenght in ipv4 and v6,
1838       ip header is excluded in ipv6 */
1839    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1840}
1841
1842static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1843                                       VirtioNetRscSeg *seg)
1844{
1845    int ret;
1846    struct virtio_net_hdr_v1 *h;
1847
1848    h = (struct virtio_net_hdr_v1 *)seg->buf;
1849    h->flags = 0;
1850    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1851
1852    if (seg->is_coalesced) {
1853        h->rsc.segments = seg->packets;
1854        h->rsc.dup_acks = seg->dup_ack;
1855        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1856        if (chain->proto == ETH_P_IP) {
1857            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1858        } else {
1859            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1860        }
1861    }
1862
1863    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1864    QTAILQ_REMOVE(&chain->buffers, seg, next);
1865    g_free(seg->buf);
1866    g_free(seg);
1867
1868    return ret;
1869}
1870
1871static void virtio_net_rsc_purge(void *opq)
1872{
1873    VirtioNetRscSeg *seg, *rn;
1874    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1875
1876    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1877        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1878            chain->stat.purge_failed++;
1879            continue;
1880        }
1881    }
1882
1883    chain->stat.timer++;
1884    if (!QTAILQ_EMPTY(&chain->buffers)) {
1885        timer_mod(chain->drain_timer,
1886              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1887    }
1888}
1889
1890static void virtio_net_rsc_cleanup(VirtIONet *n)
1891{
1892    VirtioNetRscChain *chain, *rn_chain;
1893    VirtioNetRscSeg *seg, *rn_seg;
1894
1895    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1896        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1897            QTAILQ_REMOVE(&chain->buffers, seg, next);
1898            g_free(seg->buf);
1899            g_free(seg);
1900        }
1901
1902        timer_free(chain->drain_timer);
1903        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1904        g_free(chain);
1905    }
1906}
1907
1908static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1909                                     NetClientState *nc,
1910                                     const uint8_t *buf, size_t size)
1911{
1912    uint16_t hdr_len;
1913    VirtioNetRscSeg *seg;
1914
1915    hdr_len = chain->n->guest_hdr_len;
1916    seg = g_malloc(sizeof(VirtioNetRscSeg));
1917    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1918        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1919    memcpy(seg->buf, buf, size);
1920    seg->size = size;
1921    seg->packets = 1;
1922    seg->dup_ack = 0;
1923    seg->is_coalesced = 0;
1924    seg->nc = nc;
1925
1926    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1927    chain->stat.cache++;
1928
1929    switch (chain->proto) {
1930    case ETH_P_IP:
1931        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1932        break;
1933    case ETH_P_IPV6:
1934        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1935        break;
1936    default:
1937        g_assert_not_reached();
1938    }
1939}
1940
1941static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1942                                         VirtioNetRscSeg *seg,
1943                                         const uint8_t *buf,
1944                                         struct tcp_header *n_tcp,
1945                                         struct tcp_header *o_tcp)
1946{
1947    uint32_t nack, oack;
1948    uint16_t nwin, owin;
1949
1950    nack = htonl(n_tcp->th_ack);
1951    nwin = htons(n_tcp->th_win);
1952    oack = htonl(o_tcp->th_ack);
1953    owin = htons(o_tcp->th_win);
1954
1955    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1956        chain->stat.ack_out_of_win++;
1957        return RSC_FINAL;
1958    } else if (nack == oack) {
1959        /* duplicated ack or window probe */
1960        if (nwin == owin) {
1961            /* duplicated ack, add dup ack count due to whql test up to 1 */
1962            chain->stat.dup_ack++;
1963            return RSC_FINAL;
1964        } else {
1965            /* Coalesce window update */
1966            o_tcp->th_win = n_tcp->th_win;
1967            chain->stat.win_update++;
1968            return RSC_COALESCE;
1969        }
1970    } else {
1971        /* pure ack, go to 'C', finalize*/
1972        chain->stat.pure_ack++;
1973        return RSC_FINAL;
1974    }
1975}
1976
1977static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1978                                            VirtioNetRscSeg *seg,
1979                                            const uint8_t *buf,
1980                                            VirtioNetRscUnit *n_unit)
1981{
1982    void *data;
1983    uint16_t o_ip_len;
1984    uint32_t nseq, oseq;
1985    VirtioNetRscUnit *o_unit;
1986
1987    o_unit = &seg->unit;
1988    o_ip_len = htons(*o_unit->ip_plen);
1989    nseq = htonl(n_unit->tcp->th_seq);
1990    oseq = htonl(o_unit->tcp->th_seq);
1991
1992    /* out of order or retransmitted. */
1993    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1994        chain->stat.data_out_of_win++;
1995        return RSC_FINAL;
1996    }
1997
1998    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1999    if (nseq == oseq) {
2000        if ((o_unit->payload == 0) && n_unit->payload) {
2001            /* From no payload to payload, normal case, not a dup ack or etc */
2002            chain->stat.data_after_pure_ack++;
2003            goto coalesce;
2004        } else {
2005            return virtio_net_rsc_handle_ack(chain, seg, buf,
2006                                             n_unit->tcp, o_unit->tcp);
2007        }
2008    } else if ((nseq - oseq) != o_unit->payload) {
2009        /* Not a consistent packet, out of order */
2010        chain->stat.data_out_of_order++;
2011        return RSC_FINAL;
2012    } else {
2013coalesce:
2014        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2015            chain->stat.over_size++;
2016            return RSC_FINAL;
2017        }
2018
2019        /* Here comes the right data, the payload length in v4/v6 is different,
2020           so use the field value to update and record the new data len */
2021        o_unit->payload += n_unit->payload; /* update new data len */
2022
2023        /* update field in ip header */
2024        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2025
2026        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2027           for windows guest, while this may change the behavior for linux
2028           guest (only if it uses RSC feature). */
2029        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2030
2031        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2032        o_unit->tcp->th_win = n_unit->tcp->th_win;
2033
2034        memmove(seg->buf + seg->size, data, n_unit->payload);
2035        seg->size += n_unit->payload;
2036        seg->packets++;
2037        chain->stat.coalesced++;
2038        return RSC_COALESCE;
2039    }
2040}
2041
2042static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2043                                        VirtioNetRscSeg *seg,
2044                                        const uint8_t *buf, size_t size,
2045                                        VirtioNetRscUnit *unit)
2046{
2047    struct ip_header *ip1, *ip2;
2048
2049    ip1 = (struct ip_header *)(unit->ip);
2050    ip2 = (struct ip_header *)(seg->unit.ip);
2051    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2052        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2053        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2054        chain->stat.no_match++;
2055        return RSC_NO_MATCH;
2056    }
2057
2058    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2059}
2060
2061static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2062                                        VirtioNetRscSeg *seg,
2063                                        const uint8_t *buf, size_t size,
2064                                        VirtioNetRscUnit *unit)
2065{
2066    struct ip6_header *ip1, *ip2;
2067
2068    ip1 = (struct ip6_header *)(unit->ip);
2069    ip2 = (struct ip6_header *)(seg->unit.ip);
2070    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2071        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2072        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2073        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2074            chain->stat.no_match++;
2075            return RSC_NO_MATCH;
2076    }
2077
2078    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2079}
2080
2081/* Packets with 'SYN' should bypass, other flag should be sent after drain
2082 * to prevent out of order */
2083static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2084                                         struct tcp_header *tcp)
2085{
2086    uint16_t tcp_hdr;
2087    uint16_t tcp_flag;
2088
2089    tcp_flag = htons(tcp->th_offset_flags);
2090    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2091    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2092    if (tcp_flag & TH_SYN) {
2093        chain->stat.tcp_syn++;
2094        return RSC_BYPASS;
2095    }
2096
2097    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2098        chain->stat.tcp_ctrl_drain++;
2099        return RSC_FINAL;
2100    }
2101
2102    if (tcp_hdr > sizeof(struct tcp_header)) {
2103        chain->stat.tcp_all_opt++;
2104        return RSC_FINAL;
2105    }
2106
2107    return RSC_CANDIDATE;
2108}
2109
2110static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2111                                         NetClientState *nc,
2112                                         const uint8_t *buf, size_t size,
2113                                         VirtioNetRscUnit *unit)
2114{
2115    int ret;
2116    VirtioNetRscSeg *seg, *nseg;
2117
2118    if (QTAILQ_EMPTY(&chain->buffers)) {
2119        chain->stat.empty_cache++;
2120        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2121        timer_mod(chain->drain_timer,
2122              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2123        return size;
2124    }
2125
2126    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2127        if (chain->proto == ETH_P_IP) {
2128            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2129        } else {
2130            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2131        }
2132
2133        if (ret == RSC_FINAL) {
2134            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2135                /* Send failed */
2136                chain->stat.final_failed++;
2137                return 0;
2138            }
2139
2140            /* Send current packet */
2141            return virtio_net_do_receive(nc, buf, size);
2142        } else if (ret == RSC_NO_MATCH) {
2143            continue;
2144        } else {
2145            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2146            seg->is_coalesced = 1;
2147            return size;
2148        }
2149    }
2150
2151    chain->stat.no_match_cache++;
2152    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2153    return size;
2154}
2155
2156/* Drain a connection data, this is to avoid out of order segments */
2157static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2158                                        NetClientState *nc,
2159                                        const uint8_t *buf, size_t size,
2160                                        uint16_t ip_start, uint16_t ip_size,
2161                                        uint16_t tcp_port)
2162{
2163    VirtioNetRscSeg *seg, *nseg;
2164    uint32_t ppair1, ppair2;
2165
2166    ppair1 = *(uint32_t *)(buf + tcp_port);
2167    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2168        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2169        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2170            || (ppair1 != ppair2)) {
2171            continue;
2172        }
2173        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2174            chain->stat.drain_failed++;
2175        }
2176
2177        break;
2178    }
2179
2180    return virtio_net_do_receive(nc, buf, size);
2181}
2182
2183static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2184                                            struct ip_header *ip,
2185                                            const uint8_t *buf, size_t size)
2186{
2187    uint16_t ip_len;
2188
2189    /* Not an ipv4 packet */
2190    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2191        chain->stat.ip_option++;
2192        return RSC_BYPASS;
2193    }
2194
2195    /* Don't handle packets with ip option */
2196    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2197        chain->stat.ip_option++;
2198        return RSC_BYPASS;
2199    }
2200
2201    if (ip->ip_p != IPPROTO_TCP) {
2202        chain->stat.bypass_not_tcp++;
2203        return RSC_BYPASS;
2204    }
2205
2206    /* Don't handle packets with ip fragment */
2207    if (!(htons(ip->ip_off) & IP_DF)) {
2208        chain->stat.ip_frag++;
2209        return RSC_BYPASS;
2210    }
2211
2212    /* Don't handle packets with ecn flag */
2213    if (IPTOS_ECN(ip->ip_tos)) {
2214        chain->stat.ip_ecn++;
2215        return RSC_BYPASS;
2216    }
2217
2218    ip_len = htons(ip->ip_len);
2219    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2220        || ip_len > (size - chain->n->guest_hdr_len -
2221                     sizeof(struct eth_header))) {
2222        chain->stat.ip_hacked++;
2223        return RSC_BYPASS;
2224    }
2225
2226    return RSC_CANDIDATE;
2227}
2228
2229static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2230                                      NetClientState *nc,
2231                                      const uint8_t *buf, size_t size)
2232{
2233    int32_t ret;
2234    uint16_t hdr_len;
2235    VirtioNetRscUnit unit;
2236
2237    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2238
2239    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2240        + sizeof(struct tcp_header))) {
2241        chain->stat.bypass_not_tcp++;
2242        return virtio_net_do_receive(nc, buf, size);
2243    }
2244
2245    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2246    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2247        != RSC_CANDIDATE) {
2248        return virtio_net_do_receive(nc, buf, size);
2249    }
2250
2251    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2252    if (ret == RSC_BYPASS) {
2253        return virtio_net_do_receive(nc, buf, size);
2254    } else if (ret == RSC_FINAL) {
2255        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2256                ((hdr_len + sizeof(struct eth_header)) + 12),
2257                VIRTIO_NET_IP4_ADDR_SIZE,
2258                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2259    }
2260
2261    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2262}
2263
2264static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2265                                            struct ip6_header *ip6,
2266                                            const uint8_t *buf, size_t size)
2267{
2268    uint16_t ip_len;
2269
2270    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2271        != IP_HEADER_VERSION_6) {
2272        return RSC_BYPASS;
2273    }
2274
2275    /* Both option and protocol is checked in this */
2276    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2277        chain->stat.bypass_not_tcp++;
2278        return RSC_BYPASS;
2279    }
2280
2281    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2282    if (ip_len < sizeof(struct tcp_header) ||
2283        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2284                  - sizeof(struct ip6_header))) {
2285        chain->stat.ip_hacked++;
2286        return RSC_BYPASS;
2287    }
2288
2289    /* Don't handle packets with ecn flag */
2290    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2291        chain->stat.ip_ecn++;
2292        return RSC_BYPASS;
2293    }
2294
2295    return RSC_CANDIDATE;
2296}
2297
2298static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2299                                      const uint8_t *buf, size_t size)
2300{
2301    int32_t ret;
2302    uint16_t hdr_len;
2303    VirtioNetRscChain *chain;
2304    VirtioNetRscUnit unit;
2305
2306    chain = (VirtioNetRscChain *)opq;
2307    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2308
2309    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2310        + sizeof(tcp_header))) {
2311        return virtio_net_do_receive(nc, buf, size);
2312    }
2313
2314    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2315    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2316                                                 unit.ip, buf, size)) {
2317        return virtio_net_do_receive(nc, buf, size);
2318    }
2319
2320    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2321    if (ret == RSC_BYPASS) {
2322        return virtio_net_do_receive(nc, buf, size);
2323    } else if (ret == RSC_FINAL) {
2324        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2325                ((hdr_len + sizeof(struct eth_header)) + 8),
2326                VIRTIO_NET_IP6_ADDR_SIZE,
2327                hdr_len + sizeof(struct eth_header)
2328                + sizeof(struct ip6_header));
2329    }
2330
2331    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2332}
2333
2334static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2335                                                      NetClientState *nc,
2336                                                      uint16_t proto)
2337{
2338    VirtioNetRscChain *chain;
2339
2340    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2341        return NULL;
2342    }
2343
2344    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2345        if (chain->proto == proto) {
2346            return chain;
2347        }
2348    }
2349
2350    chain = g_malloc(sizeof(*chain));
2351    chain->n = n;
2352    chain->proto = proto;
2353    if (proto == (uint16_t)ETH_P_IP) {
2354        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2355        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2356    } else {
2357        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2358        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2359    }
2360    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2361                                      virtio_net_rsc_purge, chain);
2362    memset(&chain->stat, 0, sizeof(chain->stat));
2363
2364    QTAILQ_INIT(&chain->buffers);
2365    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2366
2367    return chain;
2368}
2369
2370static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2371                                      const uint8_t *buf,
2372                                      size_t size)
2373{
2374    uint16_t proto;
2375    VirtioNetRscChain *chain;
2376    struct eth_header *eth;
2377    VirtIONet *n;
2378
2379    n = qemu_get_nic_opaque(nc);
2380    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2381        return virtio_net_do_receive(nc, buf, size);
2382    }
2383
2384    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2385    proto = htons(eth->h_proto);
2386
2387    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2388    if (chain) {
2389        chain->stat.received++;
2390        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2391            return virtio_net_rsc_receive4(chain, nc, buf, size);
2392        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2393            return virtio_net_rsc_receive6(chain, nc, buf, size);
2394        }
2395    }
2396    return virtio_net_do_receive(nc, buf, size);
2397}
2398
2399static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2400                                  size_t size)
2401{
2402    VirtIONet *n = qemu_get_nic_opaque(nc);
2403    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2404        return virtio_net_rsc_receive(nc, buf, size);
2405    } else {
2406        return virtio_net_do_receive(nc, buf, size);
2407    }
2408}
2409
2410static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2411
2412static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2413{
2414    VirtIONet *n = qemu_get_nic_opaque(nc);
2415    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2416    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2417
2418    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2419    virtio_notify(vdev, q->tx_vq);
2420
2421    g_free(q->async_tx.elem);
2422    q->async_tx.elem = NULL;
2423
2424    virtio_queue_set_notification(q->tx_vq, 1);
2425    virtio_net_flush_tx(q);
2426}
2427
2428/* TX */
2429static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2430{
2431    VirtIONet *n = q->n;
2432    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2433    VirtQueueElement *elem;
2434    int32_t num_packets = 0;
2435    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2436    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2437        return num_packets;
2438    }
2439
2440    if (q->async_tx.elem) {
2441        virtio_queue_set_notification(q->tx_vq, 0);
2442        return num_packets;
2443    }
2444
2445    for (;;) {
2446        ssize_t ret;
2447        unsigned int out_num;
2448        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2449        struct virtio_net_hdr_mrg_rxbuf mhdr;
2450
2451        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2452        if (!elem) {
2453            break;
2454        }
2455
2456        out_num = elem->out_num;
2457        out_sg = elem->out_sg;
2458        if (out_num < 1) {
2459            virtio_error(vdev, "virtio-net header not in first element");
2460            virtqueue_detach_element(q->tx_vq, elem, 0);
2461            g_free(elem);
2462            return -EINVAL;
2463        }
2464
2465        if (n->has_vnet_hdr) {
2466            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2467                n->guest_hdr_len) {
2468                virtio_error(vdev, "virtio-net header incorrect");
2469                virtqueue_detach_element(q->tx_vq, elem, 0);
2470                g_free(elem);
2471                return -EINVAL;
2472            }
2473            if (n->needs_vnet_hdr_swap) {
2474                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2475                sg2[0].iov_base = &mhdr;
2476                sg2[0].iov_len = n->guest_hdr_len;
2477                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2478                                   out_sg, out_num,
2479                                   n->guest_hdr_len, -1);
2480                if (out_num == VIRTQUEUE_MAX_SIZE) {
2481                    goto drop;
2482                }
2483                out_num += 1;
2484                out_sg = sg2;
2485            }
2486        }
2487        /*
2488         * If host wants to see the guest header as is, we can
2489         * pass it on unchanged. Otherwise, copy just the parts
2490         * that host is interested in.
2491         */
2492        assert(n->host_hdr_len <= n->guest_hdr_len);
2493        if (n->host_hdr_len != n->guest_hdr_len) {
2494            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2495                                       out_sg, out_num,
2496                                       0, n->host_hdr_len);
2497            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2498                             out_sg, out_num,
2499                             n->guest_hdr_len, -1);
2500            out_num = sg_num;
2501            out_sg = sg;
2502        }
2503
2504        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2505                                      out_sg, out_num, virtio_net_tx_complete);
2506        if (ret == 0) {
2507            virtio_queue_set_notification(q->tx_vq, 0);
2508            q->async_tx.elem = elem;
2509            return -EBUSY;
2510        }
2511
2512drop:
2513        virtqueue_push(q->tx_vq, elem, 0);
2514        virtio_notify(vdev, q->tx_vq);
2515        g_free(elem);
2516
2517        if (++num_packets >= n->tx_burst) {
2518            break;
2519        }
2520    }
2521    return num_packets;
2522}
2523
2524static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2525{
2526    VirtIONet *n = VIRTIO_NET(vdev);
2527    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2528
2529    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2530        virtio_net_drop_tx_queue_data(vdev, vq);
2531        return;
2532    }
2533
2534    /* This happens when device was stopped but VCPU wasn't. */
2535    if (!vdev->vm_running) {
2536        q->tx_waiting = 1;
2537        return;
2538    }
2539
2540    if (q->tx_waiting) {
2541        virtio_queue_set_notification(vq, 1);
2542        timer_del(q->tx_timer);
2543        q->tx_waiting = 0;
2544        if (virtio_net_flush_tx(q) == -EINVAL) {
2545            return;
2546        }
2547    } else {
2548        timer_mod(q->tx_timer,
2549                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2550        q->tx_waiting = 1;
2551        virtio_queue_set_notification(vq, 0);
2552    }
2553}
2554
2555static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2556{
2557    VirtIONet *n = VIRTIO_NET(vdev);
2558    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2559
2560    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2561        virtio_net_drop_tx_queue_data(vdev, vq);
2562        return;
2563    }
2564
2565    if (unlikely(q->tx_waiting)) {
2566        return;
2567    }
2568    q->tx_waiting = 1;
2569    /* This happens when device was stopped but VCPU wasn't. */
2570    if (!vdev->vm_running) {
2571        return;
2572    }
2573    virtio_queue_set_notification(vq, 0);
2574    qemu_bh_schedule(q->tx_bh);
2575}
2576
2577static void virtio_net_tx_timer(void *opaque)
2578{
2579    VirtIONetQueue *q = opaque;
2580    VirtIONet *n = q->n;
2581    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2582    /* This happens when device was stopped but BH wasn't. */
2583    if (!vdev->vm_running) {
2584        /* Make sure tx waiting is set, so we'll run when restarted. */
2585        assert(q->tx_waiting);
2586        return;
2587    }
2588
2589    q->tx_waiting = 0;
2590
2591    /* Just in case the driver is not ready on more */
2592    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2593        return;
2594    }
2595
2596    virtio_queue_set_notification(q->tx_vq, 1);
2597    virtio_net_flush_tx(q);
2598}
2599
2600static void virtio_net_tx_bh(void *opaque)
2601{
2602    VirtIONetQueue *q = opaque;
2603    VirtIONet *n = q->n;
2604    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2605    int32_t ret;
2606
2607    /* This happens when device was stopped but BH wasn't. */
2608    if (!vdev->vm_running) {
2609        /* Make sure tx waiting is set, so we'll run when restarted. */
2610        assert(q->tx_waiting);
2611        return;
2612    }
2613
2614    q->tx_waiting = 0;
2615
2616    /* Just in case the driver is not ready on more */
2617    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2618        return;
2619    }
2620
2621    ret = virtio_net_flush_tx(q);
2622    if (ret == -EBUSY || ret == -EINVAL) {
2623        return; /* Notification re-enable handled by tx_complete or device
2624                 * broken */
2625    }
2626
2627    /* If we flush a full burst of packets, assume there are
2628     * more coming and immediately reschedule */
2629    if (ret >= n->tx_burst) {
2630        qemu_bh_schedule(q->tx_bh);
2631        q->tx_waiting = 1;
2632        return;
2633    }
2634
2635    /* If less than a full burst, re-enable notification and flush
2636     * anything that may have come in while we weren't looking.  If
2637     * we find something, assume the guest is still active and reschedule */
2638    virtio_queue_set_notification(q->tx_vq, 1);
2639    ret = virtio_net_flush_tx(q);
2640    if (ret == -EINVAL) {
2641        return;
2642    } else if (ret > 0) {
2643        virtio_queue_set_notification(q->tx_vq, 0);
2644        qemu_bh_schedule(q->tx_bh);
2645        q->tx_waiting = 1;
2646    }
2647}
2648
2649static void virtio_net_add_queue(VirtIONet *n, int index)
2650{
2651    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2652
2653    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2654                                           virtio_net_handle_rx);
2655
2656    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2657        n->vqs[index].tx_vq =
2658            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2659                             virtio_net_handle_tx_timer);
2660        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2661                                              virtio_net_tx_timer,
2662                                              &n->vqs[index]);
2663    } else {
2664        n->vqs[index].tx_vq =
2665            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2666                             virtio_net_handle_tx_bh);
2667        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2668    }
2669
2670    n->vqs[index].tx_waiting = 0;
2671    n->vqs[index].n = n;
2672}
2673
2674static void virtio_net_del_queue(VirtIONet *n, int index)
2675{
2676    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2677    VirtIONetQueue *q = &n->vqs[index];
2678    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2679
2680    qemu_purge_queued_packets(nc);
2681
2682    virtio_del_queue(vdev, index * 2);
2683    if (q->tx_timer) {
2684        timer_free(q->tx_timer);
2685        q->tx_timer = NULL;
2686    } else {
2687        qemu_bh_delete(q->tx_bh);
2688        q->tx_bh = NULL;
2689    }
2690    q->tx_waiting = 0;
2691    virtio_del_queue(vdev, index * 2 + 1);
2692}
2693
2694static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2695{
2696    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2697    int old_num_queues = virtio_get_num_queues(vdev);
2698    int new_num_queues = new_max_queues * 2 + 1;
2699    int i;
2700
2701    assert(old_num_queues >= 3);
2702    assert(old_num_queues % 2 == 1);
2703
2704    if (old_num_queues == new_num_queues) {
2705        return;
2706    }
2707
2708    /*
2709     * We always need to remove and add ctrl vq if
2710     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2711     * and then we only enter one of the following two loops.
2712     */
2713    virtio_del_queue(vdev, old_num_queues - 1);
2714
2715    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2716        /* new_num_queues < old_num_queues */
2717        virtio_net_del_queue(n, i / 2);
2718    }
2719
2720    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2721        /* new_num_queues > old_num_queues */
2722        virtio_net_add_queue(n, i / 2);
2723    }
2724
2725    /* add ctrl_vq last */
2726    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2727}
2728
2729static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2730{
2731    int max = multiqueue ? n->max_queues : 1;
2732
2733    n->multiqueue = multiqueue;
2734    virtio_net_change_num_queues(n, max);
2735
2736    virtio_net_set_queues(n);
2737}
2738
2739static int virtio_net_post_load_device(void *opaque, int version_id)
2740{
2741    VirtIONet *n = opaque;
2742    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2743    int i, link_down;
2744
2745    trace_virtio_net_post_load_device();
2746    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2747                               virtio_vdev_has_feature(vdev,
2748                                                       VIRTIO_F_VERSION_1),
2749                               virtio_vdev_has_feature(vdev,
2750                                                       VIRTIO_NET_F_HASH_REPORT));
2751
2752    /* MAC_TABLE_ENTRIES may be different from the saved image */
2753    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2754        n->mac_table.in_use = 0;
2755    }
2756
2757    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2758        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2759    }
2760
2761    /*
2762     * curr_guest_offloads will be later overwritten by the
2763     * virtio_set_features_nocheck call done from the virtio_load.
2764     * Here we make sure it is preserved and restored accordingly
2765     * in the virtio_net_post_load_virtio callback.
2766     */
2767    n->saved_guest_offloads = n->curr_guest_offloads;
2768
2769    virtio_net_set_queues(n);
2770
2771    /* Find the first multicast entry in the saved MAC filter */
2772    for (i = 0; i < n->mac_table.in_use; i++) {
2773        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2774            break;
2775        }
2776    }
2777    n->mac_table.first_multi = i;
2778
2779    /* nc.link_down can't be migrated, so infer link_down according
2780     * to link status bit in n->status */
2781    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2782    for (i = 0; i < n->max_queues; i++) {
2783        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2784    }
2785
2786    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2787        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2788        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2789                                  QEMU_CLOCK_VIRTUAL,
2790                                  virtio_net_announce_timer, n);
2791        if (n->announce_timer.round) {
2792            timer_mod(n->announce_timer.tm,
2793                      qemu_clock_get_ms(n->announce_timer.type));
2794        } else {
2795            qemu_announce_timer_del(&n->announce_timer, false);
2796        }
2797    }
2798
2799    if (n->rss_data.enabled) {
2800        trace_virtio_net_rss_enable(n->rss_data.hash_types,
2801                                    n->rss_data.indirections_len,
2802                                    sizeof(n->rss_data.key));
2803    } else {
2804        trace_virtio_net_rss_disable();
2805    }
2806    return 0;
2807}
2808
2809static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2810{
2811    VirtIONet *n = VIRTIO_NET(vdev);
2812    /*
2813     * The actual needed state is now in saved_guest_offloads,
2814     * see virtio_net_post_load_device for detail.
2815     * Restore it back and apply the desired offloads.
2816     */
2817    n->curr_guest_offloads = n->saved_guest_offloads;
2818    if (peer_has_vnet_hdr(n)) {
2819        virtio_net_apply_guest_offloads(n);
2820    }
2821
2822    return 0;
2823}
2824
2825/* tx_waiting field of a VirtIONetQueue */
2826static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2827    .name = "virtio-net-queue-tx_waiting",
2828    .fields = (VMStateField[]) {
2829        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2830        VMSTATE_END_OF_LIST()
2831   },
2832};
2833
2834static bool max_queues_gt_1(void *opaque, int version_id)
2835{
2836    return VIRTIO_NET(opaque)->max_queues > 1;
2837}
2838
2839static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2840{
2841    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2842                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2843}
2844
2845static bool mac_table_fits(void *opaque, int version_id)
2846{
2847    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2848}
2849
2850static bool mac_table_doesnt_fit(void *opaque, int version_id)
2851{
2852    return !mac_table_fits(opaque, version_id);
2853}
2854
2855/* This temporary type is shared by all the WITH_TMP methods
2856 * although only some fields are used by each.
2857 */
2858struct VirtIONetMigTmp {
2859    VirtIONet      *parent;
2860    VirtIONetQueue *vqs_1;
2861    uint16_t        curr_queues_1;
2862    uint8_t         has_ufo;
2863    uint32_t        has_vnet_hdr;
2864};
2865
2866/* The 2nd and subsequent tx_waiting flags are loaded later than
2867 * the 1st entry in the queues and only if there's more than one
2868 * entry.  We use the tmp mechanism to calculate a temporary
2869 * pointer and count and also validate the count.
2870 */
2871
2872static int virtio_net_tx_waiting_pre_save(void *opaque)
2873{
2874    struct VirtIONetMigTmp *tmp = opaque;
2875
2876    tmp->vqs_1 = tmp->parent->vqs + 1;
2877    tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2878    if (tmp->parent->curr_queues == 0) {
2879        tmp->curr_queues_1 = 0;
2880    }
2881
2882    return 0;
2883}
2884
2885static int virtio_net_tx_waiting_pre_load(void *opaque)
2886{
2887    struct VirtIONetMigTmp *tmp = opaque;
2888
2889    /* Reuse the pointer setup from save */
2890    virtio_net_tx_waiting_pre_save(opaque);
2891
2892    if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2893        error_report("virtio-net: curr_queues %x > max_queues %x",
2894            tmp->parent->curr_queues, tmp->parent->max_queues);
2895
2896        return -EINVAL;
2897    }
2898
2899    return 0; /* all good */
2900}
2901
2902static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2903    .name      = "virtio-net-tx_waiting",
2904    .pre_load  = virtio_net_tx_waiting_pre_load,
2905    .pre_save  = virtio_net_tx_waiting_pre_save,
2906    .fields    = (VMStateField[]) {
2907        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2908                                     curr_queues_1,
2909                                     vmstate_virtio_net_queue_tx_waiting,
2910                                     struct VirtIONetQueue),
2911        VMSTATE_END_OF_LIST()
2912    },
2913};
2914
2915/* the 'has_ufo' flag is just tested; if the incoming stream has the
2916 * flag set we need to check that we have it
2917 */
2918static int virtio_net_ufo_post_load(void *opaque, int version_id)
2919{
2920    struct VirtIONetMigTmp *tmp = opaque;
2921
2922    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2923        error_report("virtio-net: saved image requires TUN_F_UFO support");
2924        return -EINVAL;
2925    }
2926
2927    return 0;
2928}
2929
2930static int virtio_net_ufo_pre_save(void *opaque)
2931{
2932    struct VirtIONetMigTmp *tmp = opaque;
2933
2934    tmp->has_ufo = tmp->parent->has_ufo;
2935
2936    return 0;
2937}
2938
2939static const VMStateDescription vmstate_virtio_net_has_ufo = {
2940    .name      = "virtio-net-ufo",
2941    .post_load = virtio_net_ufo_post_load,
2942    .pre_save  = virtio_net_ufo_pre_save,
2943    .fields    = (VMStateField[]) {
2944        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2945        VMSTATE_END_OF_LIST()
2946    },
2947};
2948
2949/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2950 * flag set we need to check that we have it
2951 */
2952static int virtio_net_vnet_post_load(void *opaque, int version_id)
2953{
2954    struct VirtIONetMigTmp *tmp = opaque;
2955
2956    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2957        error_report("virtio-net: saved image requires vnet_hdr=on");
2958        return -EINVAL;
2959    }
2960
2961    return 0;
2962}
2963
2964static int virtio_net_vnet_pre_save(void *opaque)
2965{
2966    struct VirtIONetMigTmp *tmp = opaque;
2967
2968    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2969
2970    return 0;
2971}
2972
2973static const VMStateDescription vmstate_virtio_net_has_vnet = {
2974    .name      = "virtio-net-vnet",
2975    .post_load = virtio_net_vnet_post_load,
2976    .pre_save  = virtio_net_vnet_pre_save,
2977    .fields    = (VMStateField[]) {
2978        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2979        VMSTATE_END_OF_LIST()
2980    },
2981};
2982
2983static bool virtio_net_rss_needed(void *opaque)
2984{
2985    return VIRTIO_NET(opaque)->rss_data.enabled;
2986}
2987
2988static const VMStateDescription vmstate_virtio_net_rss = {
2989    .name      = "virtio-net-device/rss",
2990    .version_id = 1,
2991    .minimum_version_id = 1,
2992    .needed = virtio_net_rss_needed,
2993    .fields = (VMStateField[]) {
2994        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2995        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2996        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2997        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2998        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2999        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3000        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3001                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3002        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3003                                    rss_data.indirections_len, 0,
3004                                    vmstate_info_uint16, uint16_t),
3005        VMSTATE_END_OF_LIST()
3006    },
3007};
3008
3009static const VMStateDescription vmstate_virtio_net_device = {
3010    .name = "virtio-net-device",
3011    .version_id = VIRTIO_NET_VM_VERSION,
3012    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3013    .post_load = virtio_net_post_load_device,
3014    .fields = (VMStateField[]) {
3015        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3016        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3017                               vmstate_virtio_net_queue_tx_waiting,
3018                               VirtIONetQueue),
3019        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3020        VMSTATE_UINT16(status, VirtIONet),
3021        VMSTATE_UINT8(promisc, VirtIONet),
3022        VMSTATE_UINT8(allmulti, VirtIONet),
3023        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3024
3025        /* Guarded pair: If it fits we load it, else we throw it away
3026         * - can happen if source has a larger MAC table.; post-load
3027         *  sets flags in this case.
3028         */
3029        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3030                                0, mac_table_fits, mac_table.in_use,
3031                                 ETH_ALEN),
3032        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3033                                     mac_table.in_use, ETH_ALEN),
3034
3035        /* Note: This is an array of uint32's that's always been saved as a
3036         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3037         * but based on the uint.
3038         */
3039        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3040        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3041                         vmstate_virtio_net_has_vnet),
3042        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3043        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3044        VMSTATE_UINT8(alluni, VirtIONet),
3045        VMSTATE_UINT8(nomulti, VirtIONet),
3046        VMSTATE_UINT8(nouni, VirtIONet),
3047        VMSTATE_UINT8(nobcast, VirtIONet),
3048        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3049                         vmstate_virtio_net_has_ufo),
3050        VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3051                            vmstate_info_uint16_equal, uint16_t),
3052        VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3053        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3054                         vmstate_virtio_net_tx_waiting),
3055        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3056                            has_ctrl_guest_offloads),
3057        VMSTATE_END_OF_LIST()
3058   },
3059    .subsections = (const VMStateDescription * []) {
3060        &vmstate_virtio_net_rss,
3061        NULL
3062    }
3063};
3064
3065static NetClientInfo net_virtio_info = {
3066    .type = NET_CLIENT_DRIVER_NIC,
3067    .size = sizeof(NICState),
3068    .can_receive = virtio_net_can_receive,
3069    .receive = virtio_net_receive,
3070    .link_status_changed = virtio_net_set_link_status,
3071    .query_rx_filter = virtio_net_query_rxfilter,
3072    .announce = virtio_net_announce,
3073};
3074
3075static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3076{
3077    VirtIONet *n = VIRTIO_NET(vdev);
3078    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3079    assert(n->vhost_started);
3080    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3081}
3082
3083static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3084                                           bool mask)
3085{
3086    VirtIONet *n = VIRTIO_NET(vdev);
3087    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3088    assert(n->vhost_started);
3089    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3090                             vdev, idx, mask);
3091}
3092
3093static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3094{
3095    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3096
3097    n->config_size = virtio_feature_get_config_size(feature_sizes,
3098                                                    host_features);
3099}
3100
3101void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3102                                   const char *type)
3103{
3104    /*
3105     * The name can be NULL, the netclient name will be type.x.
3106     */
3107    assert(type != NULL);
3108
3109    g_free(n->netclient_name);
3110    g_free(n->netclient_type);
3111    n->netclient_name = g_strdup(name);
3112    n->netclient_type = g_strdup(type);
3113}
3114
3115static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3116{
3117    HotplugHandler *hotplug_ctrl;
3118    PCIDevice *pci_dev;
3119    Error *err = NULL;
3120
3121    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3122    if (hotplug_ctrl) {
3123        pci_dev = PCI_DEVICE(dev);
3124        pci_dev->partially_hotplugged = true;
3125        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3126        if (err) {
3127            error_report_err(err);
3128            return false;
3129        }
3130    } else {
3131        return false;
3132    }
3133    return true;
3134}
3135
3136static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3137                                    Error **errp)
3138{
3139    Error *err = NULL;
3140    HotplugHandler *hotplug_ctrl;
3141    PCIDevice *pdev = PCI_DEVICE(dev);
3142    BusState *primary_bus;
3143
3144    if (!pdev->partially_hotplugged) {
3145        return true;
3146    }
3147    primary_bus = dev->parent_bus;
3148    if (!primary_bus) {
3149        error_setg(errp, "virtio_net: couldn't find primary bus");
3150        return false;
3151    }
3152    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3153    qatomic_set(&n->failover_primary_hidden, false);
3154    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3155    if (hotplug_ctrl) {
3156        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3157        if (err) {
3158            goto out;
3159        }
3160        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3161    }
3162
3163out:
3164    error_propagate(errp, err);
3165    return !err;
3166}
3167
3168static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3169{
3170    bool should_be_hidden;
3171    Error *err = NULL;
3172    DeviceState *dev = failover_find_primary_device(n);
3173
3174    if (!dev) {
3175        return;
3176    }
3177
3178    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3179
3180    if (migration_in_setup(s) && !should_be_hidden) {
3181        if (failover_unplug_primary(n, dev)) {
3182            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3183            qapi_event_send_unplug_primary(dev->id);
3184            qatomic_set(&n->failover_primary_hidden, true);
3185        } else {
3186            warn_report("couldn't unplug primary device");
3187        }
3188    } else if (migration_has_failed(s)) {
3189        /* We already unplugged the device let's plug it back */
3190        if (!failover_replug_primary(n, dev, &err)) {
3191            if (err) {
3192                error_report_err(err);
3193            }
3194        }
3195    }
3196}
3197
3198static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3199{
3200    MigrationState *s = data;
3201    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3202    virtio_net_handle_migration_primary(n, s);
3203}
3204
3205static bool failover_hide_primary_device(DeviceListener *listener,
3206                                         QemuOpts *device_opts)
3207{
3208    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3209    const char *standby_id;
3210
3211    if (!device_opts) {
3212        return false;
3213    }
3214    standby_id = qemu_opt_get(device_opts, "failover_pair_id");
3215    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3216        return false;
3217    }
3218
3219    /* failover_primary_hidden is set during feature negotiation */
3220    return qatomic_read(&n->failover_primary_hidden);
3221}
3222
3223static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3224{
3225    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3226    VirtIONet *n = VIRTIO_NET(dev);
3227    NetClientState *nc;
3228    int i;
3229
3230    if (n->net_conf.mtu) {
3231        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3232    }
3233
3234    if (n->net_conf.duplex_str) {
3235        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3236            n->net_conf.duplex = DUPLEX_HALF;
3237        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3238            n->net_conf.duplex = DUPLEX_FULL;
3239        } else {
3240            error_setg(errp, "'duplex' must be 'half' or 'full'");
3241            return;
3242        }
3243        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3244    } else {
3245        n->net_conf.duplex = DUPLEX_UNKNOWN;
3246    }
3247
3248    if (n->net_conf.speed < SPEED_UNKNOWN) {
3249        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3250        return;
3251    }
3252    if (n->net_conf.speed >= 0) {
3253        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3254    }
3255
3256    if (n->failover) {
3257        n->primary_listener.hide_device = failover_hide_primary_device;
3258        qatomic_set(&n->failover_primary_hidden, true);
3259        device_listener_register(&n->primary_listener);
3260        n->migration_state.notify = virtio_net_migration_state_notifier;
3261        add_migration_state_change_notifier(&n->migration_state);
3262        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3263    }
3264
3265    virtio_net_set_config_size(n, n->host_features);
3266    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3267
3268    /*
3269     * We set a lower limit on RX queue size to what it always was.
3270     * Guests that want a smaller ring can always resize it without
3271     * help from us (using virtio 1 and up).
3272     */
3273    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3274        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3275        !is_power_of_2(n->net_conf.rx_queue_size)) {
3276        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3277                   "must be a power of 2 between %d and %d.",
3278                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3279                   VIRTQUEUE_MAX_SIZE);
3280        virtio_cleanup(vdev);
3281        return;
3282    }
3283
3284    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3285        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3286        !is_power_of_2(n->net_conf.tx_queue_size)) {
3287        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3288                   "must be a power of 2 between %d and %d",
3289                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3290                   VIRTQUEUE_MAX_SIZE);
3291        virtio_cleanup(vdev);
3292        return;
3293    }
3294
3295    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3296    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3297        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3298                   "must be a positive integer less than %d.",
3299                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3300        virtio_cleanup(vdev);
3301        return;
3302    }
3303    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3304    n->curr_queues = 1;
3305    n->tx_timeout = n->net_conf.txtimer;
3306
3307    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3308                       && strcmp(n->net_conf.tx, "bh")) {
3309        warn_report("virtio-net: "
3310                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3311                    n->net_conf.tx);
3312        error_printf("Defaulting to \"bh\"");
3313    }
3314
3315    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3316                                    n->net_conf.tx_queue_size);
3317
3318    for (i = 0; i < n->max_queues; i++) {
3319        virtio_net_add_queue(n, i);
3320    }
3321
3322    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3323    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3324    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3325    n->status = VIRTIO_NET_S_LINK_UP;
3326    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3327                              QEMU_CLOCK_VIRTUAL,
3328                              virtio_net_announce_timer, n);
3329    n->announce_timer.round = 0;
3330
3331    if (n->netclient_type) {
3332        /*
3333         * Happen when virtio_net_set_netclient_name has been called.
3334         */
3335        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3336                              n->netclient_type, n->netclient_name, n);
3337    } else {
3338        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3339                              object_get_typename(OBJECT(dev)), dev->id, n);
3340    }
3341
3342    for (i = 0; i < n->max_queues; i++) {
3343        n->nic->ncs[i].do_not_pad = true;
3344    }
3345
3346    peer_test_vnet_hdr(n);
3347    if (peer_has_vnet_hdr(n)) {
3348        for (i = 0; i < n->max_queues; i++) {
3349            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3350        }
3351        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3352    } else {
3353        n->host_hdr_len = 0;
3354    }
3355
3356    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3357
3358    n->vqs[0].tx_waiting = 0;
3359    n->tx_burst = n->net_conf.txburst;
3360    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3361    n->promisc = 1; /* for compatibility */
3362
3363    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3364
3365    n->vlans = g_malloc0(MAX_VLAN >> 3);
3366
3367    nc = qemu_get_queue(n->nic);
3368    nc->rxfilter_notify_enabled = 1;
3369
3370   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3371        struct virtio_net_config netcfg = {};
3372        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3373        vhost_net_set_config(get_vhost_net(nc->peer),
3374            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3375    }
3376    QTAILQ_INIT(&n->rsc_chains);
3377    n->qdev = dev;
3378
3379    net_rx_pkt_init(&n->rx_pkt, false);
3380}
3381
3382static void virtio_net_device_unrealize(DeviceState *dev)
3383{
3384    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3385    VirtIONet *n = VIRTIO_NET(dev);
3386    int i, max_queues;
3387
3388    /* This will stop vhost backend if appropriate. */
3389    virtio_net_set_status(vdev, 0);
3390
3391    g_free(n->netclient_name);
3392    n->netclient_name = NULL;
3393    g_free(n->netclient_type);
3394    n->netclient_type = NULL;
3395
3396    g_free(n->mac_table.macs);
3397    g_free(n->vlans);
3398
3399    if (n->failover) {
3400        device_listener_unregister(&n->primary_listener);
3401    }
3402
3403    max_queues = n->multiqueue ? n->max_queues : 1;
3404    for (i = 0; i < max_queues; i++) {
3405        virtio_net_del_queue(n, i);
3406    }
3407    /* delete also control vq */
3408    virtio_del_queue(vdev, max_queues * 2);
3409    qemu_announce_timer_del(&n->announce_timer, false);
3410    g_free(n->vqs);
3411    qemu_del_nic(n->nic);
3412    virtio_net_rsc_cleanup(n);
3413    g_free(n->rss_data.indirections_table);
3414    net_rx_pkt_uninit(n->rx_pkt);
3415    virtio_cleanup(vdev);
3416}
3417
3418static void virtio_net_instance_init(Object *obj)
3419{
3420    VirtIONet *n = VIRTIO_NET(obj);
3421
3422    /*
3423     * The default config_size is sizeof(struct virtio_net_config).
3424     * Can be overriden with virtio_net_set_config_size.
3425     */
3426    n->config_size = sizeof(struct virtio_net_config);
3427    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3428                                  "bootindex", "/ethernet-phy@0",
3429                                  DEVICE(n));
3430}
3431
3432static int virtio_net_pre_save(void *opaque)
3433{
3434    VirtIONet *n = opaque;
3435
3436    /* At this point, backend must be stopped, otherwise
3437     * it might keep writing to memory. */
3438    assert(!n->vhost_started);
3439
3440    return 0;
3441}
3442
3443static bool primary_unplug_pending(void *opaque)
3444{
3445    DeviceState *dev = opaque;
3446    DeviceState *primary;
3447    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3448    VirtIONet *n = VIRTIO_NET(vdev);
3449
3450    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3451        return false;
3452    }
3453    primary = failover_find_primary_device(n);
3454    return primary ? primary->pending_deleted_event : false;
3455}
3456
3457static bool dev_unplug_pending(void *opaque)
3458{
3459    DeviceState *dev = opaque;
3460    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3461
3462    return vdc->primary_unplug_pending(dev);
3463}
3464
3465static const VMStateDescription vmstate_virtio_net = {
3466    .name = "virtio-net",
3467    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3468    .version_id = VIRTIO_NET_VM_VERSION,
3469    .fields = (VMStateField[]) {
3470        VMSTATE_VIRTIO_DEVICE,
3471        VMSTATE_END_OF_LIST()
3472    },
3473    .pre_save = virtio_net_pre_save,
3474    .dev_unplug_pending = dev_unplug_pending,
3475};
3476
3477static Property virtio_net_properties[] = {
3478    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3479                    VIRTIO_NET_F_CSUM, true),
3480    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3481                    VIRTIO_NET_F_GUEST_CSUM, true),
3482    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3483    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3484                    VIRTIO_NET_F_GUEST_TSO4, true),
3485    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3486                    VIRTIO_NET_F_GUEST_TSO6, true),
3487    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3488                    VIRTIO_NET_F_GUEST_ECN, true),
3489    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3490                    VIRTIO_NET_F_GUEST_UFO, true),
3491    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3492                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3493    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3494                    VIRTIO_NET_F_HOST_TSO4, true),
3495    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3496                    VIRTIO_NET_F_HOST_TSO6, true),
3497    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3498                    VIRTIO_NET_F_HOST_ECN, true),
3499    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3500                    VIRTIO_NET_F_HOST_UFO, true),
3501    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3502                    VIRTIO_NET_F_MRG_RXBUF, true),
3503    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3504                    VIRTIO_NET_F_STATUS, true),
3505    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3506                    VIRTIO_NET_F_CTRL_VQ, true),
3507    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3508                    VIRTIO_NET_F_CTRL_RX, true),
3509    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3510                    VIRTIO_NET_F_CTRL_VLAN, true),
3511    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3512                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3513    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3514                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3515    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3516                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3517    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3518    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3519                    VIRTIO_NET_F_RSS, false),
3520    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3521                    VIRTIO_NET_F_HASH_REPORT, false),
3522    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3523                    VIRTIO_NET_F_RSC_EXT, false),
3524    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3525                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3526    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3527    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3528                       TX_TIMER_INTERVAL),
3529    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3530    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3531    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3532                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3533    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3534                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3535    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3536    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3537                     true),
3538    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3539    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3540    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3541    DEFINE_PROP_END_OF_LIST(),
3542};
3543
3544static void virtio_net_class_init(ObjectClass *klass, void *data)
3545{
3546    DeviceClass *dc = DEVICE_CLASS(klass);
3547    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3548
3549    device_class_set_props(dc, virtio_net_properties);
3550    dc->vmsd = &vmstate_virtio_net;
3551    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3552    vdc->realize = virtio_net_device_realize;
3553    vdc->unrealize = virtio_net_device_unrealize;
3554    vdc->get_config = virtio_net_get_config;
3555    vdc->set_config = virtio_net_set_config;
3556    vdc->get_features = virtio_net_get_features;
3557    vdc->set_features = virtio_net_set_features;
3558    vdc->bad_features = virtio_net_bad_features;
3559    vdc->reset = virtio_net_reset;
3560    vdc->set_status = virtio_net_set_status;
3561    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3562    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3563    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3564    vdc->post_load = virtio_net_post_load_virtio;
3565    vdc->vmsd = &vmstate_virtio_net_device;
3566    vdc->primary_unplug_pending = primary_unplug_pending;
3567}
3568
3569static const TypeInfo virtio_net_info = {
3570    .name = TYPE_VIRTIO_NET,
3571    .parent = TYPE_VIRTIO_DEVICE,
3572    .instance_size = sizeof(VirtIONet),
3573    .instance_init = virtio_net_instance_init,
3574    .class_init = virtio_net_class_init,
3575};
3576
3577static void virtio_register_types(void)
3578{
3579    type_register_static(&virtio_net_info);
3580}
3581
3582type_init(virtio_register_types)
3583