qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/main-loop.h"
  18#include "qemu/module.h"
  19#include "hw/virtio/virtio.h"
  20#include "net/net.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23#include "qemu/error-report.h"
  24#include "qemu/timer.h"
  25#include "qemu/option.h"
  26#include "qemu/option_int.h"
  27#include "qemu/config-file.h"
  28#include "qapi/qmp/qdict.h"
  29#include "hw/virtio/virtio-net.h"
  30#include "net/vhost_net.h"
  31#include "net/announce.h"
  32#include "hw/virtio/virtio-bus.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-net.h"
  35#include "hw/qdev-properties.h"
  36#include "qapi/qapi-types-migration.h"
  37#include "qapi/qapi-events-migration.h"
  38#include "hw/virtio/virtio-access.h"
  39#include "migration/misc.h"
  40#include "standard-headers/linux/ethtool.h"
  41#include "sysemu/sysemu.h"
  42#include "trace.h"
  43#include "monitor/qdev.h"
  44#include "hw/pci/pci.h"
  45#include "net_rx_pkt.h"
  46#include "hw/virtio/vhost.h"
  47
  48#define VIRTIO_NET_VM_VERSION    11
  49
  50#define MAC_TABLE_ENTRIES    64
  51#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  52
  53/* previously fixed value */
  54#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  55#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  56
  57/* for now, only allow larger queues; with virtio-1, guest can downsize */
  58#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  59#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  60
  61#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  62
  63#define VIRTIO_NET_TCP_FLAG         0x3F
  64#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  65
  66/* IPv4 max payload, 16 bits in the header */
  67#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  68#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  69
  70/* header length value in ip header without option */
  71#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  72
  73#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  74#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  75
  76/* Purge coalesced packets timer interval, This value affects the performance
  77   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  78   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  79   tso/gso/gro 'off'. */
  80#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  81
  82#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  83                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  91
  92static const VirtIOFeature feature_sizes[] = {
  93    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  94     .end = endof(struct virtio_net_config, mac)},
  95    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  96     .end = endof(struct virtio_net_config, status)},
  97    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  98     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
  99    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 100     .end = endof(struct virtio_net_config, mtu)},
 101    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 102     .end = endof(struct virtio_net_config, duplex)},
 103    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 104     .end = endof(struct virtio_net_config, supported_hash_types)},
 105    {}
 106};
 107
 108static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 109{
 110    VirtIONet *n = qemu_get_nic_opaque(nc);
 111
 112    return &n->vqs[nc->queue_index];
 113}
 114
 115static int vq2q(int queue_index)
 116{
 117    return queue_index / 2;
 118}
 119
 120/* TODO
 121 * - we could suppress RX interrupt if we were so inclined.
 122 */
 123
 124static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 125{
 126    VirtIONet *n = VIRTIO_NET(vdev);
 127    struct virtio_net_config netcfg;
 128    NetClientState *nc = qemu_get_queue(n->nic);
 129    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 130
 131    int ret = 0;
 132    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 133    virtio_stw_p(vdev, &netcfg.status, n->status);
 134    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
 135    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 136    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 137    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 138    netcfg.duplex = n->net_conf.duplex;
 139    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 140    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 141                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 142                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 143    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 144                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 145    memcpy(config, &netcfg, n->config_size);
 146
 147    /*
 148     * Is this VDPA? No peer means not VDPA: there's no way to
 149     * disconnect/reconnect a VDPA peer.
 150     */
 151    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 152        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 153                                   n->config_size);
 154        if (ret != -1) {
 155            /*
 156             * Some NIC/kernel combinations present 0 as the mac address.  As
 157             * that is not a legal address, try to proceed with the
 158             * address from the QEMU command line in the hope that the
 159             * address has been configured correctly elsewhere - just not
 160             * reported by the device.
 161             */
 162            if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 163                info_report("Zero hardware mac address detected. Ignoring.");
 164                memcpy(netcfg.mac, n->mac, ETH_ALEN);
 165            }
 166            memcpy(config, &netcfg, n->config_size);
 167        }
 168    }
 169}
 170
 171static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 172{
 173    VirtIONet *n = VIRTIO_NET(vdev);
 174    struct virtio_net_config netcfg = {};
 175    NetClientState *nc = qemu_get_queue(n->nic);
 176
 177    memcpy(&netcfg, config, n->config_size);
 178
 179    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 180        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 181        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 182        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 183        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 184    }
 185
 186    /*
 187     * Is this VDPA? No peer means not VDPA: there's no way to
 188     * disconnect/reconnect a VDPA peer.
 189     */
 190    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 191        vhost_net_set_config(get_vhost_net(nc->peer),
 192                             (uint8_t *)&netcfg, 0, n->config_size,
 193                             VHOST_SET_CONFIG_TYPE_MASTER);
 194      }
 195}
 196
 197static bool virtio_net_started(VirtIONet *n, uint8_t status)
 198{
 199    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 200    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 201        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 202}
 203
 204static void virtio_net_announce_notify(VirtIONet *net)
 205{
 206    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 207    trace_virtio_net_announce_notify();
 208
 209    net->status |= VIRTIO_NET_S_ANNOUNCE;
 210    virtio_notify_config(vdev);
 211}
 212
 213static void virtio_net_announce_timer(void *opaque)
 214{
 215    VirtIONet *n = opaque;
 216    trace_virtio_net_announce_timer(n->announce_timer.round);
 217
 218    n->announce_timer.round--;
 219    virtio_net_announce_notify(n);
 220}
 221
 222static void virtio_net_announce(NetClientState *nc)
 223{
 224    VirtIONet *n = qemu_get_nic_opaque(nc);
 225    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 226
 227    /*
 228     * Make sure the virtio migration announcement timer isn't running
 229     * If it is, let it trigger announcement so that we do not cause
 230     * confusion.
 231     */
 232    if (n->announce_timer.round) {
 233        return;
 234    }
 235
 236    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 237        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 238            virtio_net_announce_notify(n);
 239    }
 240}
 241
 242static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 243{
 244    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 245    NetClientState *nc = qemu_get_queue(n->nic);
 246    int queues = n->multiqueue ? n->max_queues : 1;
 247
 248    if (!get_vhost_net(nc->peer)) {
 249        return;
 250    }
 251
 252    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 253        !!n->vhost_started) {
 254        return;
 255    }
 256    if (!n->vhost_started) {
 257        int r, i;
 258
 259        if (n->needs_vnet_hdr_swap) {
 260            error_report("backend does not support %s vnet headers; "
 261                         "falling back on userspace virtio",
 262                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 263            return;
 264        }
 265
 266        /* Any packets outstanding? Purge them to avoid touching rings
 267         * when vhost is running.
 268         */
 269        for (i = 0;  i < queues; i++) {
 270            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 271
 272            /* Purge both directions: TX and RX. */
 273            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 274            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 275        }
 276
 277        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 278            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 279            if (r < 0) {
 280                error_report("%uBytes MTU not supported by the backend",
 281                             n->net_conf.mtu);
 282
 283                return;
 284            }
 285        }
 286
 287        n->vhost_started = 1;
 288        r = vhost_net_start(vdev, n->nic->ncs, queues);
 289        if (r < 0) {
 290            error_report("unable to start vhost net: %d: "
 291                         "falling back on userspace virtio", -r);
 292            n->vhost_started = 0;
 293        }
 294    } else {
 295        vhost_net_stop(vdev, n->nic->ncs, queues);
 296        n->vhost_started = 0;
 297    }
 298}
 299
 300static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 301                                          NetClientState *peer,
 302                                          bool enable)
 303{
 304    if (virtio_is_big_endian(vdev)) {
 305        return qemu_set_vnet_be(peer, enable);
 306    } else {
 307        return qemu_set_vnet_le(peer, enable);
 308    }
 309}
 310
 311static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 312                                       int queues, bool enable)
 313{
 314    int i;
 315
 316    for (i = 0; i < queues; i++) {
 317        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 318            enable) {
 319            while (--i >= 0) {
 320                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 321            }
 322
 323            return true;
 324        }
 325    }
 326
 327    return false;
 328}
 329
 330static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 331{
 332    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 333    int queues = n->multiqueue ? n->max_queues : 1;
 334
 335    if (virtio_net_started(n, status)) {
 336        /* Before using the device, we tell the network backend about the
 337         * endianness to use when parsing vnet headers. If the backend
 338         * can't do it, we fallback onto fixing the headers in the core
 339         * virtio-net code.
 340         */
 341        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 342                                                            queues, true);
 343    } else if (virtio_net_started(n, vdev->status)) {
 344        /* After using the device, we need to reset the network backend to
 345         * the default (guest native endianness), otherwise the guest may
 346         * lose network connectivity if it is rebooted into a different
 347         * endianness.
 348         */
 349        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
 350    }
 351}
 352
 353static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 354{
 355    unsigned int dropped = virtqueue_drop_all(vq);
 356    if (dropped) {
 357        virtio_notify(vdev, vq);
 358    }
 359}
 360
 361static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 362{
 363    VirtIONet *n = VIRTIO_NET(vdev);
 364    VirtIONetQueue *q;
 365    int i;
 366    uint8_t queue_status;
 367
 368    virtio_net_vnet_endian_status(n, status);
 369    virtio_net_vhost_status(n, status);
 370
 371    for (i = 0; i < n->max_queues; i++) {
 372        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 373        bool queue_started;
 374        q = &n->vqs[i];
 375
 376        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
 377            queue_status = 0;
 378        } else {
 379            queue_status = status;
 380        }
 381        queue_started =
 382            virtio_net_started(n, queue_status) && !n->vhost_started;
 383
 384        if (queue_started) {
 385            qemu_flush_queued_packets(ncs);
 386        }
 387
 388        if (!q->tx_waiting) {
 389            continue;
 390        }
 391
 392        if (queue_started) {
 393            if (q->tx_timer) {
 394                timer_mod(q->tx_timer,
 395                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 396            } else {
 397                qemu_bh_schedule(q->tx_bh);
 398            }
 399        } else {
 400            if (q->tx_timer) {
 401                timer_del(q->tx_timer);
 402            } else {
 403                qemu_bh_cancel(q->tx_bh);
 404            }
 405            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 406                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 407                vdev->vm_running) {
 408                /* if tx is waiting we are likely have some packets in tx queue
 409                 * and disabled notification */
 410                q->tx_waiting = 0;
 411                virtio_queue_set_notification(q->tx_vq, 1);
 412                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 413            }
 414        }
 415    }
 416}
 417
 418static void virtio_net_set_link_status(NetClientState *nc)
 419{
 420    VirtIONet *n = qemu_get_nic_opaque(nc);
 421    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 422    uint16_t old_status = n->status;
 423
 424    if (nc->link_down)
 425        n->status &= ~VIRTIO_NET_S_LINK_UP;
 426    else
 427        n->status |= VIRTIO_NET_S_LINK_UP;
 428
 429    if (n->status != old_status)
 430        virtio_notify_config(vdev);
 431
 432    virtio_net_set_status(vdev, vdev->status);
 433}
 434
 435static void rxfilter_notify(NetClientState *nc)
 436{
 437    VirtIONet *n = qemu_get_nic_opaque(nc);
 438
 439    if (nc->rxfilter_notify_enabled) {
 440        char *path = object_get_canonical_path(OBJECT(n->qdev));
 441        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 442                                              n->netclient_name, path);
 443        g_free(path);
 444
 445        /* disable event notification to avoid events flooding */
 446        nc->rxfilter_notify_enabled = 0;
 447    }
 448}
 449
 450static intList *get_vlan_table(VirtIONet *n)
 451{
 452    intList *list;
 453    int i, j;
 454
 455    list = NULL;
 456    for (i = 0; i < MAX_VLAN >> 5; i++) {
 457        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 458            if (n->vlans[i] & (1U << j)) {
 459                QAPI_LIST_PREPEND(list, (i << 5) + j);
 460            }
 461        }
 462    }
 463
 464    return list;
 465}
 466
 467static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 468{
 469    VirtIONet *n = qemu_get_nic_opaque(nc);
 470    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 471    RxFilterInfo *info;
 472    strList *str_list;
 473    int i;
 474
 475    info = g_malloc0(sizeof(*info));
 476    info->name = g_strdup(nc->name);
 477    info->promiscuous = n->promisc;
 478
 479    if (n->nouni) {
 480        info->unicast = RX_STATE_NONE;
 481    } else if (n->alluni) {
 482        info->unicast = RX_STATE_ALL;
 483    } else {
 484        info->unicast = RX_STATE_NORMAL;
 485    }
 486
 487    if (n->nomulti) {
 488        info->multicast = RX_STATE_NONE;
 489    } else if (n->allmulti) {
 490        info->multicast = RX_STATE_ALL;
 491    } else {
 492        info->multicast = RX_STATE_NORMAL;
 493    }
 494
 495    info->broadcast_allowed = n->nobcast;
 496    info->multicast_overflow = n->mac_table.multi_overflow;
 497    info->unicast_overflow = n->mac_table.uni_overflow;
 498
 499    info->main_mac = qemu_mac_strdup_printf(n->mac);
 500
 501    str_list = NULL;
 502    for (i = 0; i < n->mac_table.first_multi; i++) {
 503        QAPI_LIST_PREPEND(str_list,
 504                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 505    }
 506    info->unicast_table = str_list;
 507
 508    str_list = NULL;
 509    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 510        QAPI_LIST_PREPEND(str_list,
 511                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 512    }
 513    info->multicast_table = str_list;
 514    info->vlan_table = get_vlan_table(n);
 515
 516    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 517        info->vlan = RX_STATE_ALL;
 518    } else if (!info->vlan_table) {
 519        info->vlan = RX_STATE_NONE;
 520    } else {
 521        info->vlan = RX_STATE_NORMAL;
 522    }
 523
 524    /* enable event notification after query */
 525    nc->rxfilter_notify_enabled = 1;
 526
 527    return info;
 528}
 529
 530static void virtio_net_reset(VirtIODevice *vdev)
 531{
 532    VirtIONet *n = VIRTIO_NET(vdev);
 533    int i;
 534
 535    /* Reset back to compatibility mode */
 536    n->promisc = 1;
 537    n->allmulti = 0;
 538    n->alluni = 0;
 539    n->nomulti = 0;
 540    n->nouni = 0;
 541    n->nobcast = 0;
 542    /* multiqueue is disabled by default */
 543    n->curr_queues = 1;
 544    timer_del(n->announce_timer.tm);
 545    n->announce_timer.round = 0;
 546    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 547
 548    /* Flush any MAC and VLAN filter table state */
 549    n->mac_table.in_use = 0;
 550    n->mac_table.first_multi = 0;
 551    n->mac_table.multi_overflow = 0;
 552    n->mac_table.uni_overflow = 0;
 553    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 554    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 555    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 556    memset(n->vlans, 0, MAX_VLAN >> 3);
 557
 558    /* Flush any async TX */
 559    for (i = 0;  i < n->max_queues; i++) {
 560        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 561
 562        if (nc->peer) {
 563            qemu_flush_or_purge_queued_packets(nc->peer, true);
 564            assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 565        }
 566    }
 567}
 568
 569static void peer_test_vnet_hdr(VirtIONet *n)
 570{
 571    NetClientState *nc = qemu_get_queue(n->nic);
 572    if (!nc->peer) {
 573        return;
 574    }
 575
 576    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 577}
 578
 579static int peer_has_vnet_hdr(VirtIONet *n)
 580{
 581    return n->has_vnet_hdr;
 582}
 583
 584static int peer_has_ufo(VirtIONet *n)
 585{
 586    if (!peer_has_vnet_hdr(n))
 587        return 0;
 588
 589    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 590
 591    return n->has_ufo;
 592}
 593
 594static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 595                                       int version_1, int hash_report)
 596{
 597    int i;
 598    NetClientState *nc;
 599
 600    n->mergeable_rx_bufs = mergeable_rx_bufs;
 601
 602    if (version_1) {
 603        n->guest_hdr_len = hash_report ?
 604            sizeof(struct virtio_net_hdr_v1_hash) :
 605            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 606        n->rss_data.populate_hash = !!hash_report;
 607    } else {
 608        n->guest_hdr_len = n->mergeable_rx_bufs ?
 609            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 610            sizeof(struct virtio_net_hdr);
 611    }
 612
 613    for (i = 0; i < n->max_queues; i++) {
 614        nc = qemu_get_subqueue(n->nic, i);
 615
 616        if (peer_has_vnet_hdr(n) &&
 617            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 618            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 619            n->host_hdr_len = n->guest_hdr_len;
 620        }
 621    }
 622}
 623
 624static int virtio_net_max_tx_queue_size(VirtIONet *n)
 625{
 626    NetClientState *peer = n->nic_conf.peers.ncs[0];
 627
 628    /*
 629     * Backends other than vhost-user don't support max queue size.
 630     */
 631    if (!peer) {
 632        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 633    }
 634
 635    if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
 636        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 637    }
 638
 639    return VIRTQUEUE_MAX_SIZE;
 640}
 641
 642static int peer_attach(VirtIONet *n, int index)
 643{
 644    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 645
 646    if (!nc->peer) {
 647        return 0;
 648    }
 649
 650    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 651        vhost_set_vring_enable(nc->peer, 1);
 652    }
 653
 654    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 655        return 0;
 656    }
 657
 658    if (n->max_queues == 1) {
 659        return 0;
 660    }
 661
 662    return tap_enable(nc->peer);
 663}
 664
 665static int peer_detach(VirtIONet *n, int index)
 666{
 667    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 668
 669    if (!nc->peer) {
 670        return 0;
 671    }
 672
 673    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 674        vhost_set_vring_enable(nc->peer, 0);
 675    }
 676
 677    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 678        return 0;
 679    }
 680
 681    return tap_disable(nc->peer);
 682}
 683
 684static void virtio_net_set_queues(VirtIONet *n)
 685{
 686    int i;
 687    int r;
 688
 689    if (n->nic->peer_deleted) {
 690        return;
 691    }
 692
 693    for (i = 0; i < n->max_queues; i++) {
 694        if (i < n->curr_queues) {
 695            r = peer_attach(n, i);
 696            assert(!r);
 697        } else {
 698            r = peer_detach(n, i);
 699            assert(!r);
 700        }
 701    }
 702}
 703
 704static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 705
 706static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 707                                        Error **errp)
 708{
 709    VirtIONet *n = VIRTIO_NET(vdev);
 710    NetClientState *nc = qemu_get_queue(n->nic);
 711
 712    /* Firstly sync all virtio-net possible supported features */
 713    features |= n->host_features;
 714
 715    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 716
 717    if (!peer_has_vnet_hdr(n)) {
 718        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 719        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 720        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 721        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 722
 723        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 724        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 725        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 726        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 727
 728        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 729    }
 730
 731    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 732        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 733        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 734    }
 735
 736    if (!get_vhost_net(nc->peer)) {
 737        return features;
 738    }
 739
 740    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 741        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 742    }
 743    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 744    vdev->backend_features = features;
 745
 746    if (n->mtu_bypass_backend &&
 747            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 748        features |= (1ULL << VIRTIO_NET_F_MTU);
 749    }
 750
 751    return features;
 752}
 753
 754static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 755{
 756    uint64_t features = 0;
 757
 758    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 759     * but also these: */
 760    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 761    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 762    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 763    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 764    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 765
 766    return features;
 767}
 768
 769static void virtio_net_apply_guest_offloads(VirtIONet *n)
 770{
 771    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 772            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 773            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 774            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 775            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 776            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 777}
 778
 779static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 780{
 781    static const uint64_t guest_offloads_mask =
 782        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 783        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 784        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 785        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 786        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 787
 788    return guest_offloads_mask & features;
 789}
 790
 791static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 792{
 793    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 794    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 795}
 796
 797typedef struct {
 798    VirtIONet *n;
 799    char *id;
 800} FailoverId;
 801
 802/**
 803 * Set the id of the failover primary device
 804 *
 805 * @opaque: FailoverId to setup
 806 * @opts: opts for device we are handling
 807 * @errp: returns an error if this function fails
 808 */
 809static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
 810{
 811    FailoverId *fid = opaque;
 812    const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
 813
 814    if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
 815        fid->id = g_strdup(opts->id);
 816        return 1;
 817    }
 818
 819    return 0;
 820}
 821
 822/**
 823 * Find the primary device id for this failover virtio-net
 824 *
 825 * @n: VirtIONet device
 826 * @errp: returns an error if this function fails
 827 */
 828static char *failover_find_primary_device_id(VirtIONet *n)
 829{
 830    Error *err = NULL;
 831    FailoverId fid;
 832
 833    fid.n = n;
 834    if (!qemu_opts_foreach(qemu_find_opts("device"),
 835                           failover_set_primary, &fid, &err)) {
 836        return NULL;
 837    }
 838    return fid.id;
 839}
 840
 841/**
 842 * Find the primary device for this failover virtio-net
 843 *
 844 * @n: VirtIONet device
 845 * @errp: returns an error if this function fails
 846 */
 847static DeviceState *failover_find_primary_device(VirtIONet *n)
 848{
 849    char *id = failover_find_primary_device_id(n);
 850
 851    if (!id) {
 852        return NULL;
 853    }
 854
 855    return qdev_find_recursive(sysbus_get_default(), id);
 856}
 857
 858static void failover_add_primary(VirtIONet *n, Error **errp)
 859{
 860    Error *err = NULL;
 861    QemuOpts *opts;
 862    char *id;
 863    DeviceState *dev = failover_find_primary_device(n);
 864
 865    if (dev) {
 866        return;
 867    }
 868
 869    id = failover_find_primary_device_id(n);
 870    if (!id) {
 871        error_setg(errp, "Primary device not found");
 872        error_append_hint(errp, "Virtio-net failover will not work. Make "
 873                          "sure primary device has parameter"
 874                          " failover_pair_id=%s\n", n->netclient_name);
 875        return;
 876    }
 877    opts = qemu_opts_find(qemu_find_opts("device"), id);
 878    g_assert(opts); /* cannot be NULL because id was found using opts list */
 879    dev = qdev_device_add(opts, &err);
 880    if (err) {
 881        qemu_opts_del(opts);
 882    } else {
 883        object_unref(OBJECT(dev));
 884    }
 885    error_propagate(errp, err);
 886}
 887
 888static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 889{
 890    VirtIONet *n = VIRTIO_NET(vdev);
 891    Error *err = NULL;
 892    int i;
 893
 894    if (n->mtu_bypass_backend &&
 895            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 896        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 897    }
 898
 899    virtio_net_set_multiqueue(n,
 900                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 901                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 902
 903    virtio_net_set_mrg_rx_bufs(n,
 904                               virtio_has_feature(features,
 905                                                  VIRTIO_NET_F_MRG_RXBUF),
 906                               virtio_has_feature(features,
 907                                                  VIRTIO_F_VERSION_1),
 908                               virtio_has_feature(features,
 909                                                  VIRTIO_NET_F_HASH_REPORT));
 910
 911    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 912        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 913    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 914        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 915    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 916
 917    if (n->has_vnet_hdr) {
 918        n->curr_guest_offloads =
 919            virtio_net_guest_offloads_by_features(features);
 920        virtio_net_apply_guest_offloads(n);
 921    }
 922
 923    for (i = 0;  i < n->max_queues; i++) {
 924        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 925
 926        if (!get_vhost_net(nc->peer)) {
 927            continue;
 928        }
 929        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 930    }
 931
 932    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 933        memset(n->vlans, 0, MAX_VLAN >> 3);
 934    } else {
 935        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 936    }
 937
 938    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 939        qapi_event_send_failover_negotiated(n->netclient_name);
 940        qatomic_set(&n->failover_primary_hidden, false);
 941        failover_add_primary(n, &err);
 942        if (err) {
 943            warn_report_err(err);
 944        }
 945    }
 946}
 947
 948static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 949                                     struct iovec *iov, unsigned int iov_cnt)
 950{
 951    uint8_t on;
 952    size_t s;
 953    NetClientState *nc = qemu_get_queue(n->nic);
 954
 955    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 956    if (s != sizeof(on)) {
 957        return VIRTIO_NET_ERR;
 958    }
 959
 960    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 961        n->promisc = on;
 962    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 963        n->allmulti = on;
 964    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 965        n->alluni = on;
 966    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 967        n->nomulti = on;
 968    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 969        n->nouni = on;
 970    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 971        n->nobcast = on;
 972    } else {
 973        return VIRTIO_NET_ERR;
 974    }
 975
 976    rxfilter_notify(nc);
 977
 978    return VIRTIO_NET_OK;
 979}
 980
 981static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 982                                     struct iovec *iov, unsigned int iov_cnt)
 983{
 984    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 985    uint64_t offloads;
 986    size_t s;
 987
 988    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 989        return VIRTIO_NET_ERR;
 990    }
 991
 992    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 993    if (s != sizeof(offloads)) {
 994        return VIRTIO_NET_ERR;
 995    }
 996
 997    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 998        uint64_t supported_offloads;
 999
1000        offloads = virtio_ldq_p(vdev, &offloads);
1001
1002        if (!n->has_vnet_hdr) {
1003            return VIRTIO_NET_ERR;
1004        }
1005
1006        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1007            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1008        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1009            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1010        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1011
1012        supported_offloads = virtio_net_supported_guest_offloads(n);
1013        if (offloads & ~supported_offloads) {
1014            return VIRTIO_NET_ERR;
1015        }
1016
1017        n->curr_guest_offloads = offloads;
1018        virtio_net_apply_guest_offloads(n);
1019
1020        return VIRTIO_NET_OK;
1021    } else {
1022        return VIRTIO_NET_ERR;
1023    }
1024}
1025
1026static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1027                                 struct iovec *iov, unsigned int iov_cnt)
1028{
1029    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1030    struct virtio_net_ctrl_mac mac_data;
1031    size_t s;
1032    NetClientState *nc = qemu_get_queue(n->nic);
1033
1034    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1035        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1036            return VIRTIO_NET_ERR;
1037        }
1038        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1039        assert(s == sizeof(n->mac));
1040        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1041        rxfilter_notify(nc);
1042
1043        return VIRTIO_NET_OK;
1044    }
1045
1046    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1047        return VIRTIO_NET_ERR;
1048    }
1049
1050    int in_use = 0;
1051    int first_multi = 0;
1052    uint8_t uni_overflow = 0;
1053    uint8_t multi_overflow = 0;
1054    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1055
1056    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1057                   sizeof(mac_data.entries));
1058    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1059    if (s != sizeof(mac_data.entries)) {
1060        goto error;
1061    }
1062    iov_discard_front(&iov, &iov_cnt, s);
1063
1064    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1065        goto error;
1066    }
1067
1068    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1069        s = iov_to_buf(iov, iov_cnt, 0, macs,
1070                       mac_data.entries * ETH_ALEN);
1071        if (s != mac_data.entries * ETH_ALEN) {
1072            goto error;
1073        }
1074        in_use += mac_data.entries;
1075    } else {
1076        uni_overflow = 1;
1077    }
1078
1079    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1080
1081    first_multi = in_use;
1082
1083    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1084                   sizeof(mac_data.entries));
1085    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1086    if (s != sizeof(mac_data.entries)) {
1087        goto error;
1088    }
1089
1090    iov_discard_front(&iov, &iov_cnt, s);
1091
1092    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1093        goto error;
1094    }
1095
1096    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1097        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1098                       mac_data.entries * ETH_ALEN);
1099        if (s != mac_data.entries * ETH_ALEN) {
1100            goto error;
1101        }
1102        in_use += mac_data.entries;
1103    } else {
1104        multi_overflow = 1;
1105    }
1106
1107    n->mac_table.in_use = in_use;
1108    n->mac_table.first_multi = first_multi;
1109    n->mac_table.uni_overflow = uni_overflow;
1110    n->mac_table.multi_overflow = multi_overflow;
1111    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1112    g_free(macs);
1113    rxfilter_notify(nc);
1114
1115    return VIRTIO_NET_OK;
1116
1117error:
1118    g_free(macs);
1119    return VIRTIO_NET_ERR;
1120}
1121
1122static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1123                                        struct iovec *iov, unsigned int iov_cnt)
1124{
1125    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1126    uint16_t vid;
1127    size_t s;
1128    NetClientState *nc = qemu_get_queue(n->nic);
1129
1130    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1131    vid = virtio_lduw_p(vdev, &vid);
1132    if (s != sizeof(vid)) {
1133        return VIRTIO_NET_ERR;
1134    }
1135
1136    if (vid >= MAX_VLAN)
1137        return VIRTIO_NET_ERR;
1138
1139    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1140        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1141    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1142        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1143    else
1144        return VIRTIO_NET_ERR;
1145
1146    rxfilter_notify(nc);
1147
1148    return VIRTIO_NET_OK;
1149}
1150
1151static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1152                                      struct iovec *iov, unsigned int iov_cnt)
1153{
1154    trace_virtio_net_handle_announce(n->announce_timer.round);
1155    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1156        n->status & VIRTIO_NET_S_ANNOUNCE) {
1157        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1158        if (n->announce_timer.round) {
1159            qemu_announce_timer_step(&n->announce_timer);
1160        }
1161        return VIRTIO_NET_OK;
1162    } else {
1163        return VIRTIO_NET_ERR;
1164    }
1165}
1166
1167static void virtio_net_detach_epbf_rss(VirtIONet *n);
1168
1169static void virtio_net_disable_rss(VirtIONet *n)
1170{
1171    if (n->rss_data.enabled) {
1172        trace_virtio_net_rss_disable();
1173    }
1174    n->rss_data.enabled = false;
1175
1176    virtio_net_detach_epbf_rss(n);
1177}
1178
1179static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1180{
1181    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1182    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1183        return false;
1184    }
1185
1186    return nc->info->set_steering_ebpf(nc, prog_fd);
1187}
1188
1189static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1190                                   struct EBPFRSSConfig *config)
1191{
1192    config->redirect = data->redirect;
1193    config->populate_hash = data->populate_hash;
1194    config->hash_types = data->hash_types;
1195    config->indirections_len = data->indirections_len;
1196    config->default_queue = data->default_queue;
1197}
1198
1199static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1200{
1201    struct EBPFRSSConfig config = {};
1202
1203    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1204        return false;
1205    }
1206
1207    rss_data_to_rss_config(&n->rss_data, &config);
1208
1209    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1210                          n->rss_data.indirections_table, n->rss_data.key)) {
1211        return false;
1212    }
1213
1214    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1215        return false;
1216    }
1217
1218    return true;
1219}
1220
1221static void virtio_net_detach_epbf_rss(VirtIONet *n)
1222{
1223    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1224}
1225
1226static bool virtio_net_load_ebpf(VirtIONet *n)
1227{
1228    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1229        /* backend does't support steering ebpf */
1230        return false;
1231    }
1232
1233    return ebpf_rss_load(&n->ebpf_rss);
1234}
1235
1236static void virtio_net_unload_ebpf(VirtIONet *n)
1237{
1238    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1239    ebpf_rss_unload(&n->ebpf_rss);
1240}
1241
1242static uint16_t virtio_net_handle_rss(VirtIONet *n,
1243                                      struct iovec *iov,
1244                                      unsigned int iov_cnt,
1245                                      bool do_rss)
1246{
1247    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1248    struct virtio_net_rss_config cfg;
1249    size_t s, offset = 0, size_get;
1250    uint16_t queues, i;
1251    struct {
1252        uint16_t us;
1253        uint8_t b;
1254    } QEMU_PACKED temp;
1255    const char *err_msg = "";
1256    uint32_t err_value = 0;
1257
1258    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1259        err_msg = "RSS is not negotiated";
1260        goto error;
1261    }
1262    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1263        err_msg = "Hash report is not negotiated";
1264        goto error;
1265    }
1266    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1267    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1268    if (s != size_get) {
1269        err_msg = "Short command buffer";
1270        err_value = (uint32_t)s;
1271        goto error;
1272    }
1273    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1274    n->rss_data.indirections_len =
1275        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1276    n->rss_data.indirections_len++;
1277    if (!do_rss) {
1278        n->rss_data.indirections_len = 1;
1279    }
1280    if (!is_power_of_2(n->rss_data.indirections_len)) {
1281        err_msg = "Invalid size of indirection table";
1282        err_value = n->rss_data.indirections_len;
1283        goto error;
1284    }
1285    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1286        err_msg = "Too large indirection table";
1287        err_value = n->rss_data.indirections_len;
1288        goto error;
1289    }
1290    n->rss_data.default_queue = do_rss ?
1291        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1292    if (n->rss_data.default_queue >= n->max_queues) {
1293        err_msg = "Invalid default queue";
1294        err_value = n->rss_data.default_queue;
1295        goto error;
1296    }
1297    offset += size_get;
1298    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1299    g_free(n->rss_data.indirections_table);
1300    n->rss_data.indirections_table = g_malloc(size_get);
1301    if (!n->rss_data.indirections_table) {
1302        err_msg = "Can't allocate indirections table";
1303        err_value = n->rss_data.indirections_len;
1304        goto error;
1305    }
1306    s = iov_to_buf(iov, iov_cnt, offset,
1307                   n->rss_data.indirections_table, size_get);
1308    if (s != size_get) {
1309        err_msg = "Short indirection table buffer";
1310        err_value = (uint32_t)s;
1311        goto error;
1312    }
1313    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1314        uint16_t val = n->rss_data.indirections_table[i];
1315        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1316    }
1317    offset += size_get;
1318    size_get = sizeof(temp);
1319    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1320    if (s != size_get) {
1321        err_msg = "Can't get queues";
1322        err_value = (uint32_t)s;
1323        goto error;
1324    }
1325    queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1326    if (queues == 0 || queues > n->max_queues) {
1327        err_msg = "Invalid number of queues";
1328        err_value = queues;
1329        goto error;
1330    }
1331    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1332        err_msg = "Invalid key size";
1333        err_value = temp.b;
1334        goto error;
1335    }
1336    if (!temp.b && n->rss_data.hash_types) {
1337        err_msg = "No key provided";
1338        err_value = 0;
1339        goto error;
1340    }
1341    if (!temp.b && !n->rss_data.hash_types) {
1342        virtio_net_disable_rss(n);
1343        return queues;
1344    }
1345    offset += size_get;
1346    size_get = temp.b;
1347    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1348    if (s != size_get) {
1349        err_msg = "Can get key buffer";
1350        err_value = (uint32_t)s;
1351        goto error;
1352    }
1353    n->rss_data.enabled = true;
1354
1355    if (!n->rss_data.populate_hash) {
1356        if (!virtio_net_attach_epbf_rss(n)) {
1357            /* EBPF must be loaded for vhost */
1358            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1359                warn_report("Can't load eBPF RSS for vhost");
1360                goto error;
1361            }
1362            /* fallback to software RSS */
1363            warn_report("Can't load eBPF RSS - fallback to software RSS");
1364            n->rss_data.enabled_software_rss = true;
1365        }
1366    } else {
1367        /* use software RSS for hash populating */
1368        /* and detach eBPF if was loaded before */
1369        virtio_net_detach_epbf_rss(n);
1370        n->rss_data.enabled_software_rss = true;
1371    }
1372
1373    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1374                                n->rss_data.indirections_len,
1375                                temp.b);
1376    return queues;
1377error:
1378    trace_virtio_net_rss_error(err_msg, err_value);
1379    virtio_net_disable_rss(n);
1380    return 0;
1381}
1382
1383static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1384                                struct iovec *iov, unsigned int iov_cnt)
1385{
1386    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1387    uint16_t queues;
1388
1389    virtio_net_disable_rss(n);
1390    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1391        queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1392        return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1393    }
1394    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1395        queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1396    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1397        struct virtio_net_ctrl_mq mq;
1398        size_t s;
1399        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1400            return VIRTIO_NET_ERR;
1401        }
1402        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1403        if (s != sizeof(mq)) {
1404            return VIRTIO_NET_ERR;
1405        }
1406        queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1407
1408    } else {
1409        return VIRTIO_NET_ERR;
1410    }
1411
1412    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1413        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1414        queues > n->max_queues ||
1415        !n->multiqueue) {
1416        return VIRTIO_NET_ERR;
1417    }
1418
1419    n->curr_queues = queues;
1420    /* stop the backend before changing the number of queues to avoid handling a
1421     * disabled queue */
1422    virtio_net_set_status(vdev, vdev->status);
1423    virtio_net_set_queues(n);
1424
1425    return VIRTIO_NET_OK;
1426}
1427
1428static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1429{
1430    VirtIONet *n = VIRTIO_NET(vdev);
1431    struct virtio_net_ctrl_hdr ctrl;
1432    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1433    VirtQueueElement *elem;
1434    size_t s;
1435    struct iovec *iov, *iov2;
1436    unsigned int iov_cnt;
1437
1438    for (;;) {
1439        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1440        if (!elem) {
1441            break;
1442        }
1443        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1444            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1445            virtio_error(vdev, "virtio-net ctrl missing headers");
1446            virtqueue_detach_element(vq, elem, 0);
1447            g_free(elem);
1448            break;
1449        }
1450
1451        iov_cnt = elem->out_num;
1452        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1453        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1454        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1455        if (s != sizeof(ctrl)) {
1456            status = VIRTIO_NET_ERR;
1457        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1458            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1459        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1460            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1461        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1462            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1463        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1464            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1465        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1466            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1467        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1468            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1469        }
1470
1471        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1472        assert(s == sizeof(status));
1473
1474        virtqueue_push(vq, elem, sizeof(status));
1475        virtio_notify(vdev, vq);
1476        g_free(iov2);
1477        g_free(elem);
1478    }
1479}
1480
1481/* RX */
1482
1483static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1484{
1485    VirtIONet *n = VIRTIO_NET(vdev);
1486    int queue_index = vq2q(virtio_get_queue_index(vq));
1487
1488    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1489}
1490
1491static bool virtio_net_can_receive(NetClientState *nc)
1492{
1493    VirtIONet *n = qemu_get_nic_opaque(nc);
1494    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1495    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1496
1497    if (!vdev->vm_running) {
1498        return false;
1499    }
1500
1501    if (nc->queue_index >= n->curr_queues) {
1502        return false;
1503    }
1504
1505    if (!virtio_queue_ready(q->rx_vq) ||
1506        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1507        return false;
1508    }
1509
1510    return true;
1511}
1512
1513static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1514{
1515    VirtIONet *n = q->n;
1516    if (virtio_queue_empty(q->rx_vq) ||
1517        (n->mergeable_rx_bufs &&
1518         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1519        virtio_queue_set_notification(q->rx_vq, 1);
1520
1521        /* To avoid a race condition where the guest has made some buffers
1522         * available after the above check but before notification was
1523         * enabled, check for available buffers again.
1524         */
1525        if (virtio_queue_empty(q->rx_vq) ||
1526            (n->mergeable_rx_bufs &&
1527             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1528            return 0;
1529        }
1530    }
1531
1532    virtio_queue_set_notification(q->rx_vq, 0);
1533    return 1;
1534}
1535
1536static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1537{
1538    virtio_tswap16s(vdev, &hdr->hdr_len);
1539    virtio_tswap16s(vdev, &hdr->gso_size);
1540    virtio_tswap16s(vdev, &hdr->csum_start);
1541    virtio_tswap16s(vdev, &hdr->csum_offset);
1542}
1543
1544/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1545 * it never finds out that the packets don't have valid checksums.  This
1546 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1547 * fix this with Xen but it hasn't appeared in an upstream release of
1548 * dhclient yet.
1549 *
1550 * To avoid breaking existing guests, we catch udp packets and add
1551 * checksums.  This is terrible but it's better than hacking the guest
1552 * kernels.
1553 *
1554 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1555 * we should provide a mechanism to disable it to avoid polluting the host
1556 * cache.
1557 */
1558static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1559                                        uint8_t *buf, size_t size)
1560{
1561    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1562        (size > 27 && size < 1500) && /* normal sized MTU */
1563        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1564        (buf[23] == 17) && /* ip.protocol == UDP */
1565        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1566        net_checksum_calculate(buf, size, CSUM_UDP);
1567        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1568    }
1569}
1570
1571static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1572                           const void *buf, size_t size)
1573{
1574    if (n->has_vnet_hdr) {
1575        /* FIXME this cast is evil */
1576        void *wbuf = (void *)buf;
1577        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1578                                    size - n->host_hdr_len);
1579
1580        if (n->needs_vnet_hdr_swap) {
1581            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1582        }
1583        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1584    } else {
1585        struct virtio_net_hdr hdr = {
1586            .flags = 0,
1587            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1588        };
1589        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1590    }
1591}
1592
1593static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1594{
1595    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1596    static const uint8_t vlan[] = {0x81, 0x00};
1597    uint8_t *ptr = (uint8_t *)buf;
1598    int i;
1599
1600    if (n->promisc)
1601        return 1;
1602
1603    ptr += n->host_hdr_len;
1604
1605    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1606        int vid = lduw_be_p(ptr + 14) & 0xfff;
1607        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1608            return 0;
1609    }
1610
1611    if (ptr[0] & 1) { // multicast
1612        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1613            return !n->nobcast;
1614        } else if (n->nomulti) {
1615            return 0;
1616        } else if (n->allmulti || n->mac_table.multi_overflow) {
1617            return 1;
1618        }
1619
1620        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1621            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1622                return 1;
1623            }
1624        }
1625    } else { // unicast
1626        if (n->nouni) {
1627            return 0;
1628        } else if (n->alluni || n->mac_table.uni_overflow) {
1629            return 1;
1630        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1631            return 1;
1632        }
1633
1634        for (i = 0; i < n->mac_table.first_multi; i++) {
1635            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1636                return 1;
1637            }
1638        }
1639    }
1640
1641    return 0;
1642}
1643
1644static uint8_t virtio_net_get_hash_type(bool isip4,
1645                                        bool isip6,
1646                                        bool isudp,
1647                                        bool istcp,
1648                                        uint32_t types)
1649{
1650    if (isip4) {
1651        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1652            return NetPktRssIpV4Tcp;
1653        }
1654        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1655            return NetPktRssIpV4Udp;
1656        }
1657        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1658            return NetPktRssIpV4;
1659        }
1660    } else if (isip6) {
1661        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1662                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1663
1664        if (istcp && (types & mask)) {
1665            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1666                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1667        }
1668        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1669        if (isudp && (types & mask)) {
1670            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1671                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1672        }
1673        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1674        if (types & mask) {
1675            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1676                NetPktRssIpV6Ex : NetPktRssIpV6;
1677        }
1678    }
1679    return 0xff;
1680}
1681
1682static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1683                                   uint32_t hash)
1684{
1685    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1686    hdr->hash_value = hash;
1687    hdr->hash_report = report;
1688}
1689
1690static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1691                                  size_t size)
1692{
1693    VirtIONet *n = qemu_get_nic_opaque(nc);
1694    unsigned int index = nc->queue_index, new_index = index;
1695    struct NetRxPkt *pkt = n->rx_pkt;
1696    uint8_t net_hash_type;
1697    uint32_t hash;
1698    bool isip4, isip6, isudp, istcp;
1699    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1700        VIRTIO_NET_HASH_REPORT_IPv4,
1701        VIRTIO_NET_HASH_REPORT_TCPv4,
1702        VIRTIO_NET_HASH_REPORT_TCPv6,
1703        VIRTIO_NET_HASH_REPORT_IPv6,
1704        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1705        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1706        VIRTIO_NET_HASH_REPORT_UDPv4,
1707        VIRTIO_NET_HASH_REPORT_UDPv6,
1708        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1709    };
1710
1711    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1712                             size - n->host_hdr_len);
1713    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1714    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1715        istcp = isudp = false;
1716    }
1717    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1718        istcp = isudp = false;
1719    }
1720    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1721                                             n->rss_data.hash_types);
1722    if (net_hash_type > NetPktRssIpV6UdpEx) {
1723        if (n->rss_data.populate_hash) {
1724            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1725        }
1726        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1727    }
1728
1729    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1730
1731    if (n->rss_data.populate_hash) {
1732        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1733    }
1734
1735    if (n->rss_data.redirect) {
1736        new_index = hash & (n->rss_data.indirections_len - 1);
1737        new_index = n->rss_data.indirections_table[new_index];
1738    }
1739
1740    return (index == new_index) ? -1 : new_index;
1741}
1742
1743static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1744                                      size_t size, bool no_rss)
1745{
1746    VirtIONet *n = qemu_get_nic_opaque(nc);
1747    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1748    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1749    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1750    struct virtio_net_hdr_mrg_rxbuf mhdr;
1751    unsigned mhdr_cnt = 0;
1752    size_t offset, i, guest_offset;
1753
1754    if (!virtio_net_can_receive(nc)) {
1755        return -1;
1756    }
1757
1758    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1759        int index = virtio_net_process_rss(nc, buf, size);
1760        if (index >= 0) {
1761            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1762            return virtio_net_receive_rcu(nc2, buf, size, true);
1763        }
1764    }
1765
1766    /* hdr_len refers to the header we supply to the guest */
1767    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1768        return 0;
1769    }
1770
1771    if (!receive_filter(n, buf, size))
1772        return size;
1773
1774    offset = i = 0;
1775
1776    while (offset < size) {
1777        VirtQueueElement *elem;
1778        int len, total;
1779        const struct iovec *sg;
1780
1781        total = 0;
1782
1783        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1784        if (!elem) {
1785            if (i) {
1786                virtio_error(vdev, "virtio-net unexpected empty queue: "
1787                             "i %zd mergeable %d offset %zd, size %zd, "
1788                             "guest hdr len %zd, host hdr len %zd "
1789                             "guest features 0x%" PRIx64,
1790                             i, n->mergeable_rx_bufs, offset, size,
1791                             n->guest_hdr_len, n->host_hdr_len,
1792                             vdev->guest_features);
1793            }
1794            return -1;
1795        }
1796
1797        if (elem->in_num < 1) {
1798            virtio_error(vdev,
1799                         "virtio-net receive queue contains no in buffers");
1800            virtqueue_detach_element(q->rx_vq, elem, 0);
1801            g_free(elem);
1802            return -1;
1803        }
1804
1805        sg = elem->in_sg;
1806        if (i == 0) {
1807            assert(offset == 0);
1808            if (n->mergeable_rx_bufs) {
1809                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1810                                    sg, elem->in_num,
1811                                    offsetof(typeof(mhdr), num_buffers),
1812                                    sizeof(mhdr.num_buffers));
1813            }
1814
1815            receive_header(n, sg, elem->in_num, buf, size);
1816            if (n->rss_data.populate_hash) {
1817                offset = sizeof(mhdr);
1818                iov_from_buf(sg, elem->in_num, offset,
1819                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1820            }
1821            offset = n->host_hdr_len;
1822            total += n->guest_hdr_len;
1823            guest_offset = n->guest_hdr_len;
1824        } else {
1825            guest_offset = 0;
1826        }
1827
1828        /* copy in packet.  ugh */
1829        len = iov_from_buf(sg, elem->in_num, guest_offset,
1830                           buf + offset, size - offset);
1831        total += len;
1832        offset += len;
1833        /* If buffers can't be merged, at this point we
1834         * must have consumed the complete packet.
1835         * Otherwise, drop it. */
1836        if (!n->mergeable_rx_bufs && offset < size) {
1837            virtqueue_unpop(q->rx_vq, elem, total);
1838            g_free(elem);
1839            return size;
1840        }
1841
1842        /* signal other side */
1843        virtqueue_fill(q->rx_vq, elem, total, i++);
1844        g_free(elem);
1845    }
1846
1847    if (mhdr_cnt) {
1848        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1849        iov_from_buf(mhdr_sg, mhdr_cnt,
1850                     0,
1851                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1852    }
1853
1854    virtqueue_flush(q->rx_vq, i);
1855    virtio_notify(vdev, q->rx_vq);
1856
1857    return size;
1858}
1859
1860static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1861                                  size_t size)
1862{
1863    RCU_READ_LOCK_GUARD();
1864
1865    return virtio_net_receive_rcu(nc, buf, size, false);
1866}
1867
1868static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1869                                         const uint8_t *buf,
1870                                         VirtioNetRscUnit *unit)
1871{
1872    uint16_t ip_hdrlen;
1873    struct ip_header *ip;
1874
1875    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1876                              + sizeof(struct eth_header));
1877    unit->ip = (void *)ip;
1878    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1879    unit->ip_plen = &ip->ip_len;
1880    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1881    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1882    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1883}
1884
1885static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1886                                         const uint8_t *buf,
1887                                         VirtioNetRscUnit *unit)
1888{
1889    struct ip6_header *ip6;
1890
1891    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1892                                 + sizeof(struct eth_header));
1893    unit->ip = ip6;
1894    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1895    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1896                                        + sizeof(struct ip6_header));
1897    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1898
1899    /* There is a difference between payload lenght in ipv4 and v6,
1900       ip header is excluded in ipv6 */
1901    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1902}
1903
1904static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1905                                       VirtioNetRscSeg *seg)
1906{
1907    int ret;
1908    struct virtio_net_hdr_v1 *h;
1909
1910    h = (struct virtio_net_hdr_v1 *)seg->buf;
1911    h->flags = 0;
1912    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1913
1914    if (seg->is_coalesced) {
1915        h->rsc.segments = seg->packets;
1916        h->rsc.dup_acks = seg->dup_ack;
1917        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1918        if (chain->proto == ETH_P_IP) {
1919            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1920        } else {
1921            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1922        }
1923    }
1924
1925    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1926    QTAILQ_REMOVE(&chain->buffers, seg, next);
1927    g_free(seg->buf);
1928    g_free(seg);
1929
1930    return ret;
1931}
1932
1933static void virtio_net_rsc_purge(void *opq)
1934{
1935    VirtioNetRscSeg *seg, *rn;
1936    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1937
1938    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1939        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1940            chain->stat.purge_failed++;
1941            continue;
1942        }
1943    }
1944
1945    chain->stat.timer++;
1946    if (!QTAILQ_EMPTY(&chain->buffers)) {
1947        timer_mod(chain->drain_timer,
1948              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1949    }
1950}
1951
1952static void virtio_net_rsc_cleanup(VirtIONet *n)
1953{
1954    VirtioNetRscChain *chain, *rn_chain;
1955    VirtioNetRscSeg *seg, *rn_seg;
1956
1957    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1958        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1959            QTAILQ_REMOVE(&chain->buffers, seg, next);
1960            g_free(seg->buf);
1961            g_free(seg);
1962        }
1963
1964        timer_free(chain->drain_timer);
1965        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1966        g_free(chain);
1967    }
1968}
1969
1970static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1971                                     NetClientState *nc,
1972                                     const uint8_t *buf, size_t size)
1973{
1974    uint16_t hdr_len;
1975    VirtioNetRscSeg *seg;
1976
1977    hdr_len = chain->n->guest_hdr_len;
1978    seg = g_malloc(sizeof(VirtioNetRscSeg));
1979    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1980        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1981    memcpy(seg->buf, buf, size);
1982    seg->size = size;
1983    seg->packets = 1;
1984    seg->dup_ack = 0;
1985    seg->is_coalesced = 0;
1986    seg->nc = nc;
1987
1988    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1989    chain->stat.cache++;
1990
1991    switch (chain->proto) {
1992    case ETH_P_IP:
1993        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1994        break;
1995    case ETH_P_IPV6:
1996        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1997        break;
1998    default:
1999        g_assert_not_reached();
2000    }
2001}
2002
2003static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2004                                         VirtioNetRscSeg *seg,
2005                                         const uint8_t *buf,
2006                                         struct tcp_header *n_tcp,
2007                                         struct tcp_header *o_tcp)
2008{
2009    uint32_t nack, oack;
2010    uint16_t nwin, owin;
2011
2012    nack = htonl(n_tcp->th_ack);
2013    nwin = htons(n_tcp->th_win);
2014    oack = htonl(o_tcp->th_ack);
2015    owin = htons(o_tcp->th_win);
2016
2017    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2018        chain->stat.ack_out_of_win++;
2019        return RSC_FINAL;
2020    } else if (nack == oack) {
2021        /* duplicated ack or window probe */
2022        if (nwin == owin) {
2023            /* duplicated ack, add dup ack count due to whql test up to 1 */
2024            chain->stat.dup_ack++;
2025            return RSC_FINAL;
2026        } else {
2027            /* Coalesce window update */
2028            o_tcp->th_win = n_tcp->th_win;
2029            chain->stat.win_update++;
2030            return RSC_COALESCE;
2031        }
2032    } else {
2033        /* pure ack, go to 'C', finalize*/
2034        chain->stat.pure_ack++;
2035        return RSC_FINAL;
2036    }
2037}
2038
2039static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2040                                            VirtioNetRscSeg *seg,
2041                                            const uint8_t *buf,
2042                                            VirtioNetRscUnit *n_unit)
2043{
2044    void *data;
2045    uint16_t o_ip_len;
2046    uint32_t nseq, oseq;
2047    VirtioNetRscUnit *o_unit;
2048
2049    o_unit = &seg->unit;
2050    o_ip_len = htons(*o_unit->ip_plen);
2051    nseq = htonl(n_unit->tcp->th_seq);
2052    oseq = htonl(o_unit->tcp->th_seq);
2053
2054    /* out of order or retransmitted. */
2055    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2056        chain->stat.data_out_of_win++;
2057        return RSC_FINAL;
2058    }
2059
2060    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2061    if (nseq == oseq) {
2062        if ((o_unit->payload == 0) && n_unit->payload) {
2063            /* From no payload to payload, normal case, not a dup ack or etc */
2064            chain->stat.data_after_pure_ack++;
2065            goto coalesce;
2066        } else {
2067            return virtio_net_rsc_handle_ack(chain, seg, buf,
2068                                             n_unit->tcp, o_unit->tcp);
2069        }
2070    } else if ((nseq - oseq) != o_unit->payload) {
2071        /* Not a consistent packet, out of order */
2072        chain->stat.data_out_of_order++;
2073        return RSC_FINAL;
2074    } else {
2075coalesce:
2076        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2077            chain->stat.over_size++;
2078            return RSC_FINAL;
2079        }
2080
2081        /* Here comes the right data, the payload length in v4/v6 is different,
2082           so use the field value to update and record the new data len */
2083        o_unit->payload += n_unit->payload; /* update new data len */
2084
2085        /* update field in ip header */
2086        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2087
2088        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2089           for windows guest, while this may change the behavior for linux
2090           guest (only if it uses RSC feature). */
2091        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2092
2093        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2094        o_unit->tcp->th_win = n_unit->tcp->th_win;
2095
2096        memmove(seg->buf + seg->size, data, n_unit->payload);
2097        seg->size += n_unit->payload;
2098        seg->packets++;
2099        chain->stat.coalesced++;
2100        return RSC_COALESCE;
2101    }
2102}
2103
2104static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2105                                        VirtioNetRscSeg *seg,
2106                                        const uint8_t *buf, size_t size,
2107                                        VirtioNetRscUnit *unit)
2108{
2109    struct ip_header *ip1, *ip2;
2110
2111    ip1 = (struct ip_header *)(unit->ip);
2112    ip2 = (struct ip_header *)(seg->unit.ip);
2113    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2114        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2115        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2116        chain->stat.no_match++;
2117        return RSC_NO_MATCH;
2118    }
2119
2120    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2121}
2122
2123static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2124                                        VirtioNetRscSeg *seg,
2125                                        const uint8_t *buf, size_t size,
2126                                        VirtioNetRscUnit *unit)
2127{
2128    struct ip6_header *ip1, *ip2;
2129
2130    ip1 = (struct ip6_header *)(unit->ip);
2131    ip2 = (struct ip6_header *)(seg->unit.ip);
2132    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2133        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2134        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2135        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2136            chain->stat.no_match++;
2137            return RSC_NO_MATCH;
2138    }
2139
2140    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2141}
2142
2143/* Packets with 'SYN' should bypass, other flag should be sent after drain
2144 * to prevent out of order */
2145static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2146                                         struct tcp_header *tcp)
2147{
2148    uint16_t tcp_hdr;
2149    uint16_t tcp_flag;
2150
2151    tcp_flag = htons(tcp->th_offset_flags);
2152    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2153    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2154    if (tcp_flag & TH_SYN) {
2155        chain->stat.tcp_syn++;
2156        return RSC_BYPASS;
2157    }
2158
2159    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2160        chain->stat.tcp_ctrl_drain++;
2161        return RSC_FINAL;
2162    }
2163
2164    if (tcp_hdr > sizeof(struct tcp_header)) {
2165        chain->stat.tcp_all_opt++;
2166        return RSC_FINAL;
2167    }
2168
2169    return RSC_CANDIDATE;
2170}
2171
2172static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2173                                         NetClientState *nc,
2174                                         const uint8_t *buf, size_t size,
2175                                         VirtioNetRscUnit *unit)
2176{
2177    int ret;
2178    VirtioNetRscSeg *seg, *nseg;
2179
2180    if (QTAILQ_EMPTY(&chain->buffers)) {
2181        chain->stat.empty_cache++;
2182        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2183        timer_mod(chain->drain_timer,
2184              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2185        return size;
2186    }
2187
2188    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2189        if (chain->proto == ETH_P_IP) {
2190            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2191        } else {
2192            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2193        }
2194
2195        if (ret == RSC_FINAL) {
2196            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2197                /* Send failed */
2198                chain->stat.final_failed++;
2199                return 0;
2200            }
2201
2202            /* Send current packet */
2203            return virtio_net_do_receive(nc, buf, size);
2204        } else if (ret == RSC_NO_MATCH) {
2205            continue;
2206        } else {
2207            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2208            seg->is_coalesced = 1;
2209            return size;
2210        }
2211    }
2212
2213    chain->stat.no_match_cache++;
2214    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2215    return size;
2216}
2217
2218/* Drain a connection data, this is to avoid out of order segments */
2219static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2220                                        NetClientState *nc,
2221                                        const uint8_t *buf, size_t size,
2222                                        uint16_t ip_start, uint16_t ip_size,
2223                                        uint16_t tcp_port)
2224{
2225    VirtioNetRscSeg *seg, *nseg;
2226    uint32_t ppair1, ppair2;
2227
2228    ppair1 = *(uint32_t *)(buf + tcp_port);
2229    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2230        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2231        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2232            || (ppair1 != ppair2)) {
2233            continue;
2234        }
2235        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2236            chain->stat.drain_failed++;
2237        }
2238
2239        break;
2240    }
2241
2242    return virtio_net_do_receive(nc, buf, size);
2243}
2244
2245static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2246                                            struct ip_header *ip,
2247                                            const uint8_t *buf, size_t size)
2248{
2249    uint16_t ip_len;
2250
2251    /* Not an ipv4 packet */
2252    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2253        chain->stat.ip_option++;
2254        return RSC_BYPASS;
2255    }
2256
2257    /* Don't handle packets with ip option */
2258    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2259        chain->stat.ip_option++;
2260        return RSC_BYPASS;
2261    }
2262
2263    if (ip->ip_p != IPPROTO_TCP) {
2264        chain->stat.bypass_not_tcp++;
2265        return RSC_BYPASS;
2266    }
2267
2268    /* Don't handle packets with ip fragment */
2269    if (!(htons(ip->ip_off) & IP_DF)) {
2270        chain->stat.ip_frag++;
2271        return RSC_BYPASS;
2272    }
2273
2274    /* Don't handle packets with ecn flag */
2275    if (IPTOS_ECN(ip->ip_tos)) {
2276        chain->stat.ip_ecn++;
2277        return RSC_BYPASS;
2278    }
2279
2280    ip_len = htons(ip->ip_len);
2281    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2282        || ip_len > (size - chain->n->guest_hdr_len -
2283                     sizeof(struct eth_header))) {
2284        chain->stat.ip_hacked++;
2285        return RSC_BYPASS;
2286    }
2287
2288    return RSC_CANDIDATE;
2289}
2290
2291static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2292                                      NetClientState *nc,
2293                                      const uint8_t *buf, size_t size)
2294{
2295    int32_t ret;
2296    uint16_t hdr_len;
2297    VirtioNetRscUnit unit;
2298
2299    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2300
2301    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2302        + sizeof(struct tcp_header))) {
2303        chain->stat.bypass_not_tcp++;
2304        return virtio_net_do_receive(nc, buf, size);
2305    }
2306
2307    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2308    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2309        != RSC_CANDIDATE) {
2310        return virtio_net_do_receive(nc, buf, size);
2311    }
2312
2313    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2314    if (ret == RSC_BYPASS) {
2315        return virtio_net_do_receive(nc, buf, size);
2316    } else if (ret == RSC_FINAL) {
2317        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2318                ((hdr_len + sizeof(struct eth_header)) + 12),
2319                VIRTIO_NET_IP4_ADDR_SIZE,
2320                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2321    }
2322
2323    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2324}
2325
2326static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2327                                            struct ip6_header *ip6,
2328                                            const uint8_t *buf, size_t size)
2329{
2330    uint16_t ip_len;
2331
2332    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2333        != IP_HEADER_VERSION_6) {
2334        return RSC_BYPASS;
2335    }
2336
2337    /* Both option and protocol is checked in this */
2338    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2339        chain->stat.bypass_not_tcp++;
2340        return RSC_BYPASS;
2341    }
2342
2343    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2344    if (ip_len < sizeof(struct tcp_header) ||
2345        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2346                  - sizeof(struct ip6_header))) {
2347        chain->stat.ip_hacked++;
2348        return RSC_BYPASS;
2349    }
2350
2351    /* Don't handle packets with ecn flag */
2352    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2353        chain->stat.ip_ecn++;
2354        return RSC_BYPASS;
2355    }
2356
2357    return RSC_CANDIDATE;
2358}
2359
2360static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2361                                      const uint8_t *buf, size_t size)
2362{
2363    int32_t ret;
2364    uint16_t hdr_len;
2365    VirtioNetRscChain *chain;
2366    VirtioNetRscUnit unit;
2367
2368    chain = (VirtioNetRscChain *)opq;
2369    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2370
2371    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2372        + sizeof(tcp_header))) {
2373        return virtio_net_do_receive(nc, buf, size);
2374    }
2375
2376    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2377    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2378                                                 unit.ip, buf, size)) {
2379        return virtio_net_do_receive(nc, buf, size);
2380    }
2381
2382    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2383    if (ret == RSC_BYPASS) {
2384        return virtio_net_do_receive(nc, buf, size);
2385    } else if (ret == RSC_FINAL) {
2386        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2387                ((hdr_len + sizeof(struct eth_header)) + 8),
2388                VIRTIO_NET_IP6_ADDR_SIZE,
2389                hdr_len + sizeof(struct eth_header)
2390                + sizeof(struct ip6_header));
2391    }
2392
2393    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2394}
2395
2396static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2397                                                      NetClientState *nc,
2398                                                      uint16_t proto)
2399{
2400    VirtioNetRscChain *chain;
2401
2402    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2403        return NULL;
2404    }
2405
2406    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2407        if (chain->proto == proto) {
2408            return chain;
2409        }
2410    }
2411
2412    chain = g_malloc(sizeof(*chain));
2413    chain->n = n;
2414    chain->proto = proto;
2415    if (proto == (uint16_t)ETH_P_IP) {
2416        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2417        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2418    } else {
2419        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2420        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2421    }
2422    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2423                                      virtio_net_rsc_purge, chain);
2424    memset(&chain->stat, 0, sizeof(chain->stat));
2425
2426    QTAILQ_INIT(&chain->buffers);
2427    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2428
2429    return chain;
2430}
2431
2432static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2433                                      const uint8_t *buf,
2434                                      size_t size)
2435{
2436    uint16_t proto;
2437    VirtioNetRscChain *chain;
2438    struct eth_header *eth;
2439    VirtIONet *n;
2440
2441    n = qemu_get_nic_opaque(nc);
2442    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2443        return virtio_net_do_receive(nc, buf, size);
2444    }
2445
2446    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2447    proto = htons(eth->h_proto);
2448
2449    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2450    if (chain) {
2451        chain->stat.received++;
2452        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2453            return virtio_net_rsc_receive4(chain, nc, buf, size);
2454        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2455            return virtio_net_rsc_receive6(chain, nc, buf, size);
2456        }
2457    }
2458    return virtio_net_do_receive(nc, buf, size);
2459}
2460
2461static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2462                                  size_t size)
2463{
2464    VirtIONet *n = qemu_get_nic_opaque(nc);
2465    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2466        return virtio_net_rsc_receive(nc, buf, size);
2467    } else {
2468        return virtio_net_do_receive(nc, buf, size);
2469    }
2470}
2471
2472static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2473
2474static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2475{
2476    VirtIONet *n = qemu_get_nic_opaque(nc);
2477    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2478    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2479
2480    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2481    virtio_notify(vdev, q->tx_vq);
2482
2483    g_free(q->async_tx.elem);
2484    q->async_tx.elem = NULL;
2485
2486    virtio_queue_set_notification(q->tx_vq, 1);
2487    virtio_net_flush_tx(q);
2488}
2489
2490/* TX */
2491static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2492{
2493    VirtIONet *n = q->n;
2494    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2495    VirtQueueElement *elem;
2496    int32_t num_packets = 0;
2497    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2498    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2499        return num_packets;
2500    }
2501
2502    if (q->async_tx.elem) {
2503        virtio_queue_set_notification(q->tx_vq, 0);
2504        return num_packets;
2505    }
2506
2507    for (;;) {
2508        ssize_t ret;
2509        unsigned int out_num;
2510        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2511        struct virtio_net_hdr_mrg_rxbuf mhdr;
2512
2513        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2514        if (!elem) {
2515            break;
2516        }
2517
2518        out_num = elem->out_num;
2519        out_sg = elem->out_sg;
2520        if (out_num < 1) {
2521            virtio_error(vdev, "virtio-net header not in first element");
2522            virtqueue_detach_element(q->tx_vq, elem, 0);
2523            g_free(elem);
2524            return -EINVAL;
2525        }
2526
2527        if (n->has_vnet_hdr) {
2528            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2529                n->guest_hdr_len) {
2530                virtio_error(vdev, "virtio-net header incorrect");
2531                virtqueue_detach_element(q->tx_vq, elem, 0);
2532                g_free(elem);
2533                return -EINVAL;
2534            }
2535            if (n->needs_vnet_hdr_swap) {
2536                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2537                sg2[0].iov_base = &mhdr;
2538                sg2[0].iov_len = n->guest_hdr_len;
2539                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2540                                   out_sg, out_num,
2541                                   n->guest_hdr_len, -1);
2542                if (out_num == VIRTQUEUE_MAX_SIZE) {
2543                    goto drop;
2544                }
2545                out_num += 1;
2546                out_sg = sg2;
2547            }
2548        }
2549        /*
2550         * If host wants to see the guest header as is, we can
2551         * pass it on unchanged. Otherwise, copy just the parts
2552         * that host is interested in.
2553         */
2554        assert(n->host_hdr_len <= n->guest_hdr_len);
2555        if (n->host_hdr_len != n->guest_hdr_len) {
2556            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2557                                       out_sg, out_num,
2558                                       0, n->host_hdr_len);
2559            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2560                             out_sg, out_num,
2561                             n->guest_hdr_len, -1);
2562            out_num = sg_num;
2563            out_sg = sg;
2564        }
2565
2566        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2567                                      out_sg, out_num, virtio_net_tx_complete);
2568        if (ret == 0) {
2569            virtio_queue_set_notification(q->tx_vq, 0);
2570            q->async_tx.elem = elem;
2571            return -EBUSY;
2572        }
2573
2574drop:
2575        virtqueue_push(q->tx_vq, elem, 0);
2576        virtio_notify(vdev, q->tx_vq);
2577        g_free(elem);
2578
2579        if (++num_packets >= n->tx_burst) {
2580            break;
2581        }
2582    }
2583    return num_packets;
2584}
2585
2586static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2587{
2588    VirtIONet *n = VIRTIO_NET(vdev);
2589    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2590
2591    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2592        virtio_net_drop_tx_queue_data(vdev, vq);
2593        return;
2594    }
2595
2596    /* This happens when device was stopped but VCPU wasn't. */
2597    if (!vdev->vm_running) {
2598        q->tx_waiting = 1;
2599        return;
2600    }
2601
2602    if (q->tx_waiting) {
2603        virtio_queue_set_notification(vq, 1);
2604        timer_del(q->tx_timer);
2605        q->tx_waiting = 0;
2606        if (virtio_net_flush_tx(q) == -EINVAL) {
2607            return;
2608        }
2609    } else {
2610        timer_mod(q->tx_timer,
2611                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2612        q->tx_waiting = 1;
2613        virtio_queue_set_notification(vq, 0);
2614    }
2615}
2616
2617static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2618{
2619    VirtIONet *n = VIRTIO_NET(vdev);
2620    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2621
2622    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2623        virtio_net_drop_tx_queue_data(vdev, vq);
2624        return;
2625    }
2626
2627    if (unlikely(q->tx_waiting)) {
2628        return;
2629    }
2630    q->tx_waiting = 1;
2631    /* This happens when device was stopped but VCPU wasn't. */
2632    if (!vdev->vm_running) {
2633        return;
2634    }
2635    virtio_queue_set_notification(vq, 0);
2636    qemu_bh_schedule(q->tx_bh);
2637}
2638
2639static void virtio_net_tx_timer(void *opaque)
2640{
2641    VirtIONetQueue *q = opaque;
2642    VirtIONet *n = q->n;
2643    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2644    /* This happens when device was stopped but BH wasn't. */
2645    if (!vdev->vm_running) {
2646        /* Make sure tx waiting is set, so we'll run when restarted. */
2647        assert(q->tx_waiting);
2648        return;
2649    }
2650
2651    q->tx_waiting = 0;
2652
2653    /* Just in case the driver is not ready on more */
2654    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2655        return;
2656    }
2657
2658    virtio_queue_set_notification(q->tx_vq, 1);
2659    virtio_net_flush_tx(q);
2660}
2661
2662static void virtio_net_tx_bh(void *opaque)
2663{
2664    VirtIONetQueue *q = opaque;
2665    VirtIONet *n = q->n;
2666    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2667    int32_t ret;
2668
2669    /* This happens when device was stopped but BH wasn't. */
2670    if (!vdev->vm_running) {
2671        /* Make sure tx waiting is set, so we'll run when restarted. */
2672        assert(q->tx_waiting);
2673        return;
2674    }
2675
2676    q->tx_waiting = 0;
2677
2678    /* Just in case the driver is not ready on more */
2679    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2680        return;
2681    }
2682
2683    ret = virtio_net_flush_tx(q);
2684    if (ret == -EBUSY || ret == -EINVAL) {
2685        return; /* Notification re-enable handled by tx_complete or device
2686                 * broken */
2687    }
2688
2689    /* If we flush a full burst of packets, assume there are
2690     * more coming and immediately reschedule */
2691    if (ret >= n->tx_burst) {
2692        qemu_bh_schedule(q->tx_bh);
2693        q->tx_waiting = 1;
2694        return;
2695    }
2696
2697    /* If less than a full burst, re-enable notification and flush
2698     * anything that may have come in while we weren't looking.  If
2699     * we find something, assume the guest is still active and reschedule */
2700    virtio_queue_set_notification(q->tx_vq, 1);
2701    ret = virtio_net_flush_tx(q);
2702    if (ret == -EINVAL) {
2703        return;
2704    } else if (ret > 0) {
2705        virtio_queue_set_notification(q->tx_vq, 0);
2706        qemu_bh_schedule(q->tx_bh);
2707        q->tx_waiting = 1;
2708    }
2709}
2710
2711static void virtio_net_add_queue(VirtIONet *n, int index)
2712{
2713    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2714
2715    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2716                                           virtio_net_handle_rx);
2717
2718    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2719        n->vqs[index].tx_vq =
2720            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2721                             virtio_net_handle_tx_timer);
2722        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2723                                              virtio_net_tx_timer,
2724                                              &n->vqs[index]);
2725    } else {
2726        n->vqs[index].tx_vq =
2727            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2728                             virtio_net_handle_tx_bh);
2729        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2730    }
2731
2732    n->vqs[index].tx_waiting = 0;
2733    n->vqs[index].n = n;
2734}
2735
2736static void virtio_net_del_queue(VirtIONet *n, int index)
2737{
2738    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2739    VirtIONetQueue *q = &n->vqs[index];
2740    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2741
2742    qemu_purge_queued_packets(nc);
2743
2744    virtio_del_queue(vdev, index * 2);
2745    if (q->tx_timer) {
2746        timer_free(q->tx_timer);
2747        q->tx_timer = NULL;
2748    } else {
2749        qemu_bh_delete(q->tx_bh);
2750        q->tx_bh = NULL;
2751    }
2752    q->tx_waiting = 0;
2753    virtio_del_queue(vdev, index * 2 + 1);
2754}
2755
2756static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2757{
2758    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2759    int old_num_queues = virtio_get_num_queues(vdev);
2760    int new_num_queues = new_max_queues * 2 + 1;
2761    int i;
2762
2763    assert(old_num_queues >= 3);
2764    assert(old_num_queues % 2 == 1);
2765
2766    if (old_num_queues == new_num_queues) {
2767        return;
2768    }
2769
2770    /*
2771     * We always need to remove and add ctrl vq if
2772     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2773     * and then we only enter one of the following two loops.
2774     */
2775    virtio_del_queue(vdev, old_num_queues - 1);
2776
2777    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2778        /* new_num_queues < old_num_queues */
2779        virtio_net_del_queue(n, i / 2);
2780    }
2781
2782    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2783        /* new_num_queues > old_num_queues */
2784        virtio_net_add_queue(n, i / 2);
2785    }
2786
2787    /* add ctrl_vq last */
2788    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2789}
2790
2791static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2792{
2793    int max = multiqueue ? n->max_queues : 1;
2794
2795    n->multiqueue = multiqueue;
2796    virtio_net_change_num_queues(n, max);
2797
2798    virtio_net_set_queues(n);
2799}
2800
2801static int virtio_net_post_load_device(void *opaque, int version_id)
2802{
2803    VirtIONet *n = opaque;
2804    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2805    int i, link_down;
2806
2807    trace_virtio_net_post_load_device();
2808    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2809                               virtio_vdev_has_feature(vdev,
2810                                                       VIRTIO_F_VERSION_1),
2811                               virtio_vdev_has_feature(vdev,
2812                                                       VIRTIO_NET_F_HASH_REPORT));
2813
2814    /* MAC_TABLE_ENTRIES may be different from the saved image */
2815    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2816        n->mac_table.in_use = 0;
2817    }
2818
2819    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2820        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2821    }
2822
2823    /*
2824     * curr_guest_offloads will be later overwritten by the
2825     * virtio_set_features_nocheck call done from the virtio_load.
2826     * Here we make sure it is preserved and restored accordingly
2827     * in the virtio_net_post_load_virtio callback.
2828     */
2829    n->saved_guest_offloads = n->curr_guest_offloads;
2830
2831    virtio_net_set_queues(n);
2832
2833    /* Find the first multicast entry in the saved MAC filter */
2834    for (i = 0; i < n->mac_table.in_use; i++) {
2835        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2836            break;
2837        }
2838    }
2839    n->mac_table.first_multi = i;
2840
2841    /* nc.link_down can't be migrated, so infer link_down according
2842     * to link status bit in n->status */
2843    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2844    for (i = 0; i < n->max_queues; i++) {
2845        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2846    }
2847
2848    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2849        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2850        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2851                                  QEMU_CLOCK_VIRTUAL,
2852                                  virtio_net_announce_timer, n);
2853        if (n->announce_timer.round) {
2854            timer_mod(n->announce_timer.tm,
2855                      qemu_clock_get_ms(n->announce_timer.type));
2856        } else {
2857            qemu_announce_timer_del(&n->announce_timer, false);
2858        }
2859    }
2860
2861    if (n->rss_data.enabled) {
2862        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2863        if (!n->rss_data.populate_hash) {
2864            if (!virtio_net_attach_epbf_rss(n)) {
2865                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2866                    warn_report("Can't post-load eBPF RSS for vhost");
2867                } else {
2868                    warn_report("Can't post-load eBPF RSS - "
2869                                "fallback to software RSS");
2870                    n->rss_data.enabled_software_rss = true;
2871                }
2872            }
2873        }
2874
2875        trace_virtio_net_rss_enable(n->rss_data.hash_types,
2876                                    n->rss_data.indirections_len,
2877                                    sizeof(n->rss_data.key));
2878    } else {
2879        trace_virtio_net_rss_disable();
2880    }
2881    return 0;
2882}
2883
2884static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2885{
2886    VirtIONet *n = VIRTIO_NET(vdev);
2887    /*
2888     * The actual needed state is now in saved_guest_offloads,
2889     * see virtio_net_post_load_device for detail.
2890     * Restore it back and apply the desired offloads.
2891     */
2892    n->curr_guest_offloads = n->saved_guest_offloads;
2893    if (peer_has_vnet_hdr(n)) {
2894        virtio_net_apply_guest_offloads(n);
2895    }
2896
2897    return 0;
2898}
2899
2900/* tx_waiting field of a VirtIONetQueue */
2901static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2902    .name = "virtio-net-queue-tx_waiting",
2903    .fields = (VMStateField[]) {
2904        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2905        VMSTATE_END_OF_LIST()
2906   },
2907};
2908
2909static bool max_queues_gt_1(void *opaque, int version_id)
2910{
2911    return VIRTIO_NET(opaque)->max_queues > 1;
2912}
2913
2914static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2915{
2916    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2917                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2918}
2919
2920static bool mac_table_fits(void *opaque, int version_id)
2921{
2922    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2923}
2924
2925static bool mac_table_doesnt_fit(void *opaque, int version_id)
2926{
2927    return !mac_table_fits(opaque, version_id);
2928}
2929
2930/* This temporary type is shared by all the WITH_TMP methods
2931 * although only some fields are used by each.
2932 */
2933struct VirtIONetMigTmp {
2934    VirtIONet      *parent;
2935    VirtIONetQueue *vqs_1;
2936    uint16_t        curr_queues_1;
2937    uint8_t         has_ufo;
2938    uint32_t        has_vnet_hdr;
2939};
2940
2941/* The 2nd and subsequent tx_waiting flags are loaded later than
2942 * the 1st entry in the queues and only if there's more than one
2943 * entry.  We use the tmp mechanism to calculate a temporary
2944 * pointer and count and also validate the count.
2945 */
2946
2947static int virtio_net_tx_waiting_pre_save(void *opaque)
2948{
2949    struct VirtIONetMigTmp *tmp = opaque;
2950
2951    tmp->vqs_1 = tmp->parent->vqs + 1;
2952    tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2953    if (tmp->parent->curr_queues == 0) {
2954        tmp->curr_queues_1 = 0;
2955    }
2956
2957    return 0;
2958}
2959
2960static int virtio_net_tx_waiting_pre_load(void *opaque)
2961{
2962    struct VirtIONetMigTmp *tmp = opaque;
2963
2964    /* Reuse the pointer setup from save */
2965    virtio_net_tx_waiting_pre_save(opaque);
2966
2967    if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2968        error_report("virtio-net: curr_queues %x > max_queues %x",
2969            tmp->parent->curr_queues, tmp->parent->max_queues);
2970
2971        return -EINVAL;
2972    }
2973
2974    return 0; /* all good */
2975}
2976
2977static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2978    .name      = "virtio-net-tx_waiting",
2979    .pre_load  = virtio_net_tx_waiting_pre_load,
2980    .pre_save  = virtio_net_tx_waiting_pre_save,
2981    .fields    = (VMStateField[]) {
2982        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2983                                     curr_queues_1,
2984                                     vmstate_virtio_net_queue_tx_waiting,
2985                                     struct VirtIONetQueue),
2986        VMSTATE_END_OF_LIST()
2987    },
2988};
2989
2990/* the 'has_ufo' flag is just tested; if the incoming stream has the
2991 * flag set we need to check that we have it
2992 */
2993static int virtio_net_ufo_post_load(void *opaque, int version_id)
2994{
2995    struct VirtIONetMigTmp *tmp = opaque;
2996
2997    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2998        error_report("virtio-net: saved image requires TUN_F_UFO support");
2999        return -EINVAL;
3000    }
3001
3002    return 0;
3003}
3004
3005static int virtio_net_ufo_pre_save(void *opaque)
3006{
3007    struct VirtIONetMigTmp *tmp = opaque;
3008
3009    tmp->has_ufo = tmp->parent->has_ufo;
3010
3011    return 0;
3012}
3013
3014static const VMStateDescription vmstate_virtio_net_has_ufo = {
3015    .name      = "virtio-net-ufo",
3016    .post_load = virtio_net_ufo_post_load,
3017    .pre_save  = virtio_net_ufo_pre_save,
3018    .fields    = (VMStateField[]) {
3019        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3020        VMSTATE_END_OF_LIST()
3021    },
3022};
3023
3024/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3025 * flag set we need to check that we have it
3026 */
3027static int virtio_net_vnet_post_load(void *opaque, int version_id)
3028{
3029    struct VirtIONetMigTmp *tmp = opaque;
3030
3031    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3032        error_report("virtio-net: saved image requires vnet_hdr=on");
3033        return -EINVAL;
3034    }
3035
3036    return 0;
3037}
3038
3039static int virtio_net_vnet_pre_save(void *opaque)
3040{
3041    struct VirtIONetMigTmp *tmp = opaque;
3042
3043    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3044
3045    return 0;
3046}
3047
3048static const VMStateDescription vmstate_virtio_net_has_vnet = {
3049    .name      = "virtio-net-vnet",
3050    .post_load = virtio_net_vnet_post_load,
3051    .pre_save  = virtio_net_vnet_pre_save,
3052    .fields    = (VMStateField[]) {
3053        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3054        VMSTATE_END_OF_LIST()
3055    },
3056};
3057
3058static bool virtio_net_rss_needed(void *opaque)
3059{
3060    return VIRTIO_NET(opaque)->rss_data.enabled;
3061}
3062
3063static const VMStateDescription vmstate_virtio_net_rss = {
3064    .name      = "virtio-net-device/rss",
3065    .version_id = 1,
3066    .minimum_version_id = 1,
3067    .needed = virtio_net_rss_needed,
3068    .fields = (VMStateField[]) {
3069        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3070        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3071        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3072        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3073        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3074        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3075        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3076                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3077        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3078                                    rss_data.indirections_len, 0,
3079                                    vmstate_info_uint16, uint16_t),
3080        VMSTATE_END_OF_LIST()
3081    },
3082};
3083
3084static const VMStateDescription vmstate_virtio_net_device = {
3085    .name = "virtio-net-device",
3086    .version_id = VIRTIO_NET_VM_VERSION,
3087    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3088    .post_load = virtio_net_post_load_device,
3089    .fields = (VMStateField[]) {
3090        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3091        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3092                               vmstate_virtio_net_queue_tx_waiting,
3093                               VirtIONetQueue),
3094        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3095        VMSTATE_UINT16(status, VirtIONet),
3096        VMSTATE_UINT8(promisc, VirtIONet),
3097        VMSTATE_UINT8(allmulti, VirtIONet),
3098        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3099
3100        /* Guarded pair: If it fits we load it, else we throw it away
3101         * - can happen if source has a larger MAC table.; post-load
3102         *  sets flags in this case.
3103         */
3104        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3105                                0, mac_table_fits, mac_table.in_use,
3106                                 ETH_ALEN),
3107        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3108                                     mac_table.in_use, ETH_ALEN),
3109
3110        /* Note: This is an array of uint32's that's always been saved as a
3111         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3112         * but based on the uint.
3113         */
3114        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3115        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3116                         vmstate_virtio_net_has_vnet),
3117        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3118        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3119        VMSTATE_UINT8(alluni, VirtIONet),
3120        VMSTATE_UINT8(nomulti, VirtIONet),
3121        VMSTATE_UINT8(nouni, VirtIONet),
3122        VMSTATE_UINT8(nobcast, VirtIONet),
3123        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3124                         vmstate_virtio_net_has_ufo),
3125        VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3126                            vmstate_info_uint16_equal, uint16_t),
3127        VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3128        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3129                         vmstate_virtio_net_tx_waiting),
3130        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3131                            has_ctrl_guest_offloads),
3132        VMSTATE_END_OF_LIST()
3133   },
3134    .subsections = (const VMStateDescription * []) {
3135        &vmstate_virtio_net_rss,
3136        NULL
3137    }
3138};
3139
3140static NetClientInfo net_virtio_info = {
3141    .type = NET_CLIENT_DRIVER_NIC,
3142    .size = sizeof(NICState),
3143    .can_receive = virtio_net_can_receive,
3144    .receive = virtio_net_receive,
3145    .link_status_changed = virtio_net_set_link_status,
3146    .query_rx_filter = virtio_net_query_rxfilter,
3147    .announce = virtio_net_announce,
3148};
3149
3150static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3151{
3152    VirtIONet *n = VIRTIO_NET(vdev);
3153    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3154    assert(n->vhost_started);
3155    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3156}
3157
3158static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3159                                           bool mask)
3160{
3161    VirtIONet *n = VIRTIO_NET(vdev);
3162    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3163    assert(n->vhost_started);
3164    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3165                             vdev, idx, mask);
3166}
3167
3168static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3169{
3170    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3171
3172    n->config_size = virtio_feature_get_config_size(feature_sizes,
3173                                                    host_features);
3174}
3175
3176void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3177                                   const char *type)
3178{
3179    /*
3180     * The name can be NULL, the netclient name will be type.x.
3181     */
3182    assert(type != NULL);
3183
3184    g_free(n->netclient_name);
3185    g_free(n->netclient_type);
3186    n->netclient_name = g_strdup(name);
3187    n->netclient_type = g_strdup(type);
3188}
3189
3190static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3191{
3192    HotplugHandler *hotplug_ctrl;
3193    PCIDevice *pci_dev;
3194    Error *err = NULL;
3195
3196    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3197    if (hotplug_ctrl) {
3198        pci_dev = PCI_DEVICE(dev);
3199        pci_dev->partially_hotplugged = true;
3200        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3201        if (err) {
3202            error_report_err(err);
3203            return false;
3204        }
3205    } else {
3206        return false;
3207    }
3208    return true;
3209}
3210
3211static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3212                                    Error **errp)
3213{
3214    Error *err = NULL;
3215    HotplugHandler *hotplug_ctrl;
3216    PCIDevice *pdev = PCI_DEVICE(dev);
3217    BusState *primary_bus;
3218
3219    if (!pdev->partially_hotplugged) {
3220        return true;
3221    }
3222    primary_bus = dev->parent_bus;
3223    if (!primary_bus) {
3224        error_setg(errp, "virtio_net: couldn't find primary bus");
3225        return false;
3226    }
3227    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3228    qatomic_set(&n->failover_primary_hidden, false);
3229    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3230    if (hotplug_ctrl) {
3231        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3232        if (err) {
3233            goto out;
3234        }
3235        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3236    }
3237    pdev->partially_hotplugged = false;
3238
3239out:
3240    error_propagate(errp, err);
3241    return !err;
3242}
3243
3244static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3245{
3246    bool should_be_hidden;
3247    Error *err = NULL;
3248    DeviceState *dev = failover_find_primary_device(n);
3249
3250    if (!dev) {
3251        return;
3252    }
3253
3254    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3255
3256    if (migration_in_setup(s) && !should_be_hidden) {
3257        if (failover_unplug_primary(n, dev)) {
3258            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3259            qapi_event_send_unplug_primary(dev->id);
3260            qatomic_set(&n->failover_primary_hidden, true);
3261        } else {
3262            warn_report("couldn't unplug primary device");
3263        }
3264    } else if (migration_has_failed(s)) {
3265        /* We already unplugged the device let's plug it back */
3266        if (!failover_replug_primary(n, dev, &err)) {
3267            if (err) {
3268                error_report_err(err);
3269            }
3270        }
3271    }
3272}
3273
3274static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3275{
3276    MigrationState *s = data;
3277    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3278    virtio_net_handle_migration_primary(n, s);
3279}
3280
3281static bool failover_hide_primary_device(DeviceListener *listener,
3282                                         QemuOpts *device_opts)
3283{
3284    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3285    const char *standby_id;
3286
3287    if (!device_opts) {
3288        return false;
3289    }
3290    standby_id = qemu_opt_get(device_opts, "failover_pair_id");
3291    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3292        return false;
3293    }
3294
3295    /* failover_primary_hidden is set during feature negotiation */
3296    return qatomic_read(&n->failover_primary_hidden);
3297}
3298
3299static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3300{
3301    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3302    VirtIONet *n = VIRTIO_NET(dev);
3303    NetClientState *nc;
3304    int i;
3305
3306    if (n->net_conf.mtu) {
3307        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3308    }
3309
3310    if (n->net_conf.duplex_str) {
3311        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3312            n->net_conf.duplex = DUPLEX_HALF;
3313        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3314            n->net_conf.duplex = DUPLEX_FULL;
3315        } else {
3316            error_setg(errp, "'duplex' must be 'half' or 'full'");
3317            return;
3318        }
3319        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3320    } else {
3321        n->net_conf.duplex = DUPLEX_UNKNOWN;
3322    }
3323
3324    if (n->net_conf.speed < SPEED_UNKNOWN) {
3325        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3326        return;
3327    }
3328    if (n->net_conf.speed >= 0) {
3329        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3330    }
3331
3332    if (n->failover) {
3333        n->primary_listener.hide_device = failover_hide_primary_device;
3334        qatomic_set(&n->failover_primary_hidden, true);
3335        device_listener_register(&n->primary_listener);
3336        n->migration_state.notify = virtio_net_migration_state_notifier;
3337        add_migration_state_change_notifier(&n->migration_state);
3338        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3339    }
3340
3341    virtio_net_set_config_size(n, n->host_features);
3342    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3343
3344    /*
3345     * We set a lower limit on RX queue size to what it always was.
3346     * Guests that want a smaller ring can always resize it without
3347     * help from us (using virtio 1 and up).
3348     */
3349    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3350        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3351        !is_power_of_2(n->net_conf.rx_queue_size)) {
3352        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3353                   "must be a power of 2 between %d and %d.",
3354                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3355                   VIRTQUEUE_MAX_SIZE);
3356        virtio_cleanup(vdev);
3357        return;
3358    }
3359
3360    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3361        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3362        !is_power_of_2(n->net_conf.tx_queue_size)) {
3363        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3364                   "must be a power of 2 between %d and %d",
3365                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3366                   VIRTQUEUE_MAX_SIZE);
3367        virtio_cleanup(vdev);
3368        return;
3369    }
3370
3371    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3372    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3373        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3374                   "must be a positive integer less than %d.",
3375                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3376        virtio_cleanup(vdev);
3377        return;
3378    }
3379    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3380    n->curr_queues = 1;
3381    n->tx_timeout = n->net_conf.txtimer;
3382
3383    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3384                       && strcmp(n->net_conf.tx, "bh")) {
3385        warn_report("virtio-net: "
3386                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3387                    n->net_conf.tx);
3388        error_printf("Defaulting to \"bh\"");
3389    }
3390
3391    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3392                                    n->net_conf.tx_queue_size);
3393
3394    for (i = 0; i < n->max_queues; i++) {
3395        virtio_net_add_queue(n, i);
3396    }
3397
3398    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3399    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3400    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3401    n->status = VIRTIO_NET_S_LINK_UP;
3402    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3403                              QEMU_CLOCK_VIRTUAL,
3404                              virtio_net_announce_timer, n);
3405    n->announce_timer.round = 0;
3406
3407    if (n->netclient_type) {
3408        /*
3409         * Happen when virtio_net_set_netclient_name has been called.
3410         */
3411        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3412                              n->netclient_type, n->netclient_name, n);
3413    } else {
3414        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3415                              object_get_typename(OBJECT(dev)), dev->id, n);
3416    }
3417
3418    for (i = 0; i < n->max_queues; i++) {
3419        n->nic->ncs[i].do_not_pad = true;
3420    }
3421
3422    peer_test_vnet_hdr(n);
3423    if (peer_has_vnet_hdr(n)) {
3424        for (i = 0; i < n->max_queues; i++) {
3425            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3426        }
3427        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3428    } else {
3429        n->host_hdr_len = 0;
3430    }
3431
3432    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3433
3434    n->vqs[0].tx_waiting = 0;
3435    n->tx_burst = n->net_conf.txburst;
3436    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3437    n->promisc = 1; /* for compatibility */
3438
3439    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3440
3441    n->vlans = g_malloc0(MAX_VLAN >> 3);
3442
3443    nc = qemu_get_queue(n->nic);
3444    nc->rxfilter_notify_enabled = 1;
3445
3446   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3447        struct virtio_net_config netcfg = {};
3448        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3449        vhost_net_set_config(get_vhost_net(nc->peer),
3450            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3451    }
3452    QTAILQ_INIT(&n->rsc_chains);
3453    n->qdev = dev;
3454
3455    net_rx_pkt_init(&n->rx_pkt, false);
3456
3457    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3458        virtio_net_load_ebpf(n);
3459    }
3460}
3461
3462static void virtio_net_device_unrealize(DeviceState *dev)
3463{
3464    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3465    VirtIONet *n = VIRTIO_NET(dev);
3466    int i, max_queues;
3467
3468    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3469        virtio_net_unload_ebpf(n);
3470    }
3471
3472    /* This will stop vhost backend if appropriate. */
3473    virtio_net_set_status(vdev, 0);
3474
3475    g_free(n->netclient_name);
3476    n->netclient_name = NULL;
3477    g_free(n->netclient_type);
3478    n->netclient_type = NULL;
3479
3480    g_free(n->mac_table.macs);
3481    g_free(n->vlans);
3482
3483    if (n->failover) {
3484        device_listener_unregister(&n->primary_listener);
3485        remove_migration_state_change_notifier(&n->migration_state);
3486    }
3487
3488    max_queues = n->multiqueue ? n->max_queues : 1;
3489    for (i = 0; i < max_queues; i++) {
3490        virtio_net_del_queue(n, i);
3491    }
3492    /* delete also control vq */
3493    virtio_del_queue(vdev, max_queues * 2);
3494    qemu_announce_timer_del(&n->announce_timer, false);
3495    g_free(n->vqs);
3496    qemu_del_nic(n->nic);
3497    virtio_net_rsc_cleanup(n);
3498    g_free(n->rss_data.indirections_table);
3499    net_rx_pkt_uninit(n->rx_pkt);
3500    virtio_cleanup(vdev);
3501}
3502
3503static void virtio_net_instance_init(Object *obj)
3504{
3505    VirtIONet *n = VIRTIO_NET(obj);
3506
3507    /*
3508     * The default config_size is sizeof(struct virtio_net_config).
3509     * Can be overriden with virtio_net_set_config_size.
3510     */
3511    n->config_size = sizeof(struct virtio_net_config);
3512    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3513                                  "bootindex", "/ethernet-phy@0",
3514                                  DEVICE(n));
3515
3516    ebpf_rss_init(&n->ebpf_rss);
3517}
3518
3519static int virtio_net_pre_save(void *opaque)
3520{
3521    VirtIONet *n = opaque;
3522
3523    /* At this point, backend must be stopped, otherwise
3524     * it might keep writing to memory. */
3525    assert(!n->vhost_started);
3526
3527    return 0;
3528}
3529
3530static bool primary_unplug_pending(void *opaque)
3531{
3532    DeviceState *dev = opaque;
3533    DeviceState *primary;
3534    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3535    VirtIONet *n = VIRTIO_NET(vdev);
3536
3537    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3538        return false;
3539    }
3540    primary = failover_find_primary_device(n);
3541    return primary ? primary->pending_deleted_event : false;
3542}
3543
3544static bool dev_unplug_pending(void *opaque)
3545{
3546    DeviceState *dev = opaque;
3547    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3548
3549    return vdc->primary_unplug_pending(dev);
3550}
3551
3552static const VMStateDescription vmstate_virtio_net = {
3553    .name = "virtio-net",
3554    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3555    .version_id = VIRTIO_NET_VM_VERSION,
3556    .fields = (VMStateField[]) {
3557        VMSTATE_VIRTIO_DEVICE,
3558        VMSTATE_END_OF_LIST()
3559    },
3560    .pre_save = virtio_net_pre_save,
3561    .dev_unplug_pending = dev_unplug_pending,
3562};
3563
3564static Property virtio_net_properties[] = {
3565    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3566                    VIRTIO_NET_F_CSUM, true),
3567    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3568                    VIRTIO_NET_F_GUEST_CSUM, true),
3569    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3570    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3571                    VIRTIO_NET_F_GUEST_TSO4, true),
3572    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3573                    VIRTIO_NET_F_GUEST_TSO6, true),
3574    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3575                    VIRTIO_NET_F_GUEST_ECN, true),
3576    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3577                    VIRTIO_NET_F_GUEST_UFO, true),
3578    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3579                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3580    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3581                    VIRTIO_NET_F_HOST_TSO4, true),
3582    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3583                    VIRTIO_NET_F_HOST_TSO6, true),
3584    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3585                    VIRTIO_NET_F_HOST_ECN, true),
3586    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3587                    VIRTIO_NET_F_HOST_UFO, true),
3588    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3589                    VIRTIO_NET_F_MRG_RXBUF, true),
3590    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3591                    VIRTIO_NET_F_STATUS, true),
3592    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3593                    VIRTIO_NET_F_CTRL_VQ, true),
3594    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3595                    VIRTIO_NET_F_CTRL_RX, true),
3596    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3597                    VIRTIO_NET_F_CTRL_VLAN, true),
3598    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3599                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3600    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3601                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3602    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3603                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3604    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3605    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3606                    VIRTIO_NET_F_RSS, false),
3607    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3608                    VIRTIO_NET_F_HASH_REPORT, false),
3609    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3610                    VIRTIO_NET_F_RSC_EXT, false),
3611    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3612                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3613    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3614    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3615                       TX_TIMER_INTERVAL),
3616    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3617    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3618    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3619                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3620    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3621                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3622    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3623    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3624                     true),
3625    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3626    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3627    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3628    DEFINE_PROP_END_OF_LIST(),
3629};
3630
3631static void virtio_net_class_init(ObjectClass *klass, void *data)
3632{
3633    DeviceClass *dc = DEVICE_CLASS(klass);
3634    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3635
3636    device_class_set_props(dc, virtio_net_properties);
3637    dc->vmsd = &vmstate_virtio_net;
3638    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3639    vdc->realize = virtio_net_device_realize;
3640    vdc->unrealize = virtio_net_device_unrealize;
3641    vdc->get_config = virtio_net_get_config;
3642    vdc->set_config = virtio_net_set_config;
3643    vdc->get_features = virtio_net_get_features;
3644    vdc->set_features = virtio_net_set_features;
3645    vdc->bad_features = virtio_net_bad_features;
3646    vdc->reset = virtio_net_reset;
3647    vdc->set_status = virtio_net_set_status;
3648    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3649    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3650    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3651    vdc->post_load = virtio_net_post_load_virtio;
3652    vdc->vmsd = &vmstate_virtio_net_device;
3653    vdc->primary_unplug_pending = primary_unplug_pending;
3654}
3655
3656static const TypeInfo virtio_net_info = {
3657    .name = TYPE_VIRTIO_NET,
3658    .parent = TYPE_VIRTIO_DEVICE,
3659    .instance_size = sizeof(VirtIONet),
3660    .instance_init = virtio_net_instance_init,
3661    .class_init = virtio_net_class_init,
3662};
3663
3664static void virtio_register_types(void)
3665{
3666    type_register_static(&virtio_net_info);
3667}
3668
3669type_init(virtio_register_types)
3670