qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/main-loop.h"
  18#include "qemu/module.h"
  19#include "hw/virtio/virtio.h"
  20#include "net/net.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23#include "qemu/error-report.h"
  24#include "qemu/timer.h"
  25#include "qemu/option.h"
  26#include "qemu/option_int.h"
  27#include "qemu/config-file.h"
  28#include "qapi/qmp/qdict.h"
  29#include "hw/virtio/virtio-net.h"
  30#include "net/vhost_net.h"
  31#include "net/announce.h"
  32#include "hw/virtio/virtio-bus.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-net.h"
  35#include "hw/qdev-properties.h"
  36#include "qapi/qapi-types-migration.h"
  37#include "qapi/qapi-events-migration.h"
  38#include "hw/virtio/virtio-access.h"
  39#include "migration/misc.h"
  40#include "standard-headers/linux/ethtool.h"
  41#include "sysemu/sysemu.h"
  42#include "trace.h"
  43#include "monitor/qdev.h"
  44#include "hw/pci/pci.h"
  45#include "net_rx_pkt.h"
  46#include "hw/virtio/vhost.h"
  47
  48#define VIRTIO_NET_VM_VERSION    11
  49
  50#define MAC_TABLE_ENTRIES    64
  51#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  52
  53/* previously fixed value */
  54#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  55#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  56
  57/* for now, only allow larger queues; with virtio-1, guest can downsize */
  58#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  59#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  60
  61#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  62
  63#define VIRTIO_NET_TCP_FLAG         0x3F
  64#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  65
  66/* IPv4 max payload, 16 bits in the header */
  67#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  68#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  69
  70/* header length value in ip header without option */
  71#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  72
  73#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  74#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  75
  76/* Purge coalesced packets timer interval, This value affects the performance
  77   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  78   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  79   tso/gso/gro 'off'. */
  80#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  81
  82#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  83                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  91
  92static const VirtIOFeature feature_sizes[] = {
  93    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  94     .end = endof(struct virtio_net_config, mac)},
  95    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  96     .end = endof(struct virtio_net_config, status)},
  97    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  98     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
  99    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 100     .end = endof(struct virtio_net_config, mtu)},
 101    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 102     .end = endof(struct virtio_net_config, duplex)},
 103    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 104     .end = endof(struct virtio_net_config, supported_hash_types)},
 105    {}
 106};
 107
 108static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 109{
 110    VirtIONet *n = qemu_get_nic_opaque(nc);
 111
 112    return &n->vqs[nc->queue_index];
 113}
 114
 115static int vq2q(int queue_index)
 116{
 117    return queue_index / 2;
 118}
 119
 120/* TODO
 121 * - we could suppress RX interrupt if we were so inclined.
 122 */
 123
 124static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 125{
 126    VirtIONet *n = VIRTIO_NET(vdev);
 127    struct virtio_net_config netcfg;
 128    NetClientState *nc = qemu_get_queue(n->nic);
 129    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 130
 131    int ret = 0;
 132    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 133    virtio_stw_p(vdev, &netcfg.status, n->status);
 134    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
 135    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 136    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 137    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 138    netcfg.duplex = n->net_conf.duplex;
 139    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 140    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 141                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 142                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 143    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 144                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 145    memcpy(config, &netcfg, n->config_size);
 146
 147    /*
 148     * Is this VDPA? No peer means not VDPA: there's no way to
 149     * disconnect/reconnect a VDPA peer.
 150     */
 151    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 152        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 153                                   n->config_size);
 154        if (ret != -1) {
 155            /*
 156             * Some NIC/kernel combinations present 0 as the mac address.  As
 157             * that is not a legal address, try to proceed with the
 158             * address from the QEMU command line in the hope that the
 159             * address has been configured correctly elsewhere - just not
 160             * reported by the device.
 161             */
 162            if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 163                info_report("Zero hardware mac address detected. Ignoring.");
 164                memcpy(netcfg.mac, n->mac, ETH_ALEN);
 165            }
 166            memcpy(config, &netcfg, n->config_size);
 167        }
 168    }
 169}
 170
 171static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 172{
 173    VirtIONet *n = VIRTIO_NET(vdev);
 174    struct virtio_net_config netcfg = {};
 175    NetClientState *nc = qemu_get_queue(n->nic);
 176
 177    memcpy(&netcfg, config, n->config_size);
 178
 179    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 180        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 181        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 182        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 183        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 184    }
 185
 186    /*
 187     * Is this VDPA? No peer means not VDPA: there's no way to
 188     * disconnect/reconnect a VDPA peer.
 189     */
 190    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 191        vhost_net_set_config(get_vhost_net(nc->peer),
 192                             (uint8_t *)&netcfg, 0, n->config_size,
 193                             VHOST_SET_CONFIG_TYPE_MASTER);
 194      }
 195}
 196
 197static bool virtio_net_started(VirtIONet *n, uint8_t status)
 198{
 199    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 200    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 201        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 202}
 203
 204static void virtio_net_announce_notify(VirtIONet *net)
 205{
 206    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 207    trace_virtio_net_announce_notify();
 208
 209    net->status |= VIRTIO_NET_S_ANNOUNCE;
 210    virtio_notify_config(vdev);
 211}
 212
 213static void virtio_net_announce_timer(void *opaque)
 214{
 215    VirtIONet *n = opaque;
 216    trace_virtio_net_announce_timer(n->announce_timer.round);
 217
 218    n->announce_timer.round--;
 219    virtio_net_announce_notify(n);
 220}
 221
 222static void virtio_net_announce(NetClientState *nc)
 223{
 224    VirtIONet *n = qemu_get_nic_opaque(nc);
 225    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 226
 227    /*
 228     * Make sure the virtio migration announcement timer isn't running
 229     * If it is, let it trigger announcement so that we do not cause
 230     * confusion.
 231     */
 232    if (n->announce_timer.round) {
 233        return;
 234    }
 235
 236    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 237        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 238            virtio_net_announce_notify(n);
 239    }
 240}
 241
 242static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 243{
 244    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 245    NetClientState *nc = qemu_get_queue(n->nic);
 246    int queues = n->multiqueue ? n->max_queues : 1;
 247
 248    if (!get_vhost_net(nc->peer)) {
 249        return;
 250    }
 251
 252    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 253        !!n->vhost_started) {
 254        return;
 255    }
 256    if (!n->vhost_started) {
 257        int r, i;
 258
 259        if (n->needs_vnet_hdr_swap) {
 260            error_report("backend does not support %s vnet headers; "
 261                         "falling back on userspace virtio",
 262                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 263            return;
 264        }
 265
 266        /* Any packets outstanding? Purge them to avoid touching rings
 267         * when vhost is running.
 268         */
 269        for (i = 0;  i < queues; i++) {
 270            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 271
 272            /* Purge both directions: TX and RX. */
 273            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 274            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 275        }
 276
 277        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 278            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 279            if (r < 0) {
 280                error_report("%uBytes MTU not supported by the backend",
 281                             n->net_conf.mtu);
 282
 283                return;
 284            }
 285        }
 286
 287        n->vhost_started = 1;
 288        r = vhost_net_start(vdev, n->nic->ncs, queues);
 289        if (r < 0) {
 290            error_report("unable to start vhost net: %d: "
 291                         "falling back on userspace virtio", -r);
 292            n->vhost_started = 0;
 293        }
 294    } else {
 295        vhost_net_stop(vdev, n->nic->ncs, queues);
 296        n->vhost_started = 0;
 297    }
 298}
 299
 300static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 301                                          NetClientState *peer,
 302                                          bool enable)
 303{
 304    if (virtio_is_big_endian(vdev)) {
 305        return qemu_set_vnet_be(peer, enable);
 306    } else {
 307        return qemu_set_vnet_le(peer, enable);
 308    }
 309}
 310
 311static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 312                                       int queues, bool enable)
 313{
 314    int i;
 315
 316    for (i = 0; i < queues; i++) {
 317        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 318            enable) {
 319            while (--i >= 0) {
 320                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 321            }
 322
 323            return true;
 324        }
 325    }
 326
 327    return false;
 328}
 329
 330static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 331{
 332    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 333    int queues = n->multiqueue ? n->max_queues : 1;
 334
 335    if (virtio_net_started(n, status)) {
 336        /* Before using the device, we tell the network backend about the
 337         * endianness to use when parsing vnet headers. If the backend
 338         * can't do it, we fallback onto fixing the headers in the core
 339         * virtio-net code.
 340         */
 341        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 342                                                            queues, true);
 343    } else if (virtio_net_started(n, vdev->status)) {
 344        /* After using the device, we need to reset the network backend to
 345         * the default (guest native endianness), otherwise the guest may
 346         * lose network connectivity if it is rebooted into a different
 347         * endianness.
 348         */
 349        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
 350    }
 351}
 352
 353static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 354{
 355    unsigned int dropped = virtqueue_drop_all(vq);
 356    if (dropped) {
 357        virtio_notify(vdev, vq);
 358    }
 359}
 360
 361static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 362{
 363    VirtIONet *n = VIRTIO_NET(vdev);
 364    VirtIONetQueue *q;
 365    int i;
 366    uint8_t queue_status;
 367
 368    virtio_net_vnet_endian_status(n, status);
 369    virtio_net_vhost_status(n, status);
 370
 371    for (i = 0; i < n->max_queues; i++) {
 372        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 373        bool queue_started;
 374        q = &n->vqs[i];
 375
 376        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
 377            queue_status = 0;
 378        } else {
 379            queue_status = status;
 380        }
 381        queue_started =
 382            virtio_net_started(n, queue_status) && !n->vhost_started;
 383
 384        if (queue_started) {
 385            qemu_flush_queued_packets(ncs);
 386        }
 387
 388        if (!q->tx_waiting) {
 389            continue;
 390        }
 391
 392        if (queue_started) {
 393            if (q->tx_timer) {
 394                timer_mod(q->tx_timer,
 395                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 396            } else {
 397                qemu_bh_schedule(q->tx_bh);
 398            }
 399        } else {
 400            if (q->tx_timer) {
 401                timer_del(q->tx_timer);
 402            } else {
 403                qemu_bh_cancel(q->tx_bh);
 404            }
 405            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 406                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 407                vdev->vm_running) {
 408                /* if tx is waiting we are likely have some packets in tx queue
 409                 * and disabled notification */
 410                q->tx_waiting = 0;
 411                virtio_queue_set_notification(q->tx_vq, 1);
 412                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 413            }
 414        }
 415    }
 416}
 417
 418static void virtio_net_set_link_status(NetClientState *nc)
 419{
 420    VirtIONet *n = qemu_get_nic_opaque(nc);
 421    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 422    uint16_t old_status = n->status;
 423
 424    if (nc->link_down)
 425        n->status &= ~VIRTIO_NET_S_LINK_UP;
 426    else
 427        n->status |= VIRTIO_NET_S_LINK_UP;
 428
 429    if (n->status != old_status)
 430        virtio_notify_config(vdev);
 431
 432    virtio_net_set_status(vdev, vdev->status);
 433}
 434
 435static void rxfilter_notify(NetClientState *nc)
 436{
 437    VirtIONet *n = qemu_get_nic_opaque(nc);
 438
 439    if (nc->rxfilter_notify_enabled) {
 440        char *path = object_get_canonical_path(OBJECT(n->qdev));
 441        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 442                                              n->netclient_name, path);
 443        g_free(path);
 444
 445        /* disable event notification to avoid events flooding */
 446        nc->rxfilter_notify_enabled = 0;
 447    }
 448}
 449
 450static intList *get_vlan_table(VirtIONet *n)
 451{
 452    intList *list;
 453    int i, j;
 454
 455    list = NULL;
 456    for (i = 0; i < MAX_VLAN >> 5; i++) {
 457        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 458            if (n->vlans[i] & (1U << j)) {
 459                QAPI_LIST_PREPEND(list, (i << 5) + j);
 460            }
 461        }
 462    }
 463
 464    return list;
 465}
 466
 467static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 468{
 469    VirtIONet *n = qemu_get_nic_opaque(nc);
 470    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 471    RxFilterInfo *info;
 472    strList *str_list;
 473    int i;
 474
 475    info = g_malloc0(sizeof(*info));
 476    info->name = g_strdup(nc->name);
 477    info->promiscuous = n->promisc;
 478
 479    if (n->nouni) {
 480        info->unicast = RX_STATE_NONE;
 481    } else if (n->alluni) {
 482        info->unicast = RX_STATE_ALL;
 483    } else {
 484        info->unicast = RX_STATE_NORMAL;
 485    }
 486
 487    if (n->nomulti) {
 488        info->multicast = RX_STATE_NONE;
 489    } else if (n->allmulti) {
 490        info->multicast = RX_STATE_ALL;
 491    } else {
 492        info->multicast = RX_STATE_NORMAL;
 493    }
 494
 495    info->broadcast_allowed = n->nobcast;
 496    info->multicast_overflow = n->mac_table.multi_overflow;
 497    info->unicast_overflow = n->mac_table.uni_overflow;
 498
 499    info->main_mac = qemu_mac_strdup_printf(n->mac);
 500
 501    str_list = NULL;
 502    for (i = 0; i < n->mac_table.first_multi; i++) {
 503        QAPI_LIST_PREPEND(str_list,
 504                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 505    }
 506    info->unicast_table = str_list;
 507
 508    str_list = NULL;
 509    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 510        QAPI_LIST_PREPEND(str_list,
 511                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 512    }
 513    info->multicast_table = str_list;
 514    info->vlan_table = get_vlan_table(n);
 515
 516    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 517        info->vlan = RX_STATE_ALL;
 518    } else if (!info->vlan_table) {
 519        info->vlan = RX_STATE_NONE;
 520    } else {
 521        info->vlan = RX_STATE_NORMAL;
 522    }
 523
 524    /* enable event notification after query */
 525    nc->rxfilter_notify_enabled = 1;
 526
 527    return info;
 528}
 529
 530static void virtio_net_reset(VirtIODevice *vdev)
 531{
 532    VirtIONet *n = VIRTIO_NET(vdev);
 533    int i;
 534
 535    /* Reset back to compatibility mode */
 536    n->promisc = 1;
 537    n->allmulti = 0;
 538    n->alluni = 0;
 539    n->nomulti = 0;
 540    n->nouni = 0;
 541    n->nobcast = 0;
 542    /* multiqueue is disabled by default */
 543    n->curr_queues = 1;
 544    timer_del(n->announce_timer.tm);
 545    n->announce_timer.round = 0;
 546    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 547
 548    /* Flush any MAC and VLAN filter table state */
 549    n->mac_table.in_use = 0;
 550    n->mac_table.first_multi = 0;
 551    n->mac_table.multi_overflow = 0;
 552    n->mac_table.uni_overflow = 0;
 553    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 554    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 555    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 556    memset(n->vlans, 0, MAX_VLAN >> 3);
 557
 558    /* Flush any async TX */
 559    for (i = 0;  i < n->max_queues; i++) {
 560        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 561
 562        if (nc->peer) {
 563            qemu_flush_or_purge_queued_packets(nc->peer, true);
 564            assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 565        }
 566    }
 567}
 568
 569static void peer_test_vnet_hdr(VirtIONet *n)
 570{
 571    NetClientState *nc = qemu_get_queue(n->nic);
 572    if (!nc->peer) {
 573        return;
 574    }
 575
 576    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 577}
 578
 579static int peer_has_vnet_hdr(VirtIONet *n)
 580{
 581    return n->has_vnet_hdr;
 582}
 583
 584static int peer_has_ufo(VirtIONet *n)
 585{
 586    if (!peer_has_vnet_hdr(n))
 587        return 0;
 588
 589    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 590
 591    return n->has_ufo;
 592}
 593
 594static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 595                                       int version_1, int hash_report)
 596{
 597    int i;
 598    NetClientState *nc;
 599
 600    n->mergeable_rx_bufs = mergeable_rx_bufs;
 601
 602    if (version_1) {
 603        n->guest_hdr_len = hash_report ?
 604            sizeof(struct virtio_net_hdr_v1_hash) :
 605            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 606        n->rss_data.populate_hash = !!hash_report;
 607    } else {
 608        n->guest_hdr_len = n->mergeable_rx_bufs ?
 609            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 610            sizeof(struct virtio_net_hdr);
 611    }
 612
 613    for (i = 0; i < n->max_queues; i++) {
 614        nc = qemu_get_subqueue(n->nic, i);
 615
 616        if (peer_has_vnet_hdr(n) &&
 617            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 618            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 619            n->host_hdr_len = n->guest_hdr_len;
 620        }
 621    }
 622}
 623
 624static int virtio_net_max_tx_queue_size(VirtIONet *n)
 625{
 626    NetClientState *peer = n->nic_conf.peers.ncs[0];
 627
 628    /*
 629     * Backends other than vhost-user don't support max queue size.
 630     */
 631    if (!peer) {
 632        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 633    }
 634
 635    if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
 636        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 637    }
 638
 639    return VIRTQUEUE_MAX_SIZE;
 640}
 641
 642static int peer_attach(VirtIONet *n, int index)
 643{
 644    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 645
 646    if (!nc->peer) {
 647        return 0;
 648    }
 649
 650    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 651        vhost_set_vring_enable(nc->peer, 1);
 652    }
 653
 654    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 655        return 0;
 656    }
 657
 658    if (n->max_queues == 1) {
 659        return 0;
 660    }
 661
 662    return tap_enable(nc->peer);
 663}
 664
 665static int peer_detach(VirtIONet *n, int index)
 666{
 667    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 668
 669    if (!nc->peer) {
 670        return 0;
 671    }
 672
 673    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 674        vhost_set_vring_enable(nc->peer, 0);
 675    }
 676
 677    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 678        return 0;
 679    }
 680
 681    return tap_disable(nc->peer);
 682}
 683
 684static void virtio_net_set_queues(VirtIONet *n)
 685{
 686    int i;
 687    int r;
 688
 689    if (n->nic->peer_deleted) {
 690        return;
 691    }
 692
 693    for (i = 0; i < n->max_queues; i++) {
 694        if (i < n->curr_queues) {
 695            r = peer_attach(n, i);
 696            assert(!r);
 697        } else {
 698            r = peer_detach(n, i);
 699            assert(!r);
 700        }
 701    }
 702}
 703
 704static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 705
 706static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 707                                        Error **errp)
 708{
 709    VirtIONet *n = VIRTIO_NET(vdev);
 710    NetClientState *nc = qemu_get_queue(n->nic);
 711
 712    /* Firstly sync all virtio-net possible supported features */
 713    features |= n->host_features;
 714
 715    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 716
 717    if (!peer_has_vnet_hdr(n)) {
 718        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 719        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 720        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 721        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 722
 723        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 724        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 725        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 726        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 727
 728        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 729    }
 730
 731    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 732        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 733        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 734    }
 735
 736    if (!get_vhost_net(nc->peer)) {
 737        return features;
 738    }
 739
 740    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 741        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 742    }
 743    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 744    vdev->backend_features = features;
 745
 746    if (n->mtu_bypass_backend &&
 747            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 748        features |= (1ULL << VIRTIO_NET_F_MTU);
 749    }
 750
 751    return features;
 752}
 753
 754static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 755{
 756    uint64_t features = 0;
 757
 758    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 759     * but also these: */
 760    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 761    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 762    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 763    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 764    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 765
 766    return features;
 767}
 768
 769static void virtio_net_apply_guest_offloads(VirtIONet *n)
 770{
 771    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 772            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 773            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 774            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 775            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 776            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 777}
 778
 779static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 780{
 781    static const uint64_t guest_offloads_mask =
 782        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 783        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 784        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 785        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 786        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 787
 788    return guest_offloads_mask & features;
 789}
 790
 791static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 792{
 793    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 794    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 795}
 796
 797typedef struct {
 798    VirtIONet *n;
 799    char *id;
 800} FailoverId;
 801
 802/**
 803 * Set the id of the failover primary device
 804 *
 805 * @opaque: FailoverId to setup
 806 * @opts: opts for device we are handling
 807 * @errp: returns an error if this function fails
 808 */
 809static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
 810{
 811    FailoverId *fid = opaque;
 812    const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
 813
 814    if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
 815        fid->id = g_strdup(opts->id);
 816        return 1;
 817    }
 818
 819    return 0;
 820}
 821
 822/**
 823 * Find the primary device id for this failover virtio-net
 824 *
 825 * @n: VirtIONet device
 826 * @errp: returns an error if this function fails
 827 */
 828static char *failover_find_primary_device_id(VirtIONet *n)
 829{
 830    Error *err = NULL;
 831    FailoverId fid;
 832
 833    fid.n = n;
 834    if (!qemu_opts_foreach(qemu_find_opts("device"),
 835                           failover_set_primary, &fid, &err)) {
 836        return NULL;
 837    }
 838    return fid.id;
 839}
 840
 841/**
 842 * Find the primary device for this failover virtio-net
 843 *
 844 * @n: VirtIONet device
 845 * @errp: returns an error if this function fails
 846 */
 847static DeviceState *failover_find_primary_device(VirtIONet *n)
 848{
 849    char *id = failover_find_primary_device_id(n);
 850
 851    if (!id) {
 852        return NULL;
 853    }
 854
 855    return qdev_find_recursive(sysbus_get_default(), id);
 856}
 857
 858static void failover_add_primary(VirtIONet *n, Error **errp)
 859{
 860    Error *err = NULL;
 861    QemuOpts *opts;
 862    char *id;
 863    DeviceState *dev = failover_find_primary_device(n);
 864
 865    if (dev) {
 866        return;
 867    }
 868
 869    id = failover_find_primary_device_id(n);
 870    if (!id) {
 871        error_setg(errp, "Primary device not found");
 872        error_append_hint(errp, "Virtio-net failover will not work. Make "
 873                          "sure primary device has parameter"
 874                          " failover_pair_id=%s\n", n->netclient_name);
 875        return;
 876    }
 877    opts = qemu_opts_find(qemu_find_opts("device"), id);
 878    g_assert(opts); /* cannot be NULL because id was found using opts list */
 879    dev = qdev_device_add(opts, &err);
 880    if (err) {
 881        qemu_opts_del(opts);
 882    } else {
 883        object_unref(OBJECT(dev));
 884    }
 885    error_propagate(errp, err);
 886}
 887
 888static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 889{
 890    VirtIONet *n = VIRTIO_NET(vdev);
 891    Error *err = NULL;
 892    int i;
 893
 894    if (n->mtu_bypass_backend &&
 895            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 896        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 897    }
 898
 899    virtio_net_set_multiqueue(n,
 900                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 901                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 902
 903    virtio_net_set_mrg_rx_bufs(n,
 904                               virtio_has_feature(features,
 905                                                  VIRTIO_NET_F_MRG_RXBUF),
 906                               virtio_has_feature(features,
 907                                                  VIRTIO_F_VERSION_1),
 908                               virtio_has_feature(features,
 909                                                  VIRTIO_NET_F_HASH_REPORT));
 910
 911    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 912        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 913    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 914        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 915    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 916
 917    if (n->has_vnet_hdr) {
 918        n->curr_guest_offloads =
 919            virtio_net_guest_offloads_by_features(features);
 920        virtio_net_apply_guest_offloads(n);
 921    }
 922
 923    for (i = 0;  i < n->max_queues; i++) {
 924        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 925
 926        if (!get_vhost_net(nc->peer)) {
 927            continue;
 928        }
 929        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 930    }
 931
 932    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 933        memset(n->vlans, 0, MAX_VLAN >> 3);
 934    } else {
 935        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 936    }
 937
 938    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 939        qapi_event_send_failover_negotiated(n->netclient_name);
 940        qatomic_set(&n->failover_primary_hidden, false);
 941        failover_add_primary(n, &err);
 942        if (err) {
 943            warn_report_err(err);
 944        }
 945    }
 946}
 947
 948static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 949                                     struct iovec *iov, unsigned int iov_cnt)
 950{
 951    uint8_t on;
 952    size_t s;
 953    NetClientState *nc = qemu_get_queue(n->nic);
 954
 955    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 956    if (s != sizeof(on)) {
 957        return VIRTIO_NET_ERR;
 958    }
 959
 960    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 961        n->promisc = on;
 962    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 963        n->allmulti = on;
 964    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 965        n->alluni = on;
 966    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 967        n->nomulti = on;
 968    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 969        n->nouni = on;
 970    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 971        n->nobcast = on;
 972    } else {
 973        return VIRTIO_NET_ERR;
 974    }
 975
 976    rxfilter_notify(nc);
 977
 978    return VIRTIO_NET_OK;
 979}
 980
 981static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 982                                     struct iovec *iov, unsigned int iov_cnt)
 983{
 984    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 985    uint64_t offloads;
 986    size_t s;
 987
 988    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 989        return VIRTIO_NET_ERR;
 990    }
 991
 992    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 993    if (s != sizeof(offloads)) {
 994        return VIRTIO_NET_ERR;
 995    }
 996
 997    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 998        uint64_t supported_offloads;
 999
1000        offloads = virtio_ldq_p(vdev, &offloads);
1001
1002        if (!n->has_vnet_hdr) {
1003            return VIRTIO_NET_ERR;
1004        }
1005
1006        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1007            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1008        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1009            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1010        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1011
1012        supported_offloads = virtio_net_supported_guest_offloads(n);
1013        if (offloads & ~supported_offloads) {
1014            return VIRTIO_NET_ERR;
1015        }
1016
1017        n->curr_guest_offloads = offloads;
1018        virtio_net_apply_guest_offloads(n);
1019
1020        return VIRTIO_NET_OK;
1021    } else {
1022        return VIRTIO_NET_ERR;
1023    }
1024}
1025
1026static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1027                                 struct iovec *iov, unsigned int iov_cnt)
1028{
1029    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1030    struct virtio_net_ctrl_mac mac_data;
1031    size_t s;
1032    NetClientState *nc = qemu_get_queue(n->nic);
1033
1034    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1035        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1036            return VIRTIO_NET_ERR;
1037        }
1038        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1039        assert(s == sizeof(n->mac));
1040        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1041        rxfilter_notify(nc);
1042
1043        return VIRTIO_NET_OK;
1044    }
1045
1046    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1047        return VIRTIO_NET_ERR;
1048    }
1049
1050    int in_use = 0;
1051    int first_multi = 0;
1052    uint8_t uni_overflow = 0;
1053    uint8_t multi_overflow = 0;
1054    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1055
1056    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1057                   sizeof(mac_data.entries));
1058    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1059    if (s != sizeof(mac_data.entries)) {
1060        goto error;
1061    }
1062    iov_discard_front(&iov, &iov_cnt, s);
1063
1064    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1065        goto error;
1066    }
1067
1068    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1069        s = iov_to_buf(iov, iov_cnt, 0, macs,
1070                       mac_data.entries * ETH_ALEN);
1071        if (s != mac_data.entries * ETH_ALEN) {
1072            goto error;
1073        }
1074        in_use += mac_data.entries;
1075    } else {
1076        uni_overflow = 1;
1077    }
1078
1079    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1080
1081    first_multi = in_use;
1082
1083    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1084                   sizeof(mac_data.entries));
1085    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1086    if (s != sizeof(mac_data.entries)) {
1087        goto error;
1088    }
1089
1090    iov_discard_front(&iov, &iov_cnt, s);
1091
1092    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1093        goto error;
1094    }
1095
1096    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1097        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1098                       mac_data.entries * ETH_ALEN);
1099        if (s != mac_data.entries * ETH_ALEN) {
1100            goto error;
1101        }
1102        in_use += mac_data.entries;
1103    } else {
1104        multi_overflow = 1;
1105    }
1106
1107    n->mac_table.in_use = in_use;
1108    n->mac_table.first_multi = first_multi;
1109    n->mac_table.uni_overflow = uni_overflow;
1110    n->mac_table.multi_overflow = multi_overflow;
1111    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1112    g_free(macs);
1113    rxfilter_notify(nc);
1114
1115    return VIRTIO_NET_OK;
1116
1117error:
1118    g_free(macs);
1119    return VIRTIO_NET_ERR;
1120}
1121
1122static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1123                                        struct iovec *iov, unsigned int iov_cnt)
1124{
1125    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1126    uint16_t vid;
1127    size_t s;
1128    NetClientState *nc = qemu_get_queue(n->nic);
1129
1130    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1131    vid = virtio_lduw_p(vdev, &vid);
1132    if (s != sizeof(vid)) {
1133        return VIRTIO_NET_ERR;
1134    }
1135
1136    if (vid >= MAX_VLAN)
1137        return VIRTIO_NET_ERR;
1138
1139    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1140        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1141    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1142        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1143    else
1144        return VIRTIO_NET_ERR;
1145
1146    rxfilter_notify(nc);
1147
1148    return VIRTIO_NET_OK;
1149}
1150
1151static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1152                                      struct iovec *iov, unsigned int iov_cnt)
1153{
1154    trace_virtio_net_handle_announce(n->announce_timer.round);
1155    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1156        n->status & VIRTIO_NET_S_ANNOUNCE) {
1157        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1158        if (n->announce_timer.round) {
1159            qemu_announce_timer_step(&n->announce_timer);
1160        }
1161        return VIRTIO_NET_OK;
1162    } else {
1163        return VIRTIO_NET_ERR;
1164    }
1165}
1166
1167static void virtio_net_detach_epbf_rss(VirtIONet *n);
1168
1169static void virtio_net_disable_rss(VirtIONet *n)
1170{
1171    if (n->rss_data.enabled) {
1172        trace_virtio_net_rss_disable();
1173    }
1174    n->rss_data.enabled = false;
1175
1176    virtio_net_detach_epbf_rss(n);
1177}
1178
1179static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1180{
1181    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1182    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1183        return false;
1184    }
1185
1186    return nc->info->set_steering_ebpf(nc, prog_fd);
1187}
1188
1189static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1190                                   struct EBPFRSSConfig *config)
1191{
1192    config->redirect = data->redirect;
1193    config->populate_hash = data->populate_hash;
1194    config->hash_types = data->hash_types;
1195    config->indirections_len = data->indirections_len;
1196    config->default_queue = data->default_queue;
1197}
1198
1199static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1200{
1201    struct EBPFRSSConfig config = {};
1202
1203    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1204        return false;
1205    }
1206
1207    rss_data_to_rss_config(&n->rss_data, &config);
1208
1209    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1210                          n->rss_data.indirections_table, n->rss_data.key)) {
1211        return false;
1212    }
1213
1214    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1215        return false;
1216    }
1217
1218    return true;
1219}
1220
1221static void virtio_net_detach_epbf_rss(VirtIONet *n)
1222{
1223    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1224}
1225
1226static bool virtio_net_load_ebpf(VirtIONet *n)
1227{
1228    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1229        /* backend does't support steering ebpf */
1230        return false;
1231    }
1232
1233    return ebpf_rss_load(&n->ebpf_rss);
1234}
1235
1236static void virtio_net_unload_ebpf(VirtIONet *n)
1237{
1238    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1239    ebpf_rss_unload(&n->ebpf_rss);
1240}
1241
1242static uint16_t virtio_net_handle_rss(VirtIONet *n,
1243                                      struct iovec *iov,
1244                                      unsigned int iov_cnt,
1245                                      bool do_rss)
1246{
1247    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1248    struct virtio_net_rss_config cfg;
1249    size_t s, offset = 0, size_get;
1250    uint16_t queues, i;
1251    struct {
1252        uint16_t us;
1253        uint8_t b;
1254    } QEMU_PACKED temp;
1255    const char *err_msg = "";
1256    uint32_t err_value = 0;
1257
1258    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1259        err_msg = "RSS is not negotiated";
1260        goto error;
1261    }
1262    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1263        err_msg = "Hash report is not negotiated";
1264        goto error;
1265    }
1266    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1267    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1268    if (s != size_get) {
1269        err_msg = "Short command buffer";
1270        err_value = (uint32_t)s;
1271        goto error;
1272    }
1273    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1274    n->rss_data.indirections_len =
1275        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1276    n->rss_data.indirections_len++;
1277    if (!do_rss) {
1278        n->rss_data.indirections_len = 1;
1279    }
1280    if (!is_power_of_2(n->rss_data.indirections_len)) {
1281        err_msg = "Invalid size of indirection table";
1282        err_value = n->rss_data.indirections_len;
1283        goto error;
1284    }
1285    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1286        err_msg = "Too large indirection table";
1287        err_value = n->rss_data.indirections_len;
1288        goto error;
1289    }
1290    n->rss_data.default_queue = do_rss ?
1291        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1292    if (n->rss_data.default_queue >= n->max_queues) {
1293        err_msg = "Invalid default queue";
1294        err_value = n->rss_data.default_queue;
1295        goto error;
1296    }
1297    offset += size_get;
1298    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1299    g_free(n->rss_data.indirections_table);
1300    n->rss_data.indirections_table = g_malloc(size_get);
1301    if (!n->rss_data.indirections_table) {
1302        err_msg = "Can't allocate indirections table";
1303        err_value = n->rss_data.indirections_len;
1304        goto error;
1305    }
1306    s = iov_to_buf(iov, iov_cnt, offset,
1307                   n->rss_data.indirections_table, size_get);
1308    if (s != size_get) {
1309        err_msg = "Short indirection table buffer";
1310        err_value = (uint32_t)s;
1311        goto error;
1312    }
1313    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1314        uint16_t val = n->rss_data.indirections_table[i];
1315        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1316    }
1317    offset += size_get;
1318    size_get = sizeof(temp);
1319    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1320    if (s != size_get) {
1321        err_msg = "Can't get queues";
1322        err_value = (uint32_t)s;
1323        goto error;
1324    }
1325    queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1326    if (queues == 0 || queues > n->max_queues) {
1327        err_msg = "Invalid number of queues";
1328        err_value = queues;
1329        goto error;
1330    }
1331    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1332        err_msg = "Invalid key size";
1333        err_value = temp.b;
1334        goto error;
1335    }
1336    if (!temp.b && n->rss_data.hash_types) {
1337        err_msg = "No key provided";
1338        err_value = 0;
1339        goto error;
1340    }
1341    if (!temp.b && !n->rss_data.hash_types) {
1342        virtio_net_disable_rss(n);
1343        return queues;
1344    }
1345    offset += size_get;
1346    size_get = temp.b;
1347    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1348    if (s != size_get) {
1349        err_msg = "Can get key buffer";
1350        err_value = (uint32_t)s;
1351        goto error;
1352    }
1353    n->rss_data.enabled = true;
1354
1355    if (!n->rss_data.populate_hash) {
1356        if (!virtio_net_attach_epbf_rss(n)) {
1357            /* EBPF must be loaded for vhost */
1358            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1359                warn_report("Can't load eBPF RSS for vhost");
1360                goto error;
1361            }
1362            /* fallback to software RSS */
1363            warn_report("Can't load eBPF RSS - fallback to software RSS");
1364            n->rss_data.enabled_software_rss = true;
1365        }
1366    } else {
1367        /* use software RSS for hash populating */
1368        /* and detach eBPF if was loaded before */
1369        virtio_net_detach_epbf_rss(n);
1370        n->rss_data.enabled_software_rss = true;
1371    }
1372
1373    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1374                                n->rss_data.indirections_len,
1375                                temp.b);
1376    return queues;
1377error:
1378    trace_virtio_net_rss_error(err_msg, err_value);
1379    virtio_net_disable_rss(n);
1380    return 0;
1381}
1382
1383static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1384                                struct iovec *iov, unsigned int iov_cnt)
1385{
1386    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1387    uint16_t queues;
1388
1389    virtio_net_disable_rss(n);
1390    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1391        queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1392        return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1393    }
1394    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1395        queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1396    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1397        struct virtio_net_ctrl_mq mq;
1398        size_t s;
1399        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1400            return VIRTIO_NET_ERR;
1401        }
1402        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1403        if (s != sizeof(mq)) {
1404            return VIRTIO_NET_ERR;
1405        }
1406        queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1407
1408    } else {
1409        return VIRTIO_NET_ERR;
1410    }
1411
1412    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1413        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1414        queues > n->max_queues ||
1415        !n->multiqueue) {
1416        return VIRTIO_NET_ERR;
1417    }
1418
1419    n->curr_queues = queues;
1420    /* stop the backend before changing the number of queues to avoid handling a
1421     * disabled queue */
1422    virtio_net_set_status(vdev, vdev->status);
1423    virtio_net_set_queues(n);
1424
1425    return VIRTIO_NET_OK;
1426}
1427
1428static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1429{
1430    VirtIONet *n = VIRTIO_NET(vdev);
1431    struct virtio_net_ctrl_hdr ctrl;
1432    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1433    VirtQueueElement *elem;
1434    size_t s;
1435    struct iovec *iov, *iov2;
1436    unsigned int iov_cnt;
1437
1438    for (;;) {
1439        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1440        if (!elem) {
1441            break;
1442        }
1443        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1444            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1445            virtio_error(vdev, "virtio-net ctrl missing headers");
1446            virtqueue_detach_element(vq, elem, 0);
1447            g_free(elem);
1448            break;
1449        }
1450
1451        iov_cnt = elem->out_num;
1452        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1453        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1454        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1455        if (s != sizeof(ctrl)) {
1456            status = VIRTIO_NET_ERR;
1457        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1458            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1459        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1460            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1461        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1462            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1463        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1464            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1465        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1466            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1467        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1468            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1469        }
1470
1471        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1472        assert(s == sizeof(status));
1473
1474        virtqueue_push(vq, elem, sizeof(status));
1475        virtio_notify(vdev, vq);
1476        g_free(iov2);
1477        g_free(elem);
1478    }
1479}
1480
1481/* RX */
1482
1483static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1484{
1485    VirtIONet *n = VIRTIO_NET(vdev);
1486    int queue_index = vq2q(virtio_get_queue_index(vq));
1487
1488    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1489}
1490
1491static bool virtio_net_can_receive(NetClientState *nc)
1492{
1493    VirtIONet *n = qemu_get_nic_opaque(nc);
1494    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1495    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1496
1497    if (!vdev->vm_running) {
1498        return false;
1499    }
1500
1501    if (nc->queue_index >= n->curr_queues) {
1502        return false;
1503    }
1504
1505    if (!virtio_queue_ready(q->rx_vq) ||
1506        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1507        return false;
1508    }
1509
1510    return true;
1511}
1512
1513static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1514{
1515    VirtIONet *n = q->n;
1516    if (virtio_queue_empty(q->rx_vq) ||
1517        (n->mergeable_rx_bufs &&
1518         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1519        virtio_queue_set_notification(q->rx_vq, 1);
1520
1521        /* To avoid a race condition where the guest has made some buffers
1522         * available after the above check but before notification was
1523         * enabled, check for available buffers again.
1524         */
1525        if (virtio_queue_empty(q->rx_vq) ||
1526            (n->mergeable_rx_bufs &&
1527             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1528            return 0;
1529        }
1530    }
1531
1532    virtio_queue_set_notification(q->rx_vq, 0);
1533    return 1;
1534}
1535
1536static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1537{
1538    virtio_tswap16s(vdev, &hdr->hdr_len);
1539    virtio_tswap16s(vdev, &hdr->gso_size);
1540    virtio_tswap16s(vdev, &hdr->csum_start);
1541    virtio_tswap16s(vdev, &hdr->csum_offset);
1542}
1543
1544/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1545 * it never finds out that the packets don't have valid checksums.  This
1546 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1547 * fix this with Xen but it hasn't appeared in an upstream release of
1548 * dhclient yet.
1549 *
1550 * To avoid breaking existing guests, we catch udp packets and add
1551 * checksums.  This is terrible but it's better than hacking the guest
1552 * kernels.
1553 *
1554 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1555 * we should provide a mechanism to disable it to avoid polluting the host
1556 * cache.
1557 */
1558static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1559                                        uint8_t *buf, size_t size)
1560{
1561    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1562        (size > 27 && size < 1500) && /* normal sized MTU */
1563        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1564        (buf[23] == 17) && /* ip.protocol == UDP */
1565        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1566        net_checksum_calculate(buf, size, CSUM_UDP);
1567        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1568    }
1569}
1570
1571static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1572                           const void *buf, size_t size)
1573{
1574    if (n->has_vnet_hdr) {
1575        /* FIXME this cast is evil */
1576        void *wbuf = (void *)buf;
1577        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1578                                    size - n->host_hdr_len);
1579
1580        if (n->needs_vnet_hdr_swap) {
1581            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1582        }
1583        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1584    } else {
1585        struct virtio_net_hdr hdr = {
1586            .flags = 0,
1587            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1588        };
1589        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1590    }
1591}
1592
1593static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1594{
1595    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1596    static const uint8_t vlan[] = {0x81, 0x00};
1597    uint8_t *ptr = (uint8_t *)buf;
1598    int i;
1599
1600    if (n->promisc)
1601        return 1;
1602
1603    ptr += n->host_hdr_len;
1604
1605    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1606        int vid = lduw_be_p(ptr + 14) & 0xfff;
1607        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1608            return 0;
1609    }
1610
1611    if (ptr[0] & 1) { // multicast
1612        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1613            return !n->nobcast;
1614        } else if (n->nomulti) {
1615            return 0;
1616        } else if (n->allmulti || n->mac_table.multi_overflow) {
1617            return 1;
1618        }
1619
1620        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1621            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1622                return 1;
1623            }
1624        }
1625    } else { // unicast
1626        if (n->nouni) {
1627            return 0;
1628        } else if (n->alluni || n->mac_table.uni_overflow) {
1629            return 1;
1630        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1631            return 1;
1632        }
1633
1634        for (i = 0; i < n->mac_table.first_multi; i++) {
1635            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1636                return 1;
1637            }
1638        }
1639    }
1640
1641    return 0;
1642}
1643
1644static uint8_t virtio_net_get_hash_type(bool isip4,
1645                                        bool isip6,
1646                                        bool isudp,
1647                                        bool istcp,
1648                                        uint32_t types)
1649{
1650    if (isip4) {
1651        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1652            return NetPktRssIpV4Tcp;
1653        }
1654        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1655            return NetPktRssIpV4Udp;
1656        }
1657        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1658            return NetPktRssIpV4;
1659        }
1660    } else if (isip6) {
1661        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1662                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1663
1664        if (istcp && (types & mask)) {
1665            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1666                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1667        }
1668        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1669        if (isudp && (types & mask)) {
1670            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1671                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1672        }
1673        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1674        if (types & mask) {
1675            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1676                NetPktRssIpV6Ex : NetPktRssIpV6;
1677        }
1678    }
1679    return 0xff;
1680}
1681
1682static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1683                                   uint32_t hash)
1684{
1685    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1686    hdr->hash_value = hash;
1687    hdr->hash_report = report;
1688}
1689
1690static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1691                                  size_t size)
1692{
1693    VirtIONet *n = qemu_get_nic_opaque(nc);
1694    unsigned int index = nc->queue_index, new_index = index;
1695    struct NetRxPkt *pkt = n->rx_pkt;
1696    uint8_t net_hash_type;
1697    uint32_t hash;
1698    bool isip4, isip6, isudp, istcp;
1699    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1700        VIRTIO_NET_HASH_REPORT_IPv4,
1701        VIRTIO_NET_HASH_REPORT_TCPv4,
1702        VIRTIO_NET_HASH_REPORT_TCPv6,
1703        VIRTIO_NET_HASH_REPORT_IPv6,
1704        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1705        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1706        VIRTIO_NET_HASH_REPORT_UDPv4,
1707        VIRTIO_NET_HASH_REPORT_UDPv6,
1708        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1709    };
1710
1711    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1712                             size - n->host_hdr_len);
1713    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1714    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1715        istcp = isudp = false;
1716    }
1717    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1718        istcp = isudp = false;
1719    }
1720    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1721                                             n->rss_data.hash_types);
1722    if (net_hash_type > NetPktRssIpV6UdpEx) {
1723        if (n->rss_data.populate_hash) {
1724            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1725        }
1726        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1727    }
1728
1729    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1730
1731    if (n->rss_data.populate_hash) {
1732        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1733    }
1734
1735    if (n->rss_data.redirect) {
1736        new_index = hash & (n->rss_data.indirections_len - 1);
1737        new_index = n->rss_data.indirections_table[new_index];
1738    }
1739
1740    return (index == new_index) ? -1 : new_index;
1741}
1742
1743static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1744                                      size_t size, bool no_rss)
1745{
1746    VirtIONet *n = qemu_get_nic_opaque(nc);
1747    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1748    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1749    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1750    size_t lens[VIRTQUEUE_MAX_SIZE];
1751    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1752    struct virtio_net_hdr_mrg_rxbuf mhdr;
1753    unsigned mhdr_cnt = 0;
1754    size_t offset, i, guest_offset, j;
1755    ssize_t err;
1756
1757    if (!virtio_net_can_receive(nc)) {
1758        return -1;
1759    }
1760
1761    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1762        int index = virtio_net_process_rss(nc, buf, size);
1763        if (index >= 0) {
1764            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1765            return virtio_net_receive_rcu(nc2, buf, size, true);
1766        }
1767    }
1768
1769    /* hdr_len refers to the header we supply to the guest */
1770    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1771        return 0;
1772    }
1773
1774    if (!receive_filter(n, buf, size))
1775        return size;
1776
1777    offset = i = 0;
1778
1779    while (offset < size) {
1780        VirtQueueElement *elem;
1781        int len, total;
1782        const struct iovec *sg;
1783
1784        total = 0;
1785
1786        if (i == VIRTQUEUE_MAX_SIZE) {
1787            virtio_error(vdev, "virtio-net unexpected long buffer chain");
1788            err = size;
1789            goto err;
1790        }
1791
1792        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1793        if (!elem) {
1794            if (i) {
1795                virtio_error(vdev, "virtio-net unexpected empty queue: "
1796                             "i %zd mergeable %d offset %zd, size %zd, "
1797                             "guest hdr len %zd, host hdr len %zd "
1798                             "guest features 0x%" PRIx64,
1799                             i, n->mergeable_rx_bufs, offset, size,
1800                             n->guest_hdr_len, n->host_hdr_len,
1801                             vdev->guest_features);
1802            }
1803            err = -1;
1804            goto err;
1805        }
1806
1807        if (elem->in_num < 1) {
1808            virtio_error(vdev,
1809                         "virtio-net receive queue contains no in buffers");
1810            virtqueue_detach_element(q->rx_vq, elem, 0);
1811            g_free(elem);
1812            err = -1;
1813            goto err;
1814        }
1815
1816        sg = elem->in_sg;
1817        if (i == 0) {
1818            assert(offset == 0);
1819            if (n->mergeable_rx_bufs) {
1820                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1821                                    sg, elem->in_num,
1822                                    offsetof(typeof(mhdr), num_buffers),
1823                                    sizeof(mhdr.num_buffers));
1824            }
1825
1826            receive_header(n, sg, elem->in_num, buf, size);
1827            if (n->rss_data.populate_hash) {
1828                offset = sizeof(mhdr);
1829                iov_from_buf(sg, elem->in_num, offset,
1830                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1831            }
1832            offset = n->host_hdr_len;
1833            total += n->guest_hdr_len;
1834            guest_offset = n->guest_hdr_len;
1835        } else {
1836            guest_offset = 0;
1837        }
1838
1839        /* copy in packet.  ugh */
1840        len = iov_from_buf(sg, elem->in_num, guest_offset,
1841                           buf + offset, size - offset);
1842        total += len;
1843        offset += len;
1844        /* If buffers can't be merged, at this point we
1845         * must have consumed the complete packet.
1846         * Otherwise, drop it. */
1847        if (!n->mergeable_rx_bufs && offset < size) {
1848            virtqueue_unpop(q->rx_vq, elem, total);
1849            g_free(elem);
1850            err = size;
1851            goto err;
1852        }
1853
1854        elems[i] = elem;
1855        lens[i] = total;
1856        i++;
1857    }
1858
1859    if (mhdr_cnt) {
1860        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1861        iov_from_buf(mhdr_sg, mhdr_cnt,
1862                     0,
1863                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1864    }
1865
1866    for (j = 0; j < i; j++) {
1867        /* signal other side */
1868        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1869        g_free(elems[j]);
1870    }
1871
1872    virtqueue_flush(q->rx_vq, i);
1873    virtio_notify(vdev, q->rx_vq);
1874
1875    return size;
1876
1877err:
1878    for (j = 0; j < i; j++) {
1879        g_free(elems[j]);
1880    }
1881
1882    return err;
1883}
1884
1885static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1886                                  size_t size)
1887{
1888    RCU_READ_LOCK_GUARD();
1889
1890    return virtio_net_receive_rcu(nc, buf, size, false);
1891}
1892
1893static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1894                                         const uint8_t *buf,
1895                                         VirtioNetRscUnit *unit)
1896{
1897    uint16_t ip_hdrlen;
1898    struct ip_header *ip;
1899
1900    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1901                              + sizeof(struct eth_header));
1902    unit->ip = (void *)ip;
1903    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1904    unit->ip_plen = &ip->ip_len;
1905    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1906    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1907    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1908}
1909
1910static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1911                                         const uint8_t *buf,
1912                                         VirtioNetRscUnit *unit)
1913{
1914    struct ip6_header *ip6;
1915
1916    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1917                                 + sizeof(struct eth_header));
1918    unit->ip = ip6;
1919    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1920    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1921                                        + sizeof(struct ip6_header));
1922    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1923
1924    /* There is a difference between payload lenght in ipv4 and v6,
1925       ip header is excluded in ipv6 */
1926    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1927}
1928
1929static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1930                                       VirtioNetRscSeg *seg)
1931{
1932    int ret;
1933    struct virtio_net_hdr_v1 *h;
1934
1935    h = (struct virtio_net_hdr_v1 *)seg->buf;
1936    h->flags = 0;
1937    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1938
1939    if (seg->is_coalesced) {
1940        h->rsc.segments = seg->packets;
1941        h->rsc.dup_acks = seg->dup_ack;
1942        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1943        if (chain->proto == ETH_P_IP) {
1944            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1945        } else {
1946            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1947        }
1948    }
1949
1950    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1951    QTAILQ_REMOVE(&chain->buffers, seg, next);
1952    g_free(seg->buf);
1953    g_free(seg);
1954
1955    return ret;
1956}
1957
1958static void virtio_net_rsc_purge(void *opq)
1959{
1960    VirtioNetRscSeg *seg, *rn;
1961    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1962
1963    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1964        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1965            chain->stat.purge_failed++;
1966            continue;
1967        }
1968    }
1969
1970    chain->stat.timer++;
1971    if (!QTAILQ_EMPTY(&chain->buffers)) {
1972        timer_mod(chain->drain_timer,
1973              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1974    }
1975}
1976
1977static void virtio_net_rsc_cleanup(VirtIONet *n)
1978{
1979    VirtioNetRscChain *chain, *rn_chain;
1980    VirtioNetRscSeg *seg, *rn_seg;
1981
1982    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1983        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1984            QTAILQ_REMOVE(&chain->buffers, seg, next);
1985            g_free(seg->buf);
1986            g_free(seg);
1987        }
1988
1989        timer_free(chain->drain_timer);
1990        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1991        g_free(chain);
1992    }
1993}
1994
1995static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1996                                     NetClientState *nc,
1997                                     const uint8_t *buf, size_t size)
1998{
1999    uint16_t hdr_len;
2000    VirtioNetRscSeg *seg;
2001
2002    hdr_len = chain->n->guest_hdr_len;
2003    seg = g_malloc(sizeof(VirtioNetRscSeg));
2004    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2005        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2006    memcpy(seg->buf, buf, size);
2007    seg->size = size;
2008    seg->packets = 1;
2009    seg->dup_ack = 0;
2010    seg->is_coalesced = 0;
2011    seg->nc = nc;
2012
2013    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2014    chain->stat.cache++;
2015
2016    switch (chain->proto) {
2017    case ETH_P_IP:
2018        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2019        break;
2020    case ETH_P_IPV6:
2021        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2022        break;
2023    default:
2024        g_assert_not_reached();
2025    }
2026}
2027
2028static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2029                                         VirtioNetRscSeg *seg,
2030                                         const uint8_t *buf,
2031                                         struct tcp_header *n_tcp,
2032                                         struct tcp_header *o_tcp)
2033{
2034    uint32_t nack, oack;
2035    uint16_t nwin, owin;
2036
2037    nack = htonl(n_tcp->th_ack);
2038    nwin = htons(n_tcp->th_win);
2039    oack = htonl(o_tcp->th_ack);
2040    owin = htons(o_tcp->th_win);
2041
2042    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2043        chain->stat.ack_out_of_win++;
2044        return RSC_FINAL;
2045    } else if (nack == oack) {
2046        /* duplicated ack or window probe */
2047        if (nwin == owin) {
2048            /* duplicated ack, add dup ack count due to whql test up to 1 */
2049            chain->stat.dup_ack++;
2050            return RSC_FINAL;
2051        } else {
2052            /* Coalesce window update */
2053            o_tcp->th_win = n_tcp->th_win;
2054            chain->stat.win_update++;
2055            return RSC_COALESCE;
2056        }
2057    } else {
2058        /* pure ack, go to 'C', finalize*/
2059        chain->stat.pure_ack++;
2060        return RSC_FINAL;
2061    }
2062}
2063
2064static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2065                                            VirtioNetRscSeg *seg,
2066                                            const uint8_t *buf,
2067                                            VirtioNetRscUnit *n_unit)
2068{
2069    void *data;
2070    uint16_t o_ip_len;
2071    uint32_t nseq, oseq;
2072    VirtioNetRscUnit *o_unit;
2073
2074    o_unit = &seg->unit;
2075    o_ip_len = htons(*o_unit->ip_plen);
2076    nseq = htonl(n_unit->tcp->th_seq);
2077    oseq = htonl(o_unit->tcp->th_seq);
2078
2079    /* out of order or retransmitted. */
2080    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2081        chain->stat.data_out_of_win++;
2082        return RSC_FINAL;
2083    }
2084
2085    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2086    if (nseq == oseq) {
2087        if ((o_unit->payload == 0) && n_unit->payload) {
2088            /* From no payload to payload, normal case, not a dup ack or etc */
2089            chain->stat.data_after_pure_ack++;
2090            goto coalesce;
2091        } else {
2092            return virtio_net_rsc_handle_ack(chain, seg, buf,
2093                                             n_unit->tcp, o_unit->tcp);
2094        }
2095    } else if ((nseq - oseq) != o_unit->payload) {
2096        /* Not a consistent packet, out of order */
2097        chain->stat.data_out_of_order++;
2098        return RSC_FINAL;
2099    } else {
2100coalesce:
2101        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2102            chain->stat.over_size++;
2103            return RSC_FINAL;
2104        }
2105
2106        /* Here comes the right data, the payload length in v4/v6 is different,
2107           so use the field value to update and record the new data len */
2108        o_unit->payload += n_unit->payload; /* update new data len */
2109
2110        /* update field in ip header */
2111        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2112
2113        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2114           for windows guest, while this may change the behavior for linux
2115           guest (only if it uses RSC feature). */
2116        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2117
2118        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2119        o_unit->tcp->th_win = n_unit->tcp->th_win;
2120
2121        memmove(seg->buf + seg->size, data, n_unit->payload);
2122        seg->size += n_unit->payload;
2123        seg->packets++;
2124        chain->stat.coalesced++;
2125        return RSC_COALESCE;
2126    }
2127}
2128
2129static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2130                                        VirtioNetRscSeg *seg,
2131                                        const uint8_t *buf, size_t size,
2132                                        VirtioNetRscUnit *unit)
2133{
2134    struct ip_header *ip1, *ip2;
2135
2136    ip1 = (struct ip_header *)(unit->ip);
2137    ip2 = (struct ip_header *)(seg->unit.ip);
2138    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2139        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2140        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2141        chain->stat.no_match++;
2142        return RSC_NO_MATCH;
2143    }
2144
2145    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2146}
2147
2148static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2149                                        VirtioNetRscSeg *seg,
2150                                        const uint8_t *buf, size_t size,
2151                                        VirtioNetRscUnit *unit)
2152{
2153    struct ip6_header *ip1, *ip2;
2154
2155    ip1 = (struct ip6_header *)(unit->ip);
2156    ip2 = (struct ip6_header *)(seg->unit.ip);
2157    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2158        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2159        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2160        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2161            chain->stat.no_match++;
2162            return RSC_NO_MATCH;
2163    }
2164
2165    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2166}
2167
2168/* Packets with 'SYN' should bypass, other flag should be sent after drain
2169 * to prevent out of order */
2170static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2171                                         struct tcp_header *tcp)
2172{
2173    uint16_t tcp_hdr;
2174    uint16_t tcp_flag;
2175
2176    tcp_flag = htons(tcp->th_offset_flags);
2177    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2178    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2179    if (tcp_flag & TH_SYN) {
2180        chain->stat.tcp_syn++;
2181        return RSC_BYPASS;
2182    }
2183
2184    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2185        chain->stat.tcp_ctrl_drain++;
2186        return RSC_FINAL;
2187    }
2188
2189    if (tcp_hdr > sizeof(struct tcp_header)) {
2190        chain->stat.tcp_all_opt++;
2191        return RSC_FINAL;
2192    }
2193
2194    return RSC_CANDIDATE;
2195}
2196
2197static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2198                                         NetClientState *nc,
2199                                         const uint8_t *buf, size_t size,
2200                                         VirtioNetRscUnit *unit)
2201{
2202    int ret;
2203    VirtioNetRscSeg *seg, *nseg;
2204
2205    if (QTAILQ_EMPTY(&chain->buffers)) {
2206        chain->stat.empty_cache++;
2207        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2208        timer_mod(chain->drain_timer,
2209              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2210        return size;
2211    }
2212
2213    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2214        if (chain->proto == ETH_P_IP) {
2215            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2216        } else {
2217            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2218        }
2219
2220        if (ret == RSC_FINAL) {
2221            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2222                /* Send failed */
2223                chain->stat.final_failed++;
2224                return 0;
2225            }
2226
2227            /* Send current packet */
2228            return virtio_net_do_receive(nc, buf, size);
2229        } else if (ret == RSC_NO_MATCH) {
2230            continue;
2231        } else {
2232            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2233            seg->is_coalesced = 1;
2234            return size;
2235        }
2236    }
2237
2238    chain->stat.no_match_cache++;
2239    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2240    return size;
2241}
2242
2243/* Drain a connection data, this is to avoid out of order segments */
2244static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2245                                        NetClientState *nc,
2246                                        const uint8_t *buf, size_t size,
2247                                        uint16_t ip_start, uint16_t ip_size,
2248                                        uint16_t tcp_port)
2249{
2250    VirtioNetRscSeg *seg, *nseg;
2251    uint32_t ppair1, ppair2;
2252
2253    ppair1 = *(uint32_t *)(buf + tcp_port);
2254    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2255        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2256        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2257            || (ppair1 != ppair2)) {
2258            continue;
2259        }
2260        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2261            chain->stat.drain_failed++;
2262        }
2263
2264        break;
2265    }
2266
2267    return virtio_net_do_receive(nc, buf, size);
2268}
2269
2270static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2271                                            struct ip_header *ip,
2272                                            const uint8_t *buf, size_t size)
2273{
2274    uint16_t ip_len;
2275
2276    /* Not an ipv4 packet */
2277    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2278        chain->stat.ip_option++;
2279        return RSC_BYPASS;
2280    }
2281
2282    /* Don't handle packets with ip option */
2283    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2284        chain->stat.ip_option++;
2285        return RSC_BYPASS;
2286    }
2287
2288    if (ip->ip_p != IPPROTO_TCP) {
2289        chain->stat.bypass_not_tcp++;
2290        return RSC_BYPASS;
2291    }
2292
2293    /* Don't handle packets with ip fragment */
2294    if (!(htons(ip->ip_off) & IP_DF)) {
2295        chain->stat.ip_frag++;
2296        return RSC_BYPASS;
2297    }
2298
2299    /* Don't handle packets with ecn flag */
2300    if (IPTOS_ECN(ip->ip_tos)) {
2301        chain->stat.ip_ecn++;
2302        return RSC_BYPASS;
2303    }
2304
2305    ip_len = htons(ip->ip_len);
2306    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2307        || ip_len > (size - chain->n->guest_hdr_len -
2308                     sizeof(struct eth_header))) {
2309        chain->stat.ip_hacked++;
2310        return RSC_BYPASS;
2311    }
2312
2313    return RSC_CANDIDATE;
2314}
2315
2316static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2317                                      NetClientState *nc,
2318                                      const uint8_t *buf, size_t size)
2319{
2320    int32_t ret;
2321    uint16_t hdr_len;
2322    VirtioNetRscUnit unit;
2323
2324    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2325
2326    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2327        + sizeof(struct tcp_header))) {
2328        chain->stat.bypass_not_tcp++;
2329        return virtio_net_do_receive(nc, buf, size);
2330    }
2331
2332    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2333    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2334        != RSC_CANDIDATE) {
2335        return virtio_net_do_receive(nc, buf, size);
2336    }
2337
2338    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2339    if (ret == RSC_BYPASS) {
2340        return virtio_net_do_receive(nc, buf, size);
2341    } else if (ret == RSC_FINAL) {
2342        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2343                ((hdr_len + sizeof(struct eth_header)) + 12),
2344                VIRTIO_NET_IP4_ADDR_SIZE,
2345                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2346    }
2347
2348    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2349}
2350
2351static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2352                                            struct ip6_header *ip6,
2353                                            const uint8_t *buf, size_t size)
2354{
2355    uint16_t ip_len;
2356
2357    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2358        != IP_HEADER_VERSION_6) {
2359        return RSC_BYPASS;
2360    }
2361
2362    /* Both option and protocol is checked in this */
2363    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2364        chain->stat.bypass_not_tcp++;
2365        return RSC_BYPASS;
2366    }
2367
2368    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2369    if (ip_len < sizeof(struct tcp_header) ||
2370        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2371                  - sizeof(struct ip6_header))) {
2372        chain->stat.ip_hacked++;
2373        return RSC_BYPASS;
2374    }
2375
2376    /* Don't handle packets with ecn flag */
2377    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2378        chain->stat.ip_ecn++;
2379        return RSC_BYPASS;
2380    }
2381
2382    return RSC_CANDIDATE;
2383}
2384
2385static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2386                                      const uint8_t *buf, size_t size)
2387{
2388    int32_t ret;
2389    uint16_t hdr_len;
2390    VirtioNetRscChain *chain;
2391    VirtioNetRscUnit unit;
2392
2393    chain = (VirtioNetRscChain *)opq;
2394    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2395
2396    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2397        + sizeof(tcp_header))) {
2398        return virtio_net_do_receive(nc, buf, size);
2399    }
2400
2401    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2402    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2403                                                 unit.ip, buf, size)) {
2404        return virtio_net_do_receive(nc, buf, size);
2405    }
2406
2407    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2408    if (ret == RSC_BYPASS) {
2409        return virtio_net_do_receive(nc, buf, size);
2410    } else if (ret == RSC_FINAL) {
2411        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2412                ((hdr_len + sizeof(struct eth_header)) + 8),
2413                VIRTIO_NET_IP6_ADDR_SIZE,
2414                hdr_len + sizeof(struct eth_header)
2415                + sizeof(struct ip6_header));
2416    }
2417
2418    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2419}
2420
2421static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2422                                                      NetClientState *nc,
2423                                                      uint16_t proto)
2424{
2425    VirtioNetRscChain *chain;
2426
2427    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2428        return NULL;
2429    }
2430
2431    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2432        if (chain->proto == proto) {
2433            return chain;
2434        }
2435    }
2436
2437    chain = g_malloc(sizeof(*chain));
2438    chain->n = n;
2439    chain->proto = proto;
2440    if (proto == (uint16_t)ETH_P_IP) {
2441        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2442        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2443    } else {
2444        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2445        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2446    }
2447    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2448                                      virtio_net_rsc_purge, chain);
2449    memset(&chain->stat, 0, sizeof(chain->stat));
2450
2451    QTAILQ_INIT(&chain->buffers);
2452    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2453
2454    return chain;
2455}
2456
2457static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2458                                      const uint8_t *buf,
2459                                      size_t size)
2460{
2461    uint16_t proto;
2462    VirtioNetRscChain *chain;
2463    struct eth_header *eth;
2464    VirtIONet *n;
2465
2466    n = qemu_get_nic_opaque(nc);
2467    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2468        return virtio_net_do_receive(nc, buf, size);
2469    }
2470
2471    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2472    proto = htons(eth->h_proto);
2473
2474    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2475    if (chain) {
2476        chain->stat.received++;
2477        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2478            return virtio_net_rsc_receive4(chain, nc, buf, size);
2479        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2480            return virtio_net_rsc_receive6(chain, nc, buf, size);
2481        }
2482    }
2483    return virtio_net_do_receive(nc, buf, size);
2484}
2485
2486static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2487                                  size_t size)
2488{
2489    VirtIONet *n = qemu_get_nic_opaque(nc);
2490    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2491        return virtio_net_rsc_receive(nc, buf, size);
2492    } else {
2493        return virtio_net_do_receive(nc, buf, size);
2494    }
2495}
2496
2497static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2498
2499static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2500{
2501    VirtIONet *n = qemu_get_nic_opaque(nc);
2502    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2503    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2504
2505    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2506    virtio_notify(vdev, q->tx_vq);
2507
2508    g_free(q->async_tx.elem);
2509    q->async_tx.elem = NULL;
2510
2511    virtio_queue_set_notification(q->tx_vq, 1);
2512    virtio_net_flush_tx(q);
2513}
2514
2515/* TX */
2516static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2517{
2518    VirtIONet *n = q->n;
2519    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2520    VirtQueueElement *elem;
2521    int32_t num_packets = 0;
2522    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2523    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2524        return num_packets;
2525    }
2526
2527    if (q->async_tx.elem) {
2528        virtio_queue_set_notification(q->tx_vq, 0);
2529        return num_packets;
2530    }
2531
2532    for (;;) {
2533        ssize_t ret;
2534        unsigned int out_num;
2535        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2536        struct virtio_net_hdr_mrg_rxbuf mhdr;
2537
2538        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2539        if (!elem) {
2540            break;
2541        }
2542
2543        out_num = elem->out_num;
2544        out_sg = elem->out_sg;
2545        if (out_num < 1) {
2546            virtio_error(vdev, "virtio-net header not in first element");
2547            virtqueue_detach_element(q->tx_vq, elem, 0);
2548            g_free(elem);
2549            return -EINVAL;
2550        }
2551
2552        if (n->has_vnet_hdr) {
2553            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2554                n->guest_hdr_len) {
2555                virtio_error(vdev, "virtio-net header incorrect");
2556                virtqueue_detach_element(q->tx_vq, elem, 0);
2557                g_free(elem);
2558                return -EINVAL;
2559            }
2560            if (n->needs_vnet_hdr_swap) {
2561                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2562                sg2[0].iov_base = &mhdr;
2563                sg2[0].iov_len = n->guest_hdr_len;
2564                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2565                                   out_sg, out_num,
2566                                   n->guest_hdr_len, -1);
2567                if (out_num == VIRTQUEUE_MAX_SIZE) {
2568                    goto drop;
2569                }
2570                out_num += 1;
2571                out_sg = sg2;
2572            }
2573        }
2574        /*
2575         * If host wants to see the guest header as is, we can
2576         * pass it on unchanged. Otherwise, copy just the parts
2577         * that host is interested in.
2578         */
2579        assert(n->host_hdr_len <= n->guest_hdr_len);
2580        if (n->host_hdr_len != n->guest_hdr_len) {
2581            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2582                                       out_sg, out_num,
2583                                       0, n->host_hdr_len);
2584            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2585                             out_sg, out_num,
2586                             n->guest_hdr_len, -1);
2587            out_num = sg_num;
2588            out_sg = sg;
2589        }
2590
2591        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2592                                      out_sg, out_num, virtio_net_tx_complete);
2593        if (ret == 0) {
2594            virtio_queue_set_notification(q->tx_vq, 0);
2595            q->async_tx.elem = elem;
2596            return -EBUSY;
2597        }
2598
2599drop:
2600        virtqueue_push(q->tx_vq, elem, 0);
2601        virtio_notify(vdev, q->tx_vq);
2602        g_free(elem);
2603
2604        if (++num_packets >= n->tx_burst) {
2605            break;
2606        }
2607    }
2608    return num_packets;
2609}
2610
2611static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2612{
2613    VirtIONet *n = VIRTIO_NET(vdev);
2614    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2615
2616    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2617        virtio_net_drop_tx_queue_data(vdev, vq);
2618        return;
2619    }
2620
2621    /* This happens when device was stopped but VCPU wasn't. */
2622    if (!vdev->vm_running) {
2623        q->tx_waiting = 1;
2624        return;
2625    }
2626
2627    if (q->tx_waiting) {
2628        virtio_queue_set_notification(vq, 1);
2629        timer_del(q->tx_timer);
2630        q->tx_waiting = 0;
2631        if (virtio_net_flush_tx(q) == -EINVAL) {
2632            return;
2633        }
2634    } else {
2635        timer_mod(q->tx_timer,
2636                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2637        q->tx_waiting = 1;
2638        virtio_queue_set_notification(vq, 0);
2639    }
2640}
2641
2642static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2643{
2644    VirtIONet *n = VIRTIO_NET(vdev);
2645    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2646
2647    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2648        virtio_net_drop_tx_queue_data(vdev, vq);
2649        return;
2650    }
2651
2652    if (unlikely(q->tx_waiting)) {
2653        return;
2654    }
2655    q->tx_waiting = 1;
2656    /* This happens when device was stopped but VCPU wasn't. */
2657    if (!vdev->vm_running) {
2658        return;
2659    }
2660    virtio_queue_set_notification(vq, 0);
2661    qemu_bh_schedule(q->tx_bh);
2662}
2663
2664static void virtio_net_tx_timer(void *opaque)
2665{
2666    VirtIONetQueue *q = opaque;
2667    VirtIONet *n = q->n;
2668    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2669    /* This happens when device was stopped but BH wasn't. */
2670    if (!vdev->vm_running) {
2671        /* Make sure tx waiting is set, so we'll run when restarted. */
2672        assert(q->tx_waiting);
2673        return;
2674    }
2675
2676    q->tx_waiting = 0;
2677
2678    /* Just in case the driver is not ready on more */
2679    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2680        return;
2681    }
2682
2683    virtio_queue_set_notification(q->tx_vq, 1);
2684    virtio_net_flush_tx(q);
2685}
2686
2687static void virtio_net_tx_bh(void *opaque)
2688{
2689    VirtIONetQueue *q = opaque;
2690    VirtIONet *n = q->n;
2691    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2692    int32_t ret;
2693
2694    /* This happens when device was stopped but BH wasn't. */
2695    if (!vdev->vm_running) {
2696        /* Make sure tx waiting is set, so we'll run when restarted. */
2697        assert(q->tx_waiting);
2698        return;
2699    }
2700
2701    q->tx_waiting = 0;
2702
2703    /* Just in case the driver is not ready on more */
2704    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2705        return;
2706    }
2707
2708    ret = virtio_net_flush_tx(q);
2709    if (ret == -EBUSY || ret == -EINVAL) {
2710        return; /* Notification re-enable handled by tx_complete or device
2711                 * broken */
2712    }
2713
2714    /* If we flush a full burst of packets, assume there are
2715     * more coming and immediately reschedule */
2716    if (ret >= n->tx_burst) {
2717        qemu_bh_schedule(q->tx_bh);
2718        q->tx_waiting = 1;
2719        return;
2720    }
2721
2722    /* If less than a full burst, re-enable notification and flush
2723     * anything that may have come in while we weren't looking.  If
2724     * we find something, assume the guest is still active and reschedule */
2725    virtio_queue_set_notification(q->tx_vq, 1);
2726    ret = virtio_net_flush_tx(q);
2727    if (ret == -EINVAL) {
2728        return;
2729    } else if (ret > 0) {
2730        virtio_queue_set_notification(q->tx_vq, 0);
2731        qemu_bh_schedule(q->tx_bh);
2732        q->tx_waiting = 1;
2733    }
2734}
2735
2736static void virtio_net_add_queue(VirtIONet *n, int index)
2737{
2738    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2739
2740    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2741                                           virtio_net_handle_rx);
2742
2743    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2744        n->vqs[index].tx_vq =
2745            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2746                             virtio_net_handle_tx_timer);
2747        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2748                                              virtio_net_tx_timer,
2749                                              &n->vqs[index]);
2750    } else {
2751        n->vqs[index].tx_vq =
2752            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2753                             virtio_net_handle_tx_bh);
2754        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2755    }
2756
2757    n->vqs[index].tx_waiting = 0;
2758    n->vqs[index].n = n;
2759}
2760
2761static void virtio_net_del_queue(VirtIONet *n, int index)
2762{
2763    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2764    VirtIONetQueue *q = &n->vqs[index];
2765    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2766
2767    qemu_purge_queued_packets(nc);
2768
2769    virtio_del_queue(vdev, index * 2);
2770    if (q->tx_timer) {
2771        timer_free(q->tx_timer);
2772        q->tx_timer = NULL;
2773    } else {
2774        qemu_bh_delete(q->tx_bh);
2775        q->tx_bh = NULL;
2776    }
2777    q->tx_waiting = 0;
2778    virtio_del_queue(vdev, index * 2 + 1);
2779}
2780
2781static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2782{
2783    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2784    int old_num_queues = virtio_get_num_queues(vdev);
2785    int new_num_queues = new_max_queues * 2 + 1;
2786    int i;
2787
2788    assert(old_num_queues >= 3);
2789    assert(old_num_queues % 2 == 1);
2790
2791    if (old_num_queues == new_num_queues) {
2792        return;
2793    }
2794
2795    /*
2796     * We always need to remove and add ctrl vq if
2797     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2798     * and then we only enter one of the following two loops.
2799     */
2800    virtio_del_queue(vdev, old_num_queues - 1);
2801
2802    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2803        /* new_num_queues < old_num_queues */
2804        virtio_net_del_queue(n, i / 2);
2805    }
2806
2807    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2808        /* new_num_queues > old_num_queues */
2809        virtio_net_add_queue(n, i / 2);
2810    }
2811
2812    /* add ctrl_vq last */
2813    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2814}
2815
2816static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2817{
2818    int max = multiqueue ? n->max_queues : 1;
2819
2820    n->multiqueue = multiqueue;
2821    virtio_net_change_num_queues(n, max);
2822
2823    virtio_net_set_queues(n);
2824}
2825
2826static int virtio_net_post_load_device(void *opaque, int version_id)
2827{
2828    VirtIONet *n = opaque;
2829    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2830    int i, link_down;
2831
2832    trace_virtio_net_post_load_device();
2833    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2834                               virtio_vdev_has_feature(vdev,
2835                                                       VIRTIO_F_VERSION_1),
2836                               virtio_vdev_has_feature(vdev,
2837                                                       VIRTIO_NET_F_HASH_REPORT));
2838
2839    /* MAC_TABLE_ENTRIES may be different from the saved image */
2840    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2841        n->mac_table.in_use = 0;
2842    }
2843
2844    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2845        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2846    }
2847
2848    /*
2849     * curr_guest_offloads will be later overwritten by the
2850     * virtio_set_features_nocheck call done from the virtio_load.
2851     * Here we make sure it is preserved and restored accordingly
2852     * in the virtio_net_post_load_virtio callback.
2853     */
2854    n->saved_guest_offloads = n->curr_guest_offloads;
2855
2856    virtio_net_set_queues(n);
2857
2858    /* Find the first multicast entry in the saved MAC filter */
2859    for (i = 0; i < n->mac_table.in_use; i++) {
2860        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2861            break;
2862        }
2863    }
2864    n->mac_table.first_multi = i;
2865
2866    /* nc.link_down can't be migrated, so infer link_down according
2867     * to link status bit in n->status */
2868    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2869    for (i = 0; i < n->max_queues; i++) {
2870        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2871    }
2872
2873    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2874        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2875        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2876                                  QEMU_CLOCK_VIRTUAL,
2877                                  virtio_net_announce_timer, n);
2878        if (n->announce_timer.round) {
2879            timer_mod(n->announce_timer.tm,
2880                      qemu_clock_get_ms(n->announce_timer.type));
2881        } else {
2882            qemu_announce_timer_del(&n->announce_timer, false);
2883        }
2884    }
2885
2886    if (n->rss_data.enabled) {
2887        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2888        if (!n->rss_data.populate_hash) {
2889            if (!virtio_net_attach_epbf_rss(n)) {
2890                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2891                    warn_report("Can't post-load eBPF RSS for vhost");
2892                } else {
2893                    warn_report("Can't post-load eBPF RSS - "
2894                                "fallback to software RSS");
2895                    n->rss_data.enabled_software_rss = true;
2896                }
2897            }
2898        }
2899
2900        trace_virtio_net_rss_enable(n->rss_data.hash_types,
2901                                    n->rss_data.indirections_len,
2902                                    sizeof(n->rss_data.key));
2903    } else {
2904        trace_virtio_net_rss_disable();
2905    }
2906    return 0;
2907}
2908
2909static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2910{
2911    VirtIONet *n = VIRTIO_NET(vdev);
2912    /*
2913     * The actual needed state is now in saved_guest_offloads,
2914     * see virtio_net_post_load_device for detail.
2915     * Restore it back and apply the desired offloads.
2916     */
2917    n->curr_guest_offloads = n->saved_guest_offloads;
2918    if (peer_has_vnet_hdr(n)) {
2919        virtio_net_apply_guest_offloads(n);
2920    }
2921
2922    return 0;
2923}
2924
2925/* tx_waiting field of a VirtIONetQueue */
2926static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2927    .name = "virtio-net-queue-tx_waiting",
2928    .fields = (VMStateField[]) {
2929        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2930        VMSTATE_END_OF_LIST()
2931   },
2932};
2933
2934static bool max_queues_gt_1(void *opaque, int version_id)
2935{
2936    return VIRTIO_NET(opaque)->max_queues > 1;
2937}
2938
2939static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2940{
2941    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2942                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2943}
2944
2945static bool mac_table_fits(void *opaque, int version_id)
2946{
2947    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2948}
2949
2950static bool mac_table_doesnt_fit(void *opaque, int version_id)
2951{
2952    return !mac_table_fits(opaque, version_id);
2953}
2954
2955/* This temporary type is shared by all the WITH_TMP methods
2956 * although only some fields are used by each.
2957 */
2958struct VirtIONetMigTmp {
2959    VirtIONet      *parent;
2960    VirtIONetQueue *vqs_1;
2961    uint16_t        curr_queues_1;
2962    uint8_t         has_ufo;
2963    uint32_t        has_vnet_hdr;
2964};
2965
2966/* The 2nd and subsequent tx_waiting flags are loaded later than
2967 * the 1st entry in the queues and only if there's more than one
2968 * entry.  We use the tmp mechanism to calculate a temporary
2969 * pointer and count and also validate the count.
2970 */
2971
2972static int virtio_net_tx_waiting_pre_save(void *opaque)
2973{
2974    struct VirtIONetMigTmp *tmp = opaque;
2975
2976    tmp->vqs_1 = tmp->parent->vqs + 1;
2977    tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2978    if (tmp->parent->curr_queues == 0) {
2979        tmp->curr_queues_1 = 0;
2980    }
2981
2982    return 0;
2983}
2984
2985static int virtio_net_tx_waiting_pre_load(void *opaque)
2986{
2987    struct VirtIONetMigTmp *tmp = opaque;
2988
2989    /* Reuse the pointer setup from save */
2990    virtio_net_tx_waiting_pre_save(opaque);
2991
2992    if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2993        error_report("virtio-net: curr_queues %x > max_queues %x",
2994            tmp->parent->curr_queues, tmp->parent->max_queues);
2995
2996        return -EINVAL;
2997    }
2998
2999    return 0; /* all good */
3000}
3001
3002static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3003    .name      = "virtio-net-tx_waiting",
3004    .pre_load  = virtio_net_tx_waiting_pre_load,
3005    .pre_save  = virtio_net_tx_waiting_pre_save,
3006    .fields    = (VMStateField[]) {
3007        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3008                                     curr_queues_1,
3009                                     vmstate_virtio_net_queue_tx_waiting,
3010                                     struct VirtIONetQueue),
3011        VMSTATE_END_OF_LIST()
3012    },
3013};
3014
3015/* the 'has_ufo' flag is just tested; if the incoming stream has the
3016 * flag set we need to check that we have it
3017 */
3018static int virtio_net_ufo_post_load(void *opaque, int version_id)
3019{
3020    struct VirtIONetMigTmp *tmp = opaque;
3021
3022    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3023        error_report("virtio-net: saved image requires TUN_F_UFO support");
3024        return -EINVAL;
3025    }
3026
3027    return 0;
3028}
3029
3030static int virtio_net_ufo_pre_save(void *opaque)
3031{
3032    struct VirtIONetMigTmp *tmp = opaque;
3033
3034    tmp->has_ufo = tmp->parent->has_ufo;
3035
3036    return 0;
3037}
3038
3039static const VMStateDescription vmstate_virtio_net_has_ufo = {
3040    .name      = "virtio-net-ufo",
3041    .post_load = virtio_net_ufo_post_load,
3042    .pre_save  = virtio_net_ufo_pre_save,
3043    .fields    = (VMStateField[]) {
3044        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3045        VMSTATE_END_OF_LIST()
3046    },
3047};
3048
3049/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3050 * flag set we need to check that we have it
3051 */
3052static int virtio_net_vnet_post_load(void *opaque, int version_id)
3053{
3054    struct VirtIONetMigTmp *tmp = opaque;
3055
3056    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3057        error_report("virtio-net: saved image requires vnet_hdr=on");
3058        return -EINVAL;
3059    }
3060
3061    return 0;
3062}
3063
3064static int virtio_net_vnet_pre_save(void *opaque)
3065{
3066    struct VirtIONetMigTmp *tmp = opaque;
3067
3068    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3069
3070    return 0;
3071}
3072
3073static const VMStateDescription vmstate_virtio_net_has_vnet = {
3074    .name      = "virtio-net-vnet",
3075    .post_load = virtio_net_vnet_post_load,
3076    .pre_save  = virtio_net_vnet_pre_save,
3077    .fields    = (VMStateField[]) {
3078        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3079        VMSTATE_END_OF_LIST()
3080    },
3081};
3082
3083static bool virtio_net_rss_needed(void *opaque)
3084{
3085    return VIRTIO_NET(opaque)->rss_data.enabled;
3086}
3087
3088static const VMStateDescription vmstate_virtio_net_rss = {
3089    .name      = "virtio-net-device/rss",
3090    .version_id = 1,
3091    .minimum_version_id = 1,
3092    .needed = virtio_net_rss_needed,
3093    .fields = (VMStateField[]) {
3094        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3095        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3096        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3097        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3098        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3099        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3100        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3101                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3102        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3103                                    rss_data.indirections_len, 0,
3104                                    vmstate_info_uint16, uint16_t),
3105        VMSTATE_END_OF_LIST()
3106    },
3107};
3108
3109static const VMStateDescription vmstate_virtio_net_device = {
3110    .name = "virtio-net-device",
3111    .version_id = VIRTIO_NET_VM_VERSION,
3112    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3113    .post_load = virtio_net_post_load_device,
3114    .fields = (VMStateField[]) {
3115        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3116        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3117                               vmstate_virtio_net_queue_tx_waiting,
3118                               VirtIONetQueue),
3119        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3120        VMSTATE_UINT16(status, VirtIONet),
3121        VMSTATE_UINT8(promisc, VirtIONet),
3122        VMSTATE_UINT8(allmulti, VirtIONet),
3123        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3124
3125        /* Guarded pair: If it fits we load it, else we throw it away
3126         * - can happen if source has a larger MAC table.; post-load
3127         *  sets flags in this case.
3128         */
3129        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3130                                0, mac_table_fits, mac_table.in_use,
3131                                 ETH_ALEN),
3132        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3133                                     mac_table.in_use, ETH_ALEN),
3134
3135        /* Note: This is an array of uint32's that's always been saved as a
3136         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3137         * but based on the uint.
3138         */
3139        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3140        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3141                         vmstate_virtio_net_has_vnet),
3142        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3143        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3144        VMSTATE_UINT8(alluni, VirtIONet),
3145        VMSTATE_UINT8(nomulti, VirtIONet),
3146        VMSTATE_UINT8(nouni, VirtIONet),
3147        VMSTATE_UINT8(nobcast, VirtIONet),
3148        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3149                         vmstate_virtio_net_has_ufo),
3150        VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3151                            vmstate_info_uint16_equal, uint16_t),
3152        VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3153        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3154                         vmstate_virtio_net_tx_waiting),
3155        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3156                            has_ctrl_guest_offloads),
3157        VMSTATE_END_OF_LIST()
3158   },
3159    .subsections = (const VMStateDescription * []) {
3160        &vmstate_virtio_net_rss,
3161        NULL
3162    }
3163};
3164
3165static NetClientInfo net_virtio_info = {
3166    .type = NET_CLIENT_DRIVER_NIC,
3167    .size = sizeof(NICState),
3168    .can_receive = virtio_net_can_receive,
3169    .receive = virtio_net_receive,
3170    .link_status_changed = virtio_net_set_link_status,
3171    .query_rx_filter = virtio_net_query_rxfilter,
3172    .announce = virtio_net_announce,
3173};
3174
3175static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3176{
3177    VirtIONet *n = VIRTIO_NET(vdev);
3178    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3179    assert(n->vhost_started);
3180    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3181}
3182
3183static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3184                                           bool mask)
3185{
3186    VirtIONet *n = VIRTIO_NET(vdev);
3187    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3188    assert(n->vhost_started);
3189    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3190                             vdev, idx, mask);
3191}
3192
3193static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3194{
3195    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3196
3197    n->config_size = virtio_feature_get_config_size(feature_sizes,
3198                                                    host_features);
3199}
3200
3201void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3202                                   const char *type)
3203{
3204    /*
3205     * The name can be NULL, the netclient name will be type.x.
3206     */
3207    assert(type != NULL);
3208
3209    g_free(n->netclient_name);
3210    g_free(n->netclient_type);
3211    n->netclient_name = g_strdup(name);
3212    n->netclient_type = g_strdup(type);
3213}
3214
3215static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3216{
3217    HotplugHandler *hotplug_ctrl;
3218    PCIDevice *pci_dev;
3219    Error *err = NULL;
3220
3221    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3222    if (hotplug_ctrl) {
3223        pci_dev = PCI_DEVICE(dev);
3224        pci_dev->partially_hotplugged = true;
3225        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3226        if (err) {
3227            error_report_err(err);
3228            return false;
3229        }
3230    } else {
3231        return false;
3232    }
3233    return true;
3234}
3235
3236static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3237                                    Error **errp)
3238{
3239    Error *err = NULL;
3240    HotplugHandler *hotplug_ctrl;
3241    PCIDevice *pdev = PCI_DEVICE(dev);
3242    BusState *primary_bus;
3243
3244    if (!pdev->partially_hotplugged) {
3245        return true;
3246    }
3247    primary_bus = dev->parent_bus;
3248    if (!primary_bus) {
3249        error_setg(errp, "virtio_net: couldn't find primary bus");
3250        return false;
3251    }
3252    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3253    qatomic_set(&n->failover_primary_hidden, false);
3254    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3255    if (hotplug_ctrl) {
3256        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3257        if (err) {
3258            goto out;
3259        }
3260        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3261    }
3262    pdev->partially_hotplugged = false;
3263
3264out:
3265    error_propagate(errp, err);
3266    return !err;
3267}
3268
3269static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3270{
3271    bool should_be_hidden;
3272    Error *err = NULL;
3273    DeviceState *dev = failover_find_primary_device(n);
3274
3275    if (!dev) {
3276        return;
3277    }
3278
3279    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3280
3281    if (migration_in_setup(s) && !should_be_hidden) {
3282        if (failover_unplug_primary(n, dev)) {
3283            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3284            qapi_event_send_unplug_primary(dev->id);
3285            qatomic_set(&n->failover_primary_hidden, true);
3286        } else {
3287            warn_report("couldn't unplug primary device");
3288        }
3289    } else if (migration_has_failed(s)) {
3290        /* We already unplugged the device let's plug it back */
3291        if (!failover_replug_primary(n, dev, &err)) {
3292            if (err) {
3293                error_report_err(err);
3294            }
3295        }
3296    }
3297}
3298
3299static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3300{
3301    MigrationState *s = data;
3302    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3303    virtio_net_handle_migration_primary(n, s);
3304}
3305
3306static bool failover_hide_primary_device(DeviceListener *listener,
3307                                         QemuOpts *device_opts)
3308{
3309    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3310    const char *standby_id;
3311
3312    if (!device_opts) {
3313        return false;
3314    }
3315    standby_id = qemu_opt_get(device_opts, "failover_pair_id");
3316    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3317        return false;
3318    }
3319
3320    /* failover_primary_hidden is set during feature negotiation */
3321    return qatomic_read(&n->failover_primary_hidden);
3322}
3323
3324static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3325{
3326    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3327    VirtIONet *n = VIRTIO_NET(dev);
3328    NetClientState *nc;
3329    int i;
3330
3331    if (n->net_conf.mtu) {
3332        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3333    }
3334
3335    if (n->net_conf.duplex_str) {
3336        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3337            n->net_conf.duplex = DUPLEX_HALF;
3338        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3339            n->net_conf.duplex = DUPLEX_FULL;
3340        } else {
3341            error_setg(errp, "'duplex' must be 'half' or 'full'");
3342            return;
3343        }
3344        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3345    } else {
3346        n->net_conf.duplex = DUPLEX_UNKNOWN;
3347    }
3348
3349    if (n->net_conf.speed < SPEED_UNKNOWN) {
3350        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3351        return;
3352    }
3353    if (n->net_conf.speed >= 0) {
3354        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3355    }
3356
3357    if (n->failover) {
3358        n->primary_listener.hide_device = failover_hide_primary_device;
3359        qatomic_set(&n->failover_primary_hidden, true);
3360        device_listener_register(&n->primary_listener);
3361        n->migration_state.notify = virtio_net_migration_state_notifier;
3362        add_migration_state_change_notifier(&n->migration_state);
3363        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3364    }
3365
3366    virtio_net_set_config_size(n, n->host_features);
3367    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3368
3369    /*
3370     * We set a lower limit on RX queue size to what it always was.
3371     * Guests that want a smaller ring can always resize it without
3372     * help from us (using virtio 1 and up).
3373     */
3374    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3375        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3376        !is_power_of_2(n->net_conf.rx_queue_size)) {
3377        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3378                   "must be a power of 2 between %d and %d.",
3379                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3380                   VIRTQUEUE_MAX_SIZE);
3381        virtio_cleanup(vdev);
3382        return;
3383    }
3384
3385    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3386        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3387        !is_power_of_2(n->net_conf.tx_queue_size)) {
3388        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3389                   "must be a power of 2 between %d and %d",
3390                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3391                   VIRTQUEUE_MAX_SIZE);
3392        virtio_cleanup(vdev);
3393        return;
3394    }
3395
3396    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3397    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3398        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3399                   "must be a positive integer less than %d.",
3400                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3401        virtio_cleanup(vdev);
3402        return;
3403    }
3404    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3405    n->curr_queues = 1;
3406    n->tx_timeout = n->net_conf.txtimer;
3407
3408    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3409                       && strcmp(n->net_conf.tx, "bh")) {
3410        warn_report("virtio-net: "
3411                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3412                    n->net_conf.tx);
3413        error_printf("Defaulting to \"bh\"");
3414    }
3415
3416    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3417                                    n->net_conf.tx_queue_size);
3418
3419    for (i = 0; i < n->max_queues; i++) {
3420        virtio_net_add_queue(n, i);
3421    }
3422
3423    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3424    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3425    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3426    n->status = VIRTIO_NET_S_LINK_UP;
3427    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3428                              QEMU_CLOCK_VIRTUAL,
3429                              virtio_net_announce_timer, n);
3430    n->announce_timer.round = 0;
3431
3432    if (n->netclient_type) {
3433        /*
3434         * Happen when virtio_net_set_netclient_name has been called.
3435         */
3436        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3437                              n->netclient_type, n->netclient_name, n);
3438    } else {
3439        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3440                              object_get_typename(OBJECT(dev)), dev->id, n);
3441    }
3442
3443    for (i = 0; i < n->max_queues; i++) {
3444        n->nic->ncs[i].do_not_pad = true;
3445    }
3446
3447    peer_test_vnet_hdr(n);
3448    if (peer_has_vnet_hdr(n)) {
3449        for (i = 0; i < n->max_queues; i++) {
3450            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3451        }
3452        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3453    } else {
3454        n->host_hdr_len = 0;
3455    }
3456
3457    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3458
3459    n->vqs[0].tx_waiting = 0;
3460    n->tx_burst = n->net_conf.txburst;
3461    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3462    n->promisc = 1; /* for compatibility */
3463
3464    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3465
3466    n->vlans = g_malloc0(MAX_VLAN >> 3);
3467
3468    nc = qemu_get_queue(n->nic);
3469    nc->rxfilter_notify_enabled = 1;
3470
3471   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3472        struct virtio_net_config netcfg = {};
3473        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3474        vhost_net_set_config(get_vhost_net(nc->peer),
3475            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3476    }
3477    QTAILQ_INIT(&n->rsc_chains);
3478    n->qdev = dev;
3479
3480    net_rx_pkt_init(&n->rx_pkt, false);
3481
3482    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3483        virtio_net_load_ebpf(n);
3484    }
3485}
3486
3487static void virtio_net_device_unrealize(DeviceState *dev)
3488{
3489    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3490    VirtIONet *n = VIRTIO_NET(dev);
3491    int i, max_queues;
3492
3493    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3494        virtio_net_unload_ebpf(n);
3495    }
3496
3497    /* This will stop vhost backend if appropriate. */
3498    virtio_net_set_status(vdev, 0);
3499
3500    g_free(n->netclient_name);
3501    n->netclient_name = NULL;
3502    g_free(n->netclient_type);
3503    n->netclient_type = NULL;
3504
3505    g_free(n->mac_table.macs);
3506    g_free(n->vlans);
3507
3508    if (n->failover) {
3509        device_listener_unregister(&n->primary_listener);
3510        remove_migration_state_change_notifier(&n->migration_state);
3511    }
3512
3513    max_queues = n->multiqueue ? n->max_queues : 1;
3514    for (i = 0; i < max_queues; i++) {
3515        virtio_net_del_queue(n, i);
3516    }
3517    /* delete also control vq */
3518    virtio_del_queue(vdev, max_queues * 2);
3519    qemu_announce_timer_del(&n->announce_timer, false);
3520    g_free(n->vqs);
3521    qemu_del_nic(n->nic);
3522    virtio_net_rsc_cleanup(n);
3523    g_free(n->rss_data.indirections_table);
3524    net_rx_pkt_uninit(n->rx_pkt);
3525    virtio_cleanup(vdev);
3526}
3527
3528static void virtio_net_instance_init(Object *obj)
3529{
3530    VirtIONet *n = VIRTIO_NET(obj);
3531
3532    /*
3533     * The default config_size is sizeof(struct virtio_net_config).
3534     * Can be overriden with virtio_net_set_config_size.
3535     */
3536    n->config_size = sizeof(struct virtio_net_config);
3537    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3538                                  "bootindex", "/ethernet-phy@0",
3539                                  DEVICE(n));
3540
3541    ebpf_rss_init(&n->ebpf_rss);
3542}
3543
3544static int virtio_net_pre_save(void *opaque)
3545{
3546    VirtIONet *n = opaque;
3547
3548    /* At this point, backend must be stopped, otherwise
3549     * it might keep writing to memory. */
3550    assert(!n->vhost_started);
3551
3552    return 0;
3553}
3554
3555static bool primary_unplug_pending(void *opaque)
3556{
3557    DeviceState *dev = opaque;
3558    DeviceState *primary;
3559    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3560    VirtIONet *n = VIRTIO_NET(vdev);
3561
3562    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3563        return false;
3564    }
3565    primary = failover_find_primary_device(n);
3566    return primary ? primary->pending_deleted_event : false;
3567}
3568
3569static bool dev_unplug_pending(void *opaque)
3570{
3571    DeviceState *dev = opaque;
3572    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3573
3574    return vdc->primary_unplug_pending(dev);
3575}
3576
3577static const VMStateDescription vmstate_virtio_net = {
3578    .name = "virtio-net",
3579    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3580    .version_id = VIRTIO_NET_VM_VERSION,
3581    .fields = (VMStateField[]) {
3582        VMSTATE_VIRTIO_DEVICE,
3583        VMSTATE_END_OF_LIST()
3584    },
3585    .pre_save = virtio_net_pre_save,
3586    .dev_unplug_pending = dev_unplug_pending,
3587};
3588
3589static Property virtio_net_properties[] = {
3590    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3591                    VIRTIO_NET_F_CSUM, true),
3592    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3593                    VIRTIO_NET_F_GUEST_CSUM, true),
3594    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3595    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3596                    VIRTIO_NET_F_GUEST_TSO4, true),
3597    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3598                    VIRTIO_NET_F_GUEST_TSO6, true),
3599    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3600                    VIRTIO_NET_F_GUEST_ECN, true),
3601    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3602                    VIRTIO_NET_F_GUEST_UFO, true),
3603    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3604                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3605    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3606                    VIRTIO_NET_F_HOST_TSO4, true),
3607    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3608                    VIRTIO_NET_F_HOST_TSO6, true),
3609    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3610                    VIRTIO_NET_F_HOST_ECN, true),
3611    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3612                    VIRTIO_NET_F_HOST_UFO, true),
3613    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3614                    VIRTIO_NET_F_MRG_RXBUF, true),
3615    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3616                    VIRTIO_NET_F_STATUS, true),
3617    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3618                    VIRTIO_NET_F_CTRL_VQ, true),
3619    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3620                    VIRTIO_NET_F_CTRL_RX, true),
3621    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3622                    VIRTIO_NET_F_CTRL_VLAN, true),
3623    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3624                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3625    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3626                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3627    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3628                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3629    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3630    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3631                    VIRTIO_NET_F_RSS, false),
3632    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3633                    VIRTIO_NET_F_HASH_REPORT, false),
3634    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3635                    VIRTIO_NET_F_RSC_EXT, false),
3636    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3637                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3638    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3639    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3640                       TX_TIMER_INTERVAL),
3641    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3642    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3643    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3644                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3645    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3646                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3647    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3648    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3649                     true),
3650    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3651    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3652    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3653    DEFINE_PROP_END_OF_LIST(),
3654};
3655
3656static void virtio_net_class_init(ObjectClass *klass, void *data)
3657{
3658    DeviceClass *dc = DEVICE_CLASS(klass);
3659    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3660
3661    device_class_set_props(dc, virtio_net_properties);
3662    dc->vmsd = &vmstate_virtio_net;
3663    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3664    vdc->realize = virtio_net_device_realize;
3665    vdc->unrealize = virtio_net_device_unrealize;
3666    vdc->get_config = virtio_net_get_config;
3667    vdc->set_config = virtio_net_set_config;
3668    vdc->get_features = virtio_net_get_features;
3669    vdc->set_features = virtio_net_set_features;
3670    vdc->bad_features = virtio_net_bad_features;
3671    vdc->reset = virtio_net_reset;
3672    vdc->set_status = virtio_net_set_status;
3673    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3674    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3675    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3676    vdc->post_load = virtio_net_post_load_virtio;
3677    vdc->vmsd = &vmstate_virtio_net_device;
3678    vdc->primary_unplug_pending = primary_unplug_pending;
3679}
3680
3681static const TypeInfo virtio_net_info = {
3682    .name = TYPE_VIRTIO_NET,
3683    .parent = TYPE_VIRTIO_DEVICE,
3684    .instance_size = sizeof(VirtIONet),
3685    .instance_init = virtio_net_instance_init,
3686    .class_init = virtio_net_class_init,
3687};
3688
3689static void virtio_register_types(void)
3690{
3691    type_register_static(&virtio_net_info);
3692}
3693
3694type_init(virtio_register_types)
3695