qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/main-loop.h"
  18#include "qemu/module.h"
  19#include "hw/virtio/virtio.h"
  20#include "net/net.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23#include "qemu/error-report.h"
  24#include "qemu/timer.h"
  25#include "qemu/option.h"
  26#include "qemu/option_int.h"
  27#include "qemu/config-file.h"
  28#include "qapi/qmp/qdict.h"
  29#include "hw/virtio/virtio-net.h"
  30#include "net/vhost_net.h"
  31#include "net/announce.h"
  32#include "hw/virtio/virtio-bus.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-net.h"
  35#include "hw/qdev-properties.h"
  36#include "qapi/qapi-types-migration.h"
  37#include "qapi/qapi-events-migration.h"
  38#include "hw/virtio/virtio-access.h"
  39#include "migration/misc.h"
  40#include "standard-headers/linux/ethtool.h"
  41#include "sysemu/sysemu.h"
  42#include "trace.h"
  43#include "monitor/qdev.h"
  44#include "hw/pci/pci.h"
  45#include "net_rx_pkt.h"
  46#include "hw/virtio/vhost.h"
  47
  48#define VIRTIO_NET_VM_VERSION    11
  49
  50#define MAC_TABLE_ENTRIES    64
  51#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  52
  53/* previously fixed value */
  54#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  55#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  56
  57/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
  58#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  59#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  60
  61#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  62
  63#define VIRTIO_NET_TCP_FLAG         0x3F
  64#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  65
  66/* IPv4 max payload, 16 bits in the header */
  67#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  68#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  69
  70/* header length value in ip header without option */
  71#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  72
  73#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  74#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  75
  76/* Purge coalesced packets timer interval, This value affects the performance
  77   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  78   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  79   tso/gso/gro 'off'. */
  80#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  81
  82#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  83                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  91
  92static const VirtIOFeature feature_sizes[] = {
  93    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  94     .end = endof(struct virtio_net_config, mac)},
  95    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  96     .end = endof(struct virtio_net_config, status)},
  97    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  98     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
  99    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 100     .end = endof(struct virtio_net_config, mtu)},
 101    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 102     .end = endof(struct virtio_net_config, duplex)},
 103    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 104     .end = endof(struct virtio_net_config, supported_hash_types)},
 105    {}
 106};
 107
 108static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 109{
 110    VirtIONet *n = qemu_get_nic_opaque(nc);
 111
 112    return &n->vqs[nc->queue_index];
 113}
 114
 115static int vq2q(int queue_index)
 116{
 117    return queue_index / 2;
 118}
 119
 120/* TODO
 121 * - we could suppress RX interrupt if we were so inclined.
 122 */
 123
 124static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 125{
 126    VirtIONet *n = VIRTIO_NET(vdev);
 127    struct virtio_net_config netcfg;
 128    NetClientState *nc = qemu_get_queue(n->nic);
 129    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 130
 131    int ret = 0;
 132    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 133    virtio_stw_p(vdev, &netcfg.status, n->status);
 134    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
 135    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 136    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 137    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 138    netcfg.duplex = n->net_conf.duplex;
 139    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 140    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 141                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 142                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 143    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 144                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 145    memcpy(config, &netcfg, n->config_size);
 146
 147    /*
 148     * Is this VDPA? No peer means not VDPA: there's no way to
 149     * disconnect/reconnect a VDPA peer.
 150     */
 151    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 152        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 153                                   n->config_size);
 154        if (ret != -1) {
 155            /*
 156             * Some NIC/kernel combinations present 0 as the mac address.  As
 157             * that is not a legal address, try to proceed with the
 158             * address from the QEMU command line in the hope that the
 159             * address has been configured correctly elsewhere - just not
 160             * reported by the device.
 161             */
 162            if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 163                info_report("Zero hardware mac address detected. Ignoring.");
 164                memcpy(netcfg.mac, n->mac, ETH_ALEN);
 165            }
 166            memcpy(config, &netcfg, n->config_size);
 167        }
 168    }
 169}
 170
 171static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 172{
 173    VirtIONet *n = VIRTIO_NET(vdev);
 174    struct virtio_net_config netcfg = {};
 175    NetClientState *nc = qemu_get_queue(n->nic);
 176
 177    memcpy(&netcfg, config, n->config_size);
 178
 179    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 180        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 181        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 182        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 183        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 184    }
 185
 186    /*
 187     * Is this VDPA? No peer means not VDPA: there's no way to
 188     * disconnect/reconnect a VDPA peer.
 189     */
 190    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 191        vhost_net_set_config(get_vhost_net(nc->peer),
 192                             (uint8_t *)&netcfg, 0, n->config_size,
 193                             VHOST_SET_CONFIG_TYPE_MASTER);
 194      }
 195}
 196
 197static bool virtio_net_started(VirtIONet *n, uint8_t status)
 198{
 199    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 200    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 201        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 202}
 203
 204static void virtio_net_announce_notify(VirtIONet *net)
 205{
 206    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 207    trace_virtio_net_announce_notify();
 208
 209    net->status |= VIRTIO_NET_S_ANNOUNCE;
 210    virtio_notify_config(vdev);
 211}
 212
 213static void virtio_net_announce_timer(void *opaque)
 214{
 215    VirtIONet *n = opaque;
 216    trace_virtio_net_announce_timer(n->announce_timer.round);
 217
 218    n->announce_timer.round--;
 219    virtio_net_announce_notify(n);
 220}
 221
 222static void virtio_net_announce(NetClientState *nc)
 223{
 224    VirtIONet *n = qemu_get_nic_opaque(nc);
 225    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 226
 227    /*
 228     * Make sure the virtio migration announcement timer isn't running
 229     * If it is, let it trigger announcement so that we do not cause
 230     * confusion.
 231     */
 232    if (n->announce_timer.round) {
 233        return;
 234    }
 235
 236    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 237        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 238            virtio_net_announce_notify(n);
 239    }
 240}
 241
 242static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 243{
 244    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 245    NetClientState *nc = qemu_get_queue(n->nic);
 246    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 247    int cvq = n->max_ncs - n->max_queue_pairs;
 248
 249    if (!get_vhost_net(nc->peer)) {
 250        return;
 251    }
 252
 253    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 254        !!n->vhost_started) {
 255        return;
 256    }
 257    if (!n->vhost_started) {
 258        int r, i;
 259
 260        if (n->needs_vnet_hdr_swap) {
 261            error_report("backend does not support %s vnet headers; "
 262                         "falling back on userspace virtio",
 263                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 264            return;
 265        }
 266
 267        /* Any packets outstanding? Purge them to avoid touching rings
 268         * when vhost is running.
 269         */
 270        for (i = 0;  i < queue_pairs; i++) {
 271            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 272
 273            /* Purge both directions: TX and RX. */
 274            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 275            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 276        }
 277
 278        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 279            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 280            if (r < 0) {
 281                error_report("%uBytes MTU not supported by the backend",
 282                             n->net_conf.mtu);
 283
 284                return;
 285            }
 286        }
 287
 288        n->vhost_started = 1;
 289        r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 290        if (r < 0) {
 291            error_report("unable to start vhost net: %d: "
 292                         "falling back on userspace virtio", -r);
 293            n->vhost_started = 0;
 294        }
 295    } else {
 296        vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
 297        n->vhost_started = 0;
 298    }
 299}
 300
 301static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 302                                          NetClientState *peer,
 303                                          bool enable)
 304{
 305    if (virtio_is_big_endian(vdev)) {
 306        return qemu_set_vnet_be(peer, enable);
 307    } else {
 308        return qemu_set_vnet_le(peer, enable);
 309    }
 310}
 311
 312static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 313                                       int queue_pairs, bool enable)
 314{
 315    int i;
 316
 317    for (i = 0; i < queue_pairs; i++) {
 318        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 319            enable) {
 320            while (--i >= 0) {
 321                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 322            }
 323
 324            return true;
 325        }
 326    }
 327
 328    return false;
 329}
 330
 331static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 332{
 333    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 334    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 335
 336    if (virtio_net_started(n, status)) {
 337        /* Before using the device, we tell the network backend about the
 338         * endianness to use when parsing vnet headers. If the backend
 339         * can't do it, we fallback onto fixing the headers in the core
 340         * virtio-net code.
 341         */
 342        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 343                                                            queue_pairs, true);
 344    } else if (virtio_net_started(n, vdev->status)) {
 345        /* After using the device, we need to reset the network backend to
 346         * the default (guest native endianness), otherwise the guest may
 347         * lose network connectivity if it is rebooted into a different
 348         * endianness.
 349         */
 350        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
 351    }
 352}
 353
 354static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 355{
 356    unsigned int dropped = virtqueue_drop_all(vq);
 357    if (dropped) {
 358        virtio_notify(vdev, vq);
 359    }
 360}
 361
 362static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 363{
 364    VirtIONet *n = VIRTIO_NET(vdev);
 365    VirtIONetQueue *q;
 366    int i;
 367    uint8_t queue_status;
 368
 369    virtio_net_vnet_endian_status(n, status);
 370    virtio_net_vhost_status(n, status);
 371
 372    for (i = 0; i < n->max_queue_pairs; i++) {
 373        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 374        bool queue_started;
 375        q = &n->vqs[i];
 376
 377        if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
 378            queue_status = 0;
 379        } else {
 380            queue_status = status;
 381        }
 382        queue_started =
 383            virtio_net_started(n, queue_status) && !n->vhost_started;
 384
 385        if (queue_started) {
 386            qemu_flush_queued_packets(ncs);
 387        }
 388
 389        if (!q->tx_waiting) {
 390            continue;
 391        }
 392
 393        if (queue_started) {
 394            if (q->tx_timer) {
 395                timer_mod(q->tx_timer,
 396                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 397            } else {
 398                qemu_bh_schedule(q->tx_bh);
 399            }
 400        } else {
 401            if (q->tx_timer) {
 402                timer_del(q->tx_timer);
 403            } else {
 404                qemu_bh_cancel(q->tx_bh);
 405            }
 406            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 407                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 408                vdev->vm_running) {
 409                /* if tx is waiting we are likely have some packets in tx queue
 410                 * and disabled notification */
 411                q->tx_waiting = 0;
 412                virtio_queue_set_notification(q->tx_vq, 1);
 413                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 414            }
 415        }
 416    }
 417}
 418
 419static void virtio_net_set_link_status(NetClientState *nc)
 420{
 421    VirtIONet *n = qemu_get_nic_opaque(nc);
 422    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 423    uint16_t old_status = n->status;
 424
 425    if (nc->link_down)
 426        n->status &= ~VIRTIO_NET_S_LINK_UP;
 427    else
 428        n->status |= VIRTIO_NET_S_LINK_UP;
 429
 430    if (n->status != old_status)
 431        virtio_notify_config(vdev);
 432
 433    virtio_net_set_status(vdev, vdev->status);
 434}
 435
 436static void rxfilter_notify(NetClientState *nc)
 437{
 438    VirtIONet *n = qemu_get_nic_opaque(nc);
 439
 440    if (nc->rxfilter_notify_enabled) {
 441        char *path = object_get_canonical_path(OBJECT(n->qdev));
 442        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 443                                              n->netclient_name, path);
 444        g_free(path);
 445
 446        /* disable event notification to avoid events flooding */
 447        nc->rxfilter_notify_enabled = 0;
 448    }
 449}
 450
 451static intList *get_vlan_table(VirtIONet *n)
 452{
 453    intList *list;
 454    int i, j;
 455
 456    list = NULL;
 457    for (i = 0; i < MAX_VLAN >> 5; i++) {
 458        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 459            if (n->vlans[i] & (1U << j)) {
 460                QAPI_LIST_PREPEND(list, (i << 5) + j);
 461            }
 462        }
 463    }
 464
 465    return list;
 466}
 467
 468static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 469{
 470    VirtIONet *n = qemu_get_nic_opaque(nc);
 471    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 472    RxFilterInfo *info;
 473    strList *str_list;
 474    int i;
 475
 476    info = g_malloc0(sizeof(*info));
 477    info->name = g_strdup(nc->name);
 478    info->promiscuous = n->promisc;
 479
 480    if (n->nouni) {
 481        info->unicast = RX_STATE_NONE;
 482    } else if (n->alluni) {
 483        info->unicast = RX_STATE_ALL;
 484    } else {
 485        info->unicast = RX_STATE_NORMAL;
 486    }
 487
 488    if (n->nomulti) {
 489        info->multicast = RX_STATE_NONE;
 490    } else if (n->allmulti) {
 491        info->multicast = RX_STATE_ALL;
 492    } else {
 493        info->multicast = RX_STATE_NORMAL;
 494    }
 495
 496    info->broadcast_allowed = n->nobcast;
 497    info->multicast_overflow = n->mac_table.multi_overflow;
 498    info->unicast_overflow = n->mac_table.uni_overflow;
 499
 500    info->main_mac = qemu_mac_strdup_printf(n->mac);
 501
 502    str_list = NULL;
 503    for (i = 0; i < n->mac_table.first_multi; i++) {
 504        QAPI_LIST_PREPEND(str_list,
 505                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 506    }
 507    info->unicast_table = str_list;
 508
 509    str_list = NULL;
 510    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 511        QAPI_LIST_PREPEND(str_list,
 512                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 513    }
 514    info->multicast_table = str_list;
 515    info->vlan_table = get_vlan_table(n);
 516
 517    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 518        info->vlan = RX_STATE_ALL;
 519    } else if (!info->vlan_table) {
 520        info->vlan = RX_STATE_NONE;
 521    } else {
 522        info->vlan = RX_STATE_NORMAL;
 523    }
 524
 525    /* enable event notification after query */
 526    nc->rxfilter_notify_enabled = 1;
 527
 528    return info;
 529}
 530
 531static void virtio_net_reset(VirtIODevice *vdev)
 532{
 533    VirtIONet *n = VIRTIO_NET(vdev);
 534    int i;
 535
 536    /* Reset back to compatibility mode */
 537    n->promisc = 1;
 538    n->allmulti = 0;
 539    n->alluni = 0;
 540    n->nomulti = 0;
 541    n->nouni = 0;
 542    n->nobcast = 0;
 543    /* multiqueue is disabled by default */
 544    n->curr_queue_pairs = 1;
 545    timer_del(n->announce_timer.tm);
 546    n->announce_timer.round = 0;
 547    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 548
 549    /* Flush any MAC and VLAN filter table state */
 550    n->mac_table.in_use = 0;
 551    n->mac_table.first_multi = 0;
 552    n->mac_table.multi_overflow = 0;
 553    n->mac_table.uni_overflow = 0;
 554    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 555    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 556    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 557    memset(n->vlans, 0, MAX_VLAN >> 3);
 558
 559    /* Flush any async TX */
 560    for (i = 0;  i < n->max_queue_pairs; i++) {
 561        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 562
 563        if (nc->peer) {
 564            qemu_flush_or_purge_queued_packets(nc->peer, true);
 565            assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 566        }
 567    }
 568}
 569
 570static void peer_test_vnet_hdr(VirtIONet *n)
 571{
 572    NetClientState *nc = qemu_get_queue(n->nic);
 573    if (!nc->peer) {
 574        return;
 575    }
 576
 577    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 578}
 579
 580static int peer_has_vnet_hdr(VirtIONet *n)
 581{
 582    return n->has_vnet_hdr;
 583}
 584
 585static int peer_has_ufo(VirtIONet *n)
 586{
 587    if (!peer_has_vnet_hdr(n))
 588        return 0;
 589
 590    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 591
 592    return n->has_ufo;
 593}
 594
 595static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 596                                       int version_1, int hash_report)
 597{
 598    int i;
 599    NetClientState *nc;
 600
 601    n->mergeable_rx_bufs = mergeable_rx_bufs;
 602
 603    if (version_1) {
 604        n->guest_hdr_len = hash_report ?
 605            sizeof(struct virtio_net_hdr_v1_hash) :
 606            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 607        n->rss_data.populate_hash = !!hash_report;
 608    } else {
 609        n->guest_hdr_len = n->mergeable_rx_bufs ?
 610            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 611            sizeof(struct virtio_net_hdr);
 612    }
 613
 614    for (i = 0; i < n->max_queue_pairs; i++) {
 615        nc = qemu_get_subqueue(n->nic, i);
 616
 617        if (peer_has_vnet_hdr(n) &&
 618            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 619            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 620            n->host_hdr_len = n->guest_hdr_len;
 621        }
 622    }
 623}
 624
 625static int virtio_net_max_tx_queue_size(VirtIONet *n)
 626{
 627    NetClientState *peer = n->nic_conf.peers.ncs[0];
 628
 629    /*
 630     * Backends other than vhost-user don't support max queue size.
 631     */
 632    if (!peer) {
 633        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 634    }
 635
 636    if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
 637        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 638    }
 639
 640    return VIRTQUEUE_MAX_SIZE;
 641}
 642
 643static int peer_attach(VirtIONet *n, int index)
 644{
 645    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 646
 647    if (!nc->peer) {
 648        return 0;
 649    }
 650
 651    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 652        vhost_set_vring_enable(nc->peer, 1);
 653    }
 654
 655    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 656        return 0;
 657    }
 658
 659    if (n->max_queue_pairs == 1) {
 660        return 0;
 661    }
 662
 663    return tap_enable(nc->peer);
 664}
 665
 666static int peer_detach(VirtIONet *n, int index)
 667{
 668    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 669
 670    if (!nc->peer) {
 671        return 0;
 672    }
 673
 674    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 675        vhost_set_vring_enable(nc->peer, 0);
 676    }
 677
 678    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 679        return 0;
 680    }
 681
 682    return tap_disable(nc->peer);
 683}
 684
 685static void virtio_net_set_queue_pairs(VirtIONet *n)
 686{
 687    int i;
 688    int r;
 689
 690    if (n->nic->peer_deleted) {
 691        return;
 692    }
 693
 694    for (i = 0; i < n->max_queue_pairs; i++) {
 695        if (i < n->curr_queue_pairs) {
 696            r = peer_attach(n, i);
 697            assert(!r);
 698        } else {
 699            r = peer_detach(n, i);
 700            assert(!r);
 701        }
 702    }
 703}
 704
 705static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 706
 707static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 708                                        Error **errp)
 709{
 710    VirtIONet *n = VIRTIO_NET(vdev);
 711    NetClientState *nc = qemu_get_queue(n->nic);
 712
 713    /* Firstly sync all virtio-net possible supported features */
 714    features |= n->host_features;
 715
 716    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 717
 718    if (!peer_has_vnet_hdr(n)) {
 719        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 720        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 721        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 722        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 723
 724        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 725        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 726        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 727        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 728
 729        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 730    }
 731
 732    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 733        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 734        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 735    }
 736
 737    if (!get_vhost_net(nc->peer)) {
 738        return features;
 739    }
 740
 741    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 742        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 743    }
 744    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 745    vdev->backend_features = features;
 746
 747    if (n->mtu_bypass_backend &&
 748            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 749        features |= (1ULL << VIRTIO_NET_F_MTU);
 750    }
 751
 752    return features;
 753}
 754
 755static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 756{
 757    uint64_t features = 0;
 758
 759    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 760     * but also these: */
 761    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 762    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 763    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 764    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 765    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 766
 767    return features;
 768}
 769
 770static void virtio_net_apply_guest_offloads(VirtIONet *n)
 771{
 772    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 773            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 774            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 775            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 776            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 777            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 778}
 779
 780static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 781{
 782    static const uint64_t guest_offloads_mask =
 783        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 784        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 785        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 786        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 787        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 788
 789    return guest_offloads_mask & features;
 790}
 791
 792static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 793{
 794    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 795    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 796}
 797
 798typedef struct {
 799    VirtIONet *n;
 800    DeviceState *dev;
 801} FailoverDevice;
 802
 803/**
 804 * Set the failover primary device
 805 *
 806 * @opaque: FailoverId to setup
 807 * @opts: opts for device we are handling
 808 * @errp: returns an error if this function fails
 809 */
 810static int failover_set_primary(DeviceState *dev, void *opaque)
 811{
 812    FailoverDevice *fdev = opaque;
 813    PCIDevice *pci_dev = (PCIDevice *)
 814        object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
 815
 816    if (!pci_dev) {
 817        return 0;
 818    }
 819
 820    if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
 821        fdev->dev = dev;
 822        return 1;
 823    }
 824
 825    return 0;
 826}
 827
 828/**
 829 * Find the primary device for this failover virtio-net
 830 *
 831 * @n: VirtIONet device
 832 * @errp: returns an error if this function fails
 833 */
 834static DeviceState *failover_find_primary_device(VirtIONet *n)
 835{
 836    FailoverDevice fdev = {
 837        .n = n,
 838    };
 839
 840    qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
 841                       NULL, NULL, &fdev);
 842    return fdev.dev;
 843}
 844
 845static void failover_add_primary(VirtIONet *n, Error **errp)
 846{
 847    Error *err = NULL;
 848    DeviceState *dev = failover_find_primary_device(n);
 849
 850    if (dev) {
 851        return;
 852    }
 853
 854    if (!n->primary_opts) {
 855        error_setg(errp, "Primary device not found");
 856        error_append_hint(errp, "Virtio-net failover will not work. Make "
 857                          "sure primary device has parameter"
 858                          " failover_pair_id=%s\n", n->netclient_name);
 859        return;
 860    }
 861
 862    dev = qdev_device_add_from_qdict(n->primary_opts,
 863                                     n->primary_opts_from_json,
 864                                     &err);
 865    if (err) {
 866        qobject_unref(n->primary_opts);
 867        n->primary_opts = NULL;
 868    } else {
 869        object_unref(OBJECT(dev));
 870    }
 871    error_propagate(errp, err);
 872}
 873
 874static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 875{
 876    VirtIONet *n = VIRTIO_NET(vdev);
 877    Error *err = NULL;
 878    int i;
 879
 880    if (n->mtu_bypass_backend &&
 881            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 882        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 883    }
 884
 885    virtio_net_set_multiqueue(n,
 886                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 887                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 888
 889    virtio_net_set_mrg_rx_bufs(n,
 890                               virtio_has_feature(features,
 891                                                  VIRTIO_NET_F_MRG_RXBUF),
 892                               virtio_has_feature(features,
 893                                                  VIRTIO_F_VERSION_1),
 894                               virtio_has_feature(features,
 895                                                  VIRTIO_NET_F_HASH_REPORT));
 896
 897    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 898        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 899    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 900        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 901    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 902
 903    if (n->has_vnet_hdr) {
 904        n->curr_guest_offloads =
 905            virtio_net_guest_offloads_by_features(features);
 906        virtio_net_apply_guest_offloads(n);
 907    }
 908
 909    for (i = 0;  i < n->max_queue_pairs; i++) {
 910        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 911
 912        if (!get_vhost_net(nc->peer)) {
 913            continue;
 914        }
 915        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 916    }
 917
 918    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 919        memset(n->vlans, 0, MAX_VLAN >> 3);
 920    } else {
 921        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 922    }
 923
 924    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 925        qapi_event_send_failover_negotiated(n->netclient_name);
 926        qatomic_set(&n->failover_primary_hidden, false);
 927        failover_add_primary(n, &err);
 928        if (err) {
 929            warn_report_err(err);
 930        }
 931    }
 932}
 933
 934static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 935                                     struct iovec *iov, unsigned int iov_cnt)
 936{
 937    uint8_t on;
 938    size_t s;
 939    NetClientState *nc = qemu_get_queue(n->nic);
 940
 941    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 942    if (s != sizeof(on)) {
 943        return VIRTIO_NET_ERR;
 944    }
 945
 946    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 947        n->promisc = on;
 948    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 949        n->allmulti = on;
 950    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 951        n->alluni = on;
 952    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 953        n->nomulti = on;
 954    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 955        n->nouni = on;
 956    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 957        n->nobcast = on;
 958    } else {
 959        return VIRTIO_NET_ERR;
 960    }
 961
 962    rxfilter_notify(nc);
 963
 964    return VIRTIO_NET_OK;
 965}
 966
 967static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 968                                     struct iovec *iov, unsigned int iov_cnt)
 969{
 970    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 971    uint64_t offloads;
 972    size_t s;
 973
 974    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 975        return VIRTIO_NET_ERR;
 976    }
 977
 978    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 979    if (s != sizeof(offloads)) {
 980        return VIRTIO_NET_ERR;
 981    }
 982
 983    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 984        uint64_t supported_offloads;
 985
 986        offloads = virtio_ldq_p(vdev, &offloads);
 987
 988        if (!n->has_vnet_hdr) {
 989            return VIRTIO_NET_ERR;
 990        }
 991
 992        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
 993            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
 994        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
 995            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
 996        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
 997
 998        supported_offloads = virtio_net_supported_guest_offloads(n);
 999        if (offloads & ~supported_offloads) {
1000            return VIRTIO_NET_ERR;
1001        }
1002
1003        n->curr_guest_offloads = offloads;
1004        virtio_net_apply_guest_offloads(n);
1005
1006        return VIRTIO_NET_OK;
1007    } else {
1008        return VIRTIO_NET_ERR;
1009    }
1010}
1011
1012static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1013                                 struct iovec *iov, unsigned int iov_cnt)
1014{
1015    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1016    struct virtio_net_ctrl_mac mac_data;
1017    size_t s;
1018    NetClientState *nc = qemu_get_queue(n->nic);
1019
1020    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1021        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1022            return VIRTIO_NET_ERR;
1023        }
1024        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1025        assert(s == sizeof(n->mac));
1026        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1027        rxfilter_notify(nc);
1028
1029        return VIRTIO_NET_OK;
1030    }
1031
1032    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1033        return VIRTIO_NET_ERR;
1034    }
1035
1036    int in_use = 0;
1037    int first_multi = 0;
1038    uint8_t uni_overflow = 0;
1039    uint8_t multi_overflow = 0;
1040    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1041
1042    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1043                   sizeof(mac_data.entries));
1044    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1045    if (s != sizeof(mac_data.entries)) {
1046        goto error;
1047    }
1048    iov_discard_front(&iov, &iov_cnt, s);
1049
1050    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1051        goto error;
1052    }
1053
1054    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1055        s = iov_to_buf(iov, iov_cnt, 0, macs,
1056                       mac_data.entries * ETH_ALEN);
1057        if (s != mac_data.entries * ETH_ALEN) {
1058            goto error;
1059        }
1060        in_use += mac_data.entries;
1061    } else {
1062        uni_overflow = 1;
1063    }
1064
1065    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1066
1067    first_multi = in_use;
1068
1069    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1070                   sizeof(mac_data.entries));
1071    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1072    if (s != sizeof(mac_data.entries)) {
1073        goto error;
1074    }
1075
1076    iov_discard_front(&iov, &iov_cnt, s);
1077
1078    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1079        goto error;
1080    }
1081
1082    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1083        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1084                       mac_data.entries * ETH_ALEN);
1085        if (s != mac_data.entries * ETH_ALEN) {
1086            goto error;
1087        }
1088        in_use += mac_data.entries;
1089    } else {
1090        multi_overflow = 1;
1091    }
1092
1093    n->mac_table.in_use = in_use;
1094    n->mac_table.first_multi = first_multi;
1095    n->mac_table.uni_overflow = uni_overflow;
1096    n->mac_table.multi_overflow = multi_overflow;
1097    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1098    g_free(macs);
1099    rxfilter_notify(nc);
1100
1101    return VIRTIO_NET_OK;
1102
1103error:
1104    g_free(macs);
1105    return VIRTIO_NET_ERR;
1106}
1107
1108static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1109                                        struct iovec *iov, unsigned int iov_cnt)
1110{
1111    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1112    uint16_t vid;
1113    size_t s;
1114    NetClientState *nc = qemu_get_queue(n->nic);
1115
1116    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1117    vid = virtio_lduw_p(vdev, &vid);
1118    if (s != sizeof(vid)) {
1119        return VIRTIO_NET_ERR;
1120    }
1121
1122    if (vid >= MAX_VLAN)
1123        return VIRTIO_NET_ERR;
1124
1125    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1126        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1127    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1128        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1129    else
1130        return VIRTIO_NET_ERR;
1131
1132    rxfilter_notify(nc);
1133
1134    return VIRTIO_NET_OK;
1135}
1136
1137static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1138                                      struct iovec *iov, unsigned int iov_cnt)
1139{
1140    trace_virtio_net_handle_announce(n->announce_timer.round);
1141    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1142        n->status & VIRTIO_NET_S_ANNOUNCE) {
1143        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1144        if (n->announce_timer.round) {
1145            qemu_announce_timer_step(&n->announce_timer);
1146        }
1147        return VIRTIO_NET_OK;
1148    } else {
1149        return VIRTIO_NET_ERR;
1150    }
1151}
1152
1153static void virtio_net_detach_epbf_rss(VirtIONet *n);
1154
1155static void virtio_net_disable_rss(VirtIONet *n)
1156{
1157    if (n->rss_data.enabled) {
1158        trace_virtio_net_rss_disable();
1159    }
1160    n->rss_data.enabled = false;
1161
1162    virtio_net_detach_epbf_rss(n);
1163}
1164
1165static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1166{
1167    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1168    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1169        return false;
1170    }
1171
1172    return nc->info->set_steering_ebpf(nc, prog_fd);
1173}
1174
1175static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1176                                   struct EBPFRSSConfig *config)
1177{
1178    config->redirect = data->redirect;
1179    config->populate_hash = data->populate_hash;
1180    config->hash_types = data->hash_types;
1181    config->indirections_len = data->indirections_len;
1182    config->default_queue = data->default_queue;
1183}
1184
1185static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1186{
1187    struct EBPFRSSConfig config = {};
1188
1189    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1190        return false;
1191    }
1192
1193    rss_data_to_rss_config(&n->rss_data, &config);
1194
1195    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1196                          n->rss_data.indirections_table, n->rss_data.key)) {
1197        return false;
1198    }
1199
1200    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1201        return false;
1202    }
1203
1204    return true;
1205}
1206
1207static void virtio_net_detach_epbf_rss(VirtIONet *n)
1208{
1209    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1210}
1211
1212static bool virtio_net_load_ebpf(VirtIONet *n)
1213{
1214    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1215        /* backend does't support steering ebpf */
1216        return false;
1217    }
1218
1219    return ebpf_rss_load(&n->ebpf_rss);
1220}
1221
1222static void virtio_net_unload_ebpf(VirtIONet *n)
1223{
1224    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1225    ebpf_rss_unload(&n->ebpf_rss);
1226}
1227
1228static uint16_t virtio_net_handle_rss(VirtIONet *n,
1229                                      struct iovec *iov,
1230                                      unsigned int iov_cnt,
1231                                      bool do_rss)
1232{
1233    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1234    struct virtio_net_rss_config cfg;
1235    size_t s, offset = 0, size_get;
1236    uint16_t queue_pairs, i;
1237    struct {
1238        uint16_t us;
1239        uint8_t b;
1240    } QEMU_PACKED temp;
1241    const char *err_msg = "";
1242    uint32_t err_value = 0;
1243
1244    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1245        err_msg = "RSS is not negotiated";
1246        goto error;
1247    }
1248    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1249        err_msg = "Hash report is not negotiated";
1250        goto error;
1251    }
1252    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1253    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1254    if (s != size_get) {
1255        err_msg = "Short command buffer";
1256        err_value = (uint32_t)s;
1257        goto error;
1258    }
1259    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1260    n->rss_data.indirections_len =
1261        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1262    n->rss_data.indirections_len++;
1263    if (!do_rss) {
1264        n->rss_data.indirections_len = 1;
1265    }
1266    if (!is_power_of_2(n->rss_data.indirections_len)) {
1267        err_msg = "Invalid size of indirection table";
1268        err_value = n->rss_data.indirections_len;
1269        goto error;
1270    }
1271    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1272        err_msg = "Too large indirection table";
1273        err_value = n->rss_data.indirections_len;
1274        goto error;
1275    }
1276    n->rss_data.default_queue = do_rss ?
1277        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1278    if (n->rss_data.default_queue >= n->max_queue_pairs) {
1279        err_msg = "Invalid default queue";
1280        err_value = n->rss_data.default_queue;
1281        goto error;
1282    }
1283    offset += size_get;
1284    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1285    g_free(n->rss_data.indirections_table);
1286    n->rss_data.indirections_table = g_malloc(size_get);
1287    if (!n->rss_data.indirections_table) {
1288        err_msg = "Can't allocate indirections table";
1289        err_value = n->rss_data.indirections_len;
1290        goto error;
1291    }
1292    s = iov_to_buf(iov, iov_cnt, offset,
1293                   n->rss_data.indirections_table, size_get);
1294    if (s != size_get) {
1295        err_msg = "Short indirection table buffer";
1296        err_value = (uint32_t)s;
1297        goto error;
1298    }
1299    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1300        uint16_t val = n->rss_data.indirections_table[i];
1301        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1302    }
1303    offset += size_get;
1304    size_get = sizeof(temp);
1305    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1306    if (s != size_get) {
1307        err_msg = "Can't get queue_pairs";
1308        err_value = (uint32_t)s;
1309        goto error;
1310    }
1311    queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1312    if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1313        err_msg = "Invalid number of queue_pairs";
1314        err_value = queue_pairs;
1315        goto error;
1316    }
1317    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1318        err_msg = "Invalid key size";
1319        err_value = temp.b;
1320        goto error;
1321    }
1322    if (!temp.b && n->rss_data.hash_types) {
1323        err_msg = "No key provided";
1324        err_value = 0;
1325        goto error;
1326    }
1327    if (!temp.b && !n->rss_data.hash_types) {
1328        virtio_net_disable_rss(n);
1329        return queue_pairs;
1330    }
1331    offset += size_get;
1332    size_get = temp.b;
1333    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1334    if (s != size_get) {
1335        err_msg = "Can get key buffer";
1336        err_value = (uint32_t)s;
1337        goto error;
1338    }
1339    n->rss_data.enabled = true;
1340
1341    if (!n->rss_data.populate_hash) {
1342        if (!virtio_net_attach_epbf_rss(n)) {
1343            /* EBPF must be loaded for vhost */
1344            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1345                warn_report("Can't load eBPF RSS for vhost");
1346                goto error;
1347            }
1348            /* fallback to software RSS */
1349            warn_report("Can't load eBPF RSS - fallback to software RSS");
1350            n->rss_data.enabled_software_rss = true;
1351        }
1352    } else {
1353        /* use software RSS for hash populating */
1354        /* and detach eBPF if was loaded before */
1355        virtio_net_detach_epbf_rss(n);
1356        n->rss_data.enabled_software_rss = true;
1357    }
1358
1359    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1360                                n->rss_data.indirections_len,
1361                                temp.b);
1362    return queue_pairs;
1363error:
1364    trace_virtio_net_rss_error(err_msg, err_value);
1365    virtio_net_disable_rss(n);
1366    return 0;
1367}
1368
1369static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1370                                struct iovec *iov, unsigned int iov_cnt)
1371{
1372    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1373    uint16_t queue_pairs;
1374
1375    virtio_net_disable_rss(n);
1376    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1377        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1378        return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1379    }
1380    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1381        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1382    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1383        struct virtio_net_ctrl_mq mq;
1384        size_t s;
1385        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1386            return VIRTIO_NET_ERR;
1387        }
1388        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1389        if (s != sizeof(mq)) {
1390            return VIRTIO_NET_ERR;
1391        }
1392        queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1393
1394    } else {
1395        return VIRTIO_NET_ERR;
1396    }
1397
1398    if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1399        queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1400        queue_pairs > n->max_queue_pairs ||
1401        !n->multiqueue) {
1402        return VIRTIO_NET_ERR;
1403    }
1404
1405    n->curr_queue_pairs = queue_pairs;
1406    /* stop the backend before changing the number of queue_pairs to avoid handling a
1407     * disabled queue */
1408    virtio_net_set_status(vdev, vdev->status);
1409    virtio_net_set_queue_pairs(n);
1410
1411    return VIRTIO_NET_OK;
1412}
1413
1414static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1415{
1416    VirtIONet *n = VIRTIO_NET(vdev);
1417    struct virtio_net_ctrl_hdr ctrl;
1418    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1419    VirtQueueElement *elem;
1420    size_t s;
1421    struct iovec *iov, *iov2;
1422    unsigned int iov_cnt;
1423
1424    for (;;) {
1425        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1426        if (!elem) {
1427            break;
1428        }
1429        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1430            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1431            virtio_error(vdev, "virtio-net ctrl missing headers");
1432            virtqueue_detach_element(vq, elem, 0);
1433            g_free(elem);
1434            break;
1435        }
1436
1437        iov_cnt = elem->out_num;
1438        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1439        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1440        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1441        if (s != sizeof(ctrl)) {
1442            status = VIRTIO_NET_ERR;
1443        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1444            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1445        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1446            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1447        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1448            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1449        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1450            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1451        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1452            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1453        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1454            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1455        }
1456
1457        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1458        assert(s == sizeof(status));
1459
1460        virtqueue_push(vq, elem, sizeof(status));
1461        virtio_notify(vdev, vq);
1462        g_free(iov2);
1463        g_free(elem);
1464    }
1465}
1466
1467/* RX */
1468
1469static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1470{
1471    VirtIONet *n = VIRTIO_NET(vdev);
1472    int queue_index = vq2q(virtio_get_queue_index(vq));
1473
1474    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1475}
1476
1477static bool virtio_net_can_receive(NetClientState *nc)
1478{
1479    VirtIONet *n = qemu_get_nic_opaque(nc);
1480    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1481    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1482
1483    if (!vdev->vm_running) {
1484        return false;
1485    }
1486
1487    if (nc->queue_index >= n->curr_queue_pairs) {
1488        return false;
1489    }
1490
1491    if (!virtio_queue_ready(q->rx_vq) ||
1492        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1493        return false;
1494    }
1495
1496    return true;
1497}
1498
1499static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1500{
1501    VirtIONet *n = q->n;
1502    if (virtio_queue_empty(q->rx_vq) ||
1503        (n->mergeable_rx_bufs &&
1504         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1505        virtio_queue_set_notification(q->rx_vq, 1);
1506
1507        /* To avoid a race condition where the guest has made some buffers
1508         * available after the above check but before notification was
1509         * enabled, check for available buffers again.
1510         */
1511        if (virtio_queue_empty(q->rx_vq) ||
1512            (n->mergeable_rx_bufs &&
1513             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1514            return 0;
1515        }
1516    }
1517
1518    virtio_queue_set_notification(q->rx_vq, 0);
1519    return 1;
1520}
1521
1522static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1523{
1524    virtio_tswap16s(vdev, &hdr->hdr_len);
1525    virtio_tswap16s(vdev, &hdr->gso_size);
1526    virtio_tswap16s(vdev, &hdr->csum_start);
1527    virtio_tswap16s(vdev, &hdr->csum_offset);
1528}
1529
1530/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1531 * it never finds out that the packets don't have valid checksums.  This
1532 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1533 * fix this with Xen but it hasn't appeared in an upstream release of
1534 * dhclient yet.
1535 *
1536 * To avoid breaking existing guests, we catch udp packets and add
1537 * checksums.  This is terrible but it's better than hacking the guest
1538 * kernels.
1539 *
1540 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1541 * we should provide a mechanism to disable it to avoid polluting the host
1542 * cache.
1543 */
1544static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1545                                        uint8_t *buf, size_t size)
1546{
1547    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1548        (size > 27 && size < 1500) && /* normal sized MTU */
1549        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1550        (buf[23] == 17) && /* ip.protocol == UDP */
1551        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1552        net_checksum_calculate(buf, size, CSUM_UDP);
1553        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1554    }
1555}
1556
1557static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1558                           const void *buf, size_t size)
1559{
1560    if (n->has_vnet_hdr) {
1561        /* FIXME this cast is evil */
1562        void *wbuf = (void *)buf;
1563        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1564                                    size - n->host_hdr_len);
1565
1566        if (n->needs_vnet_hdr_swap) {
1567            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1568        }
1569        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1570    } else {
1571        struct virtio_net_hdr hdr = {
1572            .flags = 0,
1573            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1574        };
1575        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1576    }
1577}
1578
1579static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1580{
1581    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1582    static const uint8_t vlan[] = {0x81, 0x00};
1583    uint8_t *ptr = (uint8_t *)buf;
1584    int i;
1585
1586    if (n->promisc)
1587        return 1;
1588
1589    ptr += n->host_hdr_len;
1590
1591    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1592        int vid = lduw_be_p(ptr + 14) & 0xfff;
1593        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1594            return 0;
1595    }
1596
1597    if (ptr[0] & 1) { // multicast
1598        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1599            return !n->nobcast;
1600        } else if (n->nomulti) {
1601            return 0;
1602        } else if (n->allmulti || n->mac_table.multi_overflow) {
1603            return 1;
1604        }
1605
1606        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1607            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1608                return 1;
1609            }
1610        }
1611    } else { // unicast
1612        if (n->nouni) {
1613            return 0;
1614        } else if (n->alluni || n->mac_table.uni_overflow) {
1615            return 1;
1616        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1617            return 1;
1618        }
1619
1620        for (i = 0; i < n->mac_table.first_multi; i++) {
1621            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1622                return 1;
1623            }
1624        }
1625    }
1626
1627    return 0;
1628}
1629
1630static uint8_t virtio_net_get_hash_type(bool isip4,
1631                                        bool isip6,
1632                                        bool isudp,
1633                                        bool istcp,
1634                                        uint32_t types)
1635{
1636    if (isip4) {
1637        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1638            return NetPktRssIpV4Tcp;
1639        }
1640        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1641            return NetPktRssIpV4Udp;
1642        }
1643        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1644            return NetPktRssIpV4;
1645        }
1646    } else if (isip6) {
1647        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1648                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1649
1650        if (istcp && (types & mask)) {
1651            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1652                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1653        }
1654        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1655        if (isudp && (types & mask)) {
1656            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1657                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1658        }
1659        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1660        if (types & mask) {
1661            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1662                NetPktRssIpV6Ex : NetPktRssIpV6;
1663        }
1664    }
1665    return 0xff;
1666}
1667
1668static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1669                                   uint32_t hash)
1670{
1671    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1672    hdr->hash_value = hash;
1673    hdr->hash_report = report;
1674}
1675
1676static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1677                                  size_t size)
1678{
1679    VirtIONet *n = qemu_get_nic_opaque(nc);
1680    unsigned int index = nc->queue_index, new_index = index;
1681    struct NetRxPkt *pkt = n->rx_pkt;
1682    uint8_t net_hash_type;
1683    uint32_t hash;
1684    bool isip4, isip6, isudp, istcp;
1685    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1686        VIRTIO_NET_HASH_REPORT_IPv4,
1687        VIRTIO_NET_HASH_REPORT_TCPv4,
1688        VIRTIO_NET_HASH_REPORT_TCPv6,
1689        VIRTIO_NET_HASH_REPORT_IPv6,
1690        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1691        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1692        VIRTIO_NET_HASH_REPORT_UDPv4,
1693        VIRTIO_NET_HASH_REPORT_UDPv6,
1694        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1695    };
1696
1697    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1698                             size - n->host_hdr_len);
1699    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1700    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1701        istcp = isudp = false;
1702    }
1703    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1704        istcp = isudp = false;
1705    }
1706    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1707                                             n->rss_data.hash_types);
1708    if (net_hash_type > NetPktRssIpV6UdpEx) {
1709        if (n->rss_data.populate_hash) {
1710            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1711        }
1712        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1713    }
1714
1715    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1716
1717    if (n->rss_data.populate_hash) {
1718        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1719    }
1720
1721    if (n->rss_data.redirect) {
1722        new_index = hash & (n->rss_data.indirections_len - 1);
1723        new_index = n->rss_data.indirections_table[new_index];
1724    }
1725
1726    return (index == new_index) ? -1 : new_index;
1727}
1728
1729static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1730                                      size_t size, bool no_rss)
1731{
1732    VirtIONet *n = qemu_get_nic_opaque(nc);
1733    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1734    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1735    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1736    size_t lens[VIRTQUEUE_MAX_SIZE];
1737    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1738    struct virtio_net_hdr_mrg_rxbuf mhdr;
1739    unsigned mhdr_cnt = 0;
1740    size_t offset, i, guest_offset, j;
1741    ssize_t err;
1742
1743    if (!virtio_net_can_receive(nc)) {
1744        return -1;
1745    }
1746
1747    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1748        int index = virtio_net_process_rss(nc, buf, size);
1749        if (index >= 0) {
1750            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1751            return virtio_net_receive_rcu(nc2, buf, size, true);
1752        }
1753    }
1754
1755    /* hdr_len refers to the header we supply to the guest */
1756    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1757        return 0;
1758    }
1759
1760    if (!receive_filter(n, buf, size))
1761        return size;
1762
1763    offset = i = 0;
1764
1765    while (offset < size) {
1766        VirtQueueElement *elem;
1767        int len, total;
1768        const struct iovec *sg;
1769
1770        total = 0;
1771
1772        if (i == VIRTQUEUE_MAX_SIZE) {
1773            virtio_error(vdev, "virtio-net unexpected long buffer chain");
1774            err = size;
1775            goto err;
1776        }
1777
1778        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1779        if (!elem) {
1780            if (i) {
1781                virtio_error(vdev, "virtio-net unexpected empty queue: "
1782                             "i %zd mergeable %d offset %zd, size %zd, "
1783                             "guest hdr len %zd, host hdr len %zd "
1784                             "guest features 0x%" PRIx64,
1785                             i, n->mergeable_rx_bufs, offset, size,
1786                             n->guest_hdr_len, n->host_hdr_len,
1787                             vdev->guest_features);
1788            }
1789            err = -1;
1790            goto err;
1791        }
1792
1793        if (elem->in_num < 1) {
1794            virtio_error(vdev,
1795                         "virtio-net receive queue contains no in buffers");
1796            virtqueue_detach_element(q->rx_vq, elem, 0);
1797            g_free(elem);
1798            err = -1;
1799            goto err;
1800        }
1801
1802        sg = elem->in_sg;
1803        if (i == 0) {
1804            assert(offset == 0);
1805            if (n->mergeable_rx_bufs) {
1806                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1807                                    sg, elem->in_num,
1808                                    offsetof(typeof(mhdr), num_buffers),
1809                                    sizeof(mhdr.num_buffers));
1810            }
1811
1812            receive_header(n, sg, elem->in_num, buf, size);
1813            if (n->rss_data.populate_hash) {
1814                offset = sizeof(mhdr);
1815                iov_from_buf(sg, elem->in_num, offset,
1816                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1817            }
1818            offset = n->host_hdr_len;
1819            total += n->guest_hdr_len;
1820            guest_offset = n->guest_hdr_len;
1821        } else {
1822            guest_offset = 0;
1823        }
1824
1825        /* copy in packet.  ugh */
1826        len = iov_from_buf(sg, elem->in_num, guest_offset,
1827                           buf + offset, size - offset);
1828        total += len;
1829        offset += len;
1830        /* If buffers can't be merged, at this point we
1831         * must have consumed the complete packet.
1832         * Otherwise, drop it. */
1833        if (!n->mergeable_rx_bufs && offset < size) {
1834            virtqueue_unpop(q->rx_vq, elem, total);
1835            g_free(elem);
1836            err = size;
1837            goto err;
1838        }
1839
1840        elems[i] = elem;
1841        lens[i] = total;
1842        i++;
1843    }
1844
1845    if (mhdr_cnt) {
1846        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1847        iov_from_buf(mhdr_sg, mhdr_cnt,
1848                     0,
1849                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1850    }
1851
1852    for (j = 0; j < i; j++) {
1853        /* signal other side */
1854        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1855        g_free(elems[j]);
1856    }
1857
1858    virtqueue_flush(q->rx_vq, i);
1859    virtio_notify(vdev, q->rx_vq);
1860
1861    return size;
1862
1863err:
1864    for (j = 0; j < i; j++) {
1865        g_free(elems[j]);
1866    }
1867
1868    return err;
1869}
1870
1871static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1872                                  size_t size)
1873{
1874    RCU_READ_LOCK_GUARD();
1875
1876    return virtio_net_receive_rcu(nc, buf, size, false);
1877}
1878
1879static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1880                                         const uint8_t *buf,
1881                                         VirtioNetRscUnit *unit)
1882{
1883    uint16_t ip_hdrlen;
1884    struct ip_header *ip;
1885
1886    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1887                              + sizeof(struct eth_header));
1888    unit->ip = (void *)ip;
1889    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1890    unit->ip_plen = &ip->ip_len;
1891    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1892    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1893    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1894}
1895
1896static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1897                                         const uint8_t *buf,
1898                                         VirtioNetRscUnit *unit)
1899{
1900    struct ip6_header *ip6;
1901
1902    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1903                                 + sizeof(struct eth_header));
1904    unit->ip = ip6;
1905    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1906    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1907                                        + sizeof(struct ip6_header));
1908    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1909
1910    /* There is a difference between payload lenght in ipv4 and v6,
1911       ip header is excluded in ipv6 */
1912    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1913}
1914
1915static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1916                                       VirtioNetRscSeg *seg)
1917{
1918    int ret;
1919    struct virtio_net_hdr_v1 *h;
1920
1921    h = (struct virtio_net_hdr_v1 *)seg->buf;
1922    h->flags = 0;
1923    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1924
1925    if (seg->is_coalesced) {
1926        h->rsc.segments = seg->packets;
1927        h->rsc.dup_acks = seg->dup_ack;
1928        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1929        if (chain->proto == ETH_P_IP) {
1930            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1931        } else {
1932            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1933        }
1934    }
1935
1936    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1937    QTAILQ_REMOVE(&chain->buffers, seg, next);
1938    g_free(seg->buf);
1939    g_free(seg);
1940
1941    return ret;
1942}
1943
1944static void virtio_net_rsc_purge(void *opq)
1945{
1946    VirtioNetRscSeg *seg, *rn;
1947    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1948
1949    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1950        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1951            chain->stat.purge_failed++;
1952            continue;
1953        }
1954    }
1955
1956    chain->stat.timer++;
1957    if (!QTAILQ_EMPTY(&chain->buffers)) {
1958        timer_mod(chain->drain_timer,
1959              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1960    }
1961}
1962
1963static void virtio_net_rsc_cleanup(VirtIONet *n)
1964{
1965    VirtioNetRscChain *chain, *rn_chain;
1966    VirtioNetRscSeg *seg, *rn_seg;
1967
1968    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1969        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1970            QTAILQ_REMOVE(&chain->buffers, seg, next);
1971            g_free(seg->buf);
1972            g_free(seg);
1973        }
1974
1975        timer_free(chain->drain_timer);
1976        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1977        g_free(chain);
1978    }
1979}
1980
1981static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1982                                     NetClientState *nc,
1983                                     const uint8_t *buf, size_t size)
1984{
1985    uint16_t hdr_len;
1986    VirtioNetRscSeg *seg;
1987
1988    hdr_len = chain->n->guest_hdr_len;
1989    seg = g_malloc(sizeof(VirtioNetRscSeg));
1990    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1991        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1992    memcpy(seg->buf, buf, size);
1993    seg->size = size;
1994    seg->packets = 1;
1995    seg->dup_ack = 0;
1996    seg->is_coalesced = 0;
1997    seg->nc = nc;
1998
1999    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2000    chain->stat.cache++;
2001
2002    switch (chain->proto) {
2003    case ETH_P_IP:
2004        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2005        break;
2006    case ETH_P_IPV6:
2007        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2008        break;
2009    default:
2010        g_assert_not_reached();
2011    }
2012}
2013
2014static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2015                                         VirtioNetRscSeg *seg,
2016                                         const uint8_t *buf,
2017                                         struct tcp_header *n_tcp,
2018                                         struct tcp_header *o_tcp)
2019{
2020    uint32_t nack, oack;
2021    uint16_t nwin, owin;
2022
2023    nack = htonl(n_tcp->th_ack);
2024    nwin = htons(n_tcp->th_win);
2025    oack = htonl(o_tcp->th_ack);
2026    owin = htons(o_tcp->th_win);
2027
2028    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2029        chain->stat.ack_out_of_win++;
2030        return RSC_FINAL;
2031    } else if (nack == oack) {
2032        /* duplicated ack or window probe */
2033        if (nwin == owin) {
2034            /* duplicated ack, add dup ack count due to whql test up to 1 */
2035            chain->stat.dup_ack++;
2036            return RSC_FINAL;
2037        } else {
2038            /* Coalesce window update */
2039            o_tcp->th_win = n_tcp->th_win;
2040            chain->stat.win_update++;
2041            return RSC_COALESCE;
2042        }
2043    } else {
2044        /* pure ack, go to 'C', finalize*/
2045        chain->stat.pure_ack++;
2046        return RSC_FINAL;
2047    }
2048}
2049
2050static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2051                                            VirtioNetRscSeg *seg,
2052                                            const uint8_t *buf,
2053                                            VirtioNetRscUnit *n_unit)
2054{
2055    void *data;
2056    uint16_t o_ip_len;
2057    uint32_t nseq, oseq;
2058    VirtioNetRscUnit *o_unit;
2059
2060    o_unit = &seg->unit;
2061    o_ip_len = htons(*o_unit->ip_plen);
2062    nseq = htonl(n_unit->tcp->th_seq);
2063    oseq = htonl(o_unit->tcp->th_seq);
2064
2065    /* out of order or retransmitted. */
2066    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2067        chain->stat.data_out_of_win++;
2068        return RSC_FINAL;
2069    }
2070
2071    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2072    if (nseq == oseq) {
2073        if ((o_unit->payload == 0) && n_unit->payload) {
2074            /* From no payload to payload, normal case, not a dup ack or etc */
2075            chain->stat.data_after_pure_ack++;
2076            goto coalesce;
2077        } else {
2078            return virtio_net_rsc_handle_ack(chain, seg, buf,
2079                                             n_unit->tcp, o_unit->tcp);
2080        }
2081    } else if ((nseq - oseq) != o_unit->payload) {
2082        /* Not a consistent packet, out of order */
2083        chain->stat.data_out_of_order++;
2084        return RSC_FINAL;
2085    } else {
2086coalesce:
2087        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2088            chain->stat.over_size++;
2089            return RSC_FINAL;
2090        }
2091
2092        /* Here comes the right data, the payload length in v4/v6 is different,
2093           so use the field value to update and record the new data len */
2094        o_unit->payload += n_unit->payload; /* update new data len */
2095
2096        /* update field in ip header */
2097        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2098
2099        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2100           for windows guest, while this may change the behavior for linux
2101           guest (only if it uses RSC feature). */
2102        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2103
2104        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2105        o_unit->tcp->th_win = n_unit->tcp->th_win;
2106
2107        memmove(seg->buf + seg->size, data, n_unit->payload);
2108        seg->size += n_unit->payload;
2109        seg->packets++;
2110        chain->stat.coalesced++;
2111        return RSC_COALESCE;
2112    }
2113}
2114
2115static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2116                                        VirtioNetRscSeg *seg,
2117                                        const uint8_t *buf, size_t size,
2118                                        VirtioNetRscUnit *unit)
2119{
2120    struct ip_header *ip1, *ip2;
2121
2122    ip1 = (struct ip_header *)(unit->ip);
2123    ip2 = (struct ip_header *)(seg->unit.ip);
2124    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2125        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2126        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2127        chain->stat.no_match++;
2128        return RSC_NO_MATCH;
2129    }
2130
2131    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2132}
2133
2134static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2135                                        VirtioNetRscSeg *seg,
2136                                        const uint8_t *buf, size_t size,
2137                                        VirtioNetRscUnit *unit)
2138{
2139    struct ip6_header *ip1, *ip2;
2140
2141    ip1 = (struct ip6_header *)(unit->ip);
2142    ip2 = (struct ip6_header *)(seg->unit.ip);
2143    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2144        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2145        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2146        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2147            chain->stat.no_match++;
2148            return RSC_NO_MATCH;
2149    }
2150
2151    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2152}
2153
2154/* Packets with 'SYN' should bypass, other flag should be sent after drain
2155 * to prevent out of order */
2156static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2157                                         struct tcp_header *tcp)
2158{
2159    uint16_t tcp_hdr;
2160    uint16_t tcp_flag;
2161
2162    tcp_flag = htons(tcp->th_offset_flags);
2163    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2164    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2165    if (tcp_flag & TH_SYN) {
2166        chain->stat.tcp_syn++;
2167        return RSC_BYPASS;
2168    }
2169
2170    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2171        chain->stat.tcp_ctrl_drain++;
2172        return RSC_FINAL;
2173    }
2174
2175    if (tcp_hdr > sizeof(struct tcp_header)) {
2176        chain->stat.tcp_all_opt++;
2177        return RSC_FINAL;
2178    }
2179
2180    return RSC_CANDIDATE;
2181}
2182
2183static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2184                                         NetClientState *nc,
2185                                         const uint8_t *buf, size_t size,
2186                                         VirtioNetRscUnit *unit)
2187{
2188    int ret;
2189    VirtioNetRscSeg *seg, *nseg;
2190
2191    if (QTAILQ_EMPTY(&chain->buffers)) {
2192        chain->stat.empty_cache++;
2193        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2194        timer_mod(chain->drain_timer,
2195              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2196        return size;
2197    }
2198
2199    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2200        if (chain->proto == ETH_P_IP) {
2201            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2202        } else {
2203            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2204        }
2205
2206        if (ret == RSC_FINAL) {
2207            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2208                /* Send failed */
2209                chain->stat.final_failed++;
2210                return 0;
2211            }
2212
2213            /* Send current packet */
2214            return virtio_net_do_receive(nc, buf, size);
2215        } else if (ret == RSC_NO_MATCH) {
2216            continue;
2217        } else {
2218            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2219            seg->is_coalesced = 1;
2220            return size;
2221        }
2222    }
2223
2224    chain->stat.no_match_cache++;
2225    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2226    return size;
2227}
2228
2229/* Drain a connection data, this is to avoid out of order segments */
2230static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2231                                        NetClientState *nc,
2232                                        const uint8_t *buf, size_t size,
2233                                        uint16_t ip_start, uint16_t ip_size,
2234                                        uint16_t tcp_port)
2235{
2236    VirtioNetRscSeg *seg, *nseg;
2237    uint32_t ppair1, ppair2;
2238
2239    ppair1 = *(uint32_t *)(buf + tcp_port);
2240    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2241        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2242        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2243            || (ppair1 != ppair2)) {
2244            continue;
2245        }
2246        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2247            chain->stat.drain_failed++;
2248        }
2249
2250        break;
2251    }
2252
2253    return virtio_net_do_receive(nc, buf, size);
2254}
2255
2256static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2257                                            struct ip_header *ip,
2258                                            const uint8_t *buf, size_t size)
2259{
2260    uint16_t ip_len;
2261
2262    /* Not an ipv4 packet */
2263    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2264        chain->stat.ip_option++;
2265        return RSC_BYPASS;
2266    }
2267
2268    /* Don't handle packets with ip option */
2269    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2270        chain->stat.ip_option++;
2271        return RSC_BYPASS;
2272    }
2273
2274    if (ip->ip_p != IPPROTO_TCP) {
2275        chain->stat.bypass_not_tcp++;
2276        return RSC_BYPASS;
2277    }
2278
2279    /* Don't handle packets with ip fragment */
2280    if (!(htons(ip->ip_off) & IP_DF)) {
2281        chain->stat.ip_frag++;
2282        return RSC_BYPASS;
2283    }
2284
2285    /* Don't handle packets with ecn flag */
2286    if (IPTOS_ECN(ip->ip_tos)) {
2287        chain->stat.ip_ecn++;
2288        return RSC_BYPASS;
2289    }
2290
2291    ip_len = htons(ip->ip_len);
2292    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2293        || ip_len > (size - chain->n->guest_hdr_len -
2294                     sizeof(struct eth_header))) {
2295        chain->stat.ip_hacked++;
2296        return RSC_BYPASS;
2297    }
2298
2299    return RSC_CANDIDATE;
2300}
2301
2302static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2303                                      NetClientState *nc,
2304                                      const uint8_t *buf, size_t size)
2305{
2306    int32_t ret;
2307    uint16_t hdr_len;
2308    VirtioNetRscUnit unit;
2309
2310    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2311
2312    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2313        + sizeof(struct tcp_header))) {
2314        chain->stat.bypass_not_tcp++;
2315        return virtio_net_do_receive(nc, buf, size);
2316    }
2317
2318    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2319    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2320        != RSC_CANDIDATE) {
2321        return virtio_net_do_receive(nc, buf, size);
2322    }
2323
2324    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2325    if (ret == RSC_BYPASS) {
2326        return virtio_net_do_receive(nc, buf, size);
2327    } else if (ret == RSC_FINAL) {
2328        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2329                ((hdr_len + sizeof(struct eth_header)) + 12),
2330                VIRTIO_NET_IP4_ADDR_SIZE,
2331                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2332    }
2333
2334    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2335}
2336
2337static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2338                                            struct ip6_header *ip6,
2339                                            const uint8_t *buf, size_t size)
2340{
2341    uint16_t ip_len;
2342
2343    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2344        != IP_HEADER_VERSION_6) {
2345        return RSC_BYPASS;
2346    }
2347
2348    /* Both option and protocol is checked in this */
2349    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2350        chain->stat.bypass_not_tcp++;
2351        return RSC_BYPASS;
2352    }
2353
2354    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2355    if (ip_len < sizeof(struct tcp_header) ||
2356        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2357                  - sizeof(struct ip6_header))) {
2358        chain->stat.ip_hacked++;
2359        return RSC_BYPASS;
2360    }
2361
2362    /* Don't handle packets with ecn flag */
2363    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2364        chain->stat.ip_ecn++;
2365        return RSC_BYPASS;
2366    }
2367
2368    return RSC_CANDIDATE;
2369}
2370
2371static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2372                                      const uint8_t *buf, size_t size)
2373{
2374    int32_t ret;
2375    uint16_t hdr_len;
2376    VirtioNetRscChain *chain;
2377    VirtioNetRscUnit unit;
2378
2379    chain = (VirtioNetRscChain *)opq;
2380    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2381
2382    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2383        + sizeof(tcp_header))) {
2384        return virtio_net_do_receive(nc, buf, size);
2385    }
2386
2387    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2388    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2389                                                 unit.ip, buf, size)) {
2390        return virtio_net_do_receive(nc, buf, size);
2391    }
2392
2393    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2394    if (ret == RSC_BYPASS) {
2395        return virtio_net_do_receive(nc, buf, size);
2396    } else if (ret == RSC_FINAL) {
2397        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2398                ((hdr_len + sizeof(struct eth_header)) + 8),
2399                VIRTIO_NET_IP6_ADDR_SIZE,
2400                hdr_len + sizeof(struct eth_header)
2401                + sizeof(struct ip6_header));
2402    }
2403
2404    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2405}
2406
2407static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2408                                                      NetClientState *nc,
2409                                                      uint16_t proto)
2410{
2411    VirtioNetRscChain *chain;
2412
2413    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2414        return NULL;
2415    }
2416
2417    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2418        if (chain->proto == proto) {
2419            return chain;
2420        }
2421    }
2422
2423    chain = g_malloc(sizeof(*chain));
2424    chain->n = n;
2425    chain->proto = proto;
2426    if (proto == (uint16_t)ETH_P_IP) {
2427        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2428        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2429    } else {
2430        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2431        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2432    }
2433    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2434                                      virtio_net_rsc_purge, chain);
2435    memset(&chain->stat, 0, sizeof(chain->stat));
2436
2437    QTAILQ_INIT(&chain->buffers);
2438    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2439
2440    return chain;
2441}
2442
2443static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2444                                      const uint8_t *buf,
2445                                      size_t size)
2446{
2447    uint16_t proto;
2448    VirtioNetRscChain *chain;
2449    struct eth_header *eth;
2450    VirtIONet *n;
2451
2452    n = qemu_get_nic_opaque(nc);
2453    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2454        return virtio_net_do_receive(nc, buf, size);
2455    }
2456
2457    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2458    proto = htons(eth->h_proto);
2459
2460    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2461    if (chain) {
2462        chain->stat.received++;
2463        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2464            return virtio_net_rsc_receive4(chain, nc, buf, size);
2465        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2466            return virtio_net_rsc_receive6(chain, nc, buf, size);
2467        }
2468    }
2469    return virtio_net_do_receive(nc, buf, size);
2470}
2471
2472static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2473                                  size_t size)
2474{
2475    VirtIONet *n = qemu_get_nic_opaque(nc);
2476    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2477        return virtio_net_rsc_receive(nc, buf, size);
2478    } else {
2479        return virtio_net_do_receive(nc, buf, size);
2480    }
2481}
2482
2483static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2484
2485static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2486{
2487    VirtIONet *n = qemu_get_nic_opaque(nc);
2488    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2489    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2490
2491    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2492    virtio_notify(vdev, q->tx_vq);
2493
2494    g_free(q->async_tx.elem);
2495    q->async_tx.elem = NULL;
2496
2497    virtio_queue_set_notification(q->tx_vq, 1);
2498    virtio_net_flush_tx(q);
2499}
2500
2501/* TX */
2502static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2503{
2504    VirtIONet *n = q->n;
2505    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2506    VirtQueueElement *elem;
2507    int32_t num_packets = 0;
2508    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2509    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2510        return num_packets;
2511    }
2512
2513    if (q->async_tx.elem) {
2514        virtio_queue_set_notification(q->tx_vq, 0);
2515        return num_packets;
2516    }
2517
2518    for (;;) {
2519        ssize_t ret;
2520        unsigned int out_num;
2521        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2522        struct virtio_net_hdr_mrg_rxbuf mhdr;
2523
2524        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2525        if (!elem) {
2526            break;
2527        }
2528
2529        out_num = elem->out_num;
2530        out_sg = elem->out_sg;
2531        if (out_num < 1) {
2532            virtio_error(vdev, "virtio-net header not in first element");
2533            virtqueue_detach_element(q->tx_vq, elem, 0);
2534            g_free(elem);
2535            return -EINVAL;
2536        }
2537
2538        if (n->has_vnet_hdr) {
2539            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2540                n->guest_hdr_len) {
2541                virtio_error(vdev, "virtio-net header incorrect");
2542                virtqueue_detach_element(q->tx_vq, elem, 0);
2543                g_free(elem);
2544                return -EINVAL;
2545            }
2546            if (n->needs_vnet_hdr_swap) {
2547                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2548                sg2[0].iov_base = &mhdr;
2549                sg2[0].iov_len = n->guest_hdr_len;
2550                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2551                                   out_sg, out_num,
2552                                   n->guest_hdr_len, -1);
2553                if (out_num == VIRTQUEUE_MAX_SIZE) {
2554                    goto drop;
2555                }
2556                out_num += 1;
2557                out_sg = sg2;
2558            }
2559        }
2560        /*
2561         * If host wants to see the guest header as is, we can
2562         * pass it on unchanged. Otherwise, copy just the parts
2563         * that host is interested in.
2564         */
2565        assert(n->host_hdr_len <= n->guest_hdr_len);
2566        if (n->host_hdr_len != n->guest_hdr_len) {
2567            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2568                                       out_sg, out_num,
2569                                       0, n->host_hdr_len);
2570            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2571                             out_sg, out_num,
2572                             n->guest_hdr_len, -1);
2573            out_num = sg_num;
2574            out_sg = sg;
2575        }
2576
2577        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2578                                      out_sg, out_num, virtio_net_tx_complete);
2579        if (ret == 0) {
2580            virtio_queue_set_notification(q->tx_vq, 0);
2581            q->async_tx.elem = elem;
2582            return -EBUSY;
2583        }
2584
2585drop:
2586        virtqueue_push(q->tx_vq, elem, 0);
2587        virtio_notify(vdev, q->tx_vq);
2588        g_free(elem);
2589
2590        if (++num_packets >= n->tx_burst) {
2591            break;
2592        }
2593    }
2594    return num_packets;
2595}
2596
2597static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2598{
2599    VirtIONet *n = VIRTIO_NET(vdev);
2600    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2601
2602    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2603        virtio_net_drop_tx_queue_data(vdev, vq);
2604        return;
2605    }
2606
2607    /* This happens when device was stopped but VCPU wasn't. */
2608    if (!vdev->vm_running) {
2609        q->tx_waiting = 1;
2610        return;
2611    }
2612
2613    if (q->tx_waiting) {
2614        virtio_queue_set_notification(vq, 1);
2615        timer_del(q->tx_timer);
2616        q->tx_waiting = 0;
2617        if (virtio_net_flush_tx(q) == -EINVAL) {
2618            return;
2619        }
2620    } else {
2621        timer_mod(q->tx_timer,
2622                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2623        q->tx_waiting = 1;
2624        virtio_queue_set_notification(vq, 0);
2625    }
2626}
2627
2628static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2629{
2630    VirtIONet *n = VIRTIO_NET(vdev);
2631    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2632
2633    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2634        virtio_net_drop_tx_queue_data(vdev, vq);
2635        return;
2636    }
2637
2638    if (unlikely(q->tx_waiting)) {
2639        return;
2640    }
2641    q->tx_waiting = 1;
2642    /* This happens when device was stopped but VCPU wasn't. */
2643    if (!vdev->vm_running) {
2644        return;
2645    }
2646    virtio_queue_set_notification(vq, 0);
2647    qemu_bh_schedule(q->tx_bh);
2648}
2649
2650static void virtio_net_tx_timer(void *opaque)
2651{
2652    VirtIONetQueue *q = opaque;
2653    VirtIONet *n = q->n;
2654    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2655    /* This happens when device was stopped but BH wasn't. */
2656    if (!vdev->vm_running) {
2657        /* Make sure tx waiting is set, so we'll run when restarted. */
2658        assert(q->tx_waiting);
2659        return;
2660    }
2661
2662    q->tx_waiting = 0;
2663
2664    /* Just in case the driver is not ready on more */
2665    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2666        return;
2667    }
2668
2669    virtio_queue_set_notification(q->tx_vq, 1);
2670    virtio_net_flush_tx(q);
2671}
2672
2673static void virtio_net_tx_bh(void *opaque)
2674{
2675    VirtIONetQueue *q = opaque;
2676    VirtIONet *n = q->n;
2677    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2678    int32_t ret;
2679
2680    /* This happens when device was stopped but BH wasn't. */
2681    if (!vdev->vm_running) {
2682        /* Make sure tx waiting is set, so we'll run when restarted. */
2683        assert(q->tx_waiting);
2684        return;
2685    }
2686
2687    q->tx_waiting = 0;
2688
2689    /* Just in case the driver is not ready on more */
2690    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2691        return;
2692    }
2693
2694    ret = virtio_net_flush_tx(q);
2695    if (ret == -EBUSY || ret == -EINVAL) {
2696        return; /* Notification re-enable handled by tx_complete or device
2697                 * broken */
2698    }
2699
2700    /* If we flush a full burst of packets, assume there are
2701     * more coming and immediately reschedule */
2702    if (ret >= n->tx_burst) {
2703        qemu_bh_schedule(q->tx_bh);
2704        q->tx_waiting = 1;
2705        return;
2706    }
2707
2708    /* If less than a full burst, re-enable notification and flush
2709     * anything that may have come in while we weren't looking.  If
2710     * we find something, assume the guest is still active and reschedule */
2711    virtio_queue_set_notification(q->tx_vq, 1);
2712    ret = virtio_net_flush_tx(q);
2713    if (ret == -EINVAL) {
2714        return;
2715    } else if (ret > 0) {
2716        virtio_queue_set_notification(q->tx_vq, 0);
2717        qemu_bh_schedule(q->tx_bh);
2718        q->tx_waiting = 1;
2719    }
2720}
2721
2722static void virtio_net_add_queue(VirtIONet *n, int index)
2723{
2724    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2725
2726    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2727                                           virtio_net_handle_rx);
2728
2729    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2730        n->vqs[index].tx_vq =
2731            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2732                             virtio_net_handle_tx_timer);
2733        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2734                                              virtio_net_tx_timer,
2735                                              &n->vqs[index]);
2736    } else {
2737        n->vqs[index].tx_vq =
2738            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2739                             virtio_net_handle_tx_bh);
2740        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2741    }
2742
2743    n->vqs[index].tx_waiting = 0;
2744    n->vqs[index].n = n;
2745}
2746
2747static void virtio_net_del_queue(VirtIONet *n, int index)
2748{
2749    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2750    VirtIONetQueue *q = &n->vqs[index];
2751    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2752
2753    qemu_purge_queued_packets(nc);
2754
2755    virtio_del_queue(vdev, index * 2);
2756    if (q->tx_timer) {
2757        timer_free(q->tx_timer);
2758        q->tx_timer = NULL;
2759    } else {
2760        qemu_bh_delete(q->tx_bh);
2761        q->tx_bh = NULL;
2762    }
2763    q->tx_waiting = 0;
2764    virtio_del_queue(vdev, index * 2 + 1);
2765}
2766
2767static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2768{
2769    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2770    int old_num_queues = virtio_get_num_queues(vdev);
2771    int new_num_queues = new_max_queue_pairs * 2 + 1;
2772    int i;
2773
2774    assert(old_num_queues >= 3);
2775    assert(old_num_queues % 2 == 1);
2776
2777    if (old_num_queues == new_num_queues) {
2778        return;
2779    }
2780
2781    /*
2782     * We always need to remove and add ctrl vq if
2783     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2784     * and then we only enter one of the following two loops.
2785     */
2786    virtio_del_queue(vdev, old_num_queues - 1);
2787
2788    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2789        /* new_num_queues < old_num_queues */
2790        virtio_net_del_queue(n, i / 2);
2791    }
2792
2793    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2794        /* new_num_queues > old_num_queues */
2795        virtio_net_add_queue(n, i / 2);
2796    }
2797
2798    /* add ctrl_vq last */
2799    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2800}
2801
2802static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2803{
2804    int max = multiqueue ? n->max_queue_pairs : 1;
2805
2806    n->multiqueue = multiqueue;
2807    virtio_net_change_num_queue_pairs(n, max);
2808
2809    virtio_net_set_queue_pairs(n);
2810}
2811
2812static int virtio_net_post_load_device(void *opaque, int version_id)
2813{
2814    VirtIONet *n = opaque;
2815    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2816    int i, link_down;
2817
2818    trace_virtio_net_post_load_device();
2819    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2820                               virtio_vdev_has_feature(vdev,
2821                                                       VIRTIO_F_VERSION_1),
2822                               virtio_vdev_has_feature(vdev,
2823                                                       VIRTIO_NET_F_HASH_REPORT));
2824
2825    /* MAC_TABLE_ENTRIES may be different from the saved image */
2826    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2827        n->mac_table.in_use = 0;
2828    }
2829
2830    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2831        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2832    }
2833
2834    /*
2835     * curr_guest_offloads will be later overwritten by the
2836     * virtio_set_features_nocheck call done from the virtio_load.
2837     * Here we make sure it is preserved and restored accordingly
2838     * in the virtio_net_post_load_virtio callback.
2839     */
2840    n->saved_guest_offloads = n->curr_guest_offloads;
2841
2842    virtio_net_set_queue_pairs(n);
2843
2844    /* Find the first multicast entry in the saved MAC filter */
2845    for (i = 0; i < n->mac_table.in_use; i++) {
2846        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2847            break;
2848        }
2849    }
2850    n->mac_table.first_multi = i;
2851
2852    /* nc.link_down can't be migrated, so infer link_down according
2853     * to link status bit in n->status */
2854    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2855    for (i = 0; i < n->max_queue_pairs; i++) {
2856        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2857    }
2858
2859    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2860        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2861        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2862                                  QEMU_CLOCK_VIRTUAL,
2863                                  virtio_net_announce_timer, n);
2864        if (n->announce_timer.round) {
2865            timer_mod(n->announce_timer.tm,
2866                      qemu_clock_get_ms(n->announce_timer.type));
2867        } else {
2868            qemu_announce_timer_del(&n->announce_timer, false);
2869        }
2870    }
2871
2872    if (n->rss_data.enabled) {
2873        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2874        if (!n->rss_data.populate_hash) {
2875            if (!virtio_net_attach_epbf_rss(n)) {
2876                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2877                    warn_report("Can't post-load eBPF RSS for vhost");
2878                } else {
2879                    warn_report("Can't post-load eBPF RSS - "
2880                                "fallback to software RSS");
2881                    n->rss_data.enabled_software_rss = true;
2882                }
2883            }
2884        }
2885
2886        trace_virtio_net_rss_enable(n->rss_data.hash_types,
2887                                    n->rss_data.indirections_len,
2888                                    sizeof(n->rss_data.key));
2889    } else {
2890        trace_virtio_net_rss_disable();
2891    }
2892    return 0;
2893}
2894
2895static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2896{
2897    VirtIONet *n = VIRTIO_NET(vdev);
2898    /*
2899     * The actual needed state is now in saved_guest_offloads,
2900     * see virtio_net_post_load_device for detail.
2901     * Restore it back and apply the desired offloads.
2902     */
2903    n->curr_guest_offloads = n->saved_guest_offloads;
2904    if (peer_has_vnet_hdr(n)) {
2905        virtio_net_apply_guest_offloads(n);
2906    }
2907
2908    return 0;
2909}
2910
2911/* tx_waiting field of a VirtIONetQueue */
2912static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2913    .name = "virtio-net-queue-tx_waiting",
2914    .fields = (VMStateField[]) {
2915        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2916        VMSTATE_END_OF_LIST()
2917   },
2918};
2919
2920static bool max_queue_pairs_gt_1(void *opaque, int version_id)
2921{
2922    return VIRTIO_NET(opaque)->max_queue_pairs > 1;
2923}
2924
2925static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2926{
2927    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2928                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2929}
2930
2931static bool mac_table_fits(void *opaque, int version_id)
2932{
2933    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2934}
2935
2936static bool mac_table_doesnt_fit(void *opaque, int version_id)
2937{
2938    return !mac_table_fits(opaque, version_id);
2939}
2940
2941/* This temporary type is shared by all the WITH_TMP methods
2942 * although only some fields are used by each.
2943 */
2944struct VirtIONetMigTmp {
2945    VirtIONet      *parent;
2946    VirtIONetQueue *vqs_1;
2947    uint16_t        curr_queue_pairs_1;
2948    uint8_t         has_ufo;
2949    uint32_t        has_vnet_hdr;
2950};
2951
2952/* The 2nd and subsequent tx_waiting flags are loaded later than
2953 * the 1st entry in the queue_pairs and only if there's more than one
2954 * entry.  We use the tmp mechanism to calculate a temporary
2955 * pointer and count and also validate the count.
2956 */
2957
2958static int virtio_net_tx_waiting_pre_save(void *opaque)
2959{
2960    struct VirtIONetMigTmp *tmp = opaque;
2961
2962    tmp->vqs_1 = tmp->parent->vqs + 1;
2963    tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
2964    if (tmp->parent->curr_queue_pairs == 0) {
2965        tmp->curr_queue_pairs_1 = 0;
2966    }
2967
2968    return 0;
2969}
2970
2971static int virtio_net_tx_waiting_pre_load(void *opaque)
2972{
2973    struct VirtIONetMigTmp *tmp = opaque;
2974
2975    /* Reuse the pointer setup from save */
2976    virtio_net_tx_waiting_pre_save(opaque);
2977
2978    if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
2979        error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
2980            tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
2981
2982        return -EINVAL;
2983    }
2984
2985    return 0; /* all good */
2986}
2987
2988static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2989    .name      = "virtio-net-tx_waiting",
2990    .pre_load  = virtio_net_tx_waiting_pre_load,
2991    .pre_save  = virtio_net_tx_waiting_pre_save,
2992    .fields    = (VMStateField[]) {
2993        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2994                                     curr_queue_pairs_1,
2995                                     vmstate_virtio_net_queue_tx_waiting,
2996                                     struct VirtIONetQueue),
2997        VMSTATE_END_OF_LIST()
2998    },
2999};
3000
3001/* the 'has_ufo' flag is just tested; if the incoming stream has the
3002 * flag set we need to check that we have it
3003 */
3004static int virtio_net_ufo_post_load(void *opaque, int version_id)
3005{
3006    struct VirtIONetMigTmp *tmp = opaque;
3007
3008    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3009        error_report("virtio-net: saved image requires TUN_F_UFO support");
3010        return -EINVAL;
3011    }
3012
3013    return 0;
3014}
3015
3016static int virtio_net_ufo_pre_save(void *opaque)
3017{
3018    struct VirtIONetMigTmp *tmp = opaque;
3019
3020    tmp->has_ufo = tmp->parent->has_ufo;
3021
3022    return 0;
3023}
3024
3025static const VMStateDescription vmstate_virtio_net_has_ufo = {
3026    .name      = "virtio-net-ufo",
3027    .post_load = virtio_net_ufo_post_load,
3028    .pre_save  = virtio_net_ufo_pre_save,
3029    .fields    = (VMStateField[]) {
3030        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3031        VMSTATE_END_OF_LIST()
3032    },
3033};
3034
3035/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3036 * flag set we need to check that we have it
3037 */
3038static int virtio_net_vnet_post_load(void *opaque, int version_id)
3039{
3040    struct VirtIONetMigTmp *tmp = opaque;
3041
3042    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3043        error_report("virtio-net: saved image requires vnet_hdr=on");
3044        return -EINVAL;
3045    }
3046
3047    return 0;
3048}
3049
3050static int virtio_net_vnet_pre_save(void *opaque)
3051{
3052    struct VirtIONetMigTmp *tmp = opaque;
3053
3054    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3055
3056    return 0;
3057}
3058
3059static const VMStateDescription vmstate_virtio_net_has_vnet = {
3060    .name      = "virtio-net-vnet",
3061    .post_load = virtio_net_vnet_post_load,
3062    .pre_save  = virtio_net_vnet_pre_save,
3063    .fields    = (VMStateField[]) {
3064        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3065        VMSTATE_END_OF_LIST()
3066    },
3067};
3068
3069static bool virtio_net_rss_needed(void *opaque)
3070{
3071    return VIRTIO_NET(opaque)->rss_data.enabled;
3072}
3073
3074static const VMStateDescription vmstate_virtio_net_rss = {
3075    .name      = "virtio-net-device/rss",
3076    .version_id = 1,
3077    .minimum_version_id = 1,
3078    .needed = virtio_net_rss_needed,
3079    .fields = (VMStateField[]) {
3080        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3081        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3082        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3083        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3084        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3085        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3086        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3087                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3088        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3089                                    rss_data.indirections_len, 0,
3090                                    vmstate_info_uint16, uint16_t),
3091        VMSTATE_END_OF_LIST()
3092    },
3093};
3094
3095static const VMStateDescription vmstate_virtio_net_device = {
3096    .name = "virtio-net-device",
3097    .version_id = VIRTIO_NET_VM_VERSION,
3098    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3099    .post_load = virtio_net_post_load_device,
3100    .fields = (VMStateField[]) {
3101        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3102        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3103                               vmstate_virtio_net_queue_tx_waiting,
3104                               VirtIONetQueue),
3105        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3106        VMSTATE_UINT16(status, VirtIONet),
3107        VMSTATE_UINT8(promisc, VirtIONet),
3108        VMSTATE_UINT8(allmulti, VirtIONet),
3109        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3110
3111        /* Guarded pair: If it fits we load it, else we throw it away
3112         * - can happen if source has a larger MAC table.; post-load
3113         *  sets flags in this case.
3114         */
3115        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3116                                0, mac_table_fits, mac_table.in_use,
3117                                 ETH_ALEN),
3118        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3119                                     mac_table.in_use, ETH_ALEN),
3120
3121        /* Note: This is an array of uint32's that's always been saved as a
3122         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3123         * but based on the uint.
3124         */
3125        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3126        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3127                         vmstate_virtio_net_has_vnet),
3128        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3129        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3130        VMSTATE_UINT8(alluni, VirtIONet),
3131        VMSTATE_UINT8(nomulti, VirtIONet),
3132        VMSTATE_UINT8(nouni, VirtIONet),
3133        VMSTATE_UINT8(nobcast, VirtIONet),
3134        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3135                         vmstate_virtio_net_has_ufo),
3136        VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3137                            vmstate_info_uint16_equal, uint16_t),
3138        VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3139        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3140                         vmstate_virtio_net_tx_waiting),
3141        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3142                            has_ctrl_guest_offloads),
3143        VMSTATE_END_OF_LIST()
3144   },
3145    .subsections = (const VMStateDescription * []) {
3146        &vmstate_virtio_net_rss,
3147        NULL
3148    }
3149};
3150
3151static NetClientInfo net_virtio_info = {
3152    .type = NET_CLIENT_DRIVER_NIC,
3153    .size = sizeof(NICState),
3154    .can_receive = virtio_net_can_receive,
3155    .receive = virtio_net_receive,
3156    .link_status_changed = virtio_net_set_link_status,
3157    .query_rx_filter = virtio_net_query_rxfilter,
3158    .announce = virtio_net_announce,
3159};
3160
3161static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3162{
3163    VirtIONet *n = VIRTIO_NET(vdev);
3164    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3165    assert(n->vhost_started);
3166    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3167}
3168
3169static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3170                                           bool mask)
3171{
3172    VirtIONet *n = VIRTIO_NET(vdev);
3173    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3174    assert(n->vhost_started);
3175    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3176                             vdev, idx, mask);
3177}
3178
3179static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3180{
3181    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3182
3183    n->config_size = virtio_feature_get_config_size(feature_sizes,
3184                                                    host_features);
3185}
3186
3187void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3188                                   const char *type)
3189{
3190    /*
3191     * The name can be NULL, the netclient name will be type.x.
3192     */
3193    assert(type != NULL);
3194
3195    g_free(n->netclient_name);
3196    g_free(n->netclient_type);
3197    n->netclient_name = g_strdup(name);
3198    n->netclient_type = g_strdup(type);
3199}
3200
3201static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3202{
3203    HotplugHandler *hotplug_ctrl;
3204    PCIDevice *pci_dev;
3205    Error *err = NULL;
3206
3207    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3208    if (hotplug_ctrl) {
3209        pci_dev = PCI_DEVICE(dev);
3210        pci_dev->partially_hotplugged = true;
3211        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3212        if (err) {
3213            error_report_err(err);
3214            return false;
3215        }
3216    } else {
3217        return false;
3218    }
3219    return true;
3220}
3221
3222static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3223                                    Error **errp)
3224{
3225    Error *err = NULL;
3226    HotplugHandler *hotplug_ctrl;
3227    PCIDevice *pdev = PCI_DEVICE(dev);
3228    BusState *primary_bus;
3229
3230    if (!pdev->partially_hotplugged) {
3231        return true;
3232    }
3233    primary_bus = dev->parent_bus;
3234    if (!primary_bus) {
3235        error_setg(errp, "virtio_net: couldn't find primary bus");
3236        return false;
3237    }
3238    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3239    qatomic_set(&n->failover_primary_hidden, false);
3240    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3241    if (hotplug_ctrl) {
3242        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3243        if (err) {
3244            goto out;
3245        }
3246        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3247    }
3248    pdev->partially_hotplugged = false;
3249
3250out:
3251    error_propagate(errp, err);
3252    return !err;
3253}
3254
3255static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3256{
3257    bool should_be_hidden;
3258    Error *err = NULL;
3259    DeviceState *dev = failover_find_primary_device(n);
3260
3261    if (!dev) {
3262        return;
3263    }
3264
3265    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3266
3267    if (migration_in_setup(s) && !should_be_hidden) {
3268        if (failover_unplug_primary(n, dev)) {
3269            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3270            qapi_event_send_unplug_primary(dev->id);
3271            qatomic_set(&n->failover_primary_hidden, true);
3272        } else {
3273            warn_report("couldn't unplug primary device");
3274        }
3275    } else if (migration_has_failed(s)) {
3276        /* We already unplugged the device let's plug it back */
3277        if (!failover_replug_primary(n, dev, &err)) {
3278            if (err) {
3279                error_report_err(err);
3280            }
3281        }
3282    }
3283}
3284
3285static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3286{
3287    MigrationState *s = data;
3288    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3289    virtio_net_handle_migration_primary(n, s);
3290}
3291
3292static bool failover_hide_primary_device(DeviceListener *listener,
3293                                         const QDict *device_opts,
3294                                         bool from_json,
3295                                         Error **errp)
3296{
3297    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3298    const char *standby_id;
3299
3300    if (!device_opts) {
3301        return false;
3302    }
3303
3304    if (!qdict_haskey(device_opts, "failover_pair_id")) {
3305        return false;
3306    }
3307
3308    if (!qdict_haskey(device_opts, "id")) {
3309        error_setg(errp, "Device with failover_pair_id needs to have id");
3310        return false;
3311    }
3312
3313    standby_id = qdict_get_str(device_opts, "failover_pair_id");
3314    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3315        return false;
3316    }
3317
3318    /*
3319     * The hide helper can be called several times for a given device.
3320     * Check there is only one primary for a virtio-net device but
3321     * don't duplicate the qdict several times if it's called for the same
3322     * device.
3323     */
3324    if (n->primary_opts) {
3325        const char *old, *new;
3326        /* devices with failover_pair_id always have an id */
3327        old = qdict_get_str(n->primary_opts, "id");
3328        new = qdict_get_str(device_opts, "id");
3329        if (strcmp(old, new) != 0) {
3330            error_setg(errp, "Cannot attach more than one primary device to "
3331                       "'%s': '%s' and '%s'", n->netclient_name, old, new);
3332            return false;
3333        }
3334    } else {
3335        n->primary_opts = qdict_clone_shallow(device_opts);
3336        n->primary_opts_from_json = from_json;
3337    }
3338
3339    /* failover_primary_hidden is set during feature negotiation */
3340    return qatomic_read(&n->failover_primary_hidden);
3341}
3342
3343static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3344{
3345    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3346    VirtIONet *n = VIRTIO_NET(dev);
3347    NetClientState *nc;
3348    int i;
3349
3350    if (n->net_conf.mtu) {
3351        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3352    }
3353
3354    if (n->net_conf.duplex_str) {
3355        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3356            n->net_conf.duplex = DUPLEX_HALF;
3357        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3358            n->net_conf.duplex = DUPLEX_FULL;
3359        } else {
3360            error_setg(errp, "'duplex' must be 'half' or 'full'");
3361            return;
3362        }
3363        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3364    } else {
3365        n->net_conf.duplex = DUPLEX_UNKNOWN;
3366    }
3367
3368    if (n->net_conf.speed < SPEED_UNKNOWN) {
3369        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3370        return;
3371    }
3372    if (n->net_conf.speed >= 0) {
3373        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3374    }
3375
3376    if (n->failover) {
3377        n->primary_listener.hide_device = failover_hide_primary_device;
3378        qatomic_set(&n->failover_primary_hidden, true);
3379        device_listener_register(&n->primary_listener);
3380        n->migration_state.notify = virtio_net_migration_state_notifier;
3381        add_migration_state_change_notifier(&n->migration_state);
3382        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3383    }
3384
3385    virtio_net_set_config_size(n, n->host_features);
3386    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3387
3388    /*
3389     * We set a lower limit on RX queue size to what it always was.
3390     * Guests that want a smaller ring can always resize it without
3391     * help from us (using virtio 1 and up).
3392     */
3393    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3394        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3395        !is_power_of_2(n->net_conf.rx_queue_size)) {
3396        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3397                   "must be a power of 2 between %d and %d.",
3398                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3399                   VIRTQUEUE_MAX_SIZE);
3400        virtio_cleanup(vdev);
3401        return;
3402    }
3403
3404    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3405        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3406        !is_power_of_2(n->net_conf.tx_queue_size)) {
3407        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3408                   "must be a power of 2 between %d and %d",
3409                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3410                   VIRTQUEUE_MAX_SIZE);
3411        virtio_cleanup(vdev);
3412        return;
3413    }
3414
3415    n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3416
3417    /*
3418     * Figure out the datapath queue pairs since the backend could
3419     * provide control queue via peers as well.
3420     */
3421    if (n->nic_conf.peers.queues) {
3422        for (i = 0; i < n->max_ncs; i++) {
3423            if (n->nic_conf.peers.ncs[i]->is_datapath) {
3424                ++n->max_queue_pairs;
3425            }
3426        }
3427    }
3428    n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3429
3430    if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3431        error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3432                   "must be a positive integer less than %d.",
3433                   n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3434        virtio_cleanup(vdev);
3435        return;
3436    }
3437    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queue_pairs);
3438    n->curr_queue_pairs = 1;
3439    n->tx_timeout = n->net_conf.txtimer;
3440
3441    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3442                       && strcmp(n->net_conf.tx, "bh")) {
3443        warn_report("virtio-net: "
3444                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3445                    n->net_conf.tx);
3446        error_printf("Defaulting to \"bh\"");
3447    }
3448
3449    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3450                                    n->net_conf.tx_queue_size);
3451
3452    for (i = 0; i < n->max_queue_pairs; i++) {
3453        virtio_net_add_queue(n, i);
3454    }
3455
3456    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3457    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3458    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3459    n->status = VIRTIO_NET_S_LINK_UP;
3460    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3461                              QEMU_CLOCK_VIRTUAL,
3462                              virtio_net_announce_timer, n);
3463    n->announce_timer.round = 0;
3464
3465    if (n->netclient_type) {
3466        /*
3467         * Happen when virtio_net_set_netclient_name has been called.
3468         */
3469        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3470                              n->netclient_type, n->netclient_name, n);
3471    } else {
3472        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3473                              object_get_typename(OBJECT(dev)), dev->id, n);
3474    }
3475
3476    for (i = 0; i < n->max_queue_pairs; i++) {
3477        n->nic->ncs[i].do_not_pad = true;
3478    }
3479
3480    peer_test_vnet_hdr(n);
3481    if (peer_has_vnet_hdr(n)) {
3482        for (i = 0; i < n->max_queue_pairs; i++) {
3483            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3484        }
3485        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3486    } else {
3487        n->host_hdr_len = 0;
3488    }
3489
3490    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3491
3492    n->vqs[0].tx_waiting = 0;
3493    n->tx_burst = n->net_conf.txburst;
3494    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3495    n->promisc = 1; /* for compatibility */
3496
3497    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3498
3499    n->vlans = g_malloc0(MAX_VLAN >> 3);
3500
3501    nc = qemu_get_queue(n->nic);
3502    nc->rxfilter_notify_enabled = 1;
3503
3504   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3505        struct virtio_net_config netcfg = {};
3506        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3507        vhost_net_set_config(get_vhost_net(nc->peer),
3508            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3509    }
3510    QTAILQ_INIT(&n->rsc_chains);
3511    n->qdev = dev;
3512
3513    net_rx_pkt_init(&n->rx_pkt, false);
3514
3515    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3516        virtio_net_load_ebpf(n);
3517    }
3518}
3519
3520static void virtio_net_device_unrealize(DeviceState *dev)
3521{
3522    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3523    VirtIONet *n = VIRTIO_NET(dev);
3524    int i, max_queue_pairs;
3525
3526    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3527        virtio_net_unload_ebpf(n);
3528    }
3529
3530    /* This will stop vhost backend if appropriate. */
3531    virtio_net_set_status(vdev, 0);
3532
3533    g_free(n->netclient_name);
3534    n->netclient_name = NULL;
3535    g_free(n->netclient_type);
3536    n->netclient_type = NULL;
3537
3538    g_free(n->mac_table.macs);
3539    g_free(n->vlans);
3540
3541    if (n->failover) {
3542        qobject_unref(n->primary_opts);
3543        device_listener_unregister(&n->primary_listener);
3544        remove_migration_state_change_notifier(&n->migration_state);
3545    } else {
3546        assert(n->primary_opts == NULL);
3547    }
3548
3549    max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3550    for (i = 0; i < max_queue_pairs; i++) {
3551        virtio_net_del_queue(n, i);
3552    }
3553    /* delete also control vq */
3554    virtio_del_queue(vdev, max_queue_pairs * 2);
3555    qemu_announce_timer_del(&n->announce_timer, false);
3556    g_free(n->vqs);
3557    qemu_del_nic(n->nic);
3558    virtio_net_rsc_cleanup(n);
3559    g_free(n->rss_data.indirections_table);
3560    net_rx_pkt_uninit(n->rx_pkt);
3561    virtio_cleanup(vdev);
3562}
3563
3564static void virtio_net_instance_init(Object *obj)
3565{
3566    VirtIONet *n = VIRTIO_NET(obj);
3567
3568    /*
3569     * The default config_size is sizeof(struct virtio_net_config).
3570     * Can be overriden with virtio_net_set_config_size.
3571     */
3572    n->config_size = sizeof(struct virtio_net_config);
3573    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3574                                  "bootindex", "/ethernet-phy@0",
3575                                  DEVICE(n));
3576
3577    ebpf_rss_init(&n->ebpf_rss);
3578}
3579
3580static int virtio_net_pre_save(void *opaque)
3581{
3582    VirtIONet *n = opaque;
3583
3584    /* At this point, backend must be stopped, otherwise
3585     * it might keep writing to memory. */
3586    assert(!n->vhost_started);
3587
3588    return 0;
3589}
3590
3591static bool primary_unplug_pending(void *opaque)
3592{
3593    DeviceState *dev = opaque;
3594    DeviceState *primary;
3595    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3596    VirtIONet *n = VIRTIO_NET(vdev);
3597
3598    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3599        return false;
3600    }
3601    primary = failover_find_primary_device(n);
3602    return primary ? primary->pending_deleted_event : false;
3603}
3604
3605static bool dev_unplug_pending(void *opaque)
3606{
3607    DeviceState *dev = opaque;
3608    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3609
3610    return vdc->primary_unplug_pending(dev);
3611}
3612
3613static const VMStateDescription vmstate_virtio_net = {
3614    .name = "virtio-net",
3615    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3616    .version_id = VIRTIO_NET_VM_VERSION,
3617    .fields = (VMStateField[]) {
3618        VMSTATE_VIRTIO_DEVICE,
3619        VMSTATE_END_OF_LIST()
3620    },
3621    .pre_save = virtio_net_pre_save,
3622    .dev_unplug_pending = dev_unplug_pending,
3623};
3624
3625static Property virtio_net_properties[] = {
3626    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3627                    VIRTIO_NET_F_CSUM, true),
3628    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3629                    VIRTIO_NET_F_GUEST_CSUM, true),
3630    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3631    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3632                    VIRTIO_NET_F_GUEST_TSO4, true),
3633    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3634                    VIRTIO_NET_F_GUEST_TSO6, true),
3635    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3636                    VIRTIO_NET_F_GUEST_ECN, true),
3637    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3638                    VIRTIO_NET_F_GUEST_UFO, true),
3639    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3640                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3641    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3642                    VIRTIO_NET_F_HOST_TSO4, true),
3643    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3644                    VIRTIO_NET_F_HOST_TSO6, true),
3645    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3646                    VIRTIO_NET_F_HOST_ECN, true),
3647    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3648                    VIRTIO_NET_F_HOST_UFO, true),
3649    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3650                    VIRTIO_NET_F_MRG_RXBUF, true),
3651    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3652                    VIRTIO_NET_F_STATUS, true),
3653    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3654                    VIRTIO_NET_F_CTRL_VQ, true),
3655    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3656                    VIRTIO_NET_F_CTRL_RX, true),
3657    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3658                    VIRTIO_NET_F_CTRL_VLAN, true),
3659    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3660                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3661    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3662                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3663    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3664                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3665    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3666    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3667                    VIRTIO_NET_F_RSS, false),
3668    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3669                    VIRTIO_NET_F_HASH_REPORT, false),
3670    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3671                    VIRTIO_NET_F_RSC_EXT, false),
3672    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3673                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3674    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3675    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3676                       TX_TIMER_INTERVAL),
3677    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3678    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3679    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3680                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3681    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3682                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3683    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3684    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3685                     true),
3686    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3687    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3688    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3689    DEFINE_PROP_END_OF_LIST(),
3690};
3691
3692static void virtio_net_class_init(ObjectClass *klass, void *data)
3693{
3694    DeviceClass *dc = DEVICE_CLASS(klass);
3695    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3696
3697    device_class_set_props(dc, virtio_net_properties);
3698    dc->vmsd = &vmstate_virtio_net;
3699    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3700    vdc->realize = virtio_net_device_realize;
3701    vdc->unrealize = virtio_net_device_unrealize;
3702    vdc->get_config = virtio_net_get_config;
3703    vdc->set_config = virtio_net_set_config;
3704    vdc->get_features = virtio_net_get_features;
3705    vdc->set_features = virtio_net_set_features;
3706    vdc->bad_features = virtio_net_bad_features;
3707    vdc->reset = virtio_net_reset;
3708    vdc->set_status = virtio_net_set_status;
3709    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3710    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3711    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3712    vdc->post_load = virtio_net_post_load_virtio;
3713    vdc->vmsd = &vmstate_virtio_net_device;
3714    vdc->primary_unplug_pending = primary_unplug_pending;
3715}
3716
3717static const TypeInfo virtio_net_info = {
3718    .name = TYPE_VIRTIO_NET,
3719    .parent = TYPE_VIRTIO_DEVICE,
3720    .instance_size = sizeof(VirtIONet),
3721    .instance_init = virtio_net_instance_init,
3722    .class_init = virtio_net_class_init,
3723};
3724
3725static void virtio_register_types(void)
3726{
3727    type_register_static(&virtio_net_info);
3728}
3729
3730type_init(virtio_register_types)
3731