qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/main-loop.h"
  18#include "qemu/module.h"
  19#include "hw/virtio/virtio.h"
  20#include "net/net.h"
  21#include "net/checksum.h"
  22#include "net/tap.h"
  23#include "qemu/error-report.h"
  24#include "qemu/timer.h"
  25#include "qemu/option.h"
  26#include "qemu/option_int.h"
  27#include "qemu/config-file.h"
  28#include "qapi/qmp/qdict.h"
  29#include "hw/virtio/virtio-net.h"
  30#include "net/vhost_net.h"
  31#include "net/announce.h"
  32#include "hw/virtio/virtio-bus.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-net.h"
  35#include "hw/qdev-properties.h"
  36#include "qapi/qapi-types-migration.h"
  37#include "qapi/qapi-events-migration.h"
  38#include "hw/virtio/virtio-access.h"
  39#include "migration/misc.h"
  40#include "standard-headers/linux/ethtool.h"
  41#include "sysemu/sysemu.h"
  42#include "trace.h"
  43#include "monitor/qdev.h"
  44#include "hw/pci/pci.h"
  45#include "net_rx_pkt.h"
  46#include "hw/virtio/vhost.h"
  47#include "sysemu/qtest.h"
  48
  49#define VIRTIO_NET_VM_VERSION    11
  50
  51#define MAC_TABLE_ENTRIES    64
  52#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  53
  54/* previously fixed value */
  55#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  56#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  57
  58/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
  59#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  60#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  61
  62#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  63
  64#define VIRTIO_NET_TCP_FLAG         0x3F
  65#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  66
  67/* IPv4 max payload, 16 bits in the header */
  68#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  69#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  70
  71/* header length value in ip header without option */
  72#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  73
  74#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  75#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  76
  77/* Purge coalesced packets timer interval, This value affects the performance
  78   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  79   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  80   tso/gso/gro 'off'. */
  81#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  82
  83#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  91                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  92
  93static const VirtIOFeature feature_sizes[] = {
  94    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  95     .end = endof(struct virtio_net_config, mac)},
  96    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  97     .end = endof(struct virtio_net_config, status)},
  98    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  99     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
 100    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 101     .end = endof(struct virtio_net_config, mtu)},
 102    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 103     .end = endof(struct virtio_net_config, duplex)},
 104    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 105     .end = endof(struct virtio_net_config, supported_hash_types)},
 106    {}
 107};
 108
 109static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 110{
 111    VirtIONet *n = qemu_get_nic_opaque(nc);
 112
 113    return &n->vqs[nc->queue_index];
 114}
 115
 116static int vq2q(int queue_index)
 117{
 118    return queue_index / 2;
 119}
 120
 121/* TODO
 122 * - we could suppress RX interrupt if we were so inclined.
 123 */
 124
 125static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 126{
 127    VirtIONet *n = VIRTIO_NET(vdev);
 128    struct virtio_net_config netcfg;
 129    NetClientState *nc = qemu_get_queue(n->nic);
 130    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 131
 132    int ret = 0;
 133    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 134    virtio_stw_p(vdev, &netcfg.status, n->status);
 135    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
 136    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 137    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 138    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 139    netcfg.duplex = n->net_conf.duplex;
 140    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 141    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 142                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 143                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 144    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 145                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 146    memcpy(config, &netcfg, n->config_size);
 147
 148    /*
 149     * Is this VDPA? No peer means not VDPA: there's no way to
 150     * disconnect/reconnect a VDPA peer.
 151     */
 152    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 153        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 154                                   n->config_size);
 155        if (ret != -1) {
 156            /*
 157             * Some NIC/kernel combinations present 0 as the mac address.  As
 158             * that is not a legal address, try to proceed with the
 159             * address from the QEMU command line in the hope that the
 160             * address has been configured correctly elsewhere - just not
 161             * reported by the device.
 162             */
 163            if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 164                info_report("Zero hardware mac address detected. Ignoring.");
 165                memcpy(netcfg.mac, n->mac, ETH_ALEN);
 166            }
 167            memcpy(config, &netcfg, n->config_size);
 168        }
 169    }
 170}
 171
 172static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 173{
 174    VirtIONet *n = VIRTIO_NET(vdev);
 175    struct virtio_net_config netcfg = {};
 176    NetClientState *nc = qemu_get_queue(n->nic);
 177
 178    memcpy(&netcfg, config, n->config_size);
 179
 180    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 181        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 182        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 183        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 184        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 185    }
 186
 187    /*
 188     * Is this VDPA? No peer means not VDPA: there's no way to
 189     * disconnect/reconnect a VDPA peer.
 190     */
 191    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 192        vhost_net_set_config(get_vhost_net(nc->peer),
 193                             (uint8_t *)&netcfg, 0, n->config_size,
 194                             VHOST_SET_CONFIG_TYPE_MASTER);
 195      }
 196}
 197
 198static bool virtio_net_started(VirtIONet *n, uint8_t status)
 199{
 200    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 201    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 202        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 203}
 204
 205static void virtio_net_announce_notify(VirtIONet *net)
 206{
 207    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 208    trace_virtio_net_announce_notify();
 209
 210    net->status |= VIRTIO_NET_S_ANNOUNCE;
 211    virtio_notify_config(vdev);
 212}
 213
 214static void virtio_net_announce_timer(void *opaque)
 215{
 216    VirtIONet *n = opaque;
 217    trace_virtio_net_announce_timer(n->announce_timer.round);
 218
 219    n->announce_timer.round--;
 220    virtio_net_announce_notify(n);
 221}
 222
 223static void virtio_net_announce(NetClientState *nc)
 224{
 225    VirtIONet *n = qemu_get_nic_opaque(nc);
 226    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 227
 228    /*
 229     * Make sure the virtio migration announcement timer isn't running
 230     * If it is, let it trigger announcement so that we do not cause
 231     * confusion.
 232     */
 233    if (n->announce_timer.round) {
 234        return;
 235    }
 236
 237    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 238        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 239            virtio_net_announce_notify(n);
 240    }
 241}
 242
 243static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 244{
 245    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 246    NetClientState *nc = qemu_get_queue(n->nic);
 247    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 248    int cvq = n->max_ncs - n->max_queue_pairs;
 249
 250    if (!get_vhost_net(nc->peer)) {
 251        return;
 252    }
 253
 254    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 255        !!n->vhost_started) {
 256        return;
 257    }
 258    if (!n->vhost_started) {
 259        int r, i;
 260
 261        if (n->needs_vnet_hdr_swap) {
 262            error_report("backend does not support %s vnet headers; "
 263                         "falling back on userspace virtio",
 264                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 265            return;
 266        }
 267
 268        /* Any packets outstanding? Purge them to avoid touching rings
 269         * when vhost is running.
 270         */
 271        for (i = 0;  i < queue_pairs; i++) {
 272            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 273
 274            /* Purge both directions: TX and RX. */
 275            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 276            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 277        }
 278
 279        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 280            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 281            if (r < 0) {
 282                error_report("%uBytes MTU not supported by the backend",
 283                             n->net_conf.mtu);
 284
 285                return;
 286            }
 287        }
 288
 289        n->vhost_started = 1;
 290        r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 291        if (r < 0) {
 292            error_report("unable to start vhost net: %d: "
 293                         "falling back on userspace virtio", -r);
 294            n->vhost_started = 0;
 295        }
 296    } else {
 297        vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
 298        n->vhost_started = 0;
 299    }
 300}
 301
 302static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 303                                          NetClientState *peer,
 304                                          bool enable)
 305{
 306    if (virtio_is_big_endian(vdev)) {
 307        return qemu_set_vnet_be(peer, enable);
 308    } else {
 309        return qemu_set_vnet_le(peer, enable);
 310    }
 311}
 312
 313static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 314                                       int queue_pairs, bool enable)
 315{
 316    int i;
 317
 318    for (i = 0; i < queue_pairs; i++) {
 319        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 320            enable) {
 321            while (--i >= 0) {
 322                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 323            }
 324
 325            return true;
 326        }
 327    }
 328
 329    return false;
 330}
 331
 332static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 333{
 334    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 335    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 336
 337    if (virtio_net_started(n, status)) {
 338        /* Before using the device, we tell the network backend about the
 339         * endianness to use when parsing vnet headers. If the backend
 340         * can't do it, we fallback onto fixing the headers in the core
 341         * virtio-net code.
 342         */
 343        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 344                                                            queue_pairs, true);
 345    } else if (virtio_net_started(n, vdev->status)) {
 346        /* After using the device, we need to reset the network backend to
 347         * the default (guest native endianness), otherwise the guest may
 348         * lose network connectivity if it is rebooted into a different
 349         * endianness.
 350         */
 351        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
 352    }
 353}
 354
 355static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 356{
 357    unsigned int dropped = virtqueue_drop_all(vq);
 358    if (dropped) {
 359        virtio_notify(vdev, vq);
 360    }
 361}
 362
 363static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 364{
 365    VirtIONet *n = VIRTIO_NET(vdev);
 366    VirtIONetQueue *q;
 367    int i;
 368    uint8_t queue_status;
 369
 370    virtio_net_vnet_endian_status(n, status);
 371    virtio_net_vhost_status(n, status);
 372
 373    for (i = 0; i < n->max_queue_pairs; i++) {
 374        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 375        bool queue_started;
 376        q = &n->vqs[i];
 377
 378        if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
 379            queue_status = 0;
 380        } else {
 381            queue_status = status;
 382        }
 383        queue_started =
 384            virtio_net_started(n, queue_status) && !n->vhost_started;
 385
 386        if (queue_started) {
 387            qemu_flush_queued_packets(ncs);
 388        }
 389
 390        if (!q->tx_waiting) {
 391            continue;
 392        }
 393
 394        if (queue_started) {
 395            if (q->tx_timer) {
 396                timer_mod(q->tx_timer,
 397                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 398            } else {
 399                qemu_bh_schedule(q->tx_bh);
 400            }
 401        } else {
 402            if (q->tx_timer) {
 403                timer_del(q->tx_timer);
 404            } else {
 405                qemu_bh_cancel(q->tx_bh);
 406            }
 407            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 408                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 409                vdev->vm_running) {
 410                /* if tx is waiting we are likely have some packets in tx queue
 411                 * and disabled notification */
 412                q->tx_waiting = 0;
 413                virtio_queue_set_notification(q->tx_vq, 1);
 414                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 415            }
 416        }
 417    }
 418}
 419
 420static void virtio_net_set_link_status(NetClientState *nc)
 421{
 422    VirtIONet *n = qemu_get_nic_opaque(nc);
 423    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 424    uint16_t old_status = n->status;
 425
 426    if (nc->link_down)
 427        n->status &= ~VIRTIO_NET_S_LINK_UP;
 428    else
 429        n->status |= VIRTIO_NET_S_LINK_UP;
 430
 431    if (n->status != old_status)
 432        virtio_notify_config(vdev);
 433
 434    virtio_net_set_status(vdev, vdev->status);
 435}
 436
 437static void rxfilter_notify(NetClientState *nc)
 438{
 439    VirtIONet *n = qemu_get_nic_opaque(nc);
 440
 441    if (nc->rxfilter_notify_enabled) {
 442        char *path = object_get_canonical_path(OBJECT(n->qdev));
 443        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 444                                              n->netclient_name, path);
 445        g_free(path);
 446
 447        /* disable event notification to avoid events flooding */
 448        nc->rxfilter_notify_enabled = 0;
 449    }
 450}
 451
 452static intList *get_vlan_table(VirtIONet *n)
 453{
 454    intList *list;
 455    int i, j;
 456
 457    list = NULL;
 458    for (i = 0; i < MAX_VLAN >> 5; i++) {
 459        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 460            if (n->vlans[i] & (1U << j)) {
 461                QAPI_LIST_PREPEND(list, (i << 5) + j);
 462            }
 463        }
 464    }
 465
 466    return list;
 467}
 468
 469static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 470{
 471    VirtIONet *n = qemu_get_nic_opaque(nc);
 472    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 473    RxFilterInfo *info;
 474    strList *str_list;
 475    int i;
 476
 477    info = g_malloc0(sizeof(*info));
 478    info->name = g_strdup(nc->name);
 479    info->promiscuous = n->promisc;
 480
 481    if (n->nouni) {
 482        info->unicast = RX_STATE_NONE;
 483    } else if (n->alluni) {
 484        info->unicast = RX_STATE_ALL;
 485    } else {
 486        info->unicast = RX_STATE_NORMAL;
 487    }
 488
 489    if (n->nomulti) {
 490        info->multicast = RX_STATE_NONE;
 491    } else if (n->allmulti) {
 492        info->multicast = RX_STATE_ALL;
 493    } else {
 494        info->multicast = RX_STATE_NORMAL;
 495    }
 496
 497    info->broadcast_allowed = n->nobcast;
 498    info->multicast_overflow = n->mac_table.multi_overflow;
 499    info->unicast_overflow = n->mac_table.uni_overflow;
 500
 501    info->main_mac = qemu_mac_strdup_printf(n->mac);
 502
 503    str_list = NULL;
 504    for (i = 0; i < n->mac_table.first_multi; i++) {
 505        QAPI_LIST_PREPEND(str_list,
 506                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 507    }
 508    info->unicast_table = str_list;
 509
 510    str_list = NULL;
 511    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 512        QAPI_LIST_PREPEND(str_list,
 513                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 514    }
 515    info->multicast_table = str_list;
 516    info->vlan_table = get_vlan_table(n);
 517
 518    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 519        info->vlan = RX_STATE_ALL;
 520    } else if (!info->vlan_table) {
 521        info->vlan = RX_STATE_NONE;
 522    } else {
 523        info->vlan = RX_STATE_NORMAL;
 524    }
 525
 526    /* enable event notification after query */
 527    nc->rxfilter_notify_enabled = 1;
 528
 529    return info;
 530}
 531
 532static void virtio_net_reset(VirtIODevice *vdev)
 533{
 534    VirtIONet *n = VIRTIO_NET(vdev);
 535    int i;
 536
 537    /* Reset back to compatibility mode */
 538    n->promisc = 1;
 539    n->allmulti = 0;
 540    n->alluni = 0;
 541    n->nomulti = 0;
 542    n->nouni = 0;
 543    n->nobcast = 0;
 544    /* multiqueue is disabled by default */
 545    n->curr_queue_pairs = 1;
 546    timer_del(n->announce_timer.tm);
 547    n->announce_timer.round = 0;
 548    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 549
 550    /* Flush any MAC and VLAN filter table state */
 551    n->mac_table.in_use = 0;
 552    n->mac_table.first_multi = 0;
 553    n->mac_table.multi_overflow = 0;
 554    n->mac_table.uni_overflow = 0;
 555    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 556    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 557    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 558    memset(n->vlans, 0, MAX_VLAN >> 3);
 559
 560    /* Flush any async TX */
 561    for (i = 0;  i < n->max_queue_pairs; i++) {
 562        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 563
 564        if (nc->peer) {
 565            qemu_flush_or_purge_queued_packets(nc->peer, true);
 566            assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 567        }
 568    }
 569}
 570
 571static void peer_test_vnet_hdr(VirtIONet *n)
 572{
 573    NetClientState *nc = qemu_get_queue(n->nic);
 574    if (!nc->peer) {
 575        return;
 576    }
 577
 578    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 579}
 580
 581static int peer_has_vnet_hdr(VirtIONet *n)
 582{
 583    return n->has_vnet_hdr;
 584}
 585
 586static int peer_has_ufo(VirtIONet *n)
 587{
 588    if (!peer_has_vnet_hdr(n))
 589        return 0;
 590
 591    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 592
 593    return n->has_ufo;
 594}
 595
 596static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 597                                       int version_1, int hash_report)
 598{
 599    int i;
 600    NetClientState *nc;
 601
 602    n->mergeable_rx_bufs = mergeable_rx_bufs;
 603
 604    if (version_1) {
 605        n->guest_hdr_len = hash_report ?
 606            sizeof(struct virtio_net_hdr_v1_hash) :
 607            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 608        n->rss_data.populate_hash = !!hash_report;
 609    } else {
 610        n->guest_hdr_len = n->mergeable_rx_bufs ?
 611            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 612            sizeof(struct virtio_net_hdr);
 613    }
 614
 615    for (i = 0; i < n->max_queue_pairs; i++) {
 616        nc = qemu_get_subqueue(n->nic, i);
 617
 618        if (peer_has_vnet_hdr(n) &&
 619            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 620            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 621            n->host_hdr_len = n->guest_hdr_len;
 622        }
 623    }
 624}
 625
 626static int virtio_net_max_tx_queue_size(VirtIONet *n)
 627{
 628    NetClientState *peer = n->nic_conf.peers.ncs[0];
 629
 630    /*
 631     * Backends other than vhost-user or vhost-vdpa don't support max queue
 632     * size.
 633     */
 634    if (!peer) {
 635        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 636    }
 637
 638    switch(peer->info->type) {
 639    case NET_CLIENT_DRIVER_VHOST_USER:
 640    case NET_CLIENT_DRIVER_VHOST_VDPA:
 641        return VIRTQUEUE_MAX_SIZE;
 642    default:
 643        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 644    };
 645}
 646
 647static int peer_attach(VirtIONet *n, int index)
 648{
 649    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 650
 651    if (!nc->peer) {
 652        return 0;
 653    }
 654
 655    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 656        vhost_set_vring_enable(nc->peer, 1);
 657    }
 658
 659    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 660        return 0;
 661    }
 662
 663    if (n->max_queue_pairs == 1) {
 664        return 0;
 665    }
 666
 667    return tap_enable(nc->peer);
 668}
 669
 670static int peer_detach(VirtIONet *n, int index)
 671{
 672    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 673
 674    if (!nc->peer) {
 675        return 0;
 676    }
 677
 678    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 679        vhost_set_vring_enable(nc->peer, 0);
 680    }
 681
 682    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 683        return 0;
 684    }
 685
 686    return tap_disable(nc->peer);
 687}
 688
 689static void virtio_net_set_queue_pairs(VirtIONet *n)
 690{
 691    int i;
 692    int r;
 693
 694    if (n->nic->peer_deleted) {
 695        return;
 696    }
 697
 698    for (i = 0; i < n->max_queue_pairs; i++) {
 699        if (i < n->curr_queue_pairs) {
 700            r = peer_attach(n, i);
 701            assert(!r);
 702        } else {
 703            r = peer_detach(n, i);
 704            assert(!r);
 705        }
 706    }
 707}
 708
 709static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 710
 711static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 712                                        Error **errp)
 713{
 714    VirtIONet *n = VIRTIO_NET(vdev);
 715    NetClientState *nc = qemu_get_queue(n->nic);
 716
 717    /* Firstly sync all virtio-net possible supported features */
 718    features |= n->host_features;
 719
 720    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 721
 722    if (!peer_has_vnet_hdr(n)) {
 723        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 724        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 725        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 726        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 727
 728        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 729        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 730        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 731        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 732
 733        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 734    }
 735
 736    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 737        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 738        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 739    }
 740
 741    if (!get_vhost_net(nc->peer)) {
 742        return features;
 743    }
 744
 745    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 746        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 747    }
 748    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 749    vdev->backend_features = features;
 750
 751    if (n->mtu_bypass_backend &&
 752            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 753        features |= (1ULL << VIRTIO_NET_F_MTU);
 754    }
 755
 756    return features;
 757}
 758
 759static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 760{
 761    uint64_t features = 0;
 762
 763    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 764     * but also these: */
 765    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 766    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 767    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 768    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 769    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 770
 771    return features;
 772}
 773
 774static void virtio_net_apply_guest_offloads(VirtIONet *n)
 775{
 776    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 777            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 778            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 779            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 780            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 781            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 782}
 783
 784static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 785{
 786    static const uint64_t guest_offloads_mask =
 787        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 788        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 789        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 790        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 791        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 792
 793    return guest_offloads_mask & features;
 794}
 795
 796static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 797{
 798    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 799    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 800}
 801
 802typedef struct {
 803    VirtIONet *n;
 804    DeviceState *dev;
 805} FailoverDevice;
 806
 807/**
 808 * Set the failover primary device
 809 *
 810 * @opaque: FailoverId to setup
 811 * @opts: opts for device we are handling
 812 * @errp: returns an error if this function fails
 813 */
 814static int failover_set_primary(DeviceState *dev, void *opaque)
 815{
 816    FailoverDevice *fdev = opaque;
 817    PCIDevice *pci_dev = (PCIDevice *)
 818        object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
 819
 820    if (!pci_dev) {
 821        return 0;
 822    }
 823
 824    if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
 825        fdev->dev = dev;
 826        return 1;
 827    }
 828
 829    return 0;
 830}
 831
 832/**
 833 * Find the primary device for this failover virtio-net
 834 *
 835 * @n: VirtIONet device
 836 * @errp: returns an error if this function fails
 837 */
 838static DeviceState *failover_find_primary_device(VirtIONet *n)
 839{
 840    FailoverDevice fdev = {
 841        .n = n,
 842    };
 843
 844    qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
 845                       NULL, NULL, &fdev);
 846    return fdev.dev;
 847}
 848
 849static void failover_add_primary(VirtIONet *n, Error **errp)
 850{
 851    Error *err = NULL;
 852    DeviceState *dev = failover_find_primary_device(n);
 853
 854    if (dev) {
 855        return;
 856    }
 857
 858    if (!n->primary_opts) {
 859        error_setg(errp, "Primary device not found");
 860        error_append_hint(errp, "Virtio-net failover will not work. Make "
 861                          "sure primary device has parameter"
 862                          " failover_pair_id=%s\n", n->netclient_name);
 863        return;
 864    }
 865
 866    dev = qdev_device_add_from_qdict(n->primary_opts,
 867                                     n->primary_opts_from_json,
 868                                     &err);
 869    if (err) {
 870        qobject_unref(n->primary_opts);
 871        n->primary_opts = NULL;
 872    } else {
 873        object_unref(OBJECT(dev));
 874    }
 875    error_propagate(errp, err);
 876}
 877
 878static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 879{
 880    VirtIONet *n = VIRTIO_NET(vdev);
 881    Error *err = NULL;
 882    int i;
 883
 884    if (n->mtu_bypass_backend &&
 885            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 886        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 887    }
 888
 889    virtio_net_set_multiqueue(n,
 890                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 891                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 892
 893    virtio_net_set_mrg_rx_bufs(n,
 894                               virtio_has_feature(features,
 895                                                  VIRTIO_NET_F_MRG_RXBUF),
 896                               virtio_has_feature(features,
 897                                                  VIRTIO_F_VERSION_1),
 898                               virtio_has_feature(features,
 899                                                  VIRTIO_NET_F_HASH_REPORT));
 900
 901    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 902        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 903    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 904        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 905    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 906
 907    if (n->has_vnet_hdr) {
 908        n->curr_guest_offloads =
 909            virtio_net_guest_offloads_by_features(features);
 910        virtio_net_apply_guest_offloads(n);
 911    }
 912
 913    for (i = 0;  i < n->max_queue_pairs; i++) {
 914        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 915
 916        if (!get_vhost_net(nc->peer)) {
 917            continue;
 918        }
 919        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 920    }
 921
 922    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 923        memset(n->vlans, 0, MAX_VLAN >> 3);
 924    } else {
 925        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 926    }
 927
 928    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 929        qapi_event_send_failover_negotiated(n->netclient_name);
 930        qatomic_set(&n->failover_primary_hidden, false);
 931        failover_add_primary(n, &err);
 932        if (err) {
 933            if (!qtest_enabled()) {
 934                warn_report_err(err);
 935            } else {
 936                error_free(err);
 937            }
 938        }
 939    }
 940}
 941
 942static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 943                                     struct iovec *iov, unsigned int iov_cnt)
 944{
 945    uint8_t on;
 946    size_t s;
 947    NetClientState *nc = qemu_get_queue(n->nic);
 948
 949    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 950    if (s != sizeof(on)) {
 951        return VIRTIO_NET_ERR;
 952    }
 953
 954    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 955        n->promisc = on;
 956    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 957        n->allmulti = on;
 958    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 959        n->alluni = on;
 960    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 961        n->nomulti = on;
 962    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 963        n->nouni = on;
 964    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 965        n->nobcast = on;
 966    } else {
 967        return VIRTIO_NET_ERR;
 968    }
 969
 970    rxfilter_notify(nc);
 971
 972    return VIRTIO_NET_OK;
 973}
 974
 975static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 976                                     struct iovec *iov, unsigned int iov_cnt)
 977{
 978    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 979    uint64_t offloads;
 980    size_t s;
 981
 982    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 983        return VIRTIO_NET_ERR;
 984    }
 985
 986    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 987    if (s != sizeof(offloads)) {
 988        return VIRTIO_NET_ERR;
 989    }
 990
 991    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 992        uint64_t supported_offloads;
 993
 994        offloads = virtio_ldq_p(vdev, &offloads);
 995
 996        if (!n->has_vnet_hdr) {
 997            return VIRTIO_NET_ERR;
 998        }
 999
1000        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1001            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1002        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1003            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1004        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1005
1006        supported_offloads = virtio_net_supported_guest_offloads(n);
1007        if (offloads & ~supported_offloads) {
1008            return VIRTIO_NET_ERR;
1009        }
1010
1011        n->curr_guest_offloads = offloads;
1012        virtio_net_apply_guest_offloads(n);
1013
1014        return VIRTIO_NET_OK;
1015    } else {
1016        return VIRTIO_NET_ERR;
1017    }
1018}
1019
1020static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1021                                 struct iovec *iov, unsigned int iov_cnt)
1022{
1023    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1024    struct virtio_net_ctrl_mac mac_data;
1025    size_t s;
1026    NetClientState *nc = qemu_get_queue(n->nic);
1027
1028    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1029        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1030            return VIRTIO_NET_ERR;
1031        }
1032        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1033        assert(s == sizeof(n->mac));
1034        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1035        rxfilter_notify(nc);
1036
1037        return VIRTIO_NET_OK;
1038    }
1039
1040    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1041        return VIRTIO_NET_ERR;
1042    }
1043
1044    int in_use = 0;
1045    int first_multi = 0;
1046    uint8_t uni_overflow = 0;
1047    uint8_t multi_overflow = 0;
1048    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1049
1050    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1051                   sizeof(mac_data.entries));
1052    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1053    if (s != sizeof(mac_data.entries)) {
1054        goto error;
1055    }
1056    iov_discard_front(&iov, &iov_cnt, s);
1057
1058    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1059        goto error;
1060    }
1061
1062    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1063        s = iov_to_buf(iov, iov_cnt, 0, macs,
1064                       mac_data.entries * ETH_ALEN);
1065        if (s != mac_data.entries * ETH_ALEN) {
1066            goto error;
1067        }
1068        in_use += mac_data.entries;
1069    } else {
1070        uni_overflow = 1;
1071    }
1072
1073    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1074
1075    first_multi = in_use;
1076
1077    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1078                   sizeof(mac_data.entries));
1079    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1080    if (s != sizeof(mac_data.entries)) {
1081        goto error;
1082    }
1083
1084    iov_discard_front(&iov, &iov_cnt, s);
1085
1086    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1087        goto error;
1088    }
1089
1090    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1091        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1092                       mac_data.entries * ETH_ALEN);
1093        if (s != mac_data.entries * ETH_ALEN) {
1094            goto error;
1095        }
1096        in_use += mac_data.entries;
1097    } else {
1098        multi_overflow = 1;
1099    }
1100
1101    n->mac_table.in_use = in_use;
1102    n->mac_table.first_multi = first_multi;
1103    n->mac_table.uni_overflow = uni_overflow;
1104    n->mac_table.multi_overflow = multi_overflow;
1105    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1106    g_free(macs);
1107    rxfilter_notify(nc);
1108
1109    return VIRTIO_NET_OK;
1110
1111error:
1112    g_free(macs);
1113    return VIRTIO_NET_ERR;
1114}
1115
1116static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1117                                        struct iovec *iov, unsigned int iov_cnt)
1118{
1119    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1120    uint16_t vid;
1121    size_t s;
1122    NetClientState *nc = qemu_get_queue(n->nic);
1123
1124    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1125    vid = virtio_lduw_p(vdev, &vid);
1126    if (s != sizeof(vid)) {
1127        return VIRTIO_NET_ERR;
1128    }
1129
1130    if (vid >= MAX_VLAN)
1131        return VIRTIO_NET_ERR;
1132
1133    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1134        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1135    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1136        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1137    else
1138        return VIRTIO_NET_ERR;
1139
1140    rxfilter_notify(nc);
1141
1142    return VIRTIO_NET_OK;
1143}
1144
1145static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1146                                      struct iovec *iov, unsigned int iov_cnt)
1147{
1148    trace_virtio_net_handle_announce(n->announce_timer.round);
1149    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1150        n->status & VIRTIO_NET_S_ANNOUNCE) {
1151        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1152        if (n->announce_timer.round) {
1153            qemu_announce_timer_step(&n->announce_timer);
1154        }
1155        return VIRTIO_NET_OK;
1156    } else {
1157        return VIRTIO_NET_ERR;
1158    }
1159}
1160
1161static void virtio_net_detach_epbf_rss(VirtIONet *n);
1162
1163static void virtio_net_disable_rss(VirtIONet *n)
1164{
1165    if (n->rss_data.enabled) {
1166        trace_virtio_net_rss_disable();
1167    }
1168    n->rss_data.enabled = false;
1169
1170    virtio_net_detach_epbf_rss(n);
1171}
1172
1173static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1174{
1175    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1176    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1177        return false;
1178    }
1179
1180    return nc->info->set_steering_ebpf(nc, prog_fd);
1181}
1182
1183static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1184                                   struct EBPFRSSConfig *config)
1185{
1186    config->redirect = data->redirect;
1187    config->populate_hash = data->populate_hash;
1188    config->hash_types = data->hash_types;
1189    config->indirections_len = data->indirections_len;
1190    config->default_queue = data->default_queue;
1191}
1192
1193static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1194{
1195    struct EBPFRSSConfig config = {};
1196
1197    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1198        return false;
1199    }
1200
1201    rss_data_to_rss_config(&n->rss_data, &config);
1202
1203    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1204                          n->rss_data.indirections_table, n->rss_data.key)) {
1205        return false;
1206    }
1207
1208    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1209        return false;
1210    }
1211
1212    return true;
1213}
1214
1215static void virtio_net_detach_epbf_rss(VirtIONet *n)
1216{
1217    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1218}
1219
1220static bool virtio_net_load_ebpf(VirtIONet *n)
1221{
1222    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1223        /* backend does't support steering ebpf */
1224        return false;
1225    }
1226
1227    return ebpf_rss_load(&n->ebpf_rss);
1228}
1229
1230static void virtio_net_unload_ebpf(VirtIONet *n)
1231{
1232    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1233    ebpf_rss_unload(&n->ebpf_rss);
1234}
1235
1236static uint16_t virtio_net_handle_rss(VirtIONet *n,
1237                                      struct iovec *iov,
1238                                      unsigned int iov_cnt,
1239                                      bool do_rss)
1240{
1241    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1242    struct virtio_net_rss_config cfg;
1243    size_t s, offset = 0, size_get;
1244    uint16_t queue_pairs, i;
1245    struct {
1246        uint16_t us;
1247        uint8_t b;
1248    } QEMU_PACKED temp;
1249    const char *err_msg = "";
1250    uint32_t err_value = 0;
1251
1252    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1253        err_msg = "RSS is not negotiated";
1254        goto error;
1255    }
1256    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1257        err_msg = "Hash report is not negotiated";
1258        goto error;
1259    }
1260    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1261    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1262    if (s != size_get) {
1263        err_msg = "Short command buffer";
1264        err_value = (uint32_t)s;
1265        goto error;
1266    }
1267    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1268    n->rss_data.indirections_len =
1269        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1270    n->rss_data.indirections_len++;
1271    if (!do_rss) {
1272        n->rss_data.indirections_len = 1;
1273    }
1274    if (!is_power_of_2(n->rss_data.indirections_len)) {
1275        err_msg = "Invalid size of indirection table";
1276        err_value = n->rss_data.indirections_len;
1277        goto error;
1278    }
1279    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1280        err_msg = "Too large indirection table";
1281        err_value = n->rss_data.indirections_len;
1282        goto error;
1283    }
1284    n->rss_data.default_queue = do_rss ?
1285        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1286    if (n->rss_data.default_queue >= n->max_queue_pairs) {
1287        err_msg = "Invalid default queue";
1288        err_value = n->rss_data.default_queue;
1289        goto error;
1290    }
1291    offset += size_get;
1292    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1293    g_free(n->rss_data.indirections_table);
1294    n->rss_data.indirections_table = g_malloc(size_get);
1295    if (!n->rss_data.indirections_table) {
1296        err_msg = "Can't allocate indirections table";
1297        err_value = n->rss_data.indirections_len;
1298        goto error;
1299    }
1300    s = iov_to_buf(iov, iov_cnt, offset,
1301                   n->rss_data.indirections_table, size_get);
1302    if (s != size_get) {
1303        err_msg = "Short indirection table buffer";
1304        err_value = (uint32_t)s;
1305        goto error;
1306    }
1307    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1308        uint16_t val = n->rss_data.indirections_table[i];
1309        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1310    }
1311    offset += size_get;
1312    size_get = sizeof(temp);
1313    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1314    if (s != size_get) {
1315        err_msg = "Can't get queue_pairs";
1316        err_value = (uint32_t)s;
1317        goto error;
1318    }
1319    queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1320    if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1321        err_msg = "Invalid number of queue_pairs";
1322        err_value = queue_pairs;
1323        goto error;
1324    }
1325    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1326        err_msg = "Invalid key size";
1327        err_value = temp.b;
1328        goto error;
1329    }
1330    if (!temp.b && n->rss_data.hash_types) {
1331        err_msg = "No key provided";
1332        err_value = 0;
1333        goto error;
1334    }
1335    if (!temp.b && !n->rss_data.hash_types) {
1336        virtio_net_disable_rss(n);
1337        return queue_pairs;
1338    }
1339    offset += size_get;
1340    size_get = temp.b;
1341    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1342    if (s != size_get) {
1343        err_msg = "Can get key buffer";
1344        err_value = (uint32_t)s;
1345        goto error;
1346    }
1347    n->rss_data.enabled = true;
1348
1349    if (!n->rss_data.populate_hash) {
1350        if (!virtio_net_attach_epbf_rss(n)) {
1351            /* EBPF must be loaded for vhost */
1352            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1353                warn_report("Can't load eBPF RSS for vhost");
1354                goto error;
1355            }
1356            /* fallback to software RSS */
1357            warn_report("Can't load eBPF RSS - fallback to software RSS");
1358            n->rss_data.enabled_software_rss = true;
1359        }
1360    } else {
1361        /* use software RSS for hash populating */
1362        /* and detach eBPF if was loaded before */
1363        virtio_net_detach_epbf_rss(n);
1364        n->rss_data.enabled_software_rss = true;
1365    }
1366
1367    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1368                                n->rss_data.indirections_len,
1369                                temp.b);
1370    return queue_pairs;
1371error:
1372    trace_virtio_net_rss_error(err_msg, err_value);
1373    virtio_net_disable_rss(n);
1374    return 0;
1375}
1376
1377static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1378                                struct iovec *iov, unsigned int iov_cnt)
1379{
1380    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1381    uint16_t queue_pairs;
1382
1383    virtio_net_disable_rss(n);
1384    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1385        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1386        return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1387    }
1388    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1389        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1390    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1391        struct virtio_net_ctrl_mq mq;
1392        size_t s;
1393        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1394            return VIRTIO_NET_ERR;
1395        }
1396        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1397        if (s != sizeof(mq)) {
1398            return VIRTIO_NET_ERR;
1399        }
1400        queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1401
1402    } else {
1403        return VIRTIO_NET_ERR;
1404    }
1405
1406    if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1407        queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1408        queue_pairs > n->max_queue_pairs ||
1409        !n->multiqueue) {
1410        return VIRTIO_NET_ERR;
1411    }
1412
1413    n->curr_queue_pairs = queue_pairs;
1414    /* stop the backend before changing the number of queue_pairs to avoid handling a
1415     * disabled queue */
1416    virtio_net_set_status(vdev, vdev->status);
1417    virtio_net_set_queue_pairs(n);
1418
1419    return VIRTIO_NET_OK;
1420}
1421
1422static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1423{
1424    VirtIONet *n = VIRTIO_NET(vdev);
1425    struct virtio_net_ctrl_hdr ctrl;
1426    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1427    VirtQueueElement *elem;
1428    size_t s;
1429    struct iovec *iov, *iov2;
1430    unsigned int iov_cnt;
1431
1432    for (;;) {
1433        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1434        if (!elem) {
1435            break;
1436        }
1437        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1438            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1439            virtio_error(vdev, "virtio-net ctrl missing headers");
1440            virtqueue_detach_element(vq, elem, 0);
1441            g_free(elem);
1442            break;
1443        }
1444
1445        iov_cnt = elem->out_num;
1446        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1447        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1448        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1449        if (s != sizeof(ctrl)) {
1450            status = VIRTIO_NET_ERR;
1451        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1452            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1453        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1454            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1455        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1456            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1457        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1458            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1459        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1460            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1461        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1462            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1463        }
1464
1465        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1466        assert(s == sizeof(status));
1467
1468        virtqueue_push(vq, elem, sizeof(status));
1469        virtio_notify(vdev, vq);
1470        g_free(iov2);
1471        g_free(elem);
1472    }
1473}
1474
1475/* RX */
1476
1477static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1478{
1479    VirtIONet *n = VIRTIO_NET(vdev);
1480    int queue_index = vq2q(virtio_get_queue_index(vq));
1481
1482    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1483}
1484
1485static bool virtio_net_can_receive(NetClientState *nc)
1486{
1487    VirtIONet *n = qemu_get_nic_opaque(nc);
1488    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1489    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1490
1491    if (!vdev->vm_running) {
1492        return false;
1493    }
1494
1495    if (nc->queue_index >= n->curr_queue_pairs) {
1496        return false;
1497    }
1498
1499    if (!virtio_queue_ready(q->rx_vq) ||
1500        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1501        return false;
1502    }
1503
1504    return true;
1505}
1506
1507static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1508{
1509    VirtIONet *n = q->n;
1510    if (virtio_queue_empty(q->rx_vq) ||
1511        (n->mergeable_rx_bufs &&
1512         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1513        virtio_queue_set_notification(q->rx_vq, 1);
1514
1515        /* To avoid a race condition where the guest has made some buffers
1516         * available after the above check but before notification was
1517         * enabled, check for available buffers again.
1518         */
1519        if (virtio_queue_empty(q->rx_vq) ||
1520            (n->mergeable_rx_bufs &&
1521             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1522            return 0;
1523        }
1524    }
1525
1526    virtio_queue_set_notification(q->rx_vq, 0);
1527    return 1;
1528}
1529
1530static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1531{
1532    virtio_tswap16s(vdev, &hdr->hdr_len);
1533    virtio_tswap16s(vdev, &hdr->gso_size);
1534    virtio_tswap16s(vdev, &hdr->csum_start);
1535    virtio_tswap16s(vdev, &hdr->csum_offset);
1536}
1537
1538/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1539 * it never finds out that the packets don't have valid checksums.  This
1540 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1541 * fix this with Xen but it hasn't appeared in an upstream release of
1542 * dhclient yet.
1543 *
1544 * To avoid breaking existing guests, we catch udp packets and add
1545 * checksums.  This is terrible but it's better than hacking the guest
1546 * kernels.
1547 *
1548 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1549 * we should provide a mechanism to disable it to avoid polluting the host
1550 * cache.
1551 */
1552static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1553                                        uint8_t *buf, size_t size)
1554{
1555    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1556        (size > 27 && size < 1500) && /* normal sized MTU */
1557        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1558        (buf[23] == 17) && /* ip.protocol == UDP */
1559        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1560        net_checksum_calculate(buf, size, CSUM_UDP);
1561        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1562    }
1563}
1564
1565static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1566                           const void *buf, size_t size)
1567{
1568    if (n->has_vnet_hdr) {
1569        /* FIXME this cast is evil */
1570        void *wbuf = (void *)buf;
1571        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1572                                    size - n->host_hdr_len);
1573
1574        if (n->needs_vnet_hdr_swap) {
1575            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1576        }
1577        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1578    } else {
1579        struct virtio_net_hdr hdr = {
1580            .flags = 0,
1581            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1582        };
1583        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1584    }
1585}
1586
1587static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1588{
1589    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1590    static const uint8_t vlan[] = {0x81, 0x00};
1591    uint8_t *ptr = (uint8_t *)buf;
1592    int i;
1593
1594    if (n->promisc)
1595        return 1;
1596
1597    ptr += n->host_hdr_len;
1598
1599    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1600        int vid = lduw_be_p(ptr + 14) & 0xfff;
1601        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1602            return 0;
1603    }
1604
1605    if (ptr[0] & 1) { // multicast
1606        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1607            return !n->nobcast;
1608        } else if (n->nomulti) {
1609            return 0;
1610        } else if (n->allmulti || n->mac_table.multi_overflow) {
1611            return 1;
1612        }
1613
1614        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1615            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1616                return 1;
1617            }
1618        }
1619    } else { // unicast
1620        if (n->nouni) {
1621            return 0;
1622        } else if (n->alluni || n->mac_table.uni_overflow) {
1623            return 1;
1624        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1625            return 1;
1626        }
1627
1628        for (i = 0; i < n->mac_table.first_multi; i++) {
1629            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1630                return 1;
1631            }
1632        }
1633    }
1634
1635    return 0;
1636}
1637
1638static uint8_t virtio_net_get_hash_type(bool isip4,
1639                                        bool isip6,
1640                                        bool isudp,
1641                                        bool istcp,
1642                                        uint32_t types)
1643{
1644    if (isip4) {
1645        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1646            return NetPktRssIpV4Tcp;
1647        }
1648        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1649            return NetPktRssIpV4Udp;
1650        }
1651        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1652            return NetPktRssIpV4;
1653        }
1654    } else if (isip6) {
1655        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1656                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1657
1658        if (istcp && (types & mask)) {
1659            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1660                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1661        }
1662        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1663        if (isudp && (types & mask)) {
1664            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1665                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1666        }
1667        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1668        if (types & mask) {
1669            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1670                NetPktRssIpV6Ex : NetPktRssIpV6;
1671        }
1672    }
1673    return 0xff;
1674}
1675
1676static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1677                                   uint32_t hash)
1678{
1679    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1680    hdr->hash_value = hash;
1681    hdr->hash_report = report;
1682}
1683
1684static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1685                                  size_t size)
1686{
1687    VirtIONet *n = qemu_get_nic_opaque(nc);
1688    unsigned int index = nc->queue_index, new_index = index;
1689    struct NetRxPkt *pkt = n->rx_pkt;
1690    uint8_t net_hash_type;
1691    uint32_t hash;
1692    bool isip4, isip6, isudp, istcp;
1693    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1694        VIRTIO_NET_HASH_REPORT_IPv4,
1695        VIRTIO_NET_HASH_REPORT_TCPv4,
1696        VIRTIO_NET_HASH_REPORT_TCPv6,
1697        VIRTIO_NET_HASH_REPORT_IPv6,
1698        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1699        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1700        VIRTIO_NET_HASH_REPORT_UDPv4,
1701        VIRTIO_NET_HASH_REPORT_UDPv6,
1702        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1703    };
1704
1705    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1706                             size - n->host_hdr_len);
1707    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1708    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1709        istcp = isudp = false;
1710    }
1711    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1712        istcp = isudp = false;
1713    }
1714    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1715                                             n->rss_data.hash_types);
1716    if (net_hash_type > NetPktRssIpV6UdpEx) {
1717        if (n->rss_data.populate_hash) {
1718            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1719        }
1720        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1721    }
1722
1723    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1724
1725    if (n->rss_data.populate_hash) {
1726        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1727    }
1728
1729    if (n->rss_data.redirect) {
1730        new_index = hash & (n->rss_data.indirections_len - 1);
1731        new_index = n->rss_data.indirections_table[new_index];
1732    }
1733
1734    return (index == new_index) ? -1 : new_index;
1735}
1736
1737static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1738                                      size_t size, bool no_rss)
1739{
1740    VirtIONet *n = qemu_get_nic_opaque(nc);
1741    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1742    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1743    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1744    size_t lens[VIRTQUEUE_MAX_SIZE];
1745    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1746    struct virtio_net_hdr_mrg_rxbuf mhdr;
1747    unsigned mhdr_cnt = 0;
1748    size_t offset, i, guest_offset, j;
1749    ssize_t err;
1750
1751    if (!virtio_net_can_receive(nc)) {
1752        return -1;
1753    }
1754
1755    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1756        int index = virtio_net_process_rss(nc, buf, size);
1757        if (index >= 0) {
1758            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1759            return virtio_net_receive_rcu(nc2, buf, size, true);
1760        }
1761    }
1762
1763    /* hdr_len refers to the header we supply to the guest */
1764    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1765        return 0;
1766    }
1767
1768    if (!receive_filter(n, buf, size))
1769        return size;
1770
1771    offset = i = 0;
1772
1773    while (offset < size) {
1774        VirtQueueElement *elem;
1775        int len, total;
1776        const struct iovec *sg;
1777
1778        total = 0;
1779
1780        if (i == VIRTQUEUE_MAX_SIZE) {
1781            virtio_error(vdev, "virtio-net unexpected long buffer chain");
1782            err = size;
1783            goto err;
1784        }
1785
1786        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1787        if (!elem) {
1788            if (i) {
1789                virtio_error(vdev, "virtio-net unexpected empty queue: "
1790                             "i %zd mergeable %d offset %zd, size %zd, "
1791                             "guest hdr len %zd, host hdr len %zd "
1792                             "guest features 0x%" PRIx64,
1793                             i, n->mergeable_rx_bufs, offset, size,
1794                             n->guest_hdr_len, n->host_hdr_len,
1795                             vdev->guest_features);
1796            }
1797            err = -1;
1798            goto err;
1799        }
1800
1801        if (elem->in_num < 1) {
1802            virtio_error(vdev,
1803                         "virtio-net receive queue contains no in buffers");
1804            virtqueue_detach_element(q->rx_vq, elem, 0);
1805            g_free(elem);
1806            err = -1;
1807            goto err;
1808        }
1809
1810        sg = elem->in_sg;
1811        if (i == 0) {
1812            assert(offset == 0);
1813            if (n->mergeable_rx_bufs) {
1814                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1815                                    sg, elem->in_num,
1816                                    offsetof(typeof(mhdr), num_buffers),
1817                                    sizeof(mhdr.num_buffers));
1818            }
1819
1820            receive_header(n, sg, elem->in_num, buf, size);
1821            if (n->rss_data.populate_hash) {
1822                offset = sizeof(mhdr);
1823                iov_from_buf(sg, elem->in_num, offset,
1824                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1825            }
1826            offset = n->host_hdr_len;
1827            total += n->guest_hdr_len;
1828            guest_offset = n->guest_hdr_len;
1829        } else {
1830            guest_offset = 0;
1831        }
1832
1833        /* copy in packet.  ugh */
1834        len = iov_from_buf(sg, elem->in_num, guest_offset,
1835                           buf + offset, size - offset);
1836        total += len;
1837        offset += len;
1838        /* If buffers can't be merged, at this point we
1839         * must have consumed the complete packet.
1840         * Otherwise, drop it. */
1841        if (!n->mergeable_rx_bufs && offset < size) {
1842            virtqueue_unpop(q->rx_vq, elem, total);
1843            g_free(elem);
1844            err = size;
1845            goto err;
1846        }
1847
1848        elems[i] = elem;
1849        lens[i] = total;
1850        i++;
1851    }
1852
1853    if (mhdr_cnt) {
1854        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1855        iov_from_buf(mhdr_sg, mhdr_cnt,
1856                     0,
1857                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1858    }
1859
1860    for (j = 0; j < i; j++) {
1861        /* signal other side */
1862        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1863        g_free(elems[j]);
1864    }
1865
1866    virtqueue_flush(q->rx_vq, i);
1867    virtio_notify(vdev, q->rx_vq);
1868
1869    return size;
1870
1871err:
1872    for (j = 0; j < i; j++) {
1873        virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1874        g_free(elems[j]);
1875    }
1876
1877    return err;
1878}
1879
1880static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1881                                  size_t size)
1882{
1883    RCU_READ_LOCK_GUARD();
1884
1885    return virtio_net_receive_rcu(nc, buf, size, false);
1886}
1887
1888static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1889                                         const uint8_t *buf,
1890                                         VirtioNetRscUnit *unit)
1891{
1892    uint16_t ip_hdrlen;
1893    struct ip_header *ip;
1894
1895    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1896                              + sizeof(struct eth_header));
1897    unit->ip = (void *)ip;
1898    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1899    unit->ip_plen = &ip->ip_len;
1900    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1901    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1902    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1903}
1904
1905static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1906                                         const uint8_t *buf,
1907                                         VirtioNetRscUnit *unit)
1908{
1909    struct ip6_header *ip6;
1910
1911    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1912                                 + sizeof(struct eth_header));
1913    unit->ip = ip6;
1914    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1915    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1916                                        + sizeof(struct ip6_header));
1917    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1918
1919    /* There is a difference between payload lenght in ipv4 and v6,
1920       ip header is excluded in ipv6 */
1921    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1922}
1923
1924static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1925                                       VirtioNetRscSeg *seg)
1926{
1927    int ret;
1928    struct virtio_net_hdr_v1 *h;
1929
1930    h = (struct virtio_net_hdr_v1 *)seg->buf;
1931    h->flags = 0;
1932    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1933
1934    if (seg->is_coalesced) {
1935        h->rsc.segments = seg->packets;
1936        h->rsc.dup_acks = seg->dup_ack;
1937        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1938        if (chain->proto == ETH_P_IP) {
1939            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1940        } else {
1941            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1942        }
1943    }
1944
1945    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1946    QTAILQ_REMOVE(&chain->buffers, seg, next);
1947    g_free(seg->buf);
1948    g_free(seg);
1949
1950    return ret;
1951}
1952
1953static void virtio_net_rsc_purge(void *opq)
1954{
1955    VirtioNetRscSeg *seg, *rn;
1956    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1957
1958    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1959        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1960            chain->stat.purge_failed++;
1961            continue;
1962        }
1963    }
1964
1965    chain->stat.timer++;
1966    if (!QTAILQ_EMPTY(&chain->buffers)) {
1967        timer_mod(chain->drain_timer,
1968              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1969    }
1970}
1971
1972static void virtio_net_rsc_cleanup(VirtIONet *n)
1973{
1974    VirtioNetRscChain *chain, *rn_chain;
1975    VirtioNetRscSeg *seg, *rn_seg;
1976
1977    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1978        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1979            QTAILQ_REMOVE(&chain->buffers, seg, next);
1980            g_free(seg->buf);
1981            g_free(seg);
1982        }
1983
1984        timer_free(chain->drain_timer);
1985        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1986        g_free(chain);
1987    }
1988}
1989
1990static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1991                                     NetClientState *nc,
1992                                     const uint8_t *buf, size_t size)
1993{
1994    uint16_t hdr_len;
1995    VirtioNetRscSeg *seg;
1996
1997    hdr_len = chain->n->guest_hdr_len;
1998    seg = g_new(VirtioNetRscSeg, 1);
1999    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2000        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2001    memcpy(seg->buf, buf, size);
2002    seg->size = size;
2003    seg->packets = 1;
2004    seg->dup_ack = 0;
2005    seg->is_coalesced = 0;
2006    seg->nc = nc;
2007
2008    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2009    chain->stat.cache++;
2010
2011    switch (chain->proto) {
2012    case ETH_P_IP:
2013        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2014        break;
2015    case ETH_P_IPV6:
2016        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2017        break;
2018    default:
2019        g_assert_not_reached();
2020    }
2021}
2022
2023static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2024                                         VirtioNetRscSeg *seg,
2025                                         const uint8_t *buf,
2026                                         struct tcp_header *n_tcp,
2027                                         struct tcp_header *o_tcp)
2028{
2029    uint32_t nack, oack;
2030    uint16_t nwin, owin;
2031
2032    nack = htonl(n_tcp->th_ack);
2033    nwin = htons(n_tcp->th_win);
2034    oack = htonl(o_tcp->th_ack);
2035    owin = htons(o_tcp->th_win);
2036
2037    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2038        chain->stat.ack_out_of_win++;
2039        return RSC_FINAL;
2040    } else if (nack == oack) {
2041        /* duplicated ack or window probe */
2042        if (nwin == owin) {
2043            /* duplicated ack, add dup ack count due to whql test up to 1 */
2044            chain->stat.dup_ack++;
2045            return RSC_FINAL;
2046        } else {
2047            /* Coalesce window update */
2048            o_tcp->th_win = n_tcp->th_win;
2049            chain->stat.win_update++;
2050            return RSC_COALESCE;
2051        }
2052    } else {
2053        /* pure ack, go to 'C', finalize*/
2054        chain->stat.pure_ack++;
2055        return RSC_FINAL;
2056    }
2057}
2058
2059static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2060                                            VirtioNetRscSeg *seg,
2061                                            const uint8_t *buf,
2062                                            VirtioNetRscUnit *n_unit)
2063{
2064    void *data;
2065    uint16_t o_ip_len;
2066    uint32_t nseq, oseq;
2067    VirtioNetRscUnit *o_unit;
2068
2069    o_unit = &seg->unit;
2070    o_ip_len = htons(*o_unit->ip_plen);
2071    nseq = htonl(n_unit->tcp->th_seq);
2072    oseq = htonl(o_unit->tcp->th_seq);
2073
2074    /* out of order or retransmitted. */
2075    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2076        chain->stat.data_out_of_win++;
2077        return RSC_FINAL;
2078    }
2079
2080    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2081    if (nseq == oseq) {
2082        if ((o_unit->payload == 0) && n_unit->payload) {
2083            /* From no payload to payload, normal case, not a dup ack or etc */
2084            chain->stat.data_after_pure_ack++;
2085            goto coalesce;
2086        } else {
2087            return virtio_net_rsc_handle_ack(chain, seg, buf,
2088                                             n_unit->tcp, o_unit->tcp);
2089        }
2090    } else if ((nseq - oseq) != o_unit->payload) {
2091        /* Not a consistent packet, out of order */
2092        chain->stat.data_out_of_order++;
2093        return RSC_FINAL;
2094    } else {
2095coalesce:
2096        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2097            chain->stat.over_size++;
2098            return RSC_FINAL;
2099        }
2100
2101        /* Here comes the right data, the payload length in v4/v6 is different,
2102           so use the field value to update and record the new data len */
2103        o_unit->payload += n_unit->payload; /* update new data len */
2104
2105        /* update field in ip header */
2106        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2107
2108        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2109           for windows guest, while this may change the behavior for linux
2110           guest (only if it uses RSC feature). */
2111        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2112
2113        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2114        o_unit->tcp->th_win = n_unit->tcp->th_win;
2115
2116        memmove(seg->buf + seg->size, data, n_unit->payload);
2117        seg->size += n_unit->payload;
2118        seg->packets++;
2119        chain->stat.coalesced++;
2120        return RSC_COALESCE;
2121    }
2122}
2123
2124static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2125                                        VirtioNetRscSeg *seg,
2126                                        const uint8_t *buf, size_t size,
2127                                        VirtioNetRscUnit *unit)
2128{
2129    struct ip_header *ip1, *ip2;
2130
2131    ip1 = (struct ip_header *)(unit->ip);
2132    ip2 = (struct ip_header *)(seg->unit.ip);
2133    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2134        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2135        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2136        chain->stat.no_match++;
2137        return RSC_NO_MATCH;
2138    }
2139
2140    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2141}
2142
2143static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2144                                        VirtioNetRscSeg *seg,
2145                                        const uint8_t *buf, size_t size,
2146                                        VirtioNetRscUnit *unit)
2147{
2148    struct ip6_header *ip1, *ip2;
2149
2150    ip1 = (struct ip6_header *)(unit->ip);
2151    ip2 = (struct ip6_header *)(seg->unit.ip);
2152    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2153        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2154        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2155        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2156            chain->stat.no_match++;
2157            return RSC_NO_MATCH;
2158    }
2159
2160    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2161}
2162
2163/* Packets with 'SYN' should bypass, other flag should be sent after drain
2164 * to prevent out of order */
2165static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2166                                         struct tcp_header *tcp)
2167{
2168    uint16_t tcp_hdr;
2169    uint16_t tcp_flag;
2170
2171    tcp_flag = htons(tcp->th_offset_flags);
2172    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2173    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2174    if (tcp_flag & TH_SYN) {
2175        chain->stat.tcp_syn++;
2176        return RSC_BYPASS;
2177    }
2178
2179    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2180        chain->stat.tcp_ctrl_drain++;
2181        return RSC_FINAL;
2182    }
2183
2184    if (tcp_hdr > sizeof(struct tcp_header)) {
2185        chain->stat.tcp_all_opt++;
2186        return RSC_FINAL;
2187    }
2188
2189    return RSC_CANDIDATE;
2190}
2191
2192static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2193                                         NetClientState *nc,
2194                                         const uint8_t *buf, size_t size,
2195                                         VirtioNetRscUnit *unit)
2196{
2197    int ret;
2198    VirtioNetRscSeg *seg, *nseg;
2199
2200    if (QTAILQ_EMPTY(&chain->buffers)) {
2201        chain->stat.empty_cache++;
2202        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2203        timer_mod(chain->drain_timer,
2204              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2205        return size;
2206    }
2207
2208    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2209        if (chain->proto == ETH_P_IP) {
2210            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2211        } else {
2212            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2213        }
2214
2215        if (ret == RSC_FINAL) {
2216            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2217                /* Send failed */
2218                chain->stat.final_failed++;
2219                return 0;
2220            }
2221
2222            /* Send current packet */
2223            return virtio_net_do_receive(nc, buf, size);
2224        } else if (ret == RSC_NO_MATCH) {
2225            continue;
2226        } else {
2227            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2228            seg->is_coalesced = 1;
2229            return size;
2230        }
2231    }
2232
2233    chain->stat.no_match_cache++;
2234    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2235    return size;
2236}
2237
2238/* Drain a connection data, this is to avoid out of order segments */
2239static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2240                                        NetClientState *nc,
2241                                        const uint8_t *buf, size_t size,
2242                                        uint16_t ip_start, uint16_t ip_size,
2243                                        uint16_t tcp_port)
2244{
2245    VirtioNetRscSeg *seg, *nseg;
2246    uint32_t ppair1, ppair2;
2247
2248    ppair1 = *(uint32_t *)(buf + tcp_port);
2249    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2250        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2251        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2252            || (ppair1 != ppair2)) {
2253            continue;
2254        }
2255        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2256            chain->stat.drain_failed++;
2257        }
2258
2259        break;
2260    }
2261
2262    return virtio_net_do_receive(nc, buf, size);
2263}
2264
2265static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2266                                            struct ip_header *ip,
2267                                            const uint8_t *buf, size_t size)
2268{
2269    uint16_t ip_len;
2270
2271    /* Not an ipv4 packet */
2272    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2273        chain->stat.ip_option++;
2274        return RSC_BYPASS;
2275    }
2276
2277    /* Don't handle packets with ip option */
2278    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2279        chain->stat.ip_option++;
2280        return RSC_BYPASS;
2281    }
2282
2283    if (ip->ip_p != IPPROTO_TCP) {
2284        chain->stat.bypass_not_tcp++;
2285        return RSC_BYPASS;
2286    }
2287
2288    /* Don't handle packets with ip fragment */
2289    if (!(htons(ip->ip_off) & IP_DF)) {
2290        chain->stat.ip_frag++;
2291        return RSC_BYPASS;
2292    }
2293
2294    /* Don't handle packets with ecn flag */
2295    if (IPTOS_ECN(ip->ip_tos)) {
2296        chain->stat.ip_ecn++;
2297        return RSC_BYPASS;
2298    }
2299
2300    ip_len = htons(ip->ip_len);
2301    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2302        || ip_len > (size - chain->n->guest_hdr_len -
2303                     sizeof(struct eth_header))) {
2304        chain->stat.ip_hacked++;
2305        return RSC_BYPASS;
2306    }
2307
2308    return RSC_CANDIDATE;
2309}
2310
2311static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2312                                      NetClientState *nc,
2313                                      const uint8_t *buf, size_t size)
2314{
2315    int32_t ret;
2316    uint16_t hdr_len;
2317    VirtioNetRscUnit unit;
2318
2319    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2320
2321    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2322        + sizeof(struct tcp_header))) {
2323        chain->stat.bypass_not_tcp++;
2324        return virtio_net_do_receive(nc, buf, size);
2325    }
2326
2327    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2328    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2329        != RSC_CANDIDATE) {
2330        return virtio_net_do_receive(nc, buf, size);
2331    }
2332
2333    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2334    if (ret == RSC_BYPASS) {
2335        return virtio_net_do_receive(nc, buf, size);
2336    } else if (ret == RSC_FINAL) {
2337        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2338                ((hdr_len + sizeof(struct eth_header)) + 12),
2339                VIRTIO_NET_IP4_ADDR_SIZE,
2340                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2341    }
2342
2343    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2344}
2345
2346static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2347                                            struct ip6_header *ip6,
2348                                            const uint8_t *buf, size_t size)
2349{
2350    uint16_t ip_len;
2351
2352    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2353        != IP_HEADER_VERSION_6) {
2354        return RSC_BYPASS;
2355    }
2356
2357    /* Both option and protocol is checked in this */
2358    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2359        chain->stat.bypass_not_tcp++;
2360        return RSC_BYPASS;
2361    }
2362
2363    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2364    if (ip_len < sizeof(struct tcp_header) ||
2365        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2366                  - sizeof(struct ip6_header))) {
2367        chain->stat.ip_hacked++;
2368        return RSC_BYPASS;
2369    }
2370
2371    /* Don't handle packets with ecn flag */
2372    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2373        chain->stat.ip_ecn++;
2374        return RSC_BYPASS;
2375    }
2376
2377    return RSC_CANDIDATE;
2378}
2379
2380static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2381                                      const uint8_t *buf, size_t size)
2382{
2383    int32_t ret;
2384    uint16_t hdr_len;
2385    VirtioNetRscChain *chain;
2386    VirtioNetRscUnit unit;
2387
2388    chain = (VirtioNetRscChain *)opq;
2389    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2390
2391    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2392        + sizeof(tcp_header))) {
2393        return virtio_net_do_receive(nc, buf, size);
2394    }
2395
2396    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2397    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2398                                                 unit.ip, buf, size)) {
2399        return virtio_net_do_receive(nc, buf, size);
2400    }
2401
2402    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2403    if (ret == RSC_BYPASS) {
2404        return virtio_net_do_receive(nc, buf, size);
2405    } else if (ret == RSC_FINAL) {
2406        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2407                ((hdr_len + sizeof(struct eth_header)) + 8),
2408                VIRTIO_NET_IP6_ADDR_SIZE,
2409                hdr_len + sizeof(struct eth_header)
2410                + sizeof(struct ip6_header));
2411    }
2412
2413    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2414}
2415
2416static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2417                                                      NetClientState *nc,
2418                                                      uint16_t proto)
2419{
2420    VirtioNetRscChain *chain;
2421
2422    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2423        return NULL;
2424    }
2425
2426    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2427        if (chain->proto == proto) {
2428            return chain;
2429        }
2430    }
2431
2432    chain = g_malloc(sizeof(*chain));
2433    chain->n = n;
2434    chain->proto = proto;
2435    if (proto == (uint16_t)ETH_P_IP) {
2436        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2437        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2438    } else {
2439        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2440        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2441    }
2442    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2443                                      virtio_net_rsc_purge, chain);
2444    memset(&chain->stat, 0, sizeof(chain->stat));
2445
2446    QTAILQ_INIT(&chain->buffers);
2447    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2448
2449    return chain;
2450}
2451
2452static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2453                                      const uint8_t *buf,
2454                                      size_t size)
2455{
2456    uint16_t proto;
2457    VirtioNetRscChain *chain;
2458    struct eth_header *eth;
2459    VirtIONet *n;
2460
2461    n = qemu_get_nic_opaque(nc);
2462    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2463        return virtio_net_do_receive(nc, buf, size);
2464    }
2465
2466    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2467    proto = htons(eth->h_proto);
2468
2469    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2470    if (chain) {
2471        chain->stat.received++;
2472        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2473            return virtio_net_rsc_receive4(chain, nc, buf, size);
2474        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2475            return virtio_net_rsc_receive6(chain, nc, buf, size);
2476        }
2477    }
2478    return virtio_net_do_receive(nc, buf, size);
2479}
2480
2481static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2482                                  size_t size)
2483{
2484    VirtIONet *n = qemu_get_nic_opaque(nc);
2485    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2486        return virtio_net_rsc_receive(nc, buf, size);
2487    } else {
2488        return virtio_net_do_receive(nc, buf, size);
2489    }
2490}
2491
2492static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2493
2494static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2495{
2496    VirtIONet *n = qemu_get_nic_opaque(nc);
2497    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2498    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2499
2500    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2501    virtio_notify(vdev, q->tx_vq);
2502
2503    g_free(q->async_tx.elem);
2504    q->async_tx.elem = NULL;
2505
2506    virtio_queue_set_notification(q->tx_vq, 1);
2507    virtio_net_flush_tx(q);
2508}
2509
2510/* TX */
2511static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2512{
2513    VirtIONet *n = q->n;
2514    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2515    VirtQueueElement *elem;
2516    int32_t num_packets = 0;
2517    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2518    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2519        return num_packets;
2520    }
2521
2522    if (q->async_tx.elem) {
2523        virtio_queue_set_notification(q->tx_vq, 0);
2524        return num_packets;
2525    }
2526
2527    for (;;) {
2528        ssize_t ret;
2529        unsigned int out_num;
2530        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2531        struct virtio_net_hdr_mrg_rxbuf mhdr;
2532
2533        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2534        if (!elem) {
2535            break;
2536        }
2537
2538        out_num = elem->out_num;
2539        out_sg = elem->out_sg;
2540        if (out_num < 1) {
2541            virtio_error(vdev, "virtio-net header not in first element");
2542            virtqueue_detach_element(q->tx_vq, elem, 0);
2543            g_free(elem);
2544            return -EINVAL;
2545        }
2546
2547        if (n->has_vnet_hdr) {
2548            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2549                n->guest_hdr_len) {
2550                virtio_error(vdev, "virtio-net header incorrect");
2551                virtqueue_detach_element(q->tx_vq, elem, 0);
2552                g_free(elem);
2553                return -EINVAL;
2554            }
2555            if (n->needs_vnet_hdr_swap) {
2556                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2557                sg2[0].iov_base = &mhdr;
2558                sg2[0].iov_len = n->guest_hdr_len;
2559                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2560                                   out_sg, out_num,
2561                                   n->guest_hdr_len, -1);
2562                if (out_num == VIRTQUEUE_MAX_SIZE) {
2563                    goto drop;
2564                }
2565                out_num += 1;
2566                out_sg = sg2;
2567            }
2568        }
2569        /*
2570         * If host wants to see the guest header as is, we can
2571         * pass it on unchanged. Otherwise, copy just the parts
2572         * that host is interested in.
2573         */
2574        assert(n->host_hdr_len <= n->guest_hdr_len);
2575        if (n->host_hdr_len != n->guest_hdr_len) {
2576            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2577                                       out_sg, out_num,
2578                                       0, n->host_hdr_len);
2579            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2580                             out_sg, out_num,
2581                             n->guest_hdr_len, -1);
2582            out_num = sg_num;
2583            out_sg = sg;
2584        }
2585
2586        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2587                                      out_sg, out_num, virtio_net_tx_complete);
2588        if (ret == 0) {
2589            virtio_queue_set_notification(q->tx_vq, 0);
2590            q->async_tx.elem = elem;
2591            return -EBUSY;
2592        }
2593
2594drop:
2595        virtqueue_push(q->tx_vq, elem, 0);
2596        virtio_notify(vdev, q->tx_vq);
2597        g_free(elem);
2598
2599        if (++num_packets >= n->tx_burst) {
2600            break;
2601        }
2602    }
2603    return num_packets;
2604}
2605
2606static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2607{
2608    VirtIONet *n = VIRTIO_NET(vdev);
2609    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2610
2611    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2612        virtio_net_drop_tx_queue_data(vdev, vq);
2613        return;
2614    }
2615
2616    /* This happens when device was stopped but VCPU wasn't. */
2617    if (!vdev->vm_running) {
2618        q->tx_waiting = 1;
2619        return;
2620    }
2621
2622    if (q->tx_waiting) {
2623        virtio_queue_set_notification(vq, 1);
2624        timer_del(q->tx_timer);
2625        q->tx_waiting = 0;
2626        if (virtio_net_flush_tx(q) == -EINVAL) {
2627            return;
2628        }
2629    } else {
2630        timer_mod(q->tx_timer,
2631                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2632        q->tx_waiting = 1;
2633        virtio_queue_set_notification(vq, 0);
2634    }
2635}
2636
2637static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2638{
2639    VirtIONet *n = VIRTIO_NET(vdev);
2640    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2641
2642    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2643        virtio_net_drop_tx_queue_data(vdev, vq);
2644        return;
2645    }
2646
2647    if (unlikely(q->tx_waiting)) {
2648        return;
2649    }
2650    q->tx_waiting = 1;
2651    /* This happens when device was stopped but VCPU wasn't. */
2652    if (!vdev->vm_running) {
2653        return;
2654    }
2655    virtio_queue_set_notification(vq, 0);
2656    qemu_bh_schedule(q->tx_bh);
2657}
2658
2659static void virtio_net_tx_timer(void *opaque)
2660{
2661    VirtIONetQueue *q = opaque;
2662    VirtIONet *n = q->n;
2663    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2664    /* This happens when device was stopped but BH wasn't. */
2665    if (!vdev->vm_running) {
2666        /* Make sure tx waiting is set, so we'll run when restarted. */
2667        assert(q->tx_waiting);
2668        return;
2669    }
2670
2671    q->tx_waiting = 0;
2672
2673    /* Just in case the driver is not ready on more */
2674    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2675        return;
2676    }
2677
2678    virtio_queue_set_notification(q->tx_vq, 1);
2679    virtio_net_flush_tx(q);
2680}
2681
2682static void virtio_net_tx_bh(void *opaque)
2683{
2684    VirtIONetQueue *q = opaque;
2685    VirtIONet *n = q->n;
2686    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2687    int32_t ret;
2688
2689    /* This happens when device was stopped but BH wasn't. */
2690    if (!vdev->vm_running) {
2691        /* Make sure tx waiting is set, so we'll run when restarted. */
2692        assert(q->tx_waiting);
2693        return;
2694    }
2695
2696    q->tx_waiting = 0;
2697
2698    /* Just in case the driver is not ready on more */
2699    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2700        return;
2701    }
2702
2703    ret = virtio_net_flush_tx(q);
2704    if (ret == -EBUSY || ret == -EINVAL) {
2705        return; /* Notification re-enable handled by tx_complete or device
2706                 * broken */
2707    }
2708
2709    /* If we flush a full burst of packets, assume there are
2710     * more coming and immediately reschedule */
2711    if (ret >= n->tx_burst) {
2712        qemu_bh_schedule(q->tx_bh);
2713        q->tx_waiting = 1;
2714        return;
2715    }
2716
2717    /* If less than a full burst, re-enable notification and flush
2718     * anything that may have come in while we weren't looking.  If
2719     * we find something, assume the guest is still active and reschedule */
2720    virtio_queue_set_notification(q->tx_vq, 1);
2721    ret = virtio_net_flush_tx(q);
2722    if (ret == -EINVAL) {
2723        return;
2724    } else if (ret > 0) {
2725        virtio_queue_set_notification(q->tx_vq, 0);
2726        qemu_bh_schedule(q->tx_bh);
2727        q->tx_waiting = 1;
2728    }
2729}
2730
2731static void virtio_net_add_queue(VirtIONet *n, int index)
2732{
2733    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2734
2735    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2736                                           virtio_net_handle_rx);
2737
2738    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2739        n->vqs[index].tx_vq =
2740            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2741                             virtio_net_handle_tx_timer);
2742        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2743                                              virtio_net_tx_timer,
2744                                              &n->vqs[index]);
2745    } else {
2746        n->vqs[index].tx_vq =
2747            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2748                             virtio_net_handle_tx_bh);
2749        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2750    }
2751
2752    n->vqs[index].tx_waiting = 0;
2753    n->vqs[index].n = n;
2754}
2755
2756static void virtio_net_del_queue(VirtIONet *n, int index)
2757{
2758    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2759    VirtIONetQueue *q = &n->vqs[index];
2760    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2761
2762    qemu_purge_queued_packets(nc);
2763
2764    virtio_del_queue(vdev, index * 2);
2765    if (q->tx_timer) {
2766        timer_free(q->tx_timer);
2767        q->tx_timer = NULL;
2768    } else {
2769        qemu_bh_delete(q->tx_bh);
2770        q->tx_bh = NULL;
2771    }
2772    q->tx_waiting = 0;
2773    virtio_del_queue(vdev, index * 2 + 1);
2774}
2775
2776static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2777{
2778    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2779    int old_num_queues = virtio_get_num_queues(vdev);
2780    int new_num_queues = new_max_queue_pairs * 2 + 1;
2781    int i;
2782
2783    assert(old_num_queues >= 3);
2784    assert(old_num_queues % 2 == 1);
2785
2786    if (old_num_queues == new_num_queues) {
2787        return;
2788    }
2789
2790    /*
2791     * We always need to remove and add ctrl vq if
2792     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2793     * and then we only enter one of the following two loops.
2794     */
2795    virtio_del_queue(vdev, old_num_queues - 1);
2796
2797    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2798        /* new_num_queues < old_num_queues */
2799        virtio_net_del_queue(n, i / 2);
2800    }
2801
2802    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2803        /* new_num_queues > old_num_queues */
2804        virtio_net_add_queue(n, i / 2);
2805    }
2806
2807    /* add ctrl_vq last */
2808    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2809}
2810
2811static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2812{
2813    int max = multiqueue ? n->max_queue_pairs : 1;
2814
2815    n->multiqueue = multiqueue;
2816    virtio_net_change_num_queue_pairs(n, max);
2817
2818    virtio_net_set_queue_pairs(n);
2819}
2820
2821static int virtio_net_post_load_device(void *opaque, int version_id)
2822{
2823    VirtIONet *n = opaque;
2824    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2825    int i, link_down;
2826
2827    trace_virtio_net_post_load_device();
2828    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2829                               virtio_vdev_has_feature(vdev,
2830                                                       VIRTIO_F_VERSION_1),
2831                               virtio_vdev_has_feature(vdev,
2832                                                       VIRTIO_NET_F_HASH_REPORT));
2833
2834    /* MAC_TABLE_ENTRIES may be different from the saved image */
2835    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2836        n->mac_table.in_use = 0;
2837    }
2838
2839    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2840        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2841    }
2842
2843    /*
2844     * curr_guest_offloads will be later overwritten by the
2845     * virtio_set_features_nocheck call done from the virtio_load.
2846     * Here we make sure it is preserved and restored accordingly
2847     * in the virtio_net_post_load_virtio callback.
2848     */
2849    n->saved_guest_offloads = n->curr_guest_offloads;
2850
2851    virtio_net_set_queue_pairs(n);
2852
2853    /* Find the first multicast entry in the saved MAC filter */
2854    for (i = 0; i < n->mac_table.in_use; i++) {
2855        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2856            break;
2857        }
2858    }
2859    n->mac_table.first_multi = i;
2860
2861    /* nc.link_down can't be migrated, so infer link_down according
2862     * to link status bit in n->status */
2863    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2864    for (i = 0; i < n->max_queue_pairs; i++) {
2865        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2866    }
2867
2868    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2869        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2870        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2871                                  QEMU_CLOCK_VIRTUAL,
2872                                  virtio_net_announce_timer, n);
2873        if (n->announce_timer.round) {
2874            timer_mod(n->announce_timer.tm,
2875                      qemu_clock_get_ms(n->announce_timer.type));
2876        } else {
2877            qemu_announce_timer_del(&n->announce_timer, false);
2878        }
2879    }
2880
2881    if (n->rss_data.enabled) {
2882        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2883        if (!n->rss_data.populate_hash) {
2884            if (!virtio_net_attach_epbf_rss(n)) {
2885                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2886                    warn_report("Can't post-load eBPF RSS for vhost");
2887                } else {
2888                    warn_report("Can't post-load eBPF RSS - "
2889                                "fallback to software RSS");
2890                    n->rss_data.enabled_software_rss = true;
2891                }
2892            }
2893        }
2894
2895        trace_virtio_net_rss_enable(n->rss_data.hash_types,
2896                                    n->rss_data.indirections_len,
2897                                    sizeof(n->rss_data.key));
2898    } else {
2899        trace_virtio_net_rss_disable();
2900    }
2901    return 0;
2902}
2903
2904static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2905{
2906    VirtIONet *n = VIRTIO_NET(vdev);
2907    /*
2908     * The actual needed state is now in saved_guest_offloads,
2909     * see virtio_net_post_load_device for detail.
2910     * Restore it back and apply the desired offloads.
2911     */
2912    n->curr_guest_offloads = n->saved_guest_offloads;
2913    if (peer_has_vnet_hdr(n)) {
2914        virtio_net_apply_guest_offloads(n);
2915    }
2916
2917    return 0;
2918}
2919
2920/* tx_waiting field of a VirtIONetQueue */
2921static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2922    .name = "virtio-net-queue-tx_waiting",
2923    .fields = (VMStateField[]) {
2924        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2925        VMSTATE_END_OF_LIST()
2926   },
2927};
2928
2929static bool max_queue_pairs_gt_1(void *opaque, int version_id)
2930{
2931    return VIRTIO_NET(opaque)->max_queue_pairs > 1;
2932}
2933
2934static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2935{
2936    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2937                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2938}
2939
2940static bool mac_table_fits(void *opaque, int version_id)
2941{
2942    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2943}
2944
2945static bool mac_table_doesnt_fit(void *opaque, int version_id)
2946{
2947    return !mac_table_fits(opaque, version_id);
2948}
2949
2950/* This temporary type is shared by all the WITH_TMP methods
2951 * although only some fields are used by each.
2952 */
2953struct VirtIONetMigTmp {
2954    VirtIONet      *parent;
2955    VirtIONetQueue *vqs_1;
2956    uint16_t        curr_queue_pairs_1;
2957    uint8_t         has_ufo;
2958    uint32_t        has_vnet_hdr;
2959};
2960
2961/* The 2nd and subsequent tx_waiting flags are loaded later than
2962 * the 1st entry in the queue_pairs and only if there's more than one
2963 * entry.  We use the tmp mechanism to calculate a temporary
2964 * pointer and count and also validate the count.
2965 */
2966
2967static int virtio_net_tx_waiting_pre_save(void *opaque)
2968{
2969    struct VirtIONetMigTmp *tmp = opaque;
2970
2971    tmp->vqs_1 = tmp->parent->vqs + 1;
2972    tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
2973    if (tmp->parent->curr_queue_pairs == 0) {
2974        tmp->curr_queue_pairs_1 = 0;
2975    }
2976
2977    return 0;
2978}
2979
2980static int virtio_net_tx_waiting_pre_load(void *opaque)
2981{
2982    struct VirtIONetMigTmp *tmp = opaque;
2983
2984    /* Reuse the pointer setup from save */
2985    virtio_net_tx_waiting_pre_save(opaque);
2986
2987    if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
2988        error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
2989            tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
2990
2991        return -EINVAL;
2992    }
2993
2994    return 0; /* all good */
2995}
2996
2997static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2998    .name      = "virtio-net-tx_waiting",
2999    .pre_load  = virtio_net_tx_waiting_pre_load,
3000    .pre_save  = virtio_net_tx_waiting_pre_save,
3001    .fields    = (VMStateField[]) {
3002        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3003                                     curr_queue_pairs_1,
3004                                     vmstate_virtio_net_queue_tx_waiting,
3005                                     struct VirtIONetQueue),
3006        VMSTATE_END_OF_LIST()
3007    },
3008};
3009
3010/* the 'has_ufo' flag is just tested; if the incoming stream has the
3011 * flag set we need to check that we have it
3012 */
3013static int virtio_net_ufo_post_load(void *opaque, int version_id)
3014{
3015    struct VirtIONetMigTmp *tmp = opaque;
3016
3017    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3018        error_report("virtio-net: saved image requires TUN_F_UFO support");
3019        return -EINVAL;
3020    }
3021
3022    return 0;
3023}
3024
3025static int virtio_net_ufo_pre_save(void *opaque)
3026{
3027    struct VirtIONetMigTmp *tmp = opaque;
3028
3029    tmp->has_ufo = tmp->parent->has_ufo;
3030
3031    return 0;
3032}
3033
3034static const VMStateDescription vmstate_virtio_net_has_ufo = {
3035    .name      = "virtio-net-ufo",
3036    .post_load = virtio_net_ufo_post_load,
3037    .pre_save  = virtio_net_ufo_pre_save,
3038    .fields    = (VMStateField[]) {
3039        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3040        VMSTATE_END_OF_LIST()
3041    },
3042};
3043
3044/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3045 * flag set we need to check that we have it
3046 */
3047static int virtio_net_vnet_post_load(void *opaque, int version_id)
3048{
3049    struct VirtIONetMigTmp *tmp = opaque;
3050
3051    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3052        error_report("virtio-net: saved image requires vnet_hdr=on");
3053        return -EINVAL;
3054    }
3055
3056    return 0;
3057}
3058
3059static int virtio_net_vnet_pre_save(void *opaque)
3060{
3061    struct VirtIONetMigTmp *tmp = opaque;
3062
3063    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3064
3065    return 0;
3066}
3067
3068static const VMStateDescription vmstate_virtio_net_has_vnet = {
3069    .name      = "virtio-net-vnet",
3070    .post_load = virtio_net_vnet_post_load,
3071    .pre_save  = virtio_net_vnet_pre_save,
3072    .fields    = (VMStateField[]) {
3073        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3074        VMSTATE_END_OF_LIST()
3075    },
3076};
3077
3078static bool virtio_net_rss_needed(void *opaque)
3079{
3080    return VIRTIO_NET(opaque)->rss_data.enabled;
3081}
3082
3083static const VMStateDescription vmstate_virtio_net_rss = {
3084    .name      = "virtio-net-device/rss",
3085    .version_id = 1,
3086    .minimum_version_id = 1,
3087    .needed = virtio_net_rss_needed,
3088    .fields = (VMStateField[]) {
3089        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3090        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3091        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3092        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3093        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3094        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3095        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3096                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3097        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3098                                    rss_data.indirections_len, 0,
3099                                    vmstate_info_uint16, uint16_t),
3100        VMSTATE_END_OF_LIST()
3101    },
3102};
3103
3104static const VMStateDescription vmstate_virtio_net_device = {
3105    .name = "virtio-net-device",
3106    .version_id = VIRTIO_NET_VM_VERSION,
3107    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3108    .post_load = virtio_net_post_load_device,
3109    .fields = (VMStateField[]) {
3110        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3111        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3112                               vmstate_virtio_net_queue_tx_waiting,
3113                               VirtIONetQueue),
3114        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3115        VMSTATE_UINT16(status, VirtIONet),
3116        VMSTATE_UINT8(promisc, VirtIONet),
3117        VMSTATE_UINT8(allmulti, VirtIONet),
3118        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3119
3120        /* Guarded pair: If it fits we load it, else we throw it away
3121         * - can happen if source has a larger MAC table.; post-load
3122         *  sets flags in this case.
3123         */
3124        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3125                                0, mac_table_fits, mac_table.in_use,
3126                                 ETH_ALEN),
3127        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3128                                     mac_table.in_use, ETH_ALEN),
3129
3130        /* Note: This is an array of uint32's that's always been saved as a
3131         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3132         * but based on the uint.
3133         */
3134        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3135        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3136                         vmstate_virtio_net_has_vnet),
3137        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3138        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3139        VMSTATE_UINT8(alluni, VirtIONet),
3140        VMSTATE_UINT8(nomulti, VirtIONet),
3141        VMSTATE_UINT8(nouni, VirtIONet),
3142        VMSTATE_UINT8(nobcast, VirtIONet),
3143        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3144                         vmstate_virtio_net_has_ufo),
3145        VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3146                            vmstate_info_uint16_equal, uint16_t),
3147        VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3148        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3149                         vmstate_virtio_net_tx_waiting),
3150        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3151                            has_ctrl_guest_offloads),
3152        VMSTATE_END_OF_LIST()
3153   },
3154    .subsections = (const VMStateDescription * []) {
3155        &vmstate_virtio_net_rss,
3156        NULL
3157    }
3158};
3159
3160static NetClientInfo net_virtio_info = {
3161    .type = NET_CLIENT_DRIVER_NIC,
3162    .size = sizeof(NICState),
3163    .can_receive = virtio_net_can_receive,
3164    .receive = virtio_net_receive,
3165    .link_status_changed = virtio_net_set_link_status,
3166    .query_rx_filter = virtio_net_query_rxfilter,
3167    .announce = virtio_net_announce,
3168};
3169
3170static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3171{
3172    VirtIONet *n = VIRTIO_NET(vdev);
3173    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3174    assert(n->vhost_started);
3175    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3176}
3177
3178static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3179                                           bool mask)
3180{
3181    VirtIONet *n = VIRTIO_NET(vdev);
3182    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3183    assert(n->vhost_started);
3184    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3185                             vdev, idx, mask);
3186}
3187
3188static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3189{
3190    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3191
3192    n->config_size = virtio_feature_get_config_size(feature_sizes,
3193                                                    host_features);
3194}
3195
3196void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3197                                   const char *type)
3198{
3199    /*
3200     * The name can be NULL, the netclient name will be type.x.
3201     */
3202    assert(type != NULL);
3203
3204    g_free(n->netclient_name);
3205    g_free(n->netclient_type);
3206    n->netclient_name = g_strdup(name);
3207    n->netclient_type = g_strdup(type);
3208}
3209
3210static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3211{
3212    HotplugHandler *hotplug_ctrl;
3213    PCIDevice *pci_dev;
3214    Error *err = NULL;
3215
3216    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3217    if (hotplug_ctrl) {
3218        pci_dev = PCI_DEVICE(dev);
3219        pci_dev->partially_hotplugged = true;
3220        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3221        if (err) {
3222            error_report_err(err);
3223            return false;
3224        }
3225    } else {
3226        return false;
3227    }
3228    return true;
3229}
3230
3231static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3232                                    Error **errp)
3233{
3234    Error *err = NULL;
3235    HotplugHandler *hotplug_ctrl;
3236    PCIDevice *pdev = PCI_DEVICE(dev);
3237    BusState *primary_bus;
3238
3239    if (!pdev->partially_hotplugged) {
3240        return true;
3241    }
3242    primary_bus = dev->parent_bus;
3243    if (!primary_bus) {
3244        error_setg(errp, "virtio_net: couldn't find primary bus");
3245        return false;
3246    }
3247    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3248    qatomic_set(&n->failover_primary_hidden, false);
3249    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3250    if (hotplug_ctrl) {
3251        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3252        if (err) {
3253            goto out;
3254        }
3255        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3256    }
3257    pdev->partially_hotplugged = false;
3258
3259out:
3260    error_propagate(errp, err);
3261    return !err;
3262}
3263
3264static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3265{
3266    bool should_be_hidden;
3267    Error *err = NULL;
3268    DeviceState *dev = failover_find_primary_device(n);
3269
3270    if (!dev) {
3271        return;
3272    }
3273
3274    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3275
3276    if (migration_in_setup(s) && !should_be_hidden) {
3277        if (failover_unplug_primary(n, dev)) {
3278            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3279            qapi_event_send_unplug_primary(dev->id);
3280            qatomic_set(&n->failover_primary_hidden, true);
3281        } else {
3282            warn_report("couldn't unplug primary device");
3283        }
3284    } else if (migration_has_failed(s)) {
3285        /* We already unplugged the device let's plug it back */
3286        if (!failover_replug_primary(n, dev, &err)) {
3287            if (err) {
3288                error_report_err(err);
3289            }
3290        }
3291    }
3292}
3293
3294static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3295{
3296    MigrationState *s = data;
3297    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3298    virtio_net_handle_migration_primary(n, s);
3299}
3300
3301static bool failover_hide_primary_device(DeviceListener *listener,
3302                                         const QDict *device_opts,
3303                                         bool from_json,
3304                                         Error **errp)
3305{
3306    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3307    const char *standby_id;
3308
3309    if (!device_opts) {
3310        return false;
3311    }
3312
3313    if (!qdict_haskey(device_opts, "failover_pair_id")) {
3314        return false;
3315    }
3316
3317    if (!qdict_haskey(device_opts, "id")) {
3318        error_setg(errp, "Device with failover_pair_id needs to have id");
3319        return false;
3320    }
3321
3322    standby_id = qdict_get_str(device_opts, "failover_pair_id");
3323    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3324        return false;
3325    }
3326
3327    /*
3328     * The hide helper can be called several times for a given device.
3329     * Check there is only one primary for a virtio-net device but
3330     * don't duplicate the qdict several times if it's called for the same
3331     * device.
3332     */
3333    if (n->primary_opts) {
3334        const char *old, *new;
3335        /* devices with failover_pair_id always have an id */
3336        old = qdict_get_str(n->primary_opts, "id");
3337        new = qdict_get_str(device_opts, "id");
3338        if (strcmp(old, new) != 0) {
3339            error_setg(errp, "Cannot attach more than one primary device to "
3340                       "'%s': '%s' and '%s'", n->netclient_name, old, new);
3341            return false;
3342        }
3343    } else {
3344        n->primary_opts = qdict_clone_shallow(device_opts);
3345        n->primary_opts_from_json = from_json;
3346    }
3347
3348    /* failover_primary_hidden is set during feature negotiation */
3349    return qatomic_read(&n->failover_primary_hidden);
3350}
3351
3352static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3353{
3354    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3355    VirtIONet *n = VIRTIO_NET(dev);
3356    NetClientState *nc;
3357    int i;
3358
3359    if (n->net_conf.mtu) {
3360        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3361    }
3362
3363    if (n->net_conf.duplex_str) {
3364        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3365            n->net_conf.duplex = DUPLEX_HALF;
3366        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3367            n->net_conf.duplex = DUPLEX_FULL;
3368        } else {
3369            error_setg(errp, "'duplex' must be 'half' or 'full'");
3370            return;
3371        }
3372        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3373    } else {
3374        n->net_conf.duplex = DUPLEX_UNKNOWN;
3375    }
3376
3377    if (n->net_conf.speed < SPEED_UNKNOWN) {
3378        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3379        return;
3380    }
3381    if (n->net_conf.speed >= 0) {
3382        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3383    }
3384
3385    if (n->failover) {
3386        n->primary_listener.hide_device = failover_hide_primary_device;
3387        qatomic_set(&n->failover_primary_hidden, true);
3388        device_listener_register(&n->primary_listener);
3389        n->migration_state.notify = virtio_net_migration_state_notifier;
3390        add_migration_state_change_notifier(&n->migration_state);
3391        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3392    }
3393
3394    virtio_net_set_config_size(n, n->host_features);
3395    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3396
3397    /*
3398     * We set a lower limit on RX queue size to what it always was.
3399     * Guests that want a smaller ring can always resize it without
3400     * help from us (using virtio 1 and up).
3401     */
3402    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3403        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3404        !is_power_of_2(n->net_conf.rx_queue_size)) {
3405        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3406                   "must be a power of 2 between %d and %d.",
3407                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3408                   VIRTQUEUE_MAX_SIZE);
3409        virtio_cleanup(vdev);
3410        return;
3411    }
3412
3413    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3414        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3415        !is_power_of_2(n->net_conf.tx_queue_size)) {
3416        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3417                   "must be a power of 2 between %d and %d",
3418                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3419                   VIRTQUEUE_MAX_SIZE);
3420        virtio_cleanup(vdev);
3421        return;
3422    }
3423
3424    n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3425
3426    /*
3427     * Figure out the datapath queue pairs since the backend could
3428     * provide control queue via peers as well.
3429     */
3430    if (n->nic_conf.peers.queues) {
3431        for (i = 0; i < n->max_ncs; i++) {
3432            if (n->nic_conf.peers.ncs[i]->is_datapath) {
3433                ++n->max_queue_pairs;
3434            }
3435        }
3436    }
3437    n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3438
3439    if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3440        error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3441                   "must be a positive integer less than %d.",
3442                   n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3443        virtio_cleanup(vdev);
3444        return;
3445    }
3446    n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3447    n->curr_queue_pairs = 1;
3448    n->tx_timeout = n->net_conf.txtimer;
3449
3450    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3451                       && strcmp(n->net_conf.tx, "bh")) {
3452        warn_report("virtio-net: "
3453                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3454                    n->net_conf.tx);
3455        error_printf("Defaulting to \"bh\"");
3456    }
3457
3458    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3459                                    n->net_conf.tx_queue_size);
3460
3461    for (i = 0; i < n->max_queue_pairs; i++) {
3462        virtio_net_add_queue(n, i);
3463    }
3464
3465    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3466    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3467    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3468    n->status = VIRTIO_NET_S_LINK_UP;
3469    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3470                              QEMU_CLOCK_VIRTUAL,
3471                              virtio_net_announce_timer, n);
3472    n->announce_timer.round = 0;
3473
3474    if (n->netclient_type) {
3475        /*
3476         * Happen when virtio_net_set_netclient_name has been called.
3477         */
3478        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3479                              n->netclient_type, n->netclient_name, n);
3480    } else {
3481        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3482                              object_get_typename(OBJECT(dev)), dev->id, n);
3483    }
3484
3485    for (i = 0; i < n->max_queue_pairs; i++) {
3486        n->nic->ncs[i].do_not_pad = true;
3487    }
3488
3489    peer_test_vnet_hdr(n);
3490    if (peer_has_vnet_hdr(n)) {
3491        for (i = 0; i < n->max_queue_pairs; i++) {
3492            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3493        }
3494        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3495    } else {
3496        n->host_hdr_len = 0;
3497    }
3498
3499    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3500
3501    n->vqs[0].tx_waiting = 0;
3502    n->tx_burst = n->net_conf.txburst;
3503    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3504    n->promisc = 1; /* for compatibility */
3505
3506    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3507
3508    n->vlans = g_malloc0(MAX_VLAN >> 3);
3509
3510    nc = qemu_get_queue(n->nic);
3511    nc->rxfilter_notify_enabled = 1;
3512
3513   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3514        struct virtio_net_config netcfg = {};
3515        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3516        vhost_net_set_config(get_vhost_net(nc->peer),
3517            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3518    }
3519    QTAILQ_INIT(&n->rsc_chains);
3520    n->qdev = dev;
3521
3522    net_rx_pkt_init(&n->rx_pkt, false);
3523
3524    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3525        virtio_net_load_ebpf(n);
3526    }
3527}
3528
3529static void virtio_net_device_unrealize(DeviceState *dev)
3530{
3531    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3532    VirtIONet *n = VIRTIO_NET(dev);
3533    int i, max_queue_pairs;
3534
3535    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3536        virtio_net_unload_ebpf(n);
3537    }
3538
3539    /* This will stop vhost backend if appropriate. */
3540    virtio_net_set_status(vdev, 0);
3541
3542    g_free(n->netclient_name);
3543    n->netclient_name = NULL;
3544    g_free(n->netclient_type);
3545    n->netclient_type = NULL;
3546
3547    g_free(n->mac_table.macs);
3548    g_free(n->vlans);
3549
3550    if (n->failover) {
3551        qobject_unref(n->primary_opts);
3552        device_listener_unregister(&n->primary_listener);
3553        remove_migration_state_change_notifier(&n->migration_state);
3554    } else {
3555        assert(n->primary_opts == NULL);
3556    }
3557
3558    max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3559    for (i = 0; i < max_queue_pairs; i++) {
3560        virtio_net_del_queue(n, i);
3561    }
3562    /* delete also control vq */
3563    virtio_del_queue(vdev, max_queue_pairs * 2);
3564    qemu_announce_timer_del(&n->announce_timer, false);
3565    g_free(n->vqs);
3566    qemu_del_nic(n->nic);
3567    virtio_net_rsc_cleanup(n);
3568    g_free(n->rss_data.indirections_table);
3569    net_rx_pkt_uninit(n->rx_pkt);
3570    virtio_cleanup(vdev);
3571}
3572
3573static void virtio_net_instance_init(Object *obj)
3574{
3575    VirtIONet *n = VIRTIO_NET(obj);
3576
3577    /*
3578     * The default config_size is sizeof(struct virtio_net_config).
3579     * Can be overriden with virtio_net_set_config_size.
3580     */
3581    n->config_size = sizeof(struct virtio_net_config);
3582    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3583                                  "bootindex", "/ethernet-phy@0",
3584                                  DEVICE(n));
3585
3586    ebpf_rss_init(&n->ebpf_rss);
3587}
3588
3589static int virtio_net_pre_save(void *opaque)
3590{
3591    VirtIONet *n = opaque;
3592
3593    /* At this point, backend must be stopped, otherwise
3594     * it might keep writing to memory. */
3595    assert(!n->vhost_started);
3596
3597    return 0;
3598}
3599
3600static bool primary_unplug_pending(void *opaque)
3601{
3602    DeviceState *dev = opaque;
3603    DeviceState *primary;
3604    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3605    VirtIONet *n = VIRTIO_NET(vdev);
3606
3607    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3608        return false;
3609    }
3610    primary = failover_find_primary_device(n);
3611    return primary ? primary->pending_deleted_event : false;
3612}
3613
3614static bool dev_unplug_pending(void *opaque)
3615{
3616    DeviceState *dev = opaque;
3617    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3618
3619    return vdc->primary_unplug_pending(dev);
3620}
3621
3622static const VMStateDescription vmstate_virtio_net = {
3623    .name = "virtio-net",
3624    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3625    .version_id = VIRTIO_NET_VM_VERSION,
3626    .fields = (VMStateField[]) {
3627        VMSTATE_VIRTIO_DEVICE,
3628        VMSTATE_END_OF_LIST()
3629    },
3630    .pre_save = virtio_net_pre_save,
3631    .dev_unplug_pending = dev_unplug_pending,
3632};
3633
3634static Property virtio_net_properties[] = {
3635    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3636                    VIRTIO_NET_F_CSUM, true),
3637    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3638                    VIRTIO_NET_F_GUEST_CSUM, true),
3639    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3640    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3641                    VIRTIO_NET_F_GUEST_TSO4, true),
3642    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3643                    VIRTIO_NET_F_GUEST_TSO6, true),
3644    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3645                    VIRTIO_NET_F_GUEST_ECN, true),
3646    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3647                    VIRTIO_NET_F_GUEST_UFO, true),
3648    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3649                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3650    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3651                    VIRTIO_NET_F_HOST_TSO4, true),
3652    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3653                    VIRTIO_NET_F_HOST_TSO6, true),
3654    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3655                    VIRTIO_NET_F_HOST_ECN, true),
3656    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3657                    VIRTIO_NET_F_HOST_UFO, true),
3658    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3659                    VIRTIO_NET_F_MRG_RXBUF, true),
3660    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3661                    VIRTIO_NET_F_STATUS, true),
3662    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3663                    VIRTIO_NET_F_CTRL_VQ, true),
3664    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3665                    VIRTIO_NET_F_CTRL_RX, true),
3666    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3667                    VIRTIO_NET_F_CTRL_VLAN, true),
3668    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3669                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3670    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3671                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3672    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3673                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3674    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3675    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3676                    VIRTIO_NET_F_RSS, false),
3677    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3678                    VIRTIO_NET_F_HASH_REPORT, false),
3679    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3680                    VIRTIO_NET_F_RSC_EXT, false),
3681    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3682                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3683    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3684    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3685                       TX_TIMER_INTERVAL),
3686    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3687    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3688    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3689                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3690    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3691                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3692    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3693    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3694                     true),
3695    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3696    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3697    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3698    DEFINE_PROP_END_OF_LIST(),
3699};
3700
3701static void virtio_net_class_init(ObjectClass *klass, void *data)
3702{
3703    DeviceClass *dc = DEVICE_CLASS(klass);
3704    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3705
3706    device_class_set_props(dc, virtio_net_properties);
3707    dc->vmsd = &vmstate_virtio_net;
3708    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3709    vdc->realize = virtio_net_device_realize;
3710    vdc->unrealize = virtio_net_device_unrealize;
3711    vdc->get_config = virtio_net_get_config;
3712    vdc->set_config = virtio_net_set_config;
3713    vdc->get_features = virtio_net_get_features;
3714    vdc->set_features = virtio_net_set_features;
3715    vdc->bad_features = virtio_net_bad_features;
3716    vdc->reset = virtio_net_reset;
3717    vdc->set_status = virtio_net_set_status;
3718    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3719    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3720    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3721    vdc->post_load = virtio_net_post_load_virtio;
3722    vdc->vmsd = &vmstate_virtio_net_device;
3723    vdc->primary_unplug_pending = primary_unplug_pending;
3724}
3725
3726static const TypeInfo virtio_net_info = {
3727    .name = TYPE_VIRTIO_NET,
3728    .parent = TYPE_VIRTIO_DEVICE,
3729    .instance_size = sizeof(VirtIONet),
3730    .instance_init = virtio_net_instance_init,
3731    .class_init = virtio_net_class_init,
3732};
3733
3734static void virtio_register_types(void)
3735{
3736    type_register_static(&virtio_net_info);
3737}
3738
3739type_init(virtio_register_types)
3740