qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/log.h"
  18#include "qemu/main-loop.h"
  19#include "qemu/module.h"
  20#include "hw/virtio/virtio.h"
  21#include "net/net.h"
  22#include "net/checksum.h"
  23#include "net/tap.h"
  24#include "qemu/error-report.h"
  25#include "qemu/timer.h"
  26#include "qemu/option.h"
  27#include "qemu/option_int.h"
  28#include "qemu/config-file.h"
  29#include "qapi/qmp/qdict.h"
  30#include "hw/virtio/virtio-net.h"
  31#include "net/vhost_net.h"
  32#include "net/announce.h"
  33#include "hw/virtio/virtio-bus.h"
  34#include "qapi/error.h"
  35#include "qapi/qapi-events-net.h"
  36#include "hw/qdev-properties.h"
  37#include "qapi/qapi-types-migration.h"
  38#include "qapi/qapi-events-migration.h"
  39#include "hw/virtio/virtio-access.h"
  40#include "migration/misc.h"
  41#include "standard-headers/linux/ethtool.h"
  42#include "sysemu/sysemu.h"
  43#include "trace.h"
  44#include "monitor/qdev.h"
  45#include "hw/pci/pci.h"
  46#include "net_rx_pkt.h"
  47#include "hw/virtio/vhost.h"
  48#include "sysemu/qtest.h"
  49
  50#define VIRTIO_NET_VM_VERSION    11
  51
  52#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  53
  54/* previously fixed value */
  55#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  56#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  57
  58/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
  59#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  60#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  61
  62#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  63
  64#define VIRTIO_NET_TCP_FLAG         0x3F
  65#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  66
  67/* IPv4 max payload, 16 bits in the header */
  68#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  69#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  70
  71/* header length value in ip header without option */
  72#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  73
  74#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  75#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  76
  77/* Purge coalesced packets timer interval, This value affects the performance
  78   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  79   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  80   tso/gso/gro 'off'. */
  81#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  82
  83#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  91                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  92
  93static const VirtIOFeature feature_sizes[] = {
  94    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  95     .end = endof(struct virtio_net_config, mac)},
  96    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  97     .end = endof(struct virtio_net_config, status)},
  98    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  99     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
 100    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 101     .end = endof(struct virtio_net_config, mtu)},
 102    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 103     .end = endof(struct virtio_net_config, duplex)},
 104    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 105     .end = endof(struct virtio_net_config, supported_hash_types)},
 106    {}
 107};
 108
 109static const VirtIOConfigSizeParams cfg_size_params = {
 110    .min_size = endof(struct virtio_net_config, mac),
 111    .max_size = sizeof(struct virtio_net_config),
 112    .feature_sizes = feature_sizes
 113};
 114
 115static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 116{
 117    VirtIONet *n = qemu_get_nic_opaque(nc);
 118
 119    return &n->vqs[nc->queue_index];
 120}
 121
 122static int vq2q(int queue_index)
 123{
 124    return queue_index / 2;
 125}
 126
 127static void flush_or_purge_queued_packets(NetClientState *nc)
 128{
 129    if (!nc->peer) {
 130        return;
 131    }
 132
 133    qemu_flush_or_purge_queued_packets(nc->peer, true);
 134    assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 135}
 136
 137/* TODO
 138 * - we could suppress RX interrupt if we were so inclined.
 139 */
 140
 141static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 142{
 143    VirtIONet *n = VIRTIO_NET(vdev);
 144    struct virtio_net_config netcfg;
 145    NetClientState *nc = qemu_get_queue(n->nic);
 146    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 147
 148    int ret = 0;
 149    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 150    virtio_stw_p(vdev, &netcfg.status, n->status);
 151    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
 152    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 153    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 154    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 155    netcfg.duplex = n->net_conf.duplex;
 156    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 157    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 158                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 159                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 160    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 161                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 162    memcpy(config, &netcfg, n->config_size);
 163
 164    /*
 165     * Is this VDPA? No peer means not VDPA: there's no way to
 166     * disconnect/reconnect a VDPA peer.
 167     */
 168    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 169        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 170                                   n->config_size);
 171        if (ret != -1) {
 172            /*
 173             * Some NIC/kernel combinations present 0 as the mac address.  As
 174             * that is not a legal address, try to proceed with the
 175             * address from the QEMU command line in the hope that the
 176             * address has been configured correctly elsewhere - just not
 177             * reported by the device.
 178             */
 179            if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 180                info_report("Zero hardware mac address detected. Ignoring.");
 181                memcpy(netcfg.mac, n->mac, ETH_ALEN);
 182            }
 183            memcpy(config, &netcfg, n->config_size);
 184        }
 185    }
 186}
 187
 188static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 189{
 190    VirtIONet *n = VIRTIO_NET(vdev);
 191    struct virtio_net_config netcfg = {};
 192    NetClientState *nc = qemu_get_queue(n->nic);
 193
 194    memcpy(&netcfg, config, n->config_size);
 195
 196    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 197        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 198        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 199        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 200        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 201    }
 202
 203    /*
 204     * Is this VDPA? No peer means not VDPA: there's no way to
 205     * disconnect/reconnect a VDPA peer.
 206     */
 207    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 208        vhost_net_set_config(get_vhost_net(nc->peer),
 209                             (uint8_t *)&netcfg, 0, n->config_size,
 210                             VHOST_SET_CONFIG_TYPE_MASTER);
 211      }
 212}
 213
 214static bool virtio_net_started(VirtIONet *n, uint8_t status)
 215{
 216    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 217    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 218        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 219}
 220
 221static void virtio_net_announce_notify(VirtIONet *net)
 222{
 223    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 224    trace_virtio_net_announce_notify();
 225
 226    net->status |= VIRTIO_NET_S_ANNOUNCE;
 227    virtio_notify_config(vdev);
 228}
 229
 230static void virtio_net_announce_timer(void *opaque)
 231{
 232    VirtIONet *n = opaque;
 233    trace_virtio_net_announce_timer(n->announce_timer.round);
 234
 235    n->announce_timer.round--;
 236    virtio_net_announce_notify(n);
 237}
 238
 239static void virtio_net_announce(NetClientState *nc)
 240{
 241    VirtIONet *n = qemu_get_nic_opaque(nc);
 242    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 243
 244    /*
 245     * Make sure the virtio migration announcement timer isn't running
 246     * If it is, let it trigger announcement so that we do not cause
 247     * confusion.
 248     */
 249    if (n->announce_timer.round) {
 250        return;
 251    }
 252
 253    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 254        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 255            virtio_net_announce_notify(n);
 256    }
 257}
 258
 259static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 260{
 261    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 262    NetClientState *nc = qemu_get_queue(n->nic);
 263    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 264    int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
 265              n->max_ncs - n->max_queue_pairs : 0;
 266
 267    if (!get_vhost_net(nc->peer)) {
 268        return;
 269    }
 270
 271    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 272        !!n->vhost_started) {
 273        return;
 274    }
 275    if (!n->vhost_started) {
 276        int r, i;
 277
 278        if (n->needs_vnet_hdr_swap) {
 279            error_report("backend does not support %s vnet headers; "
 280                         "falling back on userspace virtio",
 281                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 282            return;
 283        }
 284
 285        /* Any packets outstanding? Purge them to avoid touching rings
 286         * when vhost is running.
 287         */
 288        for (i = 0;  i < queue_pairs; i++) {
 289            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 290
 291            /* Purge both directions: TX and RX. */
 292            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 293            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 294        }
 295
 296        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 297            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 298            if (r < 0) {
 299                error_report("%uBytes MTU not supported by the backend",
 300                             n->net_conf.mtu);
 301
 302                return;
 303            }
 304        }
 305
 306        n->vhost_started = 1;
 307        r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 308        if (r < 0) {
 309            error_report("unable to start vhost net: %d: "
 310                         "falling back on userspace virtio", -r);
 311            n->vhost_started = 0;
 312        }
 313    } else {
 314        vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
 315        n->vhost_started = 0;
 316    }
 317}
 318
 319static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 320                                          NetClientState *peer,
 321                                          bool enable)
 322{
 323    if (virtio_is_big_endian(vdev)) {
 324        return qemu_set_vnet_be(peer, enable);
 325    } else {
 326        return qemu_set_vnet_le(peer, enable);
 327    }
 328}
 329
 330static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 331                                       int queue_pairs, bool enable)
 332{
 333    int i;
 334
 335    for (i = 0; i < queue_pairs; i++) {
 336        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 337            enable) {
 338            while (--i >= 0) {
 339                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 340            }
 341
 342            return true;
 343        }
 344    }
 345
 346    return false;
 347}
 348
 349static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 350{
 351    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 352    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 353
 354    if (virtio_net_started(n, status)) {
 355        /* Before using the device, we tell the network backend about the
 356         * endianness to use when parsing vnet headers. If the backend
 357         * can't do it, we fallback onto fixing the headers in the core
 358         * virtio-net code.
 359         */
 360        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 361                                                            queue_pairs, true);
 362    } else if (virtio_net_started(n, vdev->status)) {
 363        /* After using the device, we need to reset the network backend to
 364         * the default (guest native endianness), otherwise the guest may
 365         * lose network connectivity if it is rebooted into a different
 366         * endianness.
 367         */
 368        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
 369    }
 370}
 371
 372static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 373{
 374    unsigned int dropped = virtqueue_drop_all(vq);
 375    if (dropped) {
 376        virtio_notify(vdev, vq);
 377    }
 378}
 379
 380static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 381{
 382    VirtIONet *n = VIRTIO_NET(vdev);
 383    VirtIONetQueue *q;
 384    int i;
 385    uint8_t queue_status;
 386
 387    virtio_net_vnet_endian_status(n, status);
 388    virtio_net_vhost_status(n, status);
 389
 390    for (i = 0; i < n->max_queue_pairs; i++) {
 391        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 392        bool queue_started;
 393        q = &n->vqs[i];
 394
 395        if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
 396            queue_status = 0;
 397        } else {
 398            queue_status = status;
 399        }
 400        queue_started =
 401            virtio_net_started(n, queue_status) && !n->vhost_started;
 402
 403        if (queue_started) {
 404            qemu_flush_queued_packets(ncs);
 405        }
 406
 407        if (!q->tx_waiting) {
 408            continue;
 409        }
 410
 411        if (queue_started) {
 412            if (q->tx_timer) {
 413                timer_mod(q->tx_timer,
 414                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 415            } else {
 416                qemu_bh_schedule(q->tx_bh);
 417            }
 418        } else {
 419            if (q->tx_timer) {
 420                timer_del(q->tx_timer);
 421            } else {
 422                qemu_bh_cancel(q->tx_bh);
 423            }
 424            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 425                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 426                vdev->vm_running) {
 427                /* if tx is waiting we are likely have some packets in tx queue
 428                 * and disabled notification */
 429                q->tx_waiting = 0;
 430                virtio_queue_set_notification(q->tx_vq, 1);
 431                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 432            }
 433        }
 434    }
 435}
 436
 437static void virtio_net_set_link_status(NetClientState *nc)
 438{
 439    VirtIONet *n = qemu_get_nic_opaque(nc);
 440    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 441    uint16_t old_status = n->status;
 442
 443    if (nc->link_down)
 444        n->status &= ~VIRTIO_NET_S_LINK_UP;
 445    else
 446        n->status |= VIRTIO_NET_S_LINK_UP;
 447
 448    if (n->status != old_status)
 449        virtio_notify_config(vdev);
 450
 451    virtio_net_set_status(vdev, vdev->status);
 452}
 453
 454static void rxfilter_notify(NetClientState *nc)
 455{
 456    VirtIONet *n = qemu_get_nic_opaque(nc);
 457
 458    if (nc->rxfilter_notify_enabled) {
 459        char *path = object_get_canonical_path(OBJECT(n->qdev));
 460        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 461                                              n->netclient_name, path);
 462        g_free(path);
 463
 464        /* disable event notification to avoid events flooding */
 465        nc->rxfilter_notify_enabled = 0;
 466    }
 467}
 468
 469static intList *get_vlan_table(VirtIONet *n)
 470{
 471    intList *list;
 472    int i, j;
 473
 474    list = NULL;
 475    for (i = 0; i < MAX_VLAN >> 5; i++) {
 476        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 477            if (n->vlans[i] & (1U << j)) {
 478                QAPI_LIST_PREPEND(list, (i << 5) + j);
 479            }
 480        }
 481    }
 482
 483    return list;
 484}
 485
 486static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 487{
 488    VirtIONet *n = qemu_get_nic_opaque(nc);
 489    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 490    RxFilterInfo *info;
 491    strList *str_list;
 492    int i;
 493
 494    info = g_malloc0(sizeof(*info));
 495    info->name = g_strdup(nc->name);
 496    info->promiscuous = n->promisc;
 497
 498    if (n->nouni) {
 499        info->unicast = RX_STATE_NONE;
 500    } else if (n->alluni) {
 501        info->unicast = RX_STATE_ALL;
 502    } else {
 503        info->unicast = RX_STATE_NORMAL;
 504    }
 505
 506    if (n->nomulti) {
 507        info->multicast = RX_STATE_NONE;
 508    } else if (n->allmulti) {
 509        info->multicast = RX_STATE_ALL;
 510    } else {
 511        info->multicast = RX_STATE_NORMAL;
 512    }
 513
 514    info->broadcast_allowed = n->nobcast;
 515    info->multicast_overflow = n->mac_table.multi_overflow;
 516    info->unicast_overflow = n->mac_table.uni_overflow;
 517
 518    info->main_mac = qemu_mac_strdup_printf(n->mac);
 519
 520    str_list = NULL;
 521    for (i = 0; i < n->mac_table.first_multi; i++) {
 522        QAPI_LIST_PREPEND(str_list,
 523                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 524    }
 525    info->unicast_table = str_list;
 526
 527    str_list = NULL;
 528    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 529        QAPI_LIST_PREPEND(str_list,
 530                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 531    }
 532    info->multicast_table = str_list;
 533    info->vlan_table = get_vlan_table(n);
 534
 535    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 536        info->vlan = RX_STATE_ALL;
 537    } else if (!info->vlan_table) {
 538        info->vlan = RX_STATE_NONE;
 539    } else {
 540        info->vlan = RX_STATE_NORMAL;
 541    }
 542
 543    /* enable event notification after query */
 544    nc->rxfilter_notify_enabled = 1;
 545
 546    return info;
 547}
 548
 549static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
 550{
 551    VirtIONet *n = VIRTIO_NET(vdev);
 552    NetClientState *nc;
 553
 554    /* validate queue_index and skip for cvq */
 555    if (queue_index >= n->max_queue_pairs * 2) {
 556        return;
 557    }
 558
 559    nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 560
 561    if (!nc->peer) {
 562        return;
 563    }
 564
 565    if (get_vhost_net(nc->peer) &&
 566        nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 567        vhost_net_virtqueue_reset(vdev, nc, queue_index);
 568    }
 569
 570    flush_or_purge_queued_packets(nc);
 571}
 572
 573static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
 574{
 575    VirtIONet *n = VIRTIO_NET(vdev);
 576    NetClientState *nc;
 577    int r;
 578
 579    /* validate queue_index and skip for cvq */
 580    if (queue_index >= n->max_queue_pairs * 2) {
 581        return;
 582    }
 583
 584    nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 585
 586    if (!nc->peer || !vdev->vhost_started) {
 587        return;
 588    }
 589
 590    if (get_vhost_net(nc->peer) &&
 591        nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 592        r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
 593        if (r < 0) {
 594            error_report("unable to restart vhost net virtqueue: %d, "
 595                            "when resetting the queue", queue_index);
 596        }
 597    }
 598}
 599
 600static void virtio_net_reset(VirtIODevice *vdev)
 601{
 602    VirtIONet *n = VIRTIO_NET(vdev);
 603    int i;
 604
 605    /* Reset back to compatibility mode */
 606    n->promisc = 1;
 607    n->allmulti = 0;
 608    n->alluni = 0;
 609    n->nomulti = 0;
 610    n->nouni = 0;
 611    n->nobcast = 0;
 612    /* multiqueue is disabled by default */
 613    n->curr_queue_pairs = 1;
 614    timer_del(n->announce_timer.tm);
 615    n->announce_timer.round = 0;
 616    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 617
 618    /* Flush any MAC and VLAN filter table state */
 619    n->mac_table.in_use = 0;
 620    n->mac_table.first_multi = 0;
 621    n->mac_table.multi_overflow = 0;
 622    n->mac_table.uni_overflow = 0;
 623    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 624    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 625    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 626    memset(n->vlans, 0, MAX_VLAN >> 3);
 627
 628    /* Flush any async TX */
 629    for (i = 0;  i < n->max_queue_pairs; i++) {
 630        flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
 631    }
 632}
 633
 634static void peer_test_vnet_hdr(VirtIONet *n)
 635{
 636    NetClientState *nc = qemu_get_queue(n->nic);
 637    if (!nc->peer) {
 638        return;
 639    }
 640
 641    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 642}
 643
 644static int peer_has_vnet_hdr(VirtIONet *n)
 645{
 646    return n->has_vnet_hdr;
 647}
 648
 649static int peer_has_ufo(VirtIONet *n)
 650{
 651    if (!peer_has_vnet_hdr(n))
 652        return 0;
 653
 654    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 655
 656    return n->has_ufo;
 657}
 658
 659static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 660                                       int version_1, int hash_report)
 661{
 662    int i;
 663    NetClientState *nc;
 664
 665    n->mergeable_rx_bufs = mergeable_rx_bufs;
 666
 667    if (version_1) {
 668        n->guest_hdr_len = hash_report ?
 669            sizeof(struct virtio_net_hdr_v1_hash) :
 670            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 671        n->rss_data.populate_hash = !!hash_report;
 672    } else {
 673        n->guest_hdr_len = n->mergeable_rx_bufs ?
 674            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 675            sizeof(struct virtio_net_hdr);
 676    }
 677
 678    for (i = 0; i < n->max_queue_pairs; i++) {
 679        nc = qemu_get_subqueue(n->nic, i);
 680
 681        if (peer_has_vnet_hdr(n) &&
 682            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 683            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 684            n->host_hdr_len = n->guest_hdr_len;
 685        }
 686    }
 687}
 688
 689static int virtio_net_max_tx_queue_size(VirtIONet *n)
 690{
 691    NetClientState *peer = n->nic_conf.peers.ncs[0];
 692
 693    /*
 694     * Backends other than vhost-user or vhost-vdpa don't support max queue
 695     * size.
 696     */
 697    if (!peer) {
 698        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 699    }
 700
 701    switch(peer->info->type) {
 702    case NET_CLIENT_DRIVER_VHOST_USER:
 703    case NET_CLIENT_DRIVER_VHOST_VDPA:
 704        return VIRTQUEUE_MAX_SIZE;
 705    default:
 706        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 707    };
 708}
 709
 710static int peer_attach(VirtIONet *n, int index)
 711{
 712    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 713
 714    if (!nc->peer) {
 715        return 0;
 716    }
 717
 718    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 719        vhost_set_vring_enable(nc->peer, 1);
 720    }
 721
 722    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 723        return 0;
 724    }
 725
 726    if (n->max_queue_pairs == 1) {
 727        return 0;
 728    }
 729
 730    return tap_enable(nc->peer);
 731}
 732
 733static int peer_detach(VirtIONet *n, int index)
 734{
 735    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 736
 737    if (!nc->peer) {
 738        return 0;
 739    }
 740
 741    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 742        vhost_set_vring_enable(nc->peer, 0);
 743    }
 744
 745    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 746        return 0;
 747    }
 748
 749    return tap_disable(nc->peer);
 750}
 751
 752static void virtio_net_set_queue_pairs(VirtIONet *n)
 753{
 754    int i;
 755    int r;
 756
 757    if (n->nic->peer_deleted) {
 758        return;
 759    }
 760
 761    for (i = 0; i < n->max_queue_pairs; i++) {
 762        if (i < n->curr_queue_pairs) {
 763            r = peer_attach(n, i);
 764            assert(!r);
 765        } else {
 766            r = peer_detach(n, i);
 767            assert(!r);
 768        }
 769    }
 770}
 771
 772static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 773
 774static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 775                                        Error **errp)
 776{
 777    VirtIONet *n = VIRTIO_NET(vdev);
 778    NetClientState *nc = qemu_get_queue(n->nic);
 779
 780    /* Firstly sync all virtio-net possible supported features */
 781    features |= n->host_features;
 782
 783    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 784
 785    if (!peer_has_vnet_hdr(n)) {
 786        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 787        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 788        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 789        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 790
 791        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 792        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 793        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 794        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 795
 796        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 797    }
 798
 799    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 800        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 801        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 802    }
 803
 804    if (!get_vhost_net(nc->peer)) {
 805        virtio_add_feature(&features, VIRTIO_F_RING_RESET);
 806        return features;
 807    }
 808
 809    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 810        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 811    }
 812    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 813    vdev->backend_features = features;
 814
 815    if (n->mtu_bypass_backend &&
 816            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 817        features |= (1ULL << VIRTIO_NET_F_MTU);
 818    }
 819
 820    return features;
 821}
 822
 823static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 824{
 825    uint64_t features = 0;
 826
 827    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 828     * but also these: */
 829    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 830    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 831    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 832    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 833    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 834
 835    return features;
 836}
 837
 838static void virtio_net_apply_guest_offloads(VirtIONet *n)
 839{
 840    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 841            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 842            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 843            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 844            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 845            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 846}
 847
 848static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 849{
 850    static const uint64_t guest_offloads_mask =
 851        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 852        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 853        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 854        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 855        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 856
 857    return guest_offloads_mask & features;
 858}
 859
 860static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 861{
 862    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 863    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 864}
 865
 866typedef struct {
 867    VirtIONet *n;
 868    DeviceState *dev;
 869} FailoverDevice;
 870
 871/**
 872 * Set the failover primary device
 873 *
 874 * @opaque: FailoverId to setup
 875 * @opts: opts for device we are handling
 876 * @errp: returns an error if this function fails
 877 */
 878static int failover_set_primary(DeviceState *dev, void *opaque)
 879{
 880    FailoverDevice *fdev = opaque;
 881    PCIDevice *pci_dev = (PCIDevice *)
 882        object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
 883
 884    if (!pci_dev) {
 885        return 0;
 886    }
 887
 888    if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
 889        fdev->dev = dev;
 890        return 1;
 891    }
 892
 893    return 0;
 894}
 895
 896/**
 897 * Find the primary device for this failover virtio-net
 898 *
 899 * @n: VirtIONet device
 900 * @errp: returns an error if this function fails
 901 */
 902static DeviceState *failover_find_primary_device(VirtIONet *n)
 903{
 904    FailoverDevice fdev = {
 905        .n = n,
 906    };
 907
 908    qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
 909                       NULL, NULL, &fdev);
 910    return fdev.dev;
 911}
 912
 913static void failover_add_primary(VirtIONet *n, Error **errp)
 914{
 915    Error *err = NULL;
 916    DeviceState *dev = failover_find_primary_device(n);
 917
 918    if (dev) {
 919        return;
 920    }
 921
 922    if (!n->primary_opts) {
 923        error_setg(errp, "Primary device not found");
 924        error_append_hint(errp, "Virtio-net failover will not work. Make "
 925                          "sure primary device has parameter"
 926                          " failover_pair_id=%s\n", n->netclient_name);
 927        return;
 928    }
 929
 930    dev = qdev_device_add_from_qdict(n->primary_opts,
 931                                     n->primary_opts_from_json,
 932                                     &err);
 933    if (err) {
 934        qobject_unref(n->primary_opts);
 935        n->primary_opts = NULL;
 936    } else {
 937        object_unref(OBJECT(dev));
 938    }
 939    error_propagate(errp, err);
 940}
 941
 942static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 943{
 944    VirtIONet *n = VIRTIO_NET(vdev);
 945    Error *err = NULL;
 946    int i;
 947
 948    if (n->mtu_bypass_backend &&
 949            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 950        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 951    }
 952
 953    virtio_net_set_multiqueue(n,
 954                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 955                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 956
 957    virtio_net_set_mrg_rx_bufs(n,
 958                               virtio_has_feature(features,
 959                                                  VIRTIO_NET_F_MRG_RXBUF),
 960                               virtio_has_feature(features,
 961                                                  VIRTIO_F_VERSION_1),
 962                               virtio_has_feature(features,
 963                                                  VIRTIO_NET_F_HASH_REPORT));
 964
 965    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 966        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 967    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 968        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 969    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 970
 971    if (n->has_vnet_hdr) {
 972        n->curr_guest_offloads =
 973            virtio_net_guest_offloads_by_features(features);
 974        virtio_net_apply_guest_offloads(n);
 975    }
 976
 977    for (i = 0;  i < n->max_queue_pairs; i++) {
 978        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 979
 980        if (!get_vhost_net(nc->peer)) {
 981            continue;
 982        }
 983        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 984    }
 985
 986    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 987        memset(n->vlans, 0, MAX_VLAN >> 3);
 988    } else {
 989        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 990    }
 991
 992    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 993        qapi_event_send_failover_negotiated(n->netclient_name);
 994        qatomic_set(&n->failover_primary_hidden, false);
 995        failover_add_primary(n, &err);
 996        if (err) {
 997            if (!qtest_enabled()) {
 998                warn_report_err(err);
 999            } else {
1000                error_free(err);
1001            }
1002        }
1003    }
1004}
1005
1006static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1007                                     struct iovec *iov, unsigned int iov_cnt)
1008{
1009    uint8_t on;
1010    size_t s;
1011    NetClientState *nc = qemu_get_queue(n->nic);
1012
1013    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1014    if (s != sizeof(on)) {
1015        return VIRTIO_NET_ERR;
1016    }
1017
1018    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1019        n->promisc = on;
1020    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1021        n->allmulti = on;
1022    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1023        n->alluni = on;
1024    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1025        n->nomulti = on;
1026    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1027        n->nouni = on;
1028    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1029        n->nobcast = on;
1030    } else {
1031        return VIRTIO_NET_ERR;
1032    }
1033
1034    rxfilter_notify(nc);
1035
1036    return VIRTIO_NET_OK;
1037}
1038
1039static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1040                                     struct iovec *iov, unsigned int iov_cnt)
1041{
1042    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1043    uint64_t offloads;
1044    size_t s;
1045
1046    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1047        return VIRTIO_NET_ERR;
1048    }
1049
1050    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1051    if (s != sizeof(offloads)) {
1052        return VIRTIO_NET_ERR;
1053    }
1054
1055    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1056        uint64_t supported_offloads;
1057
1058        offloads = virtio_ldq_p(vdev, &offloads);
1059
1060        if (!n->has_vnet_hdr) {
1061            return VIRTIO_NET_ERR;
1062        }
1063
1064        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1065            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1066        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1067            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1068        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1069
1070        supported_offloads = virtio_net_supported_guest_offloads(n);
1071        if (offloads & ~supported_offloads) {
1072            return VIRTIO_NET_ERR;
1073        }
1074
1075        n->curr_guest_offloads = offloads;
1076        virtio_net_apply_guest_offloads(n);
1077
1078        return VIRTIO_NET_OK;
1079    } else {
1080        return VIRTIO_NET_ERR;
1081    }
1082}
1083
1084static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1085                                 struct iovec *iov, unsigned int iov_cnt)
1086{
1087    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1088    struct virtio_net_ctrl_mac mac_data;
1089    size_t s;
1090    NetClientState *nc = qemu_get_queue(n->nic);
1091
1092    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1093        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1094            return VIRTIO_NET_ERR;
1095        }
1096        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1097        assert(s == sizeof(n->mac));
1098        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1099        rxfilter_notify(nc);
1100
1101        return VIRTIO_NET_OK;
1102    }
1103
1104    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1105        return VIRTIO_NET_ERR;
1106    }
1107
1108    int in_use = 0;
1109    int first_multi = 0;
1110    uint8_t uni_overflow = 0;
1111    uint8_t multi_overflow = 0;
1112    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1113
1114    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1115                   sizeof(mac_data.entries));
1116    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1117    if (s != sizeof(mac_data.entries)) {
1118        goto error;
1119    }
1120    iov_discard_front(&iov, &iov_cnt, s);
1121
1122    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1123        goto error;
1124    }
1125
1126    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1127        s = iov_to_buf(iov, iov_cnt, 0, macs,
1128                       mac_data.entries * ETH_ALEN);
1129        if (s != mac_data.entries * ETH_ALEN) {
1130            goto error;
1131        }
1132        in_use += mac_data.entries;
1133    } else {
1134        uni_overflow = 1;
1135    }
1136
1137    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1138
1139    first_multi = in_use;
1140
1141    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1142                   sizeof(mac_data.entries));
1143    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1144    if (s != sizeof(mac_data.entries)) {
1145        goto error;
1146    }
1147
1148    iov_discard_front(&iov, &iov_cnt, s);
1149
1150    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1151        goto error;
1152    }
1153
1154    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1155        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1156                       mac_data.entries * ETH_ALEN);
1157        if (s != mac_data.entries * ETH_ALEN) {
1158            goto error;
1159        }
1160        in_use += mac_data.entries;
1161    } else {
1162        multi_overflow = 1;
1163    }
1164
1165    n->mac_table.in_use = in_use;
1166    n->mac_table.first_multi = first_multi;
1167    n->mac_table.uni_overflow = uni_overflow;
1168    n->mac_table.multi_overflow = multi_overflow;
1169    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1170    g_free(macs);
1171    rxfilter_notify(nc);
1172
1173    return VIRTIO_NET_OK;
1174
1175error:
1176    g_free(macs);
1177    return VIRTIO_NET_ERR;
1178}
1179
1180static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1181                                        struct iovec *iov, unsigned int iov_cnt)
1182{
1183    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1184    uint16_t vid;
1185    size_t s;
1186    NetClientState *nc = qemu_get_queue(n->nic);
1187
1188    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1189    vid = virtio_lduw_p(vdev, &vid);
1190    if (s != sizeof(vid)) {
1191        return VIRTIO_NET_ERR;
1192    }
1193
1194    if (vid >= MAX_VLAN)
1195        return VIRTIO_NET_ERR;
1196
1197    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1198        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1199    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1200        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1201    else
1202        return VIRTIO_NET_ERR;
1203
1204    rxfilter_notify(nc);
1205
1206    return VIRTIO_NET_OK;
1207}
1208
1209static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1210                                      struct iovec *iov, unsigned int iov_cnt)
1211{
1212    trace_virtio_net_handle_announce(n->announce_timer.round);
1213    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1214        n->status & VIRTIO_NET_S_ANNOUNCE) {
1215        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1216        if (n->announce_timer.round) {
1217            qemu_announce_timer_step(&n->announce_timer);
1218        }
1219        return VIRTIO_NET_OK;
1220    } else {
1221        return VIRTIO_NET_ERR;
1222    }
1223}
1224
1225static void virtio_net_detach_epbf_rss(VirtIONet *n);
1226
1227static void virtio_net_disable_rss(VirtIONet *n)
1228{
1229    if (n->rss_data.enabled) {
1230        trace_virtio_net_rss_disable();
1231    }
1232    n->rss_data.enabled = false;
1233
1234    virtio_net_detach_epbf_rss(n);
1235}
1236
1237static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1238{
1239    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1240    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1241        return false;
1242    }
1243
1244    return nc->info->set_steering_ebpf(nc, prog_fd);
1245}
1246
1247static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1248                                   struct EBPFRSSConfig *config)
1249{
1250    config->redirect = data->redirect;
1251    config->populate_hash = data->populate_hash;
1252    config->hash_types = data->hash_types;
1253    config->indirections_len = data->indirections_len;
1254    config->default_queue = data->default_queue;
1255}
1256
1257static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1258{
1259    struct EBPFRSSConfig config = {};
1260
1261    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1262        return false;
1263    }
1264
1265    rss_data_to_rss_config(&n->rss_data, &config);
1266
1267    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1268                          n->rss_data.indirections_table, n->rss_data.key)) {
1269        return false;
1270    }
1271
1272    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1273        return false;
1274    }
1275
1276    return true;
1277}
1278
1279static void virtio_net_detach_epbf_rss(VirtIONet *n)
1280{
1281    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1282}
1283
1284static bool virtio_net_load_ebpf(VirtIONet *n)
1285{
1286    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1287        /* backend does't support steering ebpf */
1288        return false;
1289    }
1290
1291    return ebpf_rss_load(&n->ebpf_rss);
1292}
1293
1294static void virtio_net_unload_ebpf(VirtIONet *n)
1295{
1296    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1297    ebpf_rss_unload(&n->ebpf_rss);
1298}
1299
1300static uint16_t virtio_net_handle_rss(VirtIONet *n,
1301                                      struct iovec *iov,
1302                                      unsigned int iov_cnt,
1303                                      bool do_rss)
1304{
1305    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1306    struct virtio_net_rss_config cfg;
1307    size_t s, offset = 0, size_get;
1308    uint16_t queue_pairs, i;
1309    struct {
1310        uint16_t us;
1311        uint8_t b;
1312    } QEMU_PACKED temp;
1313    const char *err_msg = "";
1314    uint32_t err_value = 0;
1315
1316    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1317        err_msg = "RSS is not negotiated";
1318        goto error;
1319    }
1320    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1321        err_msg = "Hash report is not negotiated";
1322        goto error;
1323    }
1324    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1325    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1326    if (s != size_get) {
1327        err_msg = "Short command buffer";
1328        err_value = (uint32_t)s;
1329        goto error;
1330    }
1331    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1332    n->rss_data.indirections_len =
1333        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1334    n->rss_data.indirections_len++;
1335    if (!do_rss) {
1336        n->rss_data.indirections_len = 1;
1337    }
1338    if (!is_power_of_2(n->rss_data.indirections_len)) {
1339        err_msg = "Invalid size of indirection table";
1340        err_value = n->rss_data.indirections_len;
1341        goto error;
1342    }
1343    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1344        err_msg = "Too large indirection table";
1345        err_value = n->rss_data.indirections_len;
1346        goto error;
1347    }
1348    n->rss_data.default_queue = do_rss ?
1349        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1350    if (n->rss_data.default_queue >= n->max_queue_pairs) {
1351        err_msg = "Invalid default queue";
1352        err_value = n->rss_data.default_queue;
1353        goto error;
1354    }
1355    offset += size_get;
1356    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1357    g_free(n->rss_data.indirections_table);
1358    n->rss_data.indirections_table = g_malloc(size_get);
1359    if (!n->rss_data.indirections_table) {
1360        err_msg = "Can't allocate indirections table";
1361        err_value = n->rss_data.indirections_len;
1362        goto error;
1363    }
1364    s = iov_to_buf(iov, iov_cnt, offset,
1365                   n->rss_data.indirections_table, size_get);
1366    if (s != size_get) {
1367        err_msg = "Short indirection table buffer";
1368        err_value = (uint32_t)s;
1369        goto error;
1370    }
1371    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1372        uint16_t val = n->rss_data.indirections_table[i];
1373        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1374    }
1375    offset += size_get;
1376    size_get = sizeof(temp);
1377    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1378    if (s != size_get) {
1379        err_msg = "Can't get queue_pairs";
1380        err_value = (uint32_t)s;
1381        goto error;
1382    }
1383    queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1384    if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1385        err_msg = "Invalid number of queue_pairs";
1386        err_value = queue_pairs;
1387        goto error;
1388    }
1389    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1390        err_msg = "Invalid key size";
1391        err_value = temp.b;
1392        goto error;
1393    }
1394    if (!temp.b && n->rss_data.hash_types) {
1395        err_msg = "No key provided";
1396        err_value = 0;
1397        goto error;
1398    }
1399    if (!temp.b && !n->rss_data.hash_types) {
1400        virtio_net_disable_rss(n);
1401        return queue_pairs;
1402    }
1403    offset += size_get;
1404    size_get = temp.b;
1405    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1406    if (s != size_get) {
1407        err_msg = "Can get key buffer";
1408        err_value = (uint32_t)s;
1409        goto error;
1410    }
1411    n->rss_data.enabled = true;
1412
1413    if (!n->rss_data.populate_hash) {
1414        if (!virtio_net_attach_epbf_rss(n)) {
1415            /* EBPF must be loaded for vhost */
1416            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1417                warn_report("Can't load eBPF RSS for vhost");
1418                goto error;
1419            }
1420            /* fallback to software RSS */
1421            warn_report("Can't load eBPF RSS - fallback to software RSS");
1422            n->rss_data.enabled_software_rss = true;
1423        }
1424    } else {
1425        /* use software RSS for hash populating */
1426        /* and detach eBPF if was loaded before */
1427        virtio_net_detach_epbf_rss(n);
1428        n->rss_data.enabled_software_rss = true;
1429    }
1430
1431    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1432                                n->rss_data.indirections_len,
1433                                temp.b);
1434    return queue_pairs;
1435error:
1436    trace_virtio_net_rss_error(err_msg, err_value);
1437    virtio_net_disable_rss(n);
1438    return 0;
1439}
1440
1441static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1442                                struct iovec *iov, unsigned int iov_cnt)
1443{
1444    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1445    uint16_t queue_pairs;
1446    NetClientState *nc = qemu_get_queue(n->nic);
1447
1448    virtio_net_disable_rss(n);
1449    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1450        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1451        return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1452    }
1453    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1454        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1455    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1456        struct virtio_net_ctrl_mq mq;
1457        size_t s;
1458        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1459            return VIRTIO_NET_ERR;
1460        }
1461        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1462        if (s != sizeof(mq)) {
1463            return VIRTIO_NET_ERR;
1464        }
1465        queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1466
1467    } else {
1468        return VIRTIO_NET_ERR;
1469    }
1470
1471    if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1472        queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1473        queue_pairs > n->max_queue_pairs ||
1474        !n->multiqueue) {
1475        return VIRTIO_NET_ERR;
1476    }
1477
1478    n->curr_queue_pairs = queue_pairs;
1479    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1480        /*
1481         * Avoid updating the backend for a vdpa device: We're only interested
1482         * in updating the device model queues.
1483         */
1484        return VIRTIO_NET_OK;
1485    }
1486    /* stop the backend before changing the number of queue_pairs to avoid handling a
1487     * disabled queue */
1488    virtio_net_set_status(vdev, vdev->status);
1489    virtio_net_set_queue_pairs(n);
1490
1491    return VIRTIO_NET_OK;
1492}
1493
1494size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1495                                  const struct iovec *in_sg, unsigned in_num,
1496                                  const struct iovec *out_sg,
1497                                  unsigned out_num)
1498{
1499    VirtIONet *n = VIRTIO_NET(vdev);
1500    struct virtio_net_ctrl_hdr ctrl;
1501    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1502    size_t s;
1503    struct iovec *iov, *iov2;
1504
1505    if (iov_size(in_sg, in_num) < sizeof(status) ||
1506        iov_size(out_sg, out_num) < sizeof(ctrl)) {
1507        virtio_error(vdev, "virtio-net ctrl missing headers");
1508        return 0;
1509    }
1510
1511    iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1512    s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1513    iov_discard_front(&iov, &out_num, sizeof(ctrl));
1514    if (s != sizeof(ctrl)) {
1515        status = VIRTIO_NET_ERR;
1516    } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1517        status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1518    } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1519        status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1520    } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1521        status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1522    } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1523        status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1524    } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1525        status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1526    } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1527        status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1528    }
1529
1530    s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1531    assert(s == sizeof(status));
1532
1533    g_free(iov2);
1534    return sizeof(status);
1535}
1536
1537static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1538{
1539    VirtQueueElement *elem;
1540
1541    for (;;) {
1542        size_t written;
1543        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1544        if (!elem) {
1545            break;
1546        }
1547
1548        written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1549                                             elem->out_sg, elem->out_num);
1550        if (written > 0) {
1551            virtqueue_push(vq, elem, written);
1552            virtio_notify(vdev, vq);
1553            g_free(elem);
1554        } else {
1555            virtqueue_detach_element(vq, elem, 0);
1556            g_free(elem);
1557            break;
1558        }
1559    }
1560}
1561
1562/* RX */
1563
1564static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1565{
1566    VirtIONet *n = VIRTIO_NET(vdev);
1567    int queue_index = vq2q(virtio_get_queue_index(vq));
1568
1569    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1570}
1571
1572static bool virtio_net_can_receive(NetClientState *nc)
1573{
1574    VirtIONet *n = qemu_get_nic_opaque(nc);
1575    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1576    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1577
1578    if (!vdev->vm_running) {
1579        return false;
1580    }
1581
1582    if (nc->queue_index >= n->curr_queue_pairs) {
1583        return false;
1584    }
1585
1586    if (!virtio_queue_ready(q->rx_vq) ||
1587        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1588        return false;
1589    }
1590
1591    return true;
1592}
1593
1594static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1595{
1596    VirtIONet *n = q->n;
1597    if (virtio_queue_empty(q->rx_vq) ||
1598        (n->mergeable_rx_bufs &&
1599         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1600        virtio_queue_set_notification(q->rx_vq, 1);
1601
1602        /* To avoid a race condition where the guest has made some buffers
1603         * available after the above check but before notification was
1604         * enabled, check for available buffers again.
1605         */
1606        if (virtio_queue_empty(q->rx_vq) ||
1607            (n->mergeable_rx_bufs &&
1608             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1609            return 0;
1610        }
1611    }
1612
1613    virtio_queue_set_notification(q->rx_vq, 0);
1614    return 1;
1615}
1616
1617static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1618{
1619    virtio_tswap16s(vdev, &hdr->hdr_len);
1620    virtio_tswap16s(vdev, &hdr->gso_size);
1621    virtio_tswap16s(vdev, &hdr->csum_start);
1622    virtio_tswap16s(vdev, &hdr->csum_offset);
1623}
1624
1625/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1626 * it never finds out that the packets don't have valid checksums.  This
1627 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1628 * fix this with Xen but it hasn't appeared in an upstream release of
1629 * dhclient yet.
1630 *
1631 * To avoid breaking existing guests, we catch udp packets and add
1632 * checksums.  This is terrible but it's better than hacking the guest
1633 * kernels.
1634 *
1635 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1636 * we should provide a mechanism to disable it to avoid polluting the host
1637 * cache.
1638 */
1639static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1640                                        uint8_t *buf, size_t size)
1641{
1642    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1643        (size > 27 && size < 1500) && /* normal sized MTU */
1644        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1645        (buf[23] == 17) && /* ip.protocol == UDP */
1646        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1647        net_checksum_calculate(buf, size, CSUM_UDP);
1648        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1649    }
1650}
1651
1652static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1653                           const void *buf, size_t size)
1654{
1655    if (n->has_vnet_hdr) {
1656        /* FIXME this cast is evil */
1657        void *wbuf = (void *)buf;
1658        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1659                                    size - n->host_hdr_len);
1660
1661        if (n->needs_vnet_hdr_swap) {
1662            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1663        }
1664        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1665    } else {
1666        struct virtio_net_hdr hdr = {
1667            .flags = 0,
1668            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1669        };
1670        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1671    }
1672}
1673
1674static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1675{
1676    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1677    static const uint8_t vlan[] = {0x81, 0x00};
1678    uint8_t *ptr = (uint8_t *)buf;
1679    int i;
1680
1681    if (n->promisc)
1682        return 1;
1683
1684    ptr += n->host_hdr_len;
1685
1686    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1687        int vid = lduw_be_p(ptr + 14) & 0xfff;
1688        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1689            return 0;
1690    }
1691
1692    if (ptr[0] & 1) { // multicast
1693        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1694            return !n->nobcast;
1695        } else if (n->nomulti) {
1696            return 0;
1697        } else if (n->allmulti || n->mac_table.multi_overflow) {
1698            return 1;
1699        }
1700
1701        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1702            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1703                return 1;
1704            }
1705        }
1706    } else { // unicast
1707        if (n->nouni) {
1708            return 0;
1709        } else if (n->alluni || n->mac_table.uni_overflow) {
1710            return 1;
1711        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1712            return 1;
1713        }
1714
1715        for (i = 0; i < n->mac_table.first_multi; i++) {
1716            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1717                return 1;
1718            }
1719        }
1720    }
1721
1722    return 0;
1723}
1724
1725static uint8_t virtio_net_get_hash_type(bool isip4,
1726                                        bool isip6,
1727                                        bool isudp,
1728                                        bool istcp,
1729                                        uint32_t types)
1730{
1731    if (isip4) {
1732        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1733            return NetPktRssIpV4Tcp;
1734        }
1735        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1736            return NetPktRssIpV4Udp;
1737        }
1738        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1739            return NetPktRssIpV4;
1740        }
1741    } else if (isip6) {
1742        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1743                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1744
1745        if (istcp && (types & mask)) {
1746            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1747                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1748        }
1749        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1750        if (isudp && (types & mask)) {
1751            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1752                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1753        }
1754        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1755        if (types & mask) {
1756            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1757                NetPktRssIpV6Ex : NetPktRssIpV6;
1758        }
1759    }
1760    return 0xff;
1761}
1762
1763static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1764                                   uint32_t hash)
1765{
1766    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1767    hdr->hash_value = hash;
1768    hdr->hash_report = report;
1769}
1770
1771static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1772                                  size_t size)
1773{
1774    VirtIONet *n = qemu_get_nic_opaque(nc);
1775    unsigned int index = nc->queue_index, new_index = index;
1776    struct NetRxPkt *pkt = n->rx_pkt;
1777    uint8_t net_hash_type;
1778    uint32_t hash;
1779    bool isip4, isip6, isudp, istcp;
1780    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1781        VIRTIO_NET_HASH_REPORT_IPv4,
1782        VIRTIO_NET_HASH_REPORT_TCPv4,
1783        VIRTIO_NET_HASH_REPORT_TCPv6,
1784        VIRTIO_NET_HASH_REPORT_IPv6,
1785        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1786        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1787        VIRTIO_NET_HASH_REPORT_UDPv4,
1788        VIRTIO_NET_HASH_REPORT_UDPv6,
1789        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1790    };
1791
1792    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1793                             size - n->host_hdr_len);
1794    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1795    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1796        istcp = isudp = false;
1797    }
1798    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1799        istcp = isudp = false;
1800    }
1801    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1802                                             n->rss_data.hash_types);
1803    if (net_hash_type > NetPktRssIpV6UdpEx) {
1804        if (n->rss_data.populate_hash) {
1805            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1806        }
1807        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1808    }
1809
1810    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1811
1812    if (n->rss_data.populate_hash) {
1813        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1814    }
1815
1816    if (n->rss_data.redirect) {
1817        new_index = hash & (n->rss_data.indirections_len - 1);
1818        new_index = n->rss_data.indirections_table[new_index];
1819    }
1820
1821    return (index == new_index) ? -1 : new_index;
1822}
1823
1824static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1825                                      size_t size, bool no_rss)
1826{
1827    VirtIONet *n = qemu_get_nic_opaque(nc);
1828    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1829    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1830    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1831    size_t lens[VIRTQUEUE_MAX_SIZE];
1832    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1833    struct virtio_net_hdr_mrg_rxbuf mhdr;
1834    unsigned mhdr_cnt = 0;
1835    size_t offset, i, guest_offset, j;
1836    ssize_t err;
1837
1838    if (!virtio_net_can_receive(nc)) {
1839        return -1;
1840    }
1841
1842    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1843        int index = virtio_net_process_rss(nc, buf, size);
1844        if (index >= 0) {
1845            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1846            return virtio_net_receive_rcu(nc2, buf, size, true);
1847        }
1848    }
1849
1850    /* hdr_len refers to the header we supply to the guest */
1851    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1852        return 0;
1853    }
1854
1855    if (!receive_filter(n, buf, size))
1856        return size;
1857
1858    offset = i = 0;
1859
1860    while (offset < size) {
1861        VirtQueueElement *elem;
1862        int len, total;
1863        const struct iovec *sg;
1864
1865        total = 0;
1866
1867        if (i == VIRTQUEUE_MAX_SIZE) {
1868            virtio_error(vdev, "virtio-net unexpected long buffer chain");
1869            err = size;
1870            goto err;
1871        }
1872
1873        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1874        if (!elem) {
1875            if (i) {
1876                virtio_error(vdev, "virtio-net unexpected empty queue: "
1877                             "i %zd mergeable %d offset %zd, size %zd, "
1878                             "guest hdr len %zd, host hdr len %zd "
1879                             "guest features 0x%" PRIx64,
1880                             i, n->mergeable_rx_bufs, offset, size,
1881                             n->guest_hdr_len, n->host_hdr_len,
1882                             vdev->guest_features);
1883            }
1884            err = -1;
1885            goto err;
1886        }
1887
1888        if (elem->in_num < 1) {
1889            virtio_error(vdev,
1890                         "virtio-net receive queue contains no in buffers");
1891            virtqueue_detach_element(q->rx_vq, elem, 0);
1892            g_free(elem);
1893            err = -1;
1894            goto err;
1895        }
1896
1897        sg = elem->in_sg;
1898        if (i == 0) {
1899            assert(offset == 0);
1900            if (n->mergeable_rx_bufs) {
1901                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1902                                    sg, elem->in_num,
1903                                    offsetof(typeof(mhdr), num_buffers),
1904                                    sizeof(mhdr.num_buffers));
1905            }
1906
1907            receive_header(n, sg, elem->in_num, buf, size);
1908            if (n->rss_data.populate_hash) {
1909                offset = sizeof(mhdr);
1910                iov_from_buf(sg, elem->in_num, offset,
1911                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1912            }
1913            offset = n->host_hdr_len;
1914            total += n->guest_hdr_len;
1915            guest_offset = n->guest_hdr_len;
1916        } else {
1917            guest_offset = 0;
1918        }
1919
1920        /* copy in packet.  ugh */
1921        len = iov_from_buf(sg, elem->in_num, guest_offset,
1922                           buf + offset, size - offset);
1923        total += len;
1924        offset += len;
1925        /* If buffers can't be merged, at this point we
1926         * must have consumed the complete packet.
1927         * Otherwise, drop it. */
1928        if (!n->mergeable_rx_bufs && offset < size) {
1929            virtqueue_unpop(q->rx_vq, elem, total);
1930            g_free(elem);
1931            err = size;
1932            goto err;
1933        }
1934
1935        elems[i] = elem;
1936        lens[i] = total;
1937        i++;
1938    }
1939
1940    if (mhdr_cnt) {
1941        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1942        iov_from_buf(mhdr_sg, mhdr_cnt,
1943                     0,
1944                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1945    }
1946
1947    for (j = 0; j < i; j++) {
1948        /* signal other side */
1949        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1950        g_free(elems[j]);
1951    }
1952
1953    virtqueue_flush(q->rx_vq, i);
1954    virtio_notify(vdev, q->rx_vq);
1955
1956    return size;
1957
1958err:
1959    for (j = 0; j < i; j++) {
1960        virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1961        g_free(elems[j]);
1962    }
1963
1964    return err;
1965}
1966
1967static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1968                                  size_t size)
1969{
1970    RCU_READ_LOCK_GUARD();
1971
1972    return virtio_net_receive_rcu(nc, buf, size, false);
1973}
1974
1975static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1976                                         const uint8_t *buf,
1977                                         VirtioNetRscUnit *unit)
1978{
1979    uint16_t ip_hdrlen;
1980    struct ip_header *ip;
1981
1982    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1983                              + sizeof(struct eth_header));
1984    unit->ip = (void *)ip;
1985    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1986    unit->ip_plen = &ip->ip_len;
1987    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1988    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1989    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1990}
1991
1992static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1993                                         const uint8_t *buf,
1994                                         VirtioNetRscUnit *unit)
1995{
1996    struct ip6_header *ip6;
1997
1998    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1999                                 + sizeof(struct eth_header));
2000    unit->ip = ip6;
2001    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2002    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2003                                        + sizeof(struct ip6_header));
2004    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2005
2006    /* There is a difference between payload lenght in ipv4 and v6,
2007       ip header is excluded in ipv6 */
2008    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2009}
2010
2011static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2012                                       VirtioNetRscSeg *seg)
2013{
2014    int ret;
2015    struct virtio_net_hdr_v1 *h;
2016
2017    h = (struct virtio_net_hdr_v1 *)seg->buf;
2018    h->flags = 0;
2019    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2020
2021    if (seg->is_coalesced) {
2022        h->rsc.segments = seg->packets;
2023        h->rsc.dup_acks = seg->dup_ack;
2024        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2025        if (chain->proto == ETH_P_IP) {
2026            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2027        } else {
2028            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2029        }
2030    }
2031
2032    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2033    QTAILQ_REMOVE(&chain->buffers, seg, next);
2034    g_free(seg->buf);
2035    g_free(seg);
2036
2037    return ret;
2038}
2039
2040static void virtio_net_rsc_purge(void *opq)
2041{
2042    VirtioNetRscSeg *seg, *rn;
2043    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2044
2045    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2046        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2047            chain->stat.purge_failed++;
2048            continue;
2049        }
2050    }
2051
2052    chain->stat.timer++;
2053    if (!QTAILQ_EMPTY(&chain->buffers)) {
2054        timer_mod(chain->drain_timer,
2055              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2056    }
2057}
2058
2059static void virtio_net_rsc_cleanup(VirtIONet *n)
2060{
2061    VirtioNetRscChain *chain, *rn_chain;
2062    VirtioNetRscSeg *seg, *rn_seg;
2063
2064    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2065        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2066            QTAILQ_REMOVE(&chain->buffers, seg, next);
2067            g_free(seg->buf);
2068            g_free(seg);
2069        }
2070
2071        timer_free(chain->drain_timer);
2072        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2073        g_free(chain);
2074    }
2075}
2076
2077static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2078                                     NetClientState *nc,
2079                                     const uint8_t *buf, size_t size)
2080{
2081    uint16_t hdr_len;
2082    VirtioNetRscSeg *seg;
2083
2084    hdr_len = chain->n->guest_hdr_len;
2085    seg = g_new(VirtioNetRscSeg, 1);
2086    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2087        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2088    memcpy(seg->buf, buf, size);
2089    seg->size = size;
2090    seg->packets = 1;
2091    seg->dup_ack = 0;
2092    seg->is_coalesced = 0;
2093    seg->nc = nc;
2094
2095    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2096    chain->stat.cache++;
2097
2098    switch (chain->proto) {
2099    case ETH_P_IP:
2100        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2101        break;
2102    case ETH_P_IPV6:
2103        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2104        break;
2105    default:
2106        g_assert_not_reached();
2107    }
2108}
2109
2110static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2111                                         VirtioNetRscSeg *seg,
2112                                         const uint8_t *buf,
2113                                         struct tcp_header *n_tcp,
2114                                         struct tcp_header *o_tcp)
2115{
2116    uint32_t nack, oack;
2117    uint16_t nwin, owin;
2118
2119    nack = htonl(n_tcp->th_ack);
2120    nwin = htons(n_tcp->th_win);
2121    oack = htonl(o_tcp->th_ack);
2122    owin = htons(o_tcp->th_win);
2123
2124    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2125        chain->stat.ack_out_of_win++;
2126        return RSC_FINAL;
2127    } else if (nack == oack) {
2128        /* duplicated ack or window probe */
2129        if (nwin == owin) {
2130            /* duplicated ack, add dup ack count due to whql test up to 1 */
2131            chain->stat.dup_ack++;
2132            return RSC_FINAL;
2133        } else {
2134            /* Coalesce window update */
2135            o_tcp->th_win = n_tcp->th_win;
2136            chain->stat.win_update++;
2137            return RSC_COALESCE;
2138        }
2139    } else {
2140        /* pure ack, go to 'C', finalize*/
2141        chain->stat.pure_ack++;
2142        return RSC_FINAL;
2143    }
2144}
2145
2146static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2147                                            VirtioNetRscSeg *seg,
2148                                            const uint8_t *buf,
2149                                            VirtioNetRscUnit *n_unit)
2150{
2151    void *data;
2152    uint16_t o_ip_len;
2153    uint32_t nseq, oseq;
2154    VirtioNetRscUnit *o_unit;
2155
2156    o_unit = &seg->unit;
2157    o_ip_len = htons(*o_unit->ip_plen);
2158    nseq = htonl(n_unit->tcp->th_seq);
2159    oseq = htonl(o_unit->tcp->th_seq);
2160
2161    /* out of order or retransmitted. */
2162    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2163        chain->stat.data_out_of_win++;
2164        return RSC_FINAL;
2165    }
2166
2167    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2168    if (nseq == oseq) {
2169        if ((o_unit->payload == 0) && n_unit->payload) {
2170            /* From no payload to payload, normal case, not a dup ack or etc */
2171            chain->stat.data_after_pure_ack++;
2172            goto coalesce;
2173        } else {
2174            return virtio_net_rsc_handle_ack(chain, seg, buf,
2175                                             n_unit->tcp, o_unit->tcp);
2176        }
2177    } else if ((nseq - oseq) != o_unit->payload) {
2178        /* Not a consistent packet, out of order */
2179        chain->stat.data_out_of_order++;
2180        return RSC_FINAL;
2181    } else {
2182coalesce:
2183        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2184            chain->stat.over_size++;
2185            return RSC_FINAL;
2186        }
2187
2188        /* Here comes the right data, the payload length in v4/v6 is different,
2189           so use the field value to update and record the new data len */
2190        o_unit->payload += n_unit->payload; /* update new data len */
2191
2192        /* update field in ip header */
2193        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2194
2195        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2196           for windows guest, while this may change the behavior for linux
2197           guest (only if it uses RSC feature). */
2198        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2199
2200        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2201        o_unit->tcp->th_win = n_unit->tcp->th_win;
2202
2203        memmove(seg->buf + seg->size, data, n_unit->payload);
2204        seg->size += n_unit->payload;
2205        seg->packets++;
2206        chain->stat.coalesced++;
2207        return RSC_COALESCE;
2208    }
2209}
2210
2211static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2212                                        VirtioNetRscSeg *seg,
2213                                        const uint8_t *buf, size_t size,
2214                                        VirtioNetRscUnit *unit)
2215{
2216    struct ip_header *ip1, *ip2;
2217
2218    ip1 = (struct ip_header *)(unit->ip);
2219    ip2 = (struct ip_header *)(seg->unit.ip);
2220    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2221        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2222        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2223        chain->stat.no_match++;
2224        return RSC_NO_MATCH;
2225    }
2226
2227    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2228}
2229
2230static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2231                                        VirtioNetRscSeg *seg,
2232                                        const uint8_t *buf, size_t size,
2233                                        VirtioNetRscUnit *unit)
2234{
2235    struct ip6_header *ip1, *ip2;
2236
2237    ip1 = (struct ip6_header *)(unit->ip);
2238    ip2 = (struct ip6_header *)(seg->unit.ip);
2239    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2240        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2241        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2242        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2243            chain->stat.no_match++;
2244            return RSC_NO_MATCH;
2245    }
2246
2247    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2248}
2249
2250/* Packets with 'SYN' should bypass, other flag should be sent after drain
2251 * to prevent out of order */
2252static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2253                                         struct tcp_header *tcp)
2254{
2255    uint16_t tcp_hdr;
2256    uint16_t tcp_flag;
2257
2258    tcp_flag = htons(tcp->th_offset_flags);
2259    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2260    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2261    if (tcp_flag & TH_SYN) {
2262        chain->stat.tcp_syn++;
2263        return RSC_BYPASS;
2264    }
2265
2266    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2267        chain->stat.tcp_ctrl_drain++;
2268        return RSC_FINAL;
2269    }
2270
2271    if (tcp_hdr > sizeof(struct tcp_header)) {
2272        chain->stat.tcp_all_opt++;
2273        return RSC_FINAL;
2274    }
2275
2276    return RSC_CANDIDATE;
2277}
2278
2279static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2280                                         NetClientState *nc,
2281                                         const uint8_t *buf, size_t size,
2282                                         VirtioNetRscUnit *unit)
2283{
2284    int ret;
2285    VirtioNetRscSeg *seg, *nseg;
2286
2287    if (QTAILQ_EMPTY(&chain->buffers)) {
2288        chain->stat.empty_cache++;
2289        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2290        timer_mod(chain->drain_timer,
2291              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2292        return size;
2293    }
2294
2295    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2296        if (chain->proto == ETH_P_IP) {
2297            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2298        } else {
2299            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2300        }
2301
2302        if (ret == RSC_FINAL) {
2303            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2304                /* Send failed */
2305                chain->stat.final_failed++;
2306                return 0;
2307            }
2308
2309            /* Send current packet */
2310            return virtio_net_do_receive(nc, buf, size);
2311        } else if (ret == RSC_NO_MATCH) {
2312            continue;
2313        } else {
2314            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2315            seg->is_coalesced = 1;
2316            return size;
2317        }
2318    }
2319
2320    chain->stat.no_match_cache++;
2321    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2322    return size;
2323}
2324
2325/* Drain a connection data, this is to avoid out of order segments */
2326static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2327                                        NetClientState *nc,
2328                                        const uint8_t *buf, size_t size,
2329                                        uint16_t ip_start, uint16_t ip_size,
2330                                        uint16_t tcp_port)
2331{
2332    VirtioNetRscSeg *seg, *nseg;
2333    uint32_t ppair1, ppair2;
2334
2335    ppair1 = *(uint32_t *)(buf + tcp_port);
2336    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2337        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2338        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2339            || (ppair1 != ppair2)) {
2340            continue;
2341        }
2342        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2343            chain->stat.drain_failed++;
2344        }
2345
2346        break;
2347    }
2348
2349    return virtio_net_do_receive(nc, buf, size);
2350}
2351
2352static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2353                                            struct ip_header *ip,
2354                                            const uint8_t *buf, size_t size)
2355{
2356    uint16_t ip_len;
2357
2358    /* Not an ipv4 packet */
2359    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2360        chain->stat.ip_option++;
2361        return RSC_BYPASS;
2362    }
2363
2364    /* Don't handle packets with ip option */
2365    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2366        chain->stat.ip_option++;
2367        return RSC_BYPASS;
2368    }
2369
2370    if (ip->ip_p != IPPROTO_TCP) {
2371        chain->stat.bypass_not_tcp++;
2372        return RSC_BYPASS;
2373    }
2374
2375    /* Don't handle packets with ip fragment */
2376    if (!(htons(ip->ip_off) & IP_DF)) {
2377        chain->stat.ip_frag++;
2378        return RSC_BYPASS;
2379    }
2380
2381    /* Don't handle packets with ecn flag */
2382    if (IPTOS_ECN(ip->ip_tos)) {
2383        chain->stat.ip_ecn++;
2384        return RSC_BYPASS;
2385    }
2386
2387    ip_len = htons(ip->ip_len);
2388    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2389        || ip_len > (size - chain->n->guest_hdr_len -
2390                     sizeof(struct eth_header))) {
2391        chain->stat.ip_hacked++;
2392        return RSC_BYPASS;
2393    }
2394
2395    return RSC_CANDIDATE;
2396}
2397
2398static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2399                                      NetClientState *nc,
2400                                      const uint8_t *buf, size_t size)
2401{
2402    int32_t ret;
2403    uint16_t hdr_len;
2404    VirtioNetRscUnit unit;
2405
2406    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2407
2408    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2409        + sizeof(struct tcp_header))) {
2410        chain->stat.bypass_not_tcp++;
2411        return virtio_net_do_receive(nc, buf, size);
2412    }
2413
2414    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2415    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2416        != RSC_CANDIDATE) {
2417        return virtio_net_do_receive(nc, buf, size);
2418    }
2419
2420    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2421    if (ret == RSC_BYPASS) {
2422        return virtio_net_do_receive(nc, buf, size);
2423    } else if (ret == RSC_FINAL) {
2424        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2425                ((hdr_len + sizeof(struct eth_header)) + 12),
2426                VIRTIO_NET_IP4_ADDR_SIZE,
2427                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2428    }
2429
2430    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2431}
2432
2433static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2434                                            struct ip6_header *ip6,
2435                                            const uint8_t *buf, size_t size)
2436{
2437    uint16_t ip_len;
2438
2439    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2440        != IP_HEADER_VERSION_6) {
2441        return RSC_BYPASS;
2442    }
2443
2444    /* Both option and protocol is checked in this */
2445    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2446        chain->stat.bypass_not_tcp++;
2447        return RSC_BYPASS;
2448    }
2449
2450    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2451    if (ip_len < sizeof(struct tcp_header) ||
2452        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2453                  - sizeof(struct ip6_header))) {
2454        chain->stat.ip_hacked++;
2455        return RSC_BYPASS;
2456    }
2457
2458    /* Don't handle packets with ecn flag */
2459    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2460        chain->stat.ip_ecn++;
2461        return RSC_BYPASS;
2462    }
2463
2464    return RSC_CANDIDATE;
2465}
2466
2467static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2468                                      const uint8_t *buf, size_t size)
2469{
2470    int32_t ret;
2471    uint16_t hdr_len;
2472    VirtioNetRscChain *chain;
2473    VirtioNetRscUnit unit;
2474
2475    chain = (VirtioNetRscChain *)opq;
2476    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2477
2478    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2479        + sizeof(tcp_header))) {
2480        return virtio_net_do_receive(nc, buf, size);
2481    }
2482
2483    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2484    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2485                                                 unit.ip, buf, size)) {
2486        return virtio_net_do_receive(nc, buf, size);
2487    }
2488
2489    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2490    if (ret == RSC_BYPASS) {
2491        return virtio_net_do_receive(nc, buf, size);
2492    } else if (ret == RSC_FINAL) {
2493        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2494                ((hdr_len + sizeof(struct eth_header)) + 8),
2495                VIRTIO_NET_IP6_ADDR_SIZE,
2496                hdr_len + sizeof(struct eth_header)
2497                + sizeof(struct ip6_header));
2498    }
2499
2500    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2501}
2502
2503static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2504                                                      NetClientState *nc,
2505                                                      uint16_t proto)
2506{
2507    VirtioNetRscChain *chain;
2508
2509    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2510        return NULL;
2511    }
2512
2513    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2514        if (chain->proto == proto) {
2515            return chain;
2516        }
2517    }
2518
2519    chain = g_malloc(sizeof(*chain));
2520    chain->n = n;
2521    chain->proto = proto;
2522    if (proto == (uint16_t)ETH_P_IP) {
2523        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2524        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2525    } else {
2526        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2527        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2528    }
2529    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2530                                      virtio_net_rsc_purge, chain);
2531    memset(&chain->stat, 0, sizeof(chain->stat));
2532
2533    QTAILQ_INIT(&chain->buffers);
2534    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2535
2536    return chain;
2537}
2538
2539static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2540                                      const uint8_t *buf,
2541                                      size_t size)
2542{
2543    uint16_t proto;
2544    VirtioNetRscChain *chain;
2545    struct eth_header *eth;
2546    VirtIONet *n;
2547
2548    n = qemu_get_nic_opaque(nc);
2549    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2550        return virtio_net_do_receive(nc, buf, size);
2551    }
2552
2553    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2554    proto = htons(eth->h_proto);
2555
2556    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2557    if (chain) {
2558        chain->stat.received++;
2559        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2560            return virtio_net_rsc_receive4(chain, nc, buf, size);
2561        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2562            return virtio_net_rsc_receive6(chain, nc, buf, size);
2563        }
2564    }
2565    return virtio_net_do_receive(nc, buf, size);
2566}
2567
2568static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2569                                  size_t size)
2570{
2571    VirtIONet *n = qemu_get_nic_opaque(nc);
2572    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2573        return virtio_net_rsc_receive(nc, buf, size);
2574    } else {
2575        return virtio_net_do_receive(nc, buf, size);
2576    }
2577}
2578
2579static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2580
2581static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2582{
2583    VirtIONet *n = qemu_get_nic_opaque(nc);
2584    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2585    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2586    int ret;
2587
2588    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2589    virtio_notify(vdev, q->tx_vq);
2590
2591    g_free(q->async_tx.elem);
2592    q->async_tx.elem = NULL;
2593
2594    virtio_queue_set_notification(q->tx_vq, 1);
2595    ret = virtio_net_flush_tx(q);
2596    if (ret >= n->tx_burst) {
2597        /*
2598         * the flush has been stopped by tx_burst
2599         * we will not receive notification for the
2600         * remainining part, so re-schedule
2601         */
2602        virtio_queue_set_notification(q->tx_vq, 0);
2603        if (q->tx_bh) {
2604            qemu_bh_schedule(q->tx_bh);
2605        } else {
2606            timer_mod(q->tx_timer,
2607                      qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2608        }
2609        q->tx_waiting = 1;
2610    }
2611}
2612
2613/* TX */
2614static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2615{
2616    VirtIONet *n = q->n;
2617    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2618    VirtQueueElement *elem;
2619    int32_t num_packets = 0;
2620    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2621    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2622        return num_packets;
2623    }
2624
2625    if (q->async_tx.elem) {
2626        virtio_queue_set_notification(q->tx_vq, 0);
2627        return num_packets;
2628    }
2629
2630    for (;;) {
2631        ssize_t ret;
2632        unsigned int out_num;
2633        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2634        struct virtio_net_hdr_mrg_rxbuf mhdr;
2635
2636        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2637        if (!elem) {
2638            break;
2639        }
2640
2641        out_num = elem->out_num;
2642        out_sg = elem->out_sg;
2643        if (out_num < 1) {
2644            virtio_error(vdev, "virtio-net header not in first element");
2645            virtqueue_detach_element(q->tx_vq, elem, 0);
2646            g_free(elem);
2647            return -EINVAL;
2648        }
2649
2650        if (n->has_vnet_hdr) {
2651            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2652                n->guest_hdr_len) {
2653                virtio_error(vdev, "virtio-net header incorrect");
2654                virtqueue_detach_element(q->tx_vq, elem, 0);
2655                g_free(elem);
2656                return -EINVAL;
2657            }
2658            if (n->needs_vnet_hdr_swap) {
2659                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2660                sg2[0].iov_base = &mhdr;
2661                sg2[0].iov_len = n->guest_hdr_len;
2662                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2663                                   out_sg, out_num,
2664                                   n->guest_hdr_len, -1);
2665                if (out_num == VIRTQUEUE_MAX_SIZE) {
2666                    goto drop;
2667                }
2668                out_num += 1;
2669                out_sg = sg2;
2670            }
2671        }
2672        /*
2673         * If host wants to see the guest header as is, we can
2674         * pass it on unchanged. Otherwise, copy just the parts
2675         * that host is interested in.
2676         */
2677        assert(n->host_hdr_len <= n->guest_hdr_len);
2678        if (n->host_hdr_len != n->guest_hdr_len) {
2679            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2680                                       out_sg, out_num,
2681                                       0, n->host_hdr_len);
2682            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2683                             out_sg, out_num,
2684                             n->guest_hdr_len, -1);
2685            out_num = sg_num;
2686            out_sg = sg;
2687        }
2688
2689        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2690                                      out_sg, out_num, virtio_net_tx_complete);
2691        if (ret == 0) {
2692            virtio_queue_set_notification(q->tx_vq, 0);
2693            q->async_tx.elem = elem;
2694            return -EBUSY;
2695        }
2696
2697drop:
2698        virtqueue_push(q->tx_vq, elem, 0);
2699        virtio_notify(vdev, q->tx_vq);
2700        g_free(elem);
2701
2702        if (++num_packets >= n->tx_burst) {
2703            break;
2704        }
2705    }
2706    return num_packets;
2707}
2708
2709static void virtio_net_tx_timer(void *opaque);
2710
2711static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2712{
2713    VirtIONet *n = VIRTIO_NET(vdev);
2714    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2715
2716    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2717        virtio_net_drop_tx_queue_data(vdev, vq);
2718        return;
2719    }
2720
2721    /* This happens when device was stopped but VCPU wasn't. */
2722    if (!vdev->vm_running) {
2723        q->tx_waiting = 1;
2724        return;
2725    }
2726
2727    if (q->tx_waiting) {
2728        /* We already have queued packets, immediately flush */
2729        timer_del(q->tx_timer);
2730        virtio_net_tx_timer(q);
2731    } else {
2732        /* re-arm timer to flush it (and more) on next tick */
2733        timer_mod(q->tx_timer,
2734                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2735        q->tx_waiting = 1;
2736        virtio_queue_set_notification(vq, 0);
2737    }
2738}
2739
2740static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2741{
2742    VirtIONet *n = VIRTIO_NET(vdev);
2743    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2744
2745    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2746        virtio_net_drop_tx_queue_data(vdev, vq);
2747        return;
2748    }
2749
2750    if (unlikely(q->tx_waiting)) {
2751        return;
2752    }
2753    q->tx_waiting = 1;
2754    /* This happens when device was stopped but VCPU wasn't. */
2755    if (!vdev->vm_running) {
2756        return;
2757    }
2758    virtio_queue_set_notification(vq, 0);
2759    qemu_bh_schedule(q->tx_bh);
2760}
2761
2762static void virtio_net_tx_timer(void *opaque)
2763{
2764    VirtIONetQueue *q = opaque;
2765    VirtIONet *n = q->n;
2766    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2767    int ret;
2768
2769    /* This happens when device was stopped but BH wasn't. */
2770    if (!vdev->vm_running) {
2771        /* Make sure tx waiting is set, so we'll run when restarted. */
2772        assert(q->tx_waiting);
2773        return;
2774    }
2775
2776    q->tx_waiting = 0;
2777
2778    /* Just in case the driver is not ready on more */
2779    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2780        return;
2781    }
2782
2783    ret = virtio_net_flush_tx(q);
2784    if (ret == -EBUSY || ret == -EINVAL) {
2785        return;
2786    }
2787    /*
2788     * If we flush a full burst of packets, assume there are
2789     * more coming and immediately rearm
2790     */
2791    if (ret >= n->tx_burst) {
2792        q->tx_waiting = 1;
2793        timer_mod(q->tx_timer,
2794                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2795        return;
2796    }
2797    /*
2798     * If less than a full burst, re-enable notification and flush
2799     * anything that may have come in while we weren't looking.  If
2800     * we find something, assume the guest is still active and rearm
2801     */
2802    virtio_queue_set_notification(q->tx_vq, 1);
2803    ret = virtio_net_flush_tx(q);
2804    if (ret > 0) {
2805        virtio_queue_set_notification(q->tx_vq, 0);
2806        q->tx_waiting = 1;
2807        timer_mod(q->tx_timer,
2808                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2809    }
2810}
2811
2812static void virtio_net_tx_bh(void *opaque)
2813{
2814    VirtIONetQueue *q = opaque;
2815    VirtIONet *n = q->n;
2816    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2817    int32_t ret;
2818
2819    /* This happens when device was stopped but BH wasn't. */
2820    if (!vdev->vm_running) {
2821        /* Make sure tx waiting is set, so we'll run when restarted. */
2822        assert(q->tx_waiting);
2823        return;
2824    }
2825
2826    q->tx_waiting = 0;
2827
2828    /* Just in case the driver is not ready on more */
2829    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2830        return;
2831    }
2832
2833    ret = virtio_net_flush_tx(q);
2834    if (ret == -EBUSY || ret == -EINVAL) {
2835        return; /* Notification re-enable handled by tx_complete or device
2836                 * broken */
2837    }
2838
2839    /* If we flush a full burst of packets, assume there are
2840     * more coming and immediately reschedule */
2841    if (ret >= n->tx_burst) {
2842        qemu_bh_schedule(q->tx_bh);
2843        q->tx_waiting = 1;
2844        return;
2845    }
2846
2847    /* If less than a full burst, re-enable notification and flush
2848     * anything that may have come in while we weren't looking.  If
2849     * we find something, assume the guest is still active and reschedule */
2850    virtio_queue_set_notification(q->tx_vq, 1);
2851    ret = virtio_net_flush_tx(q);
2852    if (ret == -EINVAL) {
2853        return;
2854    } else if (ret > 0) {
2855        virtio_queue_set_notification(q->tx_vq, 0);
2856        qemu_bh_schedule(q->tx_bh);
2857        q->tx_waiting = 1;
2858    }
2859}
2860
2861static void virtio_net_add_queue(VirtIONet *n, int index)
2862{
2863    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2864
2865    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2866                                           virtio_net_handle_rx);
2867
2868    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2869        n->vqs[index].tx_vq =
2870            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2871                             virtio_net_handle_tx_timer);
2872        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2873                                              virtio_net_tx_timer,
2874                                              &n->vqs[index]);
2875    } else {
2876        n->vqs[index].tx_vq =
2877            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2878                             virtio_net_handle_tx_bh);
2879        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2880    }
2881
2882    n->vqs[index].tx_waiting = 0;
2883    n->vqs[index].n = n;
2884}
2885
2886static void virtio_net_del_queue(VirtIONet *n, int index)
2887{
2888    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2889    VirtIONetQueue *q = &n->vqs[index];
2890    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2891
2892    qemu_purge_queued_packets(nc);
2893
2894    virtio_del_queue(vdev, index * 2);
2895    if (q->tx_timer) {
2896        timer_free(q->tx_timer);
2897        q->tx_timer = NULL;
2898    } else {
2899        qemu_bh_delete(q->tx_bh);
2900        q->tx_bh = NULL;
2901    }
2902    q->tx_waiting = 0;
2903    virtio_del_queue(vdev, index * 2 + 1);
2904}
2905
2906static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2907{
2908    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2909    int old_num_queues = virtio_get_num_queues(vdev);
2910    int new_num_queues = new_max_queue_pairs * 2 + 1;
2911    int i;
2912
2913    assert(old_num_queues >= 3);
2914    assert(old_num_queues % 2 == 1);
2915
2916    if (old_num_queues == new_num_queues) {
2917        return;
2918    }
2919
2920    /*
2921     * We always need to remove and add ctrl vq if
2922     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2923     * and then we only enter one of the following two loops.
2924     */
2925    virtio_del_queue(vdev, old_num_queues - 1);
2926
2927    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2928        /* new_num_queues < old_num_queues */
2929        virtio_net_del_queue(n, i / 2);
2930    }
2931
2932    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2933        /* new_num_queues > old_num_queues */
2934        virtio_net_add_queue(n, i / 2);
2935    }
2936
2937    /* add ctrl_vq last */
2938    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2939}
2940
2941static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2942{
2943    int max = multiqueue ? n->max_queue_pairs : 1;
2944
2945    n->multiqueue = multiqueue;
2946    virtio_net_change_num_queue_pairs(n, max);
2947
2948    virtio_net_set_queue_pairs(n);
2949}
2950
2951static int virtio_net_post_load_device(void *opaque, int version_id)
2952{
2953    VirtIONet *n = opaque;
2954    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2955    int i, link_down;
2956
2957    trace_virtio_net_post_load_device();
2958    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2959                               virtio_vdev_has_feature(vdev,
2960                                                       VIRTIO_F_VERSION_1),
2961                               virtio_vdev_has_feature(vdev,
2962                                                       VIRTIO_NET_F_HASH_REPORT));
2963
2964    /* MAC_TABLE_ENTRIES may be different from the saved image */
2965    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2966        n->mac_table.in_use = 0;
2967    }
2968
2969    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2970        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2971    }
2972
2973    /*
2974     * curr_guest_offloads will be later overwritten by the
2975     * virtio_set_features_nocheck call done from the virtio_load.
2976     * Here we make sure it is preserved and restored accordingly
2977     * in the virtio_net_post_load_virtio callback.
2978     */
2979    n->saved_guest_offloads = n->curr_guest_offloads;
2980
2981    virtio_net_set_queue_pairs(n);
2982
2983    /* Find the first multicast entry in the saved MAC filter */
2984    for (i = 0; i < n->mac_table.in_use; i++) {
2985        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2986            break;
2987        }
2988    }
2989    n->mac_table.first_multi = i;
2990
2991    /* nc.link_down can't be migrated, so infer link_down according
2992     * to link status bit in n->status */
2993    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2994    for (i = 0; i < n->max_queue_pairs; i++) {
2995        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2996    }
2997
2998    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2999        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3000        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3001                                  QEMU_CLOCK_VIRTUAL,
3002                                  virtio_net_announce_timer, n);
3003        if (n->announce_timer.round) {
3004            timer_mod(n->announce_timer.tm,
3005                      qemu_clock_get_ms(n->announce_timer.type));
3006        } else {
3007            qemu_announce_timer_del(&n->announce_timer, false);
3008        }
3009    }
3010
3011    if (n->rss_data.enabled) {
3012        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3013        if (!n->rss_data.populate_hash) {
3014            if (!virtio_net_attach_epbf_rss(n)) {
3015                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3016                    warn_report("Can't post-load eBPF RSS for vhost");
3017                } else {
3018                    warn_report("Can't post-load eBPF RSS - "
3019                                "fallback to software RSS");
3020                    n->rss_data.enabled_software_rss = true;
3021                }
3022            }
3023        }
3024
3025        trace_virtio_net_rss_enable(n->rss_data.hash_types,
3026                                    n->rss_data.indirections_len,
3027                                    sizeof(n->rss_data.key));
3028    } else {
3029        trace_virtio_net_rss_disable();
3030    }
3031    return 0;
3032}
3033
3034static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3035{
3036    VirtIONet *n = VIRTIO_NET(vdev);
3037    /*
3038     * The actual needed state is now in saved_guest_offloads,
3039     * see virtio_net_post_load_device for detail.
3040     * Restore it back and apply the desired offloads.
3041     */
3042    n->curr_guest_offloads = n->saved_guest_offloads;
3043    if (peer_has_vnet_hdr(n)) {
3044        virtio_net_apply_guest_offloads(n);
3045    }
3046
3047    return 0;
3048}
3049
3050/* tx_waiting field of a VirtIONetQueue */
3051static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3052    .name = "virtio-net-queue-tx_waiting",
3053    .fields = (VMStateField[]) {
3054        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3055        VMSTATE_END_OF_LIST()
3056   },
3057};
3058
3059static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3060{
3061    return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3062}
3063
3064static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3065{
3066    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3067                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3068}
3069
3070static bool mac_table_fits(void *opaque, int version_id)
3071{
3072    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3073}
3074
3075static bool mac_table_doesnt_fit(void *opaque, int version_id)
3076{
3077    return !mac_table_fits(opaque, version_id);
3078}
3079
3080/* This temporary type is shared by all the WITH_TMP methods
3081 * although only some fields are used by each.
3082 */
3083struct VirtIONetMigTmp {
3084    VirtIONet      *parent;
3085    VirtIONetQueue *vqs_1;
3086    uint16_t        curr_queue_pairs_1;
3087    uint8_t         has_ufo;
3088    uint32_t        has_vnet_hdr;
3089};
3090
3091/* The 2nd and subsequent tx_waiting flags are loaded later than
3092 * the 1st entry in the queue_pairs and only if there's more than one
3093 * entry.  We use the tmp mechanism to calculate a temporary
3094 * pointer and count and also validate the count.
3095 */
3096
3097static int virtio_net_tx_waiting_pre_save(void *opaque)
3098{
3099    struct VirtIONetMigTmp *tmp = opaque;
3100
3101    tmp->vqs_1 = tmp->parent->vqs + 1;
3102    tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3103    if (tmp->parent->curr_queue_pairs == 0) {
3104        tmp->curr_queue_pairs_1 = 0;
3105    }
3106
3107    return 0;
3108}
3109
3110static int virtio_net_tx_waiting_pre_load(void *opaque)
3111{
3112    struct VirtIONetMigTmp *tmp = opaque;
3113
3114    /* Reuse the pointer setup from save */
3115    virtio_net_tx_waiting_pre_save(opaque);
3116
3117    if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3118        error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3119            tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3120
3121        return -EINVAL;
3122    }
3123
3124    return 0; /* all good */
3125}
3126
3127static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3128    .name      = "virtio-net-tx_waiting",
3129    .pre_load  = virtio_net_tx_waiting_pre_load,
3130    .pre_save  = virtio_net_tx_waiting_pre_save,
3131    .fields    = (VMStateField[]) {
3132        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3133                                     curr_queue_pairs_1,
3134                                     vmstate_virtio_net_queue_tx_waiting,
3135                                     struct VirtIONetQueue),
3136        VMSTATE_END_OF_LIST()
3137    },
3138};
3139
3140/* the 'has_ufo' flag is just tested; if the incoming stream has the
3141 * flag set we need to check that we have it
3142 */
3143static int virtio_net_ufo_post_load(void *opaque, int version_id)
3144{
3145    struct VirtIONetMigTmp *tmp = opaque;
3146
3147    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3148        error_report("virtio-net: saved image requires TUN_F_UFO support");
3149        return -EINVAL;
3150    }
3151
3152    return 0;
3153}
3154
3155static int virtio_net_ufo_pre_save(void *opaque)
3156{
3157    struct VirtIONetMigTmp *tmp = opaque;
3158
3159    tmp->has_ufo = tmp->parent->has_ufo;
3160
3161    return 0;
3162}
3163
3164static const VMStateDescription vmstate_virtio_net_has_ufo = {
3165    .name      = "virtio-net-ufo",
3166    .post_load = virtio_net_ufo_post_load,
3167    .pre_save  = virtio_net_ufo_pre_save,
3168    .fields    = (VMStateField[]) {
3169        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3170        VMSTATE_END_OF_LIST()
3171    },
3172};
3173
3174/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3175 * flag set we need to check that we have it
3176 */
3177static int virtio_net_vnet_post_load(void *opaque, int version_id)
3178{
3179    struct VirtIONetMigTmp *tmp = opaque;
3180
3181    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3182        error_report("virtio-net: saved image requires vnet_hdr=on");
3183        return -EINVAL;
3184    }
3185
3186    return 0;
3187}
3188
3189static int virtio_net_vnet_pre_save(void *opaque)
3190{
3191    struct VirtIONetMigTmp *tmp = opaque;
3192
3193    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3194
3195    return 0;
3196}
3197
3198static const VMStateDescription vmstate_virtio_net_has_vnet = {
3199    .name      = "virtio-net-vnet",
3200    .post_load = virtio_net_vnet_post_load,
3201    .pre_save  = virtio_net_vnet_pre_save,
3202    .fields    = (VMStateField[]) {
3203        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3204        VMSTATE_END_OF_LIST()
3205    },
3206};
3207
3208static bool virtio_net_rss_needed(void *opaque)
3209{
3210    return VIRTIO_NET(opaque)->rss_data.enabled;
3211}
3212
3213static const VMStateDescription vmstate_virtio_net_rss = {
3214    .name      = "virtio-net-device/rss",
3215    .version_id = 1,
3216    .minimum_version_id = 1,
3217    .needed = virtio_net_rss_needed,
3218    .fields = (VMStateField[]) {
3219        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3220        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3221        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3222        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3223        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3224        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3225        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3226                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3227        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3228                                    rss_data.indirections_len, 0,
3229                                    vmstate_info_uint16, uint16_t),
3230        VMSTATE_END_OF_LIST()
3231    },
3232};
3233
3234static const VMStateDescription vmstate_virtio_net_device = {
3235    .name = "virtio-net-device",
3236    .version_id = VIRTIO_NET_VM_VERSION,
3237    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3238    .post_load = virtio_net_post_load_device,
3239    .fields = (VMStateField[]) {
3240        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3241        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3242                               vmstate_virtio_net_queue_tx_waiting,
3243                               VirtIONetQueue),
3244        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3245        VMSTATE_UINT16(status, VirtIONet),
3246        VMSTATE_UINT8(promisc, VirtIONet),
3247        VMSTATE_UINT8(allmulti, VirtIONet),
3248        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3249
3250        /* Guarded pair: If it fits we load it, else we throw it away
3251         * - can happen if source has a larger MAC table.; post-load
3252         *  sets flags in this case.
3253         */
3254        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3255                                0, mac_table_fits, mac_table.in_use,
3256                                 ETH_ALEN),
3257        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3258                                     mac_table.in_use, ETH_ALEN),
3259
3260        /* Note: This is an array of uint32's that's always been saved as a
3261         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3262         * but based on the uint.
3263         */
3264        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3265        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3266                         vmstate_virtio_net_has_vnet),
3267        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3268        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3269        VMSTATE_UINT8(alluni, VirtIONet),
3270        VMSTATE_UINT8(nomulti, VirtIONet),
3271        VMSTATE_UINT8(nouni, VirtIONet),
3272        VMSTATE_UINT8(nobcast, VirtIONet),
3273        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3274                         vmstate_virtio_net_has_ufo),
3275        VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3276                            vmstate_info_uint16_equal, uint16_t),
3277        VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3278        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3279                         vmstate_virtio_net_tx_waiting),
3280        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3281                            has_ctrl_guest_offloads),
3282        VMSTATE_END_OF_LIST()
3283   },
3284    .subsections = (const VMStateDescription * []) {
3285        &vmstate_virtio_net_rss,
3286        NULL
3287    }
3288};
3289
3290static NetClientInfo net_virtio_info = {
3291    .type = NET_CLIENT_DRIVER_NIC,
3292    .size = sizeof(NICState),
3293    .can_receive = virtio_net_can_receive,
3294    .receive = virtio_net_receive,
3295    .link_status_changed = virtio_net_set_link_status,
3296    .query_rx_filter = virtio_net_query_rxfilter,
3297    .announce = virtio_net_announce,
3298};
3299
3300static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3301{
3302    VirtIONet *n = VIRTIO_NET(vdev);
3303    NetClientState *nc;
3304    assert(n->vhost_started);
3305    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3306        /* Must guard against invalid features and bogus queue index
3307         * from being set by malicious guest, or penetrated through
3308         * buggy migration stream.
3309         */
3310        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3311            qemu_log_mask(LOG_GUEST_ERROR,
3312                          "%s: bogus vq index ignored\n", __func__);
3313            return false;
3314        }
3315        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3316    } else {
3317        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3318    }
3319    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3320}
3321
3322static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3323                                           bool mask)
3324{
3325    VirtIONet *n = VIRTIO_NET(vdev);
3326    NetClientState *nc;
3327    assert(n->vhost_started);
3328    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3329        /* Must guard against invalid features and bogus queue index
3330         * from being set by malicious guest, or penetrated through
3331         * buggy migration stream.
3332         */
3333        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3334            qemu_log_mask(LOG_GUEST_ERROR,
3335                          "%s: bogus vq index ignored\n", __func__);
3336            return;
3337        }
3338        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3339    } else {
3340        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3341    }
3342    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3343                             vdev, idx, mask);
3344}
3345
3346static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3347{
3348    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3349
3350    n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3351}
3352
3353void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3354                                   const char *type)
3355{
3356    /*
3357     * The name can be NULL, the netclient name will be type.x.
3358     */
3359    assert(type != NULL);
3360
3361    g_free(n->netclient_name);
3362    g_free(n->netclient_type);
3363    n->netclient_name = g_strdup(name);
3364    n->netclient_type = g_strdup(type);
3365}
3366
3367static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3368{
3369    HotplugHandler *hotplug_ctrl;
3370    PCIDevice *pci_dev;
3371    Error *err = NULL;
3372
3373    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3374    if (hotplug_ctrl) {
3375        pci_dev = PCI_DEVICE(dev);
3376        pci_dev->partially_hotplugged = true;
3377        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3378        if (err) {
3379            error_report_err(err);
3380            return false;
3381        }
3382    } else {
3383        return false;
3384    }
3385    return true;
3386}
3387
3388static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3389                                    Error **errp)
3390{
3391    Error *err = NULL;
3392    HotplugHandler *hotplug_ctrl;
3393    PCIDevice *pdev = PCI_DEVICE(dev);
3394    BusState *primary_bus;
3395
3396    if (!pdev->partially_hotplugged) {
3397        return true;
3398    }
3399    primary_bus = dev->parent_bus;
3400    if (!primary_bus) {
3401        error_setg(errp, "virtio_net: couldn't find primary bus");
3402        return false;
3403    }
3404    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3405    qatomic_set(&n->failover_primary_hidden, false);
3406    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3407    if (hotplug_ctrl) {
3408        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3409        if (err) {
3410            goto out;
3411        }
3412        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3413    }
3414    pdev->partially_hotplugged = false;
3415
3416out:
3417    error_propagate(errp, err);
3418    return !err;
3419}
3420
3421static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3422{
3423    bool should_be_hidden;
3424    Error *err = NULL;
3425    DeviceState *dev = failover_find_primary_device(n);
3426
3427    if (!dev) {
3428        return;
3429    }
3430
3431    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3432
3433    if (migration_in_setup(s) && !should_be_hidden) {
3434        if (failover_unplug_primary(n, dev)) {
3435            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3436            qapi_event_send_unplug_primary(dev->id);
3437            qatomic_set(&n->failover_primary_hidden, true);
3438        } else {
3439            warn_report("couldn't unplug primary device");
3440        }
3441    } else if (migration_has_failed(s)) {
3442        /* We already unplugged the device let's plug it back */
3443        if (!failover_replug_primary(n, dev, &err)) {
3444            if (err) {
3445                error_report_err(err);
3446            }
3447        }
3448    }
3449}
3450
3451static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3452{
3453    MigrationState *s = data;
3454    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3455    virtio_net_handle_migration_primary(n, s);
3456}
3457
3458static bool failover_hide_primary_device(DeviceListener *listener,
3459                                         const QDict *device_opts,
3460                                         bool from_json,
3461                                         Error **errp)
3462{
3463    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3464    const char *standby_id;
3465
3466    if (!device_opts) {
3467        return false;
3468    }
3469
3470    if (!qdict_haskey(device_opts, "failover_pair_id")) {
3471        return false;
3472    }
3473
3474    if (!qdict_haskey(device_opts, "id")) {
3475        error_setg(errp, "Device with failover_pair_id needs to have id");
3476        return false;
3477    }
3478
3479    standby_id = qdict_get_str(device_opts, "failover_pair_id");
3480    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3481        return false;
3482    }
3483
3484    /*
3485     * The hide helper can be called several times for a given device.
3486     * Check there is only one primary for a virtio-net device but
3487     * don't duplicate the qdict several times if it's called for the same
3488     * device.
3489     */
3490    if (n->primary_opts) {
3491        const char *old, *new;
3492        /* devices with failover_pair_id always have an id */
3493        old = qdict_get_str(n->primary_opts, "id");
3494        new = qdict_get_str(device_opts, "id");
3495        if (strcmp(old, new) != 0) {
3496            error_setg(errp, "Cannot attach more than one primary device to "
3497                       "'%s': '%s' and '%s'", n->netclient_name, old, new);
3498            return false;
3499        }
3500    } else {
3501        n->primary_opts = qdict_clone_shallow(device_opts);
3502        n->primary_opts_from_json = from_json;
3503    }
3504
3505    /* failover_primary_hidden is set during feature negotiation */
3506    return qatomic_read(&n->failover_primary_hidden);
3507}
3508
3509static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3510{
3511    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3512    VirtIONet *n = VIRTIO_NET(dev);
3513    NetClientState *nc;
3514    int i;
3515
3516    if (n->net_conf.mtu) {
3517        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3518    }
3519
3520    if (n->net_conf.duplex_str) {
3521        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3522            n->net_conf.duplex = DUPLEX_HALF;
3523        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3524            n->net_conf.duplex = DUPLEX_FULL;
3525        } else {
3526            error_setg(errp, "'duplex' must be 'half' or 'full'");
3527            return;
3528        }
3529        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3530    } else {
3531        n->net_conf.duplex = DUPLEX_UNKNOWN;
3532    }
3533
3534    if (n->net_conf.speed < SPEED_UNKNOWN) {
3535        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3536        return;
3537    }
3538    if (n->net_conf.speed >= 0) {
3539        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3540    }
3541
3542    if (n->failover) {
3543        n->primary_listener.hide_device = failover_hide_primary_device;
3544        qatomic_set(&n->failover_primary_hidden, true);
3545        device_listener_register(&n->primary_listener);
3546        n->migration_state.notify = virtio_net_migration_state_notifier;
3547        add_migration_state_change_notifier(&n->migration_state);
3548        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3549    }
3550
3551    virtio_net_set_config_size(n, n->host_features);
3552    virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3553
3554    /*
3555     * We set a lower limit on RX queue size to what it always was.
3556     * Guests that want a smaller ring can always resize it without
3557     * help from us (using virtio 1 and up).
3558     */
3559    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3560        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3561        !is_power_of_2(n->net_conf.rx_queue_size)) {
3562        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3563                   "must be a power of 2 between %d and %d.",
3564                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3565                   VIRTQUEUE_MAX_SIZE);
3566        virtio_cleanup(vdev);
3567        return;
3568    }
3569
3570    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3571        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3572        !is_power_of_2(n->net_conf.tx_queue_size)) {
3573        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3574                   "must be a power of 2 between %d and %d",
3575                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3576                   VIRTQUEUE_MAX_SIZE);
3577        virtio_cleanup(vdev);
3578        return;
3579    }
3580
3581    n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3582
3583    /*
3584     * Figure out the datapath queue pairs since the backend could
3585     * provide control queue via peers as well.
3586     */
3587    if (n->nic_conf.peers.queues) {
3588        for (i = 0; i < n->max_ncs; i++) {
3589            if (n->nic_conf.peers.ncs[i]->is_datapath) {
3590                ++n->max_queue_pairs;
3591            }
3592        }
3593    }
3594    n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3595
3596    if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3597        error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3598                   "must be a positive integer less than %d.",
3599                   n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3600        virtio_cleanup(vdev);
3601        return;
3602    }
3603    n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3604    n->curr_queue_pairs = 1;
3605    n->tx_timeout = n->net_conf.txtimer;
3606
3607    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3608                       && strcmp(n->net_conf.tx, "bh")) {
3609        warn_report("virtio-net: "
3610                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3611                    n->net_conf.tx);
3612        error_printf("Defaulting to \"bh\"");
3613    }
3614
3615    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3616                                    n->net_conf.tx_queue_size);
3617
3618    for (i = 0; i < n->max_queue_pairs; i++) {
3619        virtio_net_add_queue(n, i);
3620    }
3621
3622    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3623    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3624    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3625    n->status = VIRTIO_NET_S_LINK_UP;
3626    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3627                              QEMU_CLOCK_VIRTUAL,
3628                              virtio_net_announce_timer, n);
3629    n->announce_timer.round = 0;
3630
3631    if (n->netclient_type) {
3632        /*
3633         * Happen when virtio_net_set_netclient_name has been called.
3634         */
3635        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3636                              n->netclient_type, n->netclient_name, n);
3637    } else {
3638        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3639                              object_get_typename(OBJECT(dev)), dev->id, n);
3640    }
3641
3642    for (i = 0; i < n->max_queue_pairs; i++) {
3643        n->nic->ncs[i].do_not_pad = true;
3644    }
3645
3646    peer_test_vnet_hdr(n);
3647    if (peer_has_vnet_hdr(n)) {
3648        for (i = 0; i < n->max_queue_pairs; i++) {
3649            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3650        }
3651        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3652    } else {
3653        n->host_hdr_len = 0;
3654    }
3655
3656    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3657
3658    n->vqs[0].tx_waiting = 0;
3659    n->tx_burst = n->net_conf.txburst;
3660    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3661    n->promisc = 1; /* for compatibility */
3662
3663    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3664
3665    n->vlans = g_malloc0(MAX_VLAN >> 3);
3666
3667    nc = qemu_get_queue(n->nic);
3668    nc->rxfilter_notify_enabled = 1;
3669
3670   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3671        struct virtio_net_config netcfg = {};
3672        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3673        vhost_net_set_config(get_vhost_net(nc->peer),
3674            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3675    }
3676    QTAILQ_INIT(&n->rsc_chains);
3677    n->qdev = dev;
3678
3679    net_rx_pkt_init(&n->rx_pkt, false);
3680
3681    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3682        virtio_net_load_ebpf(n);
3683    }
3684}
3685
3686static void virtio_net_device_unrealize(DeviceState *dev)
3687{
3688    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3689    VirtIONet *n = VIRTIO_NET(dev);
3690    int i, max_queue_pairs;
3691
3692    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3693        virtio_net_unload_ebpf(n);
3694    }
3695
3696    /* This will stop vhost backend if appropriate. */
3697    virtio_net_set_status(vdev, 0);
3698
3699    g_free(n->netclient_name);
3700    n->netclient_name = NULL;
3701    g_free(n->netclient_type);
3702    n->netclient_type = NULL;
3703
3704    g_free(n->mac_table.macs);
3705    g_free(n->vlans);
3706
3707    if (n->failover) {
3708        qobject_unref(n->primary_opts);
3709        device_listener_unregister(&n->primary_listener);
3710        remove_migration_state_change_notifier(&n->migration_state);
3711    } else {
3712        assert(n->primary_opts == NULL);
3713    }
3714
3715    max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3716    for (i = 0; i < max_queue_pairs; i++) {
3717        virtio_net_del_queue(n, i);
3718    }
3719    /* delete also control vq */
3720    virtio_del_queue(vdev, max_queue_pairs * 2);
3721    qemu_announce_timer_del(&n->announce_timer, false);
3722    g_free(n->vqs);
3723    qemu_del_nic(n->nic);
3724    virtio_net_rsc_cleanup(n);
3725    g_free(n->rss_data.indirections_table);
3726    net_rx_pkt_uninit(n->rx_pkt);
3727    virtio_cleanup(vdev);
3728}
3729
3730static void virtio_net_instance_init(Object *obj)
3731{
3732    VirtIONet *n = VIRTIO_NET(obj);
3733
3734    /*
3735     * The default config_size is sizeof(struct virtio_net_config).
3736     * Can be overriden with virtio_net_set_config_size.
3737     */
3738    n->config_size = sizeof(struct virtio_net_config);
3739    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3740                                  "bootindex", "/ethernet-phy@0",
3741                                  DEVICE(n));
3742
3743    ebpf_rss_init(&n->ebpf_rss);
3744}
3745
3746static int virtio_net_pre_save(void *opaque)
3747{
3748    VirtIONet *n = opaque;
3749
3750    /* At this point, backend must be stopped, otherwise
3751     * it might keep writing to memory. */
3752    assert(!n->vhost_started);
3753
3754    return 0;
3755}
3756
3757static bool primary_unplug_pending(void *opaque)
3758{
3759    DeviceState *dev = opaque;
3760    DeviceState *primary;
3761    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3762    VirtIONet *n = VIRTIO_NET(vdev);
3763
3764    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3765        return false;
3766    }
3767    primary = failover_find_primary_device(n);
3768    return primary ? primary->pending_deleted_event : false;
3769}
3770
3771static bool dev_unplug_pending(void *opaque)
3772{
3773    DeviceState *dev = opaque;
3774    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3775
3776    return vdc->primary_unplug_pending(dev);
3777}
3778
3779static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3780{
3781    VirtIONet *n = VIRTIO_NET(vdev);
3782    NetClientState *nc = qemu_get_queue(n->nic);
3783    struct vhost_net *net = get_vhost_net(nc->peer);
3784    return &net->dev;
3785}
3786
3787static const VMStateDescription vmstate_virtio_net = {
3788    .name = "virtio-net",
3789    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3790    .version_id = VIRTIO_NET_VM_VERSION,
3791    .fields = (VMStateField[]) {
3792        VMSTATE_VIRTIO_DEVICE,
3793        VMSTATE_END_OF_LIST()
3794    },
3795    .pre_save = virtio_net_pre_save,
3796    .dev_unplug_pending = dev_unplug_pending,
3797};
3798
3799static Property virtio_net_properties[] = {
3800    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3801                    VIRTIO_NET_F_CSUM, true),
3802    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3803                    VIRTIO_NET_F_GUEST_CSUM, true),
3804    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3805    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3806                    VIRTIO_NET_F_GUEST_TSO4, true),
3807    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3808                    VIRTIO_NET_F_GUEST_TSO6, true),
3809    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3810                    VIRTIO_NET_F_GUEST_ECN, true),
3811    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3812                    VIRTIO_NET_F_GUEST_UFO, true),
3813    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3814                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3815    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3816                    VIRTIO_NET_F_HOST_TSO4, true),
3817    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3818                    VIRTIO_NET_F_HOST_TSO6, true),
3819    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3820                    VIRTIO_NET_F_HOST_ECN, true),
3821    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3822                    VIRTIO_NET_F_HOST_UFO, true),
3823    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3824                    VIRTIO_NET_F_MRG_RXBUF, true),
3825    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3826                    VIRTIO_NET_F_STATUS, true),
3827    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3828                    VIRTIO_NET_F_CTRL_VQ, true),
3829    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3830                    VIRTIO_NET_F_CTRL_RX, true),
3831    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3832                    VIRTIO_NET_F_CTRL_VLAN, true),
3833    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3834                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3835    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3836                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3837    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3838                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3839    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3840    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3841                    VIRTIO_NET_F_RSS, false),
3842    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3843                    VIRTIO_NET_F_HASH_REPORT, false),
3844    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3845                    VIRTIO_NET_F_RSC_EXT, false),
3846    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3847                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3848    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3849    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3850                       TX_TIMER_INTERVAL),
3851    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3852    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3853    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3854                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3855    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3856                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3857    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3858    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3859                     true),
3860    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3861    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3862    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3863    DEFINE_PROP_END_OF_LIST(),
3864};
3865
3866static void virtio_net_class_init(ObjectClass *klass, void *data)
3867{
3868    DeviceClass *dc = DEVICE_CLASS(klass);
3869    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3870
3871    device_class_set_props(dc, virtio_net_properties);
3872    dc->vmsd = &vmstate_virtio_net;
3873    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3874    vdc->realize = virtio_net_device_realize;
3875    vdc->unrealize = virtio_net_device_unrealize;
3876    vdc->get_config = virtio_net_get_config;
3877    vdc->set_config = virtio_net_set_config;
3878    vdc->get_features = virtio_net_get_features;
3879    vdc->set_features = virtio_net_set_features;
3880    vdc->bad_features = virtio_net_bad_features;
3881    vdc->reset = virtio_net_reset;
3882    vdc->queue_reset = virtio_net_queue_reset;
3883    vdc->queue_enable = virtio_net_queue_enable;
3884    vdc->set_status = virtio_net_set_status;
3885    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3886    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3887    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3888    vdc->post_load = virtio_net_post_load_virtio;
3889    vdc->vmsd = &vmstate_virtio_net_device;
3890    vdc->primary_unplug_pending = primary_unplug_pending;
3891    vdc->get_vhost = virtio_net_get_vhost;
3892}
3893
3894static const TypeInfo virtio_net_info = {
3895    .name = TYPE_VIRTIO_NET,
3896    .parent = TYPE_VIRTIO_DEVICE,
3897    .instance_size = sizeof(VirtIONet),
3898    .instance_init = virtio_net_instance_init,
3899    .class_init = virtio_net_class_init,
3900};
3901
3902static void virtio_register_types(void)
3903{
3904    type_register_static(&virtio_net_info);
3905}
3906
3907type_init(virtio_register_types)
3908