qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/log.h"
  18#include "qemu/main-loop.h"
  19#include "qemu/module.h"
  20#include "hw/virtio/virtio.h"
  21#include "net/net.h"
  22#include "net/checksum.h"
  23#include "net/tap.h"
  24#include "qemu/error-report.h"
  25#include "qemu/timer.h"
  26#include "qemu/option.h"
  27#include "qemu/option_int.h"
  28#include "qemu/config-file.h"
  29#include "qapi/qmp/qdict.h"
  30#include "hw/virtio/virtio-net.h"
  31#include "net/vhost_net.h"
  32#include "net/announce.h"
  33#include "hw/virtio/virtio-bus.h"
  34#include "qapi/error.h"
  35#include "qapi/qapi-events-net.h"
  36#include "hw/qdev-properties.h"
  37#include "qapi/qapi-types-migration.h"
  38#include "qapi/qapi-events-migration.h"
  39#include "hw/virtio/virtio-access.h"
  40#include "migration/misc.h"
  41#include "standard-headers/linux/ethtool.h"
  42#include "sysemu/sysemu.h"
  43#include "trace.h"
  44#include "monitor/qdev.h"
  45#include "hw/pci/pci.h"
  46#include "net_rx_pkt.h"
  47#include "hw/virtio/vhost.h"
  48#include "sysemu/qtest.h"
  49
  50#define VIRTIO_NET_VM_VERSION    11
  51
  52#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  53
  54/* previously fixed value */
  55#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  56#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  57
  58/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
  59#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  60#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  61
  62#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  63
  64#define VIRTIO_NET_TCP_FLAG         0x3F
  65#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  66
  67/* IPv4 max payload, 16 bits in the header */
  68#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  69#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  70
  71/* header length value in ip header without option */
  72#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  73
  74#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  75#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  76
  77/* Purge coalesced packets timer interval, This value affects the performance
  78   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  79   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  80   tso/gso/gro 'off'. */
  81#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  82
  83#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  91                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  92
  93static const VirtIOFeature feature_sizes[] = {
  94    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  95     .end = endof(struct virtio_net_config, mac)},
  96    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  97     .end = endof(struct virtio_net_config, status)},
  98    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  99     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
 100    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 101     .end = endof(struct virtio_net_config, mtu)},
 102    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 103     .end = endof(struct virtio_net_config, duplex)},
 104    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 105     .end = endof(struct virtio_net_config, supported_hash_types)},
 106    {}
 107};
 108
 109static const VirtIOConfigSizeParams cfg_size_params = {
 110    .min_size = endof(struct virtio_net_config, mac),
 111    .max_size = sizeof(struct virtio_net_config),
 112    .feature_sizes = feature_sizes
 113};
 114
 115static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 116{
 117    VirtIONet *n = qemu_get_nic_opaque(nc);
 118
 119    return &n->vqs[nc->queue_index];
 120}
 121
 122static int vq2q(int queue_index)
 123{
 124    return queue_index / 2;
 125}
 126
 127static void flush_or_purge_queued_packets(NetClientState *nc)
 128{
 129    if (!nc->peer) {
 130        return;
 131    }
 132
 133    qemu_flush_or_purge_queued_packets(nc->peer, true);
 134    assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 135}
 136
 137/* TODO
 138 * - we could suppress RX interrupt if we were so inclined.
 139 */
 140
 141static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 142{
 143    VirtIONet *n = VIRTIO_NET(vdev);
 144    struct virtio_net_config netcfg;
 145    NetClientState *nc = qemu_get_queue(n->nic);
 146    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 147
 148    int ret = 0;
 149    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 150    virtio_stw_p(vdev, &netcfg.status, n->status);
 151    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
 152    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 153    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 154    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 155    netcfg.duplex = n->net_conf.duplex;
 156    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 157    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 158                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 159                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 160    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 161                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 162    memcpy(config, &netcfg, n->config_size);
 163
 164    /*
 165     * Is this VDPA? No peer means not VDPA: there's no way to
 166     * disconnect/reconnect a VDPA peer.
 167     */
 168    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 169        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 170                                   n->config_size);
 171        if (ret != -1) {
 172            /*
 173             * Some NIC/kernel combinations present 0 as the mac address.  As
 174             * that is not a legal address, try to proceed with the
 175             * address from the QEMU command line in the hope that the
 176             * address has been configured correctly elsewhere - just not
 177             * reported by the device.
 178             */
 179            if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 180                info_report("Zero hardware mac address detected. Ignoring.");
 181                memcpy(netcfg.mac, n->mac, ETH_ALEN);
 182            }
 183            memcpy(config, &netcfg, n->config_size);
 184        }
 185    }
 186}
 187
 188static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 189{
 190    VirtIONet *n = VIRTIO_NET(vdev);
 191    struct virtio_net_config netcfg = {};
 192    NetClientState *nc = qemu_get_queue(n->nic);
 193
 194    memcpy(&netcfg, config, n->config_size);
 195
 196    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 197        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 198        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 199        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 200        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 201    }
 202
 203    /*
 204     * Is this VDPA? No peer means not VDPA: there's no way to
 205     * disconnect/reconnect a VDPA peer.
 206     */
 207    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 208        vhost_net_set_config(get_vhost_net(nc->peer),
 209                             (uint8_t *)&netcfg, 0, n->config_size,
 210                             VHOST_SET_CONFIG_TYPE_MASTER);
 211      }
 212}
 213
 214static bool virtio_net_started(VirtIONet *n, uint8_t status)
 215{
 216    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 217    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 218        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 219}
 220
 221static void virtio_net_announce_notify(VirtIONet *net)
 222{
 223    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 224    trace_virtio_net_announce_notify();
 225
 226    net->status |= VIRTIO_NET_S_ANNOUNCE;
 227    virtio_notify_config(vdev);
 228}
 229
 230static void virtio_net_announce_timer(void *opaque)
 231{
 232    VirtIONet *n = opaque;
 233    trace_virtio_net_announce_timer(n->announce_timer.round);
 234
 235    n->announce_timer.round--;
 236    virtio_net_announce_notify(n);
 237}
 238
 239static void virtio_net_announce(NetClientState *nc)
 240{
 241    VirtIONet *n = qemu_get_nic_opaque(nc);
 242    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 243
 244    /*
 245     * Make sure the virtio migration announcement timer isn't running
 246     * If it is, let it trigger announcement so that we do not cause
 247     * confusion.
 248     */
 249    if (n->announce_timer.round) {
 250        return;
 251    }
 252
 253    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 254        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 255            virtio_net_announce_notify(n);
 256    }
 257}
 258
 259static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 260{
 261    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 262    NetClientState *nc = qemu_get_queue(n->nic);
 263    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 264    int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
 265              n->max_ncs - n->max_queue_pairs : 0;
 266
 267    if (!get_vhost_net(nc->peer)) {
 268        return;
 269    }
 270
 271    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 272        !!n->vhost_started) {
 273        return;
 274    }
 275    if (!n->vhost_started) {
 276        int r, i;
 277
 278        if (n->needs_vnet_hdr_swap) {
 279            error_report("backend does not support %s vnet headers; "
 280                         "falling back on userspace virtio",
 281                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 282            return;
 283        }
 284
 285        /* Any packets outstanding? Purge them to avoid touching rings
 286         * when vhost is running.
 287         */
 288        for (i = 0;  i < queue_pairs; i++) {
 289            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 290
 291            /* Purge both directions: TX and RX. */
 292            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 293            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 294        }
 295
 296        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 297            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 298            if (r < 0) {
 299                error_report("%uBytes MTU not supported by the backend",
 300                             n->net_conf.mtu);
 301
 302                return;
 303            }
 304        }
 305
 306        n->vhost_started = 1;
 307        r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 308        if (r < 0) {
 309            error_report("unable to start vhost net: %d: "
 310                         "falling back on userspace virtio", -r);
 311            n->vhost_started = 0;
 312        }
 313    } else {
 314        vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
 315        n->vhost_started = 0;
 316    }
 317}
 318
 319static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 320                                          NetClientState *peer,
 321                                          bool enable)
 322{
 323    if (virtio_is_big_endian(vdev)) {
 324        return qemu_set_vnet_be(peer, enable);
 325    } else {
 326        return qemu_set_vnet_le(peer, enable);
 327    }
 328}
 329
 330static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 331                                       int queue_pairs, bool enable)
 332{
 333    int i;
 334
 335    for (i = 0; i < queue_pairs; i++) {
 336        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 337            enable) {
 338            while (--i >= 0) {
 339                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 340            }
 341
 342            return true;
 343        }
 344    }
 345
 346    return false;
 347}
 348
 349static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 350{
 351    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 352    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 353
 354    if (virtio_net_started(n, status)) {
 355        /* Before using the device, we tell the network backend about the
 356         * endianness to use when parsing vnet headers. If the backend
 357         * can't do it, we fallback onto fixing the headers in the core
 358         * virtio-net code.
 359         */
 360        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 361                                                            queue_pairs, true);
 362    } else if (virtio_net_started(n, vdev->status)) {
 363        /* After using the device, we need to reset the network backend to
 364         * the default (guest native endianness), otherwise the guest may
 365         * lose network connectivity if it is rebooted into a different
 366         * endianness.
 367         */
 368        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
 369    }
 370}
 371
 372static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 373{
 374    unsigned int dropped = virtqueue_drop_all(vq);
 375    if (dropped) {
 376        virtio_notify(vdev, vq);
 377    }
 378}
 379
 380static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 381{
 382    VirtIONet *n = VIRTIO_NET(vdev);
 383    VirtIONetQueue *q;
 384    int i;
 385    uint8_t queue_status;
 386
 387    virtio_net_vnet_endian_status(n, status);
 388    virtio_net_vhost_status(n, status);
 389
 390    for (i = 0; i < n->max_queue_pairs; i++) {
 391        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 392        bool queue_started;
 393        q = &n->vqs[i];
 394
 395        if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
 396            queue_status = 0;
 397        } else {
 398            queue_status = status;
 399        }
 400        queue_started =
 401            virtio_net_started(n, queue_status) && !n->vhost_started;
 402
 403        if (queue_started) {
 404            qemu_flush_queued_packets(ncs);
 405        }
 406
 407        if (!q->tx_waiting) {
 408            continue;
 409        }
 410
 411        if (queue_started) {
 412            if (q->tx_timer) {
 413                timer_mod(q->tx_timer,
 414                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 415            } else {
 416                qemu_bh_schedule(q->tx_bh);
 417            }
 418        } else {
 419            if (q->tx_timer) {
 420                timer_del(q->tx_timer);
 421            } else {
 422                qemu_bh_cancel(q->tx_bh);
 423            }
 424            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 425                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 426                vdev->vm_running) {
 427                /* if tx is waiting we are likely have some packets in tx queue
 428                 * and disabled notification */
 429                q->tx_waiting = 0;
 430                virtio_queue_set_notification(q->tx_vq, 1);
 431                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 432            }
 433        }
 434    }
 435}
 436
 437static void virtio_net_set_link_status(NetClientState *nc)
 438{
 439    VirtIONet *n = qemu_get_nic_opaque(nc);
 440    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 441    uint16_t old_status = n->status;
 442
 443    if (nc->link_down)
 444        n->status &= ~VIRTIO_NET_S_LINK_UP;
 445    else
 446        n->status |= VIRTIO_NET_S_LINK_UP;
 447
 448    if (n->status != old_status)
 449        virtio_notify_config(vdev);
 450
 451    virtio_net_set_status(vdev, vdev->status);
 452}
 453
 454static void rxfilter_notify(NetClientState *nc)
 455{
 456    VirtIONet *n = qemu_get_nic_opaque(nc);
 457
 458    if (nc->rxfilter_notify_enabled) {
 459        char *path = object_get_canonical_path(OBJECT(n->qdev));
 460        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 461                                              n->netclient_name, path);
 462        g_free(path);
 463
 464        /* disable event notification to avoid events flooding */
 465        nc->rxfilter_notify_enabled = 0;
 466    }
 467}
 468
 469static intList *get_vlan_table(VirtIONet *n)
 470{
 471    intList *list;
 472    int i, j;
 473
 474    list = NULL;
 475    for (i = 0; i < MAX_VLAN >> 5; i++) {
 476        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 477            if (n->vlans[i] & (1U << j)) {
 478                QAPI_LIST_PREPEND(list, (i << 5) + j);
 479            }
 480        }
 481    }
 482
 483    return list;
 484}
 485
 486static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 487{
 488    VirtIONet *n = qemu_get_nic_opaque(nc);
 489    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 490    RxFilterInfo *info;
 491    strList *str_list;
 492    int i;
 493
 494    info = g_malloc0(sizeof(*info));
 495    info->name = g_strdup(nc->name);
 496    info->promiscuous = n->promisc;
 497
 498    if (n->nouni) {
 499        info->unicast = RX_STATE_NONE;
 500    } else if (n->alluni) {
 501        info->unicast = RX_STATE_ALL;
 502    } else {
 503        info->unicast = RX_STATE_NORMAL;
 504    }
 505
 506    if (n->nomulti) {
 507        info->multicast = RX_STATE_NONE;
 508    } else if (n->allmulti) {
 509        info->multicast = RX_STATE_ALL;
 510    } else {
 511        info->multicast = RX_STATE_NORMAL;
 512    }
 513
 514    info->broadcast_allowed = n->nobcast;
 515    info->multicast_overflow = n->mac_table.multi_overflow;
 516    info->unicast_overflow = n->mac_table.uni_overflow;
 517
 518    info->main_mac = qemu_mac_strdup_printf(n->mac);
 519
 520    str_list = NULL;
 521    for (i = 0; i < n->mac_table.first_multi; i++) {
 522        QAPI_LIST_PREPEND(str_list,
 523                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 524    }
 525    info->unicast_table = str_list;
 526
 527    str_list = NULL;
 528    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 529        QAPI_LIST_PREPEND(str_list,
 530                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 531    }
 532    info->multicast_table = str_list;
 533    info->vlan_table = get_vlan_table(n);
 534
 535    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 536        info->vlan = RX_STATE_ALL;
 537    } else if (!info->vlan_table) {
 538        info->vlan = RX_STATE_NONE;
 539    } else {
 540        info->vlan = RX_STATE_NORMAL;
 541    }
 542
 543    /* enable event notification after query */
 544    nc->rxfilter_notify_enabled = 1;
 545
 546    return info;
 547}
 548
 549static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
 550{
 551    VirtIONet *n = VIRTIO_NET(vdev);
 552    NetClientState *nc;
 553
 554    /* validate queue_index and skip for cvq */
 555    if (queue_index >= n->max_queue_pairs * 2) {
 556        return;
 557    }
 558
 559    nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 560
 561    if (!nc->peer) {
 562        return;
 563    }
 564
 565    if (get_vhost_net(nc->peer) &&
 566        nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 567        vhost_net_virtqueue_reset(vdev, nc, queue_index);
 568    }
 569
 570    flush_or_purge_queued_packets(nc);
 571}
 572
 573static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
 574{
 575    VirtIONet *n = VIRTIO_NET(vdev);
 576    NetClientState *nc;
 577    int r;
 578
 579    /* validate queue_index and skip for cvq */
 580    if (queue_index >= n->max_queue_pairs * 2) {
 581        return;
 582    }
 583
 584    nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 585
 586    if (!nc->peer || !vdev->vhost_started) {
 587        return;
 588    }
 589
 590    if (get_vhost_net(nc->peer) &&
 591        nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 592        r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
 593        if (r < 0) {
 594            error_report("unable to restart vhost net virtqueue: %d, "
 595                            "when resetting the queue", queue_index);
 596        }
 597    }
 598}
 599
 600static void virtio_net_reset(VirtIODevice *vdev)
 601{
 602    VirtIONet *n = VIRTIO_NET(vdev);
 603    int i;
 604
 605    /* Reset back to compatibility mode */
 606    n->promisc = 1;
 607    n->allmulti = 0;
 608    n->alluni = 0;
 609    n->nomulti = 0;
 610    n->nouni = 0;
 611    n->nobcast = 0;
 612    /* multiqueue is disabled by default */
 613    n->curr_queue_pairs = 1;
 614    timer_del(n->announce_timer.tm);
 615    n->announce_timer.round = 0;
 616    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 617
 618    /* Flush any MAC and VLAN filter table state */
 619    n->mac_table.in_use = 0;
 620    n->mac_table.first_multi = 0;
 621    n->mac_table.multi_overflow = 0;
 622    n->mac_table.uni_overflow = 0;
 623    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 624    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 625    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 626    memset(n->vlans, 0, MAX_VLAN >> 3);
 627
 628    /* Flush any async TX */
 629    for (i = 0;  i < n->max_queue_pairs; i++) {
 630        flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
 631    }
 632}
 633
 634static void peer_test_vnet_hdr(VirtIONet *n)
 635{
 636    NetClientState *nc = qemu_get_queue(n->nic);
 637    if (!nc->peer) {
 638        return;
 639    }
 640
 641    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 642}
 643
 644static int peer_has_vnet_hdr(VirtIONet *n)
 645{
 646    return n->has_vnet_hdr;
 647}
 648
 649static int peer_has_ufo(VirtIONet *n)
 650{
 651    if (!peer_has_vnet_hdr(n))
 652        return 0;
 653
 654    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 655
 656    return n->has_ufo;
 657}
 658
 659static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 660                                       int version_1, int hash_report)
 661{
 662    int i;
 663    NetClientState *nc;
 664
 665    n->mergeable_rx_bufs = mergeable_rx_bufs;
 666
 667    if (version_1) {
 668        n->guest_hdr_len = hash_report ?
 669            sizeof(struct virtio_net_hdr_v1_hash) :
 670            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 671        n->rss_data.populate_hash = !!hash_report;
 672    } else {
 673        n->guest_hdr_len = n->mergeable_rx_bufs ?
 674            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 675            sizeof(struct virtio_net_hdr);
 676    }
 677
 678    for (i = 0; i < n->max_queue_pairs; i++) {
 679        nc = qemu_get_subqueue(n->nic, i);
 680
 681        if (peer_has_vnet_hdr(n) &&
 682            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 683            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 684            n->host_hdr_len = n->guest_hdr_len;
 685        }
 686    }
 687}
 688
 689static int virtio_net_max_tx_queue_size(VirtIONet *n)
 690{
 691    NetClientState *peer = n->nic_conf.peers.ncs[0];
 692
 693    /*
 694     * Backends other than vhost-user or vhost-vdpa don't support max queue
 695     * size.
 696     */
 697    if (!peer) {
 698        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 699    }
 700
 701    switch(peer->info->type) {
 702    case NET_CLIENT_DRIVER_VHOST_USER:
 703    case NET_CLIENT_DRIVER_VHOST_VDPA:
 704        return VIRTQUEUE_MAX_SIZE;
 705    default:
 706        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 707    };
 708}
 709
 710static int peer_attach(VirtIONet *n, int index)
 711{
 712    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 713
 714    if (!nc->peer) {
 715        return 0;
 716    }
 717
 718    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 719        vhost_set_vring_enable(nc->peer, 1);
 720    }
 721
 722    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 723        return 0;
 724    }
 725
 726    if (n->max_queue_pairs == 1) {
 727        return 0;
 728    }
 729
 730    return tap_enable(nc->peer);
 731}
 732
 733static int peer_detach(VirtIONet *n, int index)
 734{
 735    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 736
 737    if (!nc->peer) {
 738        return 0;
 739    }
 740
 741    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 742        vhost_set_vring_enable(nc->peer, 0);
 743    }
 744
 745    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 746        return 0;
 747    }
 748
 749    return tap_disable(nc->peer);
 750}
 751
 752static void virtio_net_set_queue_pairs(VirtIONet *n)
 753{
 754    int i;
 755    int r;
 756
 757    if (n->nic->peer_deleted) {
 758        return;
 759    }
 760
 761    for (i = 0; i < n->max_queue_pairs; i++) {
 762        if (i < n->curr_queue_pairs) {
 763            r = peer_attach(n, i);
 764            assert(!r);
 765        } else {
 766            r = peer_detach(n, i);
 767            assert(!r);
 768        }
 769    }
 770}
 771
 772static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 773
 774static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 775                                        Error **errp)
 776{
 777    VirtIONet *n = VIRTIO_NET(vdev);
 778    NetClientState *nc = qemu_get_queue(n->nic);
 779
 780    /* Firstly sync all virtio-net possible supported features */
 781    features |= n->host_features;
 782
 783    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 784
 785    if (!peer_has_vnet_hdr(n)) {
 786        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 787        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 788        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 789        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 790
 791        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 792        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 793        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 794        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 795
 796        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 797    }
 798
 799    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 800        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 801        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 802    }
 803
 804    if (!get_vhost_net(nc->peer)) {
 805        return features;
 806    }
 807
 808    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 809        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 810    }
 811    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 812    vdev->backend_features = features;
 813
 814    if (n->mtu_bypass_backend &&
 815            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 816        features |= (1ULL << VIRTIO_NET_F_MTU);
 817    }
 818
 819    return features;
 820}
 821
 822static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 823{
 824    uint64_t features = 0;
 825
 826    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 827     * but also these: */
 828    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 829    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 830    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 831    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 832    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 833
 834    return features;
 835}
 836
 837static void virtio_net_apply_guest_offloads(VirtIONet *n)
 838{
 839    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 840            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 841            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 842            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 843            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 844            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 845}
 846
 847static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 848{
 849    static const uint64_t guest_offloads_mask =
 850        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 851        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 852        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 853        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 854        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 855
 856    return guest_offloads_mask & features;
 857}
 858
 859static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 860{
 861    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 862    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 863}
 864
 865typedef struct {
 866    VirtIONet *n;
 867    DeviceState *dev;
 868} FailoverDevice;
 869
 870/**
 871 * Set the failover primary device
 872 *
 873 * @opaque: FailoverId to setup
 874 * @opts: opts for device we are handling
 875 * @errp: returns an error if this function fails
 876 */
 877static int failover_set_primary(DeviceState *dev, void *opaque)
 878{
 879    FailoverDevice *fdev = opaque;
 880    PCIDevice *pci_dev = (PCIDevice *)
 881        object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
 882
 883    if (!pci_dev) {
 884        return 0;
 885    }
 886
 887    if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
 888        fdev->dev = dev;
 889        return 1;
 890    }
 891
 892    return 0;
 893}
 894
 895/**
 896 * Find the primary device for this failover virtio-net
 897 *
 898 * @n: VirtIONet device
 899 * @errp: returns an error if this function fails
 900 */
 901static DeviceState *failover_find_primary_device(VirtIONet *n)
 902{
 903    FailoverDevice fdev = {
 904        .n = n,
 905    };
 906
 907    qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
 908                       NULL, NULL, &fdev);
 909    return fdev.dev;
 910}
 911
 912static void failover_add_primary(VirtIONet *n, Error **errp)
 913{
 914    Error *err = NULL;
 915    DeviceState *dev = failover_find_primary_device(n);
 916
 917    if (dev) {
 918        return;
 919    }
 920
 921    if (!n->primary_opts) {
 922        error_setg(errp, "Primary device not found");
 923        error_append_hint(errp, "Virtio-net failover will not work. Make "
 924                          "sure primary device has parameter"
 925                          " failover_pair_id=%s\n", n->netclient_name);
 926        return;
 927    }
 928
 929    dev = qdev_device_add_from_qdict(n->primary_opts,
 930                                     n->primary_opts_from_json,
 931                                     &err);
 932    if (err) {
 933        qobject_unref(n->primary_opts);
 934        n->primary_opts = NULL;
 935    } else {
 936        object_unref(OBJECT(dev));
 937    }
 938    error_propagate(errp, err);
 939}
 940
 941static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 942{
 943    VirtIONet *n = VIRTIO_NET(vdev);
 944    Error *err = NULL;
 945    int i;
 946
 947    if (n->mtu_bypass_backend &&
 948            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 949        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 950    }
 951
 952    virtio_net_set_multiqueue(n,
 953                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 954                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 955
 956    virtio_net_set_mrg_rx_bufs(n,
 957                               virtio_has_feature(features,
 958                                                  VIRTIO_NET_F_MRG_RXBUF),
 959                               virtio_has_feature(features,
 960                                                  VIRTIO_F_VERSION_1),
 961                               virtio_has_feature(features,
 962                                                  VIRTIO_NET_F_HASH_REPORT));
 963
 964    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 965        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 966    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 967        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 968    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 969
 970    if (n->has_vnet_hdr) {
 971        n->curr_guest_offloads =
 972            virtio_net_guest_offloads_by_features(features);
 973        virtio_net_apply_guest_offloads(n);
 974    }
 975
 976    for (i = 0;  i < n->max_queue_pairs; i++) {
 977        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 978
 979        if (!get_vhost_net(nc->peer)) {
 980            continue;
 981        }
 982        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 983    }
 984
 985    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 986        memset(n->vlans, 0, MAX_VLAN >> 3);
 987    } else {
 988        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 989    }
 990
 991    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 992        qapi_event_send_failover_negotiated(n->netclient_name);
 993        qatomic_set(&n->failover_primary_hidden, false);
 994        failover_add_primary(n, &err);
 995        if (err) {
 996            if (!qtest_enabled()) {
 997                warn_report_err(err);
 998            } else {
 999                error_free(err);
1000            }
1001        }
1002    }
1003}
1004
1005static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1006                                     struct iovec *iov, unsigned int iov_cnt)
1007{
1008    uint8_t on;
1009    size_t s;
1010    NetClientState *nc = qemu_get_queue(n->nic);
1011
1012    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1013    if (s != sizeof(on)) {
1014        return VIRTIO_NET_ERR;
1015    }
1016
1017    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1018        n->promisc = on;
1019    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1020        n->allmulti = on;
1021    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1022        n->alluni = on;
1023    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1024        n->nomulti = on;
1025    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1026        n->nouni = on;
1027    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1028        n->nobcast = on;
1029    } else {
1030        return VIRTIO_NET_ERR;
1031    }
1032
1033    rxfilter_notify(nc);
1034
1035    return VIRTIO_NET_OK;
1036}
1037
1038static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1039                                     struct iovec *iov, unsigned int iov_cnt)
1040{
1041    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1042    uint64_t offloads;
1043    size_t s;
1044
1045    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1046        return VIRTIO_NET_ERR;
1047    }
1048
1049    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1050    if (s != sizeof(offloads)) {
1051        return VIRTIO_NET_ERR;
1052    }
1053
1054    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1055        uint64_t supported_offloads;
1056
1057        offloads = virtio_ldq_p(vdev, &offloads);
1058
1059        if (!n->has_vnet_hdr) {
1060            return VIRTIO_NET_ERR;
1061        }
1062
1063        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1064            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1065        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1066            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1067        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1068
1069        supported_offloads = virtio_net_supported_guest_offloads(n);
1070        if (offloads & ~supported_offloads) {
1071            return VIRTIO_NET_ERR;
1072        }
1073
1074        n->curr_guest_offloads = offloads;
1075        virtio_net_apply_guest_offloads(n);
1076
1077        return VIRTIO_NET_OK;
1078    } else {
1079        return VIRTIO_NET_ERR;
1080    }
1081}
1082
1083static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1084                                 struct iovec *iov, unsigned int iov_cnt)
1085{
1086    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1087    struct virtio_net_ctrl_mac mac_data;
1088    size_t s;
1089    NetClientState *nc = qemu_get_queue(n->nic);
1090
1091    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1092        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1093            return VIRTIO_NET_ERR;
1094        }
1095        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1096        assert(s == sizeof(n->mac));
1097        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1098        rxfilter_notify(nc);
1099
1100        return VIRTIO_NET_OK;
1101    }
1102
1103    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1104        return VIRTIO_NET_ERR;
1105    }
1106
1107    int in_use = 0;
1108    int first_multi = 0;
1109    uint8_t uni_overflow = 0;
1110    uint8_t multi_overflow = 0;
1111    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1112
1113    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1114                   sizeof(mac_data.entries));
1115    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1116    if (s != sizeof(mac_data.entries)) {
1117        goto error;
1118    }
1119    iov_discard_front(&iov, &iov_cnt, s);
1120
1121    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1122        goto error;
1123    }
1124
1125    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1126        s = iov_to_buf(iov, iov_cnt, 0, macs,
1127                       mac_data.entries * ETH_ALEN);
1128        if (s != mac_data.entries * ETH_ALEN) {
1129            goto error;
1130        }
1131        in_use += mac_data.entries;
1132    } else {
1133        uni_overflow = 1;
1134    }
1135
1136    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1137
1138    first_multi = in_use;
1139
1140    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1141                   sizeof(mac_data.entries));
1142    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1143    if (s != sizeof(mac_data.entries)) {
1144        goto error;
1145    }
1146
1147    iov_discard_front(&iov, &iov_cnt, s);
1148
1149    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1150        goto error;
1151    }
1152
1153    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1154        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1155                       mac_data.entries * ETH_ALEN);
1156        if (s != mac_data.entries * ETH_ALEN) {
1157            goto error;
1158        }
1159        in_use += mac_data.entries;
1160    } else {
1161        multi_overflow = 1;
1162    }
1163
1164    n->mac_table.in_use = in_use;
1165    n->mac_table.first_multi = first_multi;
1166    n->mac_table.uni_overflow = uni_overflow;
1167    n->mac_table.multi_overflow = multi_overflow;
1168    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1169    g_free(macs);
1170    rxfilter_notify(nc);
1171
1172    return VIRTIO_NET_OK;
1173
1174error:
1175    g_free(macs);
1176    return VIRTIO_NET_ERR;
1177}
1178
1179static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1180                                        struct iovec *iov, unsigned int iov_cnt)
1181{
1182    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1183    uint16_t vid;
1184    size_t s;
1185    NetClientState *nc = qemu_get_queue(n->nic);
1186
1187    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1188    vid = virtio_lduw_p(vdev, &vid);
1189    if (s != sizeof(vid)) {
1190        return VIRTIO_NET_ERR;
1191    }
1192
1193    if (vid >= MAX_VLAN)
1194        return VIRTIO_NET_ERR;
1195
1196    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1197        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1198    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1199        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1200    else
1201        return VIRTIO_NET_ERR;
1202
1203    rxfilter_notify(nc);
1204
1205    return VIRTIO_NET_OK;
1206}
1207
1208static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1209                                      struct iovec *iov, unsigned int iov_cnt)
1210{
1211    trace_virtio_net_handle_announce(n->announce_timer.round);
1212    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1213        n->status & VIRTIO_NET_S_ANNOUNCE) {
1214        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1215        if (n->announce_timer.round) {
1216            qemu_announce_timer_step(&n->announce_timer);
1217        }
1218        return VIRTIO_NET_OK;
1219    } else {
1220        return VIRTIO_NET_ERR;
1221    }
1222}
1223
1224static void virtio_net_detach_epbf_rss(VirtIONet *n);
1225
1226static void virtio_net_disable_rss(VirtIONet *n)
1227{
1228    if (n->rss_data.enabled) {
1229        trace_virtio_net_rss_disable();
1230    }
1231    n->rss_data.enabled = false;
1232
1233    virtio_net_detach_epbf_rss(n);
1234}
1235
1236static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1237{
1238    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1239    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1240        return false;
1241    }
1242
1243    return nc->info->set_steering_ebpf(nc, prog_fd);
1244}
1245
1246static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1247                                   struct EBPFRSSConfig *config)
1248{
1249    config->redirect = data->redirect;
1250    config->populate_hash = data->populate_hash;
1251    config->hash_types = data->hash_types;
1252    config->indirections_len = data->indirections_len;
1253    config->default_queue = data->default_queue;
1254}
1255
1256static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1257{
1258    struct EBPFRSSConfig config = {};
1259
1260    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1261        return false;
1262    }
1263
1264    rss_data_to_rss_config(&n->rss_data, &config);
1265
1266    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1267                          n->rss_data.indirections_table, n->rss_data.key)) {
1268        return false;
1269    }
1270
1271    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1272        return false;
1273    }
1274
1275    return true;
1276}
1277
1278static void virtio_net_detach_epbf_rss(VirtIONet *n)
1279{
1280    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1281}
1282
1283static bool virtio_net_load_ebpf(VirtIONet *n)
1284{
1285    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1286        /* backend does't support steering ebpf */
1287        return false;
1288    }
1289
1290    return ebpf_rss_load(&n->ebpf_rss);
1291}
1292
1293static void virtio_net_unload_ebpf(VirtIONet *n)
1294{
1295    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1296    ebpf_rss_unload(&n->ebpf_rss);
1297}
1298
1299static uint16_t virtio_net_handle_rss(VirtIONet *n,
1300                                      struct iovec *iov,
1301                                      unsigned int iov_cnt,
1302                                      bool do_rss)
1303{
1304    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1305    struct virtio_net_rss_config cfg;
1306    size_t s, offset = 0, size_get;
1307    uint16_t queue_pairs, i;
1308    struct {
1309        uint16_t us;
1310        uint8_t b;
1311    } QEMU_PACKED temp;
1312    const char *err_msg = "";
1313    uint32_t err_value = 0;
1314
1315    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1316        err_msg = "RSS is not negotiated";
1317        goto error;
1318    }
1319    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1320        err_msg = "Hash report is not negotiated";
1321        goto error;
1322    }
1323    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1324    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1325    if (s != size_get) {
1326        err_msg = "Short command buffer";
1327        err_value = (uint32_t)s;
1328        goto error;
1329    }
1330    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1331    n->rss_data.indirections_len =
1332        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1333    n->rss_data.indirections_len++;
1334    if (!do_rss) {
1335        n->rss_data.indirections_len = 1;
1336    }
1337    if (!is_power_of_2(n->rss_data.indirections_len)) {
1338        err_msg = "Invalid size of indirection table";
1339        err_value = n->rss_data.indirections_len;
1340        goto error;
1341    }
1342    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1343        err_msg = "Too large indirection table";
1344        err_value = n->rss_data.indirections_len;
1345        goto error;
1346    }
1347    n->rss_data.default_queue = do_rss ?
1348        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1349    if (n->rss_data.default_queue >= n->max_queue_pairs) {
1350        err_msg = "Invalid default queue";
1351        err_value = n->rss_data.default_queue;
1352        goto error;
1353    }
1354    offset += size_get;
1355    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1356    g_free(n->rss_data.indirections_table);
1357    n->rss_data.indirections_table = g_malloc(size_get);
1358    if (!n->rss_data.indirections_table) {
1359        err_msg = "Can't allocate indirections table";
1360        err_value = n->rss_data.indirections_len;
1361        goto error;
1362    }
1363    s = iov_to_buf(iov, iov_cnt, offset,
1364                   n->rss_data.indirections_table, size_get);
1365    if (s != size_get) {
1366        err_msg = "Short indirection table buffer";
1367        err_value = (uint32_t)s;
1368        goto error;
1369    }
1370    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1371        uint16_t val = n->rss_data.indirections_table[i];
1372        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1373    }
1374    offset += size_get;
1375    size_get = sizeof(temp);
1376    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1377    if (s != size_get) {
1378        err_msg = "Can't get queue_pairs";
1379        err_value = (uint32_t)s;
1380        goto error;
1381    }
1382    queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1383    if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1384        err_msg = "Invalid number of queue_pairs";
1385        err_value = queue_pairs;
1386        goto error;
1387    }
1388    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1389        err_msg = "Invalid key size";
1390        err_value = temp.b;
1391        goto error;
1392    }
1393    if (!temp.b && n->rss_data.hash_types) {
1394        err_msg = "No key provided";
1395        err_value = 0;
1396        goto error;
1397    }
1398    if (!temp.b && !n->rss_data.hash_types) {
1399        virtio_net_disable_rss(n);
1400        return queue_pairs;
1401    }
1402    offset += size_get;
1403    size_get = temp.b;
1404    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1405    if (s != size_get) {
1406        err_msg = "Can get key buffer";
1407        err_value = (uint32_t)s;
1408        goto error;
1409    }
1410    n->rss_data.enabled = true;
1411
1412    if (!n->rss_data.populate_hash) {
1413        if (!virtio_net_attach_epbf_rss(n)) {
1414            /* EBPF must be loaded for vhost */
1415            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1416                warn_report("Can't load eBPF RSS for vhost");
1417                goto error;
1418            }
1419            /* fallback to software RSS */
1420            warn_report("Can't load eBPF RSS - fallback to software RSS");
1421            n->rss_data.enabled_software_rss = true;
1422        }
1423    } else {
1424        /* use software RSS for hash populating */
1425        /* and detach eBPF if was loaded before */
1426        virtio_net_detach_epbf_rss(n);
1427        n->rss_data.enabled_software_rss = true;
1428    }
1429
1430    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1431                                n->rss_data.indirections_len,
1432                                temp.b);
1433    return queue_pairs;
1434error:
1435    trace_virtio_net_rss_error(err_msg, err_value);
1436    virtio_net_disable_rss(n);
1437    return 0;
1438}
1439
1440static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1441                                struct iovec *iov, unsigned int iov_cnt)
1442{
1443    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1444    uint16_t queue_pairs;
1445    NetClientState *nc = qemu_get_queue(n->nic);
1446
1447    virtio_net_disable_rss(n);
1448    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1449        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1450        return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1451    }
1452    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1453        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1454    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1455        struct virtio_net_ctrl_mq mq;
1456        size_t s;
1457        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1458            return VIRTIO_NET_ERR;
1459        }
1460        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1461        if (s != sizeof(mq)) {
1462            return VIRTIO_NET_ERR;
1463        }
1464        queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1465
1466    } else {
1467        return VIRTIO_NET_ERR;
1468    }
1469
1470    if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1471        queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1472        queue_pairs > n->max_queue_pairs ||
1473        !n->multiqueue) {
1474        return VIRTIO_NET_ERR;
1475    }
1476
1477    n->curr_queue_pairs = queue_pairs;
1478    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1479        /*
1480         * Avoid updating the backend for a vdpa device: We're only interested
1481         * in updating the device model queues.
1482         */
1483        return VIRTIO_NET_OK;
1484    }
1485    /* stop the backend before changing the number of queue_pairs to avoid handling a
1486     * disabled queue */
1487    virtio_net_set_status(vdev, vdev->status);
1488    virtio_net_set_queue_pairs(n);
1489
1490    return VIRTIO_NET_OK;
1491}
1492
1493size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1494                                  const struct iovec *in_sg, unsigned in_num,
1495                                  const struct iovec *out_sg,
1496                                  unsigned out_num)
1497{
1498    VirtIONet *n = VIRTIO_NET(vdev);
1499    struct virtio_net_ctrl_hdr ctrl;
1500    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1501    size_t s;
1502    struct iovec *iov, *iov2;
1503
1504    if (iov_size(in_sg, in_num) < sizeof(status) ||
1505        iov_size(out_sg, out_num) < sizeof(ctrl)) {
1506        virtio_error(vdev, "virtio-net ctrl missing headers");
1507        return 0;
1508    }
1509
1510    iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1511    s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1512    iov_discard_front(&iov, &out_num, sizeof(ctrl));
1513    if (s != sizeof(ctrl)) {
1514        status = VIRTIO_NET_ERR;
1515    } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1516        status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1517    } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1518        status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1519    } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1520        status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1521    } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1522        status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1523    } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1524        status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1525    } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1526        status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1527    }
1528
1529    s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1530    assert(s == sizeof(status));
1531
1532    g_free(iov2);
1533    return sizeof(status);
1534}
1535
1536static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1537{
1538    VirtQueueElement *elem;
1539
1540    for (;;) {
1541        size_t written;
1542        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1543        if (!elem) {
1544            break;
1545        }
1546
1547        written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1548                                             elem->out_sg, elem->out_num);
1549        if (written > 0) {
1550            virtqueue_push(vq, elem, written);
1551            virtio_notify(vdev, vq);
1552            g_free(elem);
1553        } else {
1554            virtqueue_detach_element(vq, elem, 0);
1555            g_free(elem);
1556            break;
1557        }
1558    }
1559}
1560
1561/* RX */
1562
1563static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1564{
1565    VirtIONet *n = VIRTIO_NET(vdev);
1566    int queue_index = vq2q(virtio_get_queue_index(vq));
1567
1568    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1569}
1570
1571static bool virtio_net_can_receive(NetClientState *nc)
1572{
1573    VirtIONet *n = qemu_get_nic_opaque(nc);
1574    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1575    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1576
1577    if (!vdev->vm_running) {
1578        return false;
1579    }
1580
1581    if (nc->queue_index >= n->curr_queue_pairs) {
1582        return false;
1583    }
1584
1585    if (!virtio_queue_ready(q->rx_vq) ||
1586        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1587        return false;
1588    }
1589
1590    return true;
1591}
1592
1593static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1594{
1595    VirtIONet *n = q->n;
1596    if (virtio_queue_empty(q->rx_vq) ||
1597        (n->mergeable_rx_bufs &&
1598         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1599        virtio_queue_set_notification(q->rx_vq, 1);
1600
1601        /* To avoid a race condition where the guest has made some buffers
1602         * available after the above check but before notification was
1603         * enabled, check for available buffers again.
1604         */
1605        if (virtio_queue_empty(q->rx_vq) ||
1606            (n->mergeable_rx_bufs &&
1607             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1608            return 0;
1609        }
1610    }
1611
1612    virtio_queue_set_notification(q->rx_vq, 0);
1613    return 1;
1614}
1615
1616static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1617{
1618    virtio_tswap16s(vdev, &hdr->hdr_len);
1619    virtio_tswap16s(vdev, &hdr->gso_size);
1620    virtio_tswap16s(vdev, &hdr->csum_start);
1621    virtio_tswap16s(vdev, &hdr->csum_offset);
1622}
1623
1624/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1625 * it never finds out that the packets don't have valid checksums.  This
1626 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1627 * fix this with Xen but it hasn't appeared in an upstream release of
1628 * dhclient yet.
1629 *
1630 * To avoid breaking existing guests, we catch udp packets and add
1631 * checksums.  This is terrible but it's better than hacking the guest
1632 * kernels.
1633 *
1634 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1635 * we should provide a mechanism to disable it to avoid polluting the host
1636 * cache.
1637 */
1638static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1639                                        uint8_t *buf, size_t size)
1640{
1641    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1642        (size > 27 && size < 1500) && /* normal sized MTU */
1643        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1644        (buf[23] == 17) && /* ip.protocol == UDP */
1645        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1646        net_checksum_calculate(buf, size, CSUM_UDP);
1647        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1648    }
1649}
1650
1651static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1652                           const void *buf, size_t size)
1653{
1654    if (n->has_vnet_hdr) {
1655        /* FIXME this cast is evil */
1656        void *wbuf = (void *)buf;
1657        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1658                                    size - n->host_hdr_len);
1659
1660        if (n->needs_vnet_hdr_swap) {
1661            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1662        }
1663        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1664    } else {
1665        struct virtio_net_hdr hdr = {
1666            .flags = 0,
1667            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1668        };
1669        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1670    }
1671}
1672
1673static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1674{
1675    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1676    static const uint8_t vlan[] = {0x81, 0x00};
1677    uint8_t *ptr = (uint8_t *)buf;
1678    int i;
1679
1680    if (n->promisc)
1681        return 1;
1682
1683    ptr += n->host_hdr_len;
1684
1685    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1686        int vid = lduw_be_p(ptr + 14) & 0xfff;
1687        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1688            return 0;
1689    }
1690
1691    if (ptr[0] & 1) { // multicast
1692        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1693            return !n->nobcast;
1694        } else if (n->nomulti) {
1695            return 0;
1696        } else if (n->allmulti || n->mac_table.multi_overflow) {
1697            return 1;
1698        }
1699
1700        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1701            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1702                return 1;
1703            }
1704        }
1705    } else { // unicast
1706        if (n->nouni) {
1707            return 0;
1708        } else if (n->alluni || n->mac_table.uni_overflow) {
1709            return 1;
1710        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1711            return 1;
1712        }
1713
1714        for (i = 0; i < n->mac_table.first_multi; i++) {
1715            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1716                return 1;
1717            }
1718        }
1719    }
1720
1721    return 0;
1722}
1723
1724static uint8_t virtio_net_get_hash_type(bool isip4,
1725                                        bool isip6,
1726                                        bool isudp,
1727                                        bool istcp,
1728                                        uint32_t types)
1729{
1730    if (isip4) {
1731        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1732            return NetPktRssIpV4Tcp;
1733        }
1734        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1735            return NetPktRssIpV4Udp;
1736        }
1737        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1738            return NetPktRssIpV4;
1739        }
1740    } else if (isip6) {
1741        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1742                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1743
1744        if (istcp && (types & mask)) {
1745            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1746                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1747        }
1748        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1749        if (isudp && (types & mask)) {
1750            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1751                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1752        }
1753        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1754        if (types & mask) {
1755            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1756                NetPktRssIpV6Ex : NetPktRssIpV6;
1757        }
1758    }
1759    return 0xff;
1760}
1761
1762static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1763                                   uint32_t hash)
1764{
1765    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1766    hdr->hash_value = hash;
1767    hdr->hash_report = report;
1768}
1769
1770static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1771                                  size_t size)
1772{
1773    VirtIONet *n = qemu_get_nic_opaque(nc);
1774    unsigned int index = nc->queue_index, new_index = index;
1775    struct NetRxPkt *pkt = n->rx_pkt;
1776    uint8_t net_hash_type;
1777    uint32_t hash;
1778    bool isip4, isip6, isudp, istcp;
1779    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1780        VIRTIO_NET_HASH_REPORT_IPv4,
1781        VIRTIO_NET_HASH_REPORT_TCPv4,
1782        VIRTIO_NET_HASH_REPORT_TCPv6,
1783        VIRTIO_NET_HASH_REPORT_IPv6,
1784        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1785        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1786        VIRTIO_NET_HASH_REPORT_UDPv4,
1787        VIRTIO_NET_HASH_REPORT_UDPv6,
1788        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1789    };
1790
1791    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1792                             size - n->host_hdr_len);
1793    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1794    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1795        istcp = isudp = false;
1796    }
1797    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1798        istcp = isudp = false;
1799    }
1800    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1801                                             n->rss_data.hash_types);
1802    if (net_hash_type > NetPktRssIpV6UdpEx) {
1803        if (n->rss_data.populate_hash) {
1804            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1805        }
1806        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1807    }
1808
1809    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1810
1811    if (n->rss_data.populate_hash) {
1812        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1813    }
1814
1815    if (n->rss_data.redirect) {
1816        new_index = hash & (n->rss_data.indirections_len - 1);
1817        new_index = n->rss_data.indirections_table[new_index];
1818    }
1819
1820    return (index == new_index) ? -1 : new_index;
1821}
1822
1823static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1824                                      size_t size, bool no_rss)
1825{
1826    VirtIONet *n = qemu_get_nic_opaque(nc);
1827    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1828    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1829    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1830    size_t lens[VIRTQUEUE_MAX_SIZE];
1831    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1832    struct virtio_net_hdr_mrg_rxbuf mhdr;
1833    unsigned mhdr_cnt = 0;
1834    size_t offset, i, guest_offset, j;
1835    ssize_t err;
1836
1837    if (!virtio_net_can_receive(nc)) {
1838        return -1;
1839    }
1840
1841    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1842        int index = virtio_net_process_rss(nc, buf, size);
1843        if (index >= 0) {
1844            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1845            return virtio_net_receive_rcu(nc2, buf, size, true);
1846        }
1847    }
1848
1849    /* hdr_len refers to the header we supply to the guest */
1850    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1851        return 0;
1852    }
1853
1854    if (!receive_filter(n, buf, size))
1855        return size;
1856
1857    offset = i = 0;
1858
1859    while (offset < size) {
1860        VirtQueueElement *elem;
1861        int len, total;
1862        const struct iovec *sg;
1863
1864        total = 0;
1865
1866        if (i == VIRTQUEUE_MAX_SIZE) {
1867            virtio_error(vdev, "virtio-net unexpected long buffer chain");
1868            err = size;
1869            goto err;
1870        }
1871
1872        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1873        if (!elem) {
1874            if (i) {
1875                virtio_error(vdev, "virtio-net unexpected empty queue: "
1876                             "i %zd mergeable %d offset %zd, size %zd, "
1877                             "guest hdr len %zd, host hdr len %zd "
1878                             "guest features 0x%" PRIx64,
1879                             i, n->mergeable_rx_bufs, offset, size,
1880                             n->guest_hdr_len, n->host_hdr_len,
1881                             vdev->guest_features);
1882            }
1883            err = -1;
1884            goto err;
1885        }
1886
1887        if (elem->in_num < 1) {
1888            virtio_error(vdev,
1889                         "virtio-net receive queue contains no in buffers");
1890            virtqueue_detach_element(q->rx_vq, elem, 0);
1891            g_free(elem);
1892            err = -1;
1893            goto err;
1894        }
1895
1896        sg = elem->in_sg;
1897        if (i == 0) {
1898            assert(offset == 0);
1899            if (n->mergeable_rx_bufs) {
1900                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1901                                    sg, elem->in_num,
1902                                    offsetof(typeof(mhdr), num_buffers),
1903                                    sizeof(mhdr.num_buffers));
1904            }
1905
1906            receive_header(n, sg, elem->in_num, buf, size);
1907            if (n->rss_data.populate_hash) {
1908                offset = sizeof(mhdr);
1909                iov_from_buf(sg, elem->in_num, offset,
1910                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1911            }
1912            offset = n->host_hdr_len;
1913            total += n->guest_hdr_len;
1914            guest_offset = n->guest_hdr_len;
1915        } else {
1916            guest_offset = 0;
1917        }
1918
1919        /* copy in packet.  ugh */
1920        len = iov_from_buf(sg, elem->in_num, guest_offset,
1921                           buf + offset, size - offset);
1922        total += len;
1923        offset += len;
1924        /* If buffers can't be merged, at this point we
1925         * must have consumed the complete packet.
1926         * Otherwise, drop it. */
1927        if (!n->mergeable_rx_bufs && offset < size) {
1928            virtqueue_unpop(q->rx_vq, elem, total);
1929            g_free(elem);
1930            err = size;
1931            goto err;
1932        }
1933
1934        elems[i] = elem;
1935        lens[i] = total;
1936        i++;
1937    }
1938
1939    if (mhdr_cnt) {
1940        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1941        iov_from_buf(mhdr_sg, mhdr_cnt,
1942                     0,
1943                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1944    }
1945
1946    for (j = 0; j < i; j++) {
1947        /* signal other side */
1948        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1949        g_free(elems[j]);
1950    }
1951
1952    virtqueue_flush(q->rx_vq, i);
1953    virtio_notify(vdev, q->rx_vq);
1954
1955    return size;
1956
1957err:
1958    for (j = 0; j < i; j++) {
1959        virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1960        g_free(elems[j]);
1961    }
1962
1963    return err;
1964}
1965
1966static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1967                                  size_t size)
1968{
1969    RCU_READ_LOCK_GUARD();
1970
1971    return virtio_net_receive_rcu(nc, buf, size, false);
1972}
1973
1974static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1975                                         const uint8_t *buf,
1976                                         VirtioNetRscUnit *unit)
1977{
1978    uint16_t ip_hdrlen;
1979    struct ip_header *ip;
1980
1981    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1982                              + sizeof(struct eth_header));
1983    unit->ip = (void *)ip;
1984    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1985    unit->ip_plen = &ip->ip_len;
1986    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1987    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1988    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1989}
1990
1991static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1992                                         const uint8_t *buf,
1993                                         VirtioNetRscUnit *unit)
1994{
1995    struct ip6_header *ip6;
1996
1997    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1998                                 + sizeof(struct eth_header));
1999    unit->ip = ip6;
2000    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2001    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2002                                        + sizeof(struct ip6_header));
2003    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2004
2005    /* There is a difference between payload lenght in ipv4 and v6,
2006       ip header is excluded in ipv6 */
2007    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2008}
2009
2010static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2011                                       VirtioNetRscSeg *seg)
2012{
2013    int ret;
2014    struct virtio_net_hdr_v1 *h;
2015
2016    h = (struct virtio_net_hdr_v1 *)seg->buf;
2017    h->flags = 0;
2018    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2019
2020    if (seg->is_coalesced) {
2021        h->rsc.segments = seg->packets;
2022        h->rsc.dup_acks = seg->dup_ack;
2023        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2024        if (chain->proto == ETH_P_IP) {
2025            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2026        } else {
2027            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2028        }
2029    }
2030
2031    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2032    QTAILQ_REMOVE(&chain->buffers, seg, next);
2033    g_free(seg->buf);
2034    g_free(seg);
2035
2036    return ret;
2037}
2038
2039static void virtio_net_rsc_purge(void *opq)
2040{
2041    VirtioNetRscSeg *seg, *rn;
2042    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2043
2044    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2045        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2046            chain->stat.purge_failed++;
2047            continue;
2048        }
2049    }
2050
2051    chain->stat.timer++;
2052    if (!QTAILQ_EMPTY(&chain->buffers)) {
2053        timer_mod(chain->drain_timer,
2054              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2055    }
2056}
2057
2058static void virtio_net_rsc_cleanup(VirtIONet *n)
2059{
2060    VirtioNetRscChain *chain, *rn_chain;
2061    VirtioNetRscSeg *seg, *rn_seg;
2062
2063    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2064        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2065            QTAILQ_REMOVE(&chain->buffers, seg, next);
2066            g_free(seg->buf);
2067            g_free(seg);
2068        }
2069
2070        timer_free(chain->drain_timer);
2071        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2072        g_free(chain);
2073    }
2074}
2075
2076static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2077                                     NetClientState *nc,
2078                                     const uint8_t *buf, size_t size)
2079{
2080    uint16_t hdr_len;
2081    VirtioNetRscSeg *seg;
2082
2083    hdr_len = chain->n->guest_hdr_len;
2084    seg = g_new(VirtioNetRscSeg, 1);
2085    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2086        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2087    memcpy(seg->buf, buf, size);
2088    seg->size = size;
2089    seg->packets = 1;
2090    seg->dup_ack = 0;
2091    seg->is_coalesced = 0;
2092    seg->nc = nc;
2093
2094    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2095    chain->stat.cache++;
2096
2097    switch (chain->proto) {
2098    case ETH_P_IP:
2099        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2100        break;
2101    case ETH_P_IPV6:
2102        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2103        break;
2104    default:
2105        g_assert_not_reached();
2106    }
2107}
2108
2109static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2110                                         VirtioNetRscSeg *seg,
2111                                         const uint8_t *buf,
2112                                         struct tcp_header *n_tcp,
2113                                         struct tcp_header *o_tcp)
2114{
2115    uint32_t nack, oack;
2116    uint16_t nwin, owin;
2117
2118    nack = htonl(n_tcp->th_ack);
2119    nwin = htons(n_tcp->th_win);
2120    oack = htonl(o_tcp->th_ack);
2121    owin = htons(o_tcp->th_win);
2122
2123    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2124        chain->stat.ack_out_of_win++;
2125        return RSC_FINAL;
2126    } else if (nack == oack) {
2127        /* duplicated ack or window probe */
2128        if (nwin == owin) {
2129            /* duplicated ack, add dup ack count due to whql test up to 1 */
2130            chain->stat.dup_ack++;
2131            return RSC_FINAL;
2132        } else {
2133            /* Coalesce window update */
2134            o_tcp->th_win = n_tcp->th_win;
2135            chain->stat.win_update++;
2136            return RSC_COALESCE;
2137        }
2138    } else {
2139        /* pure ack, go to 'C', finalize*/
2140        chain->stat.pure_ack++;
2141        return RSC_FINAL;
2142    }
2143}
2144
2145static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2146                                            VirtioNetRscSeg *seg,
2147                                            const uint8_t *buf,
2148                                            VirtioNetRscUnit *n_unit)
2149{
2150    void *data;
2151    uint16_t o_ip_len;
2152    uint32_t nseq, oseq;
2153    VirtioNetRscUnit *o_unit;
2154
2155    o_unit = &seg->unit;
2156    o_ip_len = htons(*o_unit->ip_plen);
2157    nseq = htonl(n_unit->tcp->th_seq);
2158    oseq = htonl(o_unit->tcp->th_seq);
2159
2160    /* out of order or retransmitted. */
2161    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2162        chain->stat.data_out_of_win++;
2163        return RSC_FINAL;
2164    }
2165
2166    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2167    if (nseq == oseq) {
2168        if ((o_unit->payload == 0) && n_unit->payload) {
2169            /* From no payload to payload, normal case, not a dup ack or etc */
2170            chain->stat.data_after_pure_ack++;
2171            goto coalesce;
2172        } else {
2173            return virtio_net_rsc_handle_ack(chain, seg, buf,
2174                                             n_unit->tcp, o_unit->tcp);
2175        }
2176    } else if ((nseq - oseq) != o_unit->payload) {
2177        /* Not a consistent packet, out of order */
2178        chain->stat.data_out_of_order++;
2179        return RSC_FINAL;
2180    } else {
2181coalesce:
2182        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2183            chain->stat.over_size++;
2184            return RSC_FINAL;
2185        }
2186
2187        /* Here comes the right data, the payload length in v4/v6 is different,
2188           so use the field value to update and record the new data len */
2189        o_unit->payload += n_unit->payload; /* update new data len */
2190
2191        /* update field in ip header */
2192        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2193
2194        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2195           for windows guest, while this may change the behavior for linux
2196           guest (only if it uses RSC feature). */
2197        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2198
2199        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2200        o_unit->tcp->th_win = n_unit->tcp->th_win;
2201
2202        memmove(seg->buf + seg->size, data, n_unit->payload);
2203        seg->size += n_unit->payload;
2204        seg->packets++;
2205        chain->stat.coalesced++;
2206        return RSC_COALESCE;
2207    }
2208}
2209
2210static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2211                                        VirtioNetRscSeg *seg,
2212                                        const uint8_t *buf, size_t size,
2213                                        VirtioNetRscUnit *unit)
2214{
2215    struct ip_header *ip1, *ip2;
2216
2217    ip1 = (struct ip_header *)(unit->ip);
2218    ip2 = (struct ip_header *)(seg->unit.ip);
2219    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2220        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2221        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2222        chain->stat.no_match++;
2223        return RSC_NO_MATCH;
2224    }
2225
2226    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2227}
2228
2229static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2230                                        VirtioNetRscSeg *seg,
2231                                        const uint8_t *buf, size_t size,
2232                                        VirtioNetRscUnit *unit)
2233{
2234    struct ip6_header *ip1, *ip2;
2235
2236    ip1 = (struct ip6_header *)(unit->ip);
2237    ip2 = (struct ip6_header *)(seg->unit.ip);
2238    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2239        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2240        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2241        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2242            chain->stat.no_match++;
2243            return RSC_NO_MATCH;
2244    }
2245
2246    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2247}
2248
2249/* Packets with 'SYN' should bypass, other flag should be sent after drain
2250 * to prevent out of order */
2251static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2252                                         struct tcp_header *tcp)
2253{
2254    uint16_t tcp_hdr;
2255    uint16_t tcp_flag;
2256
2257    tcp_flag = htons(tcp->th_offset_flags);
2258    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2259    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2260    if (tcp_flag & TH_SYN) {
2261        chain->stat.tcp_syn++;
2262        return RSC_BYPASS;
2263    }
2264
2265    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2266        chain->stat.tcp_ctrl_drain++;
2267        return RSC_FINAL;
2268    }
2269
2270    if (tcp_hdr > sizeof(struct tcp_header)) {
2271        chain->stat.tcp_all_opt++;
2272        return RSC_FINAL;
2273    }
2274
2275    return RSC_CANDIDATE;
2276}
2277
2278static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2279                                         NetClientState *nc,
2280                                         const uint8_t *buf, size_t size,
2281                                         VirtioNetRscUnit *unit)
2282{
2283    int ret;
2284    VirtioNetRscSeg *seg, *nseg;
2285
2286    if (QTAILQ_EMPTY(&chain->buffers)) {
2287        chain->stat.empty_cache++;
2288        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2289        timer_mod(chain->drain_timer,
2290              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2291        return size;
2292    }
2293
2294    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2295        if (chain->proto == ETH_P_IP) {
2296            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2297        } else {
2298            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2299        }
2300
2301        if (ret == RSC_FINAL) {
2302            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2303                /* Send failed */
2304                chain->stat.final_failed++;
2305                return 0;
2306            }
2307
2308            /* Send current packet */
2309            return virtio_net_do_receive(nc, buf, size);
2310        } else if (ret == RSC_NO_MATCH) {
2311            continue;
2312        } else {
2313            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2314            seg->is_coalesced = 1;
2315            return size;
2316        }
2317    }
2318
2319    chain->stat.no_match_cache++;
2320    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2321    return size;
2322}
2323
2324/* Drain a connection data, this is to avoid out of order segments */
2325static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2326                                        NetClientState *nc,
2327                                        const uint8_t *buf, size_t size,
2328                                        uint16_t ip_start, uint16_t ip_size,
2329                                        uint16_t tcp_port)
2330{
2331    VirtioNetRscSeg *seg, *nseg;
2332    uint32_t ppair1, ppair2;
2333
2334    ppair1 = *(uint32_t *)(buf + tcp_port);
2335    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2336        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2337        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2338            || (ppair1 != ppair2)) {
2339            continue;
2340        }
2341        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2342            chain->stat.drain_failed++;
2343        }
2344
2345        break;
2346    }
2347
2348    return virtio_net_do_receive(nc, buf, size);
2349}
2350
2351static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2352                                            struct ip_header *ip,
2353                                            const uint8_t *buf, size_t size)
2354{
2355    uint16_t ip_len;
2356
2357    /* Not an ipv4 packet */
2358    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2359        chain->stat.ip_option++;
2360        return RSC_BYPASS;
2361    }
2362
2363    /* Don't handle packets with ip option */
2364    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2365        chain->stat.ip_option++;
2366        return RSC_BYPASS;
2367    }
2368
2369    if (ip->ip_p != IPPROTO_TCP) {
2370        chain->stat.bypass_not_tcp++;
2371        return RSC_BYPASS;
2372    }
2373
2374    /* Don't handle packets with ip fragment */
2375    if (!(htons(ip->ip_off) & IP_DF)) {
2376        chain->stat.ip_frag++;
2377        return RSC_BYPASS;
2378    }
2379
2380    /* Don't handle packets with ecn flag */
2381    if (IPTOS_ECN(ip->ip_tos)) {
2382        chain->stat.ip_ecn++;
2383        return RSC_BYPASS;
2384    }
2385
2386    ip_len = htons(ip->ip_len);
2387    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2388        || ip_len > (size - chain->n->guest_hdr_len -
2389                     sizeof(struct eth_header))) {
2390        chain->stat.ip_hacked++;
2391        return RSC_BYPASS;
2392    }
2393
2394    return RSC_CANDIDATE;
2395}
2396
2397static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2398                                      NetClientState *nc,
2399                                      const uint8_t *buf, size_t size)
2400{
2401    int32_t ret;
2402    uint16_t hdr_len;
2403    VirtioNetRscUnit unit;
2404
2405    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2406
2407    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2408        + sizeof(struct tcp_header))) {
2409        chain->stat.bypass_not_tcp++;
2410        return virtio_net_do_receive(nc, buf, size);
2411    }
2412
2413    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2414    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2415        != RSC_CANDIDATE) {
2416        return virtio_net_do_receive(nc, buf, size);
2417    }
2418
2419    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2420    if (ret == RSC_BYPASS) {
2421        return virtio_net_do_receive(nc, buf, size);
2422    } else if (ret == RSC_FINAL) {
2423        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2424                ((hdr_len + sizeof(struct eth_header)) + 12),
2425                VIRTIO_NET_IP4_ADDR_SIZE,
2426                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2427    }
2428
2429    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2430}
2431
2432static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2433                                            struct ip6_header *ip6,
2434                                            const uint8_t *buf, size_t size)
2435{
2436    uint16_t ip_len;
2437
2438    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2439        != IP_HEADER_VERSION_6) {
2440        return RSC_BYPASS;
2441    }
2442
2443    /* Both option and protocol is checked in this */
2444    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2445        chain->stat.bypass_not_tcp++;
2446        return RSC_BYPASS;
2447    }
2448
2449    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2450    if (ip_len < sizeof(struct tcp_header) ||
2451        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2452                  - sizeof(struct ip6_header))) {
2453        chain->stat.ip_hacked++;
2454        return RSC_BYPASS;
2455    }
2456
2457    /* Don't handle packets with ecn flag */
2458    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2459        chain->stat.ip_ecn++;
2460        return RSC_BYPASS;
2461    }
2462
2463    return RSC_CANDIDATE;
2464}
2465
2466static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2467                                      const uint8_t *buf, size_t size)
2468{
2469    int32_t ret;
2470    uint16_t hdr_len;
2471    VirtioNetRscChain *chain;
2472    VirtioNetRscUnit unit;
2473
2474    chain = (VirtioNetRscChain *)opq;
2475    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2476
2477    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2478        + sizeof(tcp_header))) {
2479        return virtio_net_do_receive(nc, buf, size);
2480    }
2481
2482    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2483    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2484                                                 unit.ip, buf, size)) {
2485        return virtio_net_do_receive(nc, buf, size);
2486    }
2487
2488    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2489    if (ret == RSC_BYPASS) {
2490        return virtio_net_do_receive(nc, buf, size);
2491    } else if (ret == RSC_FINAL) {
2492        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2493                ((hdr_len + sizeof(struct eth_header)) + 8),
2494                VIRTIO_NET_IP6_ADDR_SIZE,
2495                hdr_len + sizeof(struct eth_header)
2496                + sizeof(struct ip6_header));
2497    }
2498
2499    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2500}
2501
2502static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2503                                                      NetClientState *nc,
2504                                                      uint16_t proto)
2505{
2506    VirtioNetRscChain *chain;
2507
2508    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2509        return NULL;
2510    }
2511
2512    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2513        if (chain->proto == proto) {
2514            return chain;
2515        }
2516    }
2517
2518    chain = g_malloc(sizeof(*chain));
2519    chain->n = n;
2520    chain->proto = proto;
2521    if (proto == (uint16_t)ETH_P_IP) {
2522        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2523        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2524    } else {
2525        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2526        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2527    }
2528    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2529                                      virtio_net_rsc_purge, chain);
2530    memset(&chain->stat, 0, sizeof(chain->stat));
2531
2532    QTAILQ_INIT(&chain->buffers);
2533    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2534
2535    return chain;
2536}
2537
2538static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2539                                      const uint8_t *buf,
2540                                      size_t size)
2541{
2542    uint16_t proto;
2543    VirtioNetRscChain *chain;
2544    struct eth_header *eth;
2545    VirtIONet *n;
2546
2547    n = qemu_get_nic_opaque(nc);
2548    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2549        return virtio_net_do_receive(nc, buf, size);
2550    }
2551
2552    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2553    proto = htons(eth->h_proto);
2554
2555    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2556    if (chain) {
2557        chain->stat.received++;
2558        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2559            return virtio_net_rsc_receive4(chain, nc, buf, size);
2560        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2561            return virtio_net_rsc_receive6(chain, nc, buf, size);
2562        }
2563    }
2564    return virtio_net_do_receive(nc, buf, size);
2565}
2566
2567static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2568                                  size_t size)
2569{
2570    VirtIONet *n = qemu_get_nic_opaque(nc);
2571    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2572        return virtio_net_rsc_receive(nc, buf, size);
2573    } else {
2574        return virtio_net_do_receive(nc, buf, size);
2575    }
2576}
2577
2578static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2579
2580static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2581{
2582    VirtIONet *n = qemu_get_nic_opaque(nc);
2583    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2584    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2585    int ret;
2586
2587    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2588    virtio_notify(vdev, q->tx_vq);
2589
2590    g_free(q->async_tx.elem);
2591    q->async_tx.elem = NULL;
2592
2593    virtio_queue_set_notification(q->tx_vq, 1);
2594    ret = virtio_net_flush_tx(q);
2595    if (ret >= n->tx_burst) {
2596        /*
2597         * the flush has been stopped by tx_burst
2598         * we will not receive notification for the
2599         * remainining part, so re-schedule
2600         */
2601        virtio_queue_set_notification(q->tx_vq, 0);
2602        if (q->tx_bh) {
2603            qemu_bh_schedule(q->tx_bh);
2604        } else {
2605            timer_mod(q->tx_timer,
2606                      qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2607        }
2608        q->tx_waiting = 1;
2609    }
2610}
2611
2612/* TX */
2613static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2614{
2615    VirtIONet *n = q->n;
2616    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2617    VirtQueueElement *elem;
2618    int32_t num_packets = 0;
2619    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2620    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2621        return num_packets;
2622    }
2623
2624    if (q->async_tx.elem) {
2625        virtio_queue_set_notification(q->tx_vq, 0);
2626        return num_packets;
2627    }
2628
2629    for (;;) {
2630        ssize_t ret;
2631        unsigned int out_num;
2632        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2633        struct virtio_net_hdr_mrg_rxbuf mhdr;
2634
2635        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2636        if (!elem) {
2637            break;
2638        }
2639
2640        out_num = elem->out_num;
2641        out_sg = elem->out_sg;
2642        if (out_num < 1) {
2643            virtio_error(vdev, "virtio-net header not in first element");
2644            virtqueue_detach_element(q->tx_vq, elem, 0);
2645            g_free(elem);
2646            return -EINVAL;
2647        }
2648
2649        if (n->has_vnet_hdr) {
2650            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2651                n->guest_hdr_len) {
2652                virtio_error(vdev, "virtio-net header incorrect");
2653                virtqueue_detach_element(q->tx_vq, elem, 0);
2654                g_free(elem);
2655                return -EINVAL;
2656            }
2657            if (n->needs_vnet_hdr_swap) {
2658                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2659                sg2[0].iov_base = &mhdr;
2660                sg2[0].iov_len = n->guest_hdr_len;
2661                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2662                                   out_sg, out_num,
2663                                   n->guest_hdr_len, -1);
2664                if (out_num == VIRTQUEUE_MAX_SIZE) {
2665                    goto drop;
2666                }
2667                out_num += 1;
2668                out_sg = sg2;
2669            }
2670        }
2671        /*
2672         * If host wants to see the guest header as is, we can
2673         * pass it on unchanged. Otherwise, copy just the parts
2674         * that host is interested in.
2675         */
2676        assert(n->host_hdr_len <= n->guest_hdr_len);
2677        if (n->host_hdr_len != n->guest_hdr_len) {
2678            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2679                                       out_sg, out_num,
2680                                       0, n->host_hdr_len);
2681            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2682                             out_sg, out_num,
2683                             n->guest_hdr_len, -1);
2684            out_num = sg_num;
2685            out_sg = sg;
2686        }
2687
2688        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2689                                      out_sg, out_num, virtio_net_tx_complete);
2690        if (ret == 0) {
2691            virtio_queue_set_notification(q->tx_vq, 0);
2692            q->async_tx.elem = elem;
2693            return -EBUSY;
2694        }
2695
2696drop:
2697        virtqueue_push(q->tx_vq, elem, 0);
2698        virtio_notify(vdev, q->tx_vq);
2699        g_free(elem);
2700
2701        if (++num_packets >= n->tx_burst) {
2702            break;
2703        }
2704    }
2705    return num_packets;
2706}
2707
2708static void virtio_net_tx_timer(void *opaque);
2709
2710static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2711{
2712    VirtIONet *n = VIRTIO_NET(vdev);
2713    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2714
2715    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2716        virtio_net_drop_tx_queue_data(vdev, vq);
2717        return;
2718    }
2719
2720    /* This happens when device was stopped but VCPU wasn't. */
2721    if (!vdev->vm_running) {
2722        q->tx_waiting = 1;
2723        return;
2724    }
2725
2726    if (q->tx_waiting) {
2727        /* We already have queued packets, immediately flush */
2728        timer_del(q->tx_timer);
2729        virtio_net_tx_timer(q);
2730    } else {
2731        /* re-arm timer to flush it (and more) on next tick */
2732        timer_mod(q->tx_timer,
2733                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2734        q->tx_waiting = 1;
2735        virtio_queue_set_notification(vq, 0);
2736    }
2737}
2738
2739static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2740{
2741    VirtIONet *n = VIRTIO_NET(vdev);
2742    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2743
2744    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2745        virtio_net_drop_tx_queue_data(vdev, vq);
2746        return;
2747    }
2748
2749    if (unlikely(q->tx_waiting)) {
2750        return;
2751    }
2752    q->tx_waiting = 1;
2753    /* This happens when device was stopped but VCPU wasn't. */
2754    if (!vdev->vm_running) {
2755        return;
2756    }
2757    virtio_queue_set_notification(vq, 0);
2758    qemu_bh_schedule(q->tx_bh);
2759}
2760
2761static void virtio_net_tx_timer(void *opaque)
2762{
2763    VirtIONetQueue *q = opaque;
2764    VirtIONet *n = q->n;
2765    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2766    int ret;
2767
2768    /* This happens when device was stopped but BH wasn't. */
2769    if (!vdev->vm_running) {
2770        /* Make sure tx waiting is set, so we'll run when restarted. */
2771        assert(q->tx_waiting);
2772        return;
2773    }
2774
2775    q->tx_waiting = 0;
2776
2777    /* Just in case the driver is not ready on more */
2778    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2779        return;
2780    }
2781
2782    ret = virtio_net_flush_tx(q);
2783    if (ret == -EBUSY || ret == -EINVAL) {
2784        return;
2785    }
2786    /*
2787     * If we flush a full burst of packets, assume there are
2788     * more coming and immediately rearm
2789     */
2790    if (ret >= n->tx_burst) {
2791        q->tx_waiting = 1;
2792        timer_mod(q->tx_timer,
2793                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2794        return;
2795    }
2796    /*
2797     * If less than a full burst, re-enable notification and flush
2798     * anything that may have come in while we weren't looking.  If
2799     * we find something, assume the guest is still active and rearm
2800     */
2801    virtio_queue_set_notification(q->tx_vq, 1);
2802    ret = virtio_net_flush_tx(q);
2803    if (ret > 0) {
2804        virtio_queue_set_notification(q->tx_vq, 0);
2805        q->tx_waiting = 1;
2806        timer_mod(q->tx_timer,
2807                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2808    }
2809}
2810
2811static void virtio_net_tx_bh(void *opaque)
2812{
2813    VirtIONetQueue *q = opaque;
2814    VirtIONet *n = q->n;
2815    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2816    int32_t ret;
2817
2818    /* This happens when device was stopped but BH wasn't. */
2819    if (!vdev->vm_running) {
2820        /* Make sure tx waiting is set, so we'll run when restarted. */
2821        assert(q->tx_waiting);
2822        return;
2823    }
2824
2825    q->tx_waiting = 0;
2826
2827    /* Just in case the driver is not ready on more */
2828    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2829        return;
2830    }
2831
2832    ret = virtio_net_flush_tx(q);
2833    if (ret == -EBUSY || ret == -EINVAL) {
2834        return; /* Notification re-enable handled by tx_complete or device
2835                 * broken */
2836    }
2837
2838    /* If we flush a full burst of packets, assume there are
2839     * more coming and immediately reschedule */
2840    if (ret >= n->tx_burst) {
2841        qemu_bh_schedule(q->tx_bh);
2842        q->tx_waiting = 1;
2843        return;
2844    }
2845
2846    /* If less than a full burst, re-enable notification and flush
2847     * anything that may have come in while we weren't looking.  If
2848     * we find something, assume the guest is still active and reschedule */
2849    virtio_queue_set_notification(q->tx_vq, 1);
2850    ret = virtio_net_flush_tx(q);
2851    if (ret == -EINVAL) {
2852        return;
2853    } else if (ret > 0) {
2854        virtio_queue_set_notification(q->tx_vq, 0);
2855        qemu_bh_schedule(q->tx_bh);
2856        q->tx_waiting = 1;
2857    }
2858}
2859
2860static void virtio_net_add_queue(VirtIONet *n, int index)
2861{
2862    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2863
2864    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2865                                           virtio_net_handle_rx);
2866
2867    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2868        n->vqs[index].tx_vq =
2869            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2870                             virtio_net_handle_tx_timer);
2871        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2872                                              virtio_net_tx_timer,
2873                                              &n->vqs[index]);
2874    } else {
2875        n->vqs[index].tx_vq =
2876            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2877                             virtio_net_handle_tx_bh);
2878        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2879    }
2880
2881    n->vqs[index].tx_waiting = 0;
2882    n->vqs[index].n = n;
2883}
2884
2885static void virtio_net_del_queue(VirtIONet *n, int index)
2886{
2887    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2888    VirtIONetQueue *q = &n->vqs[index];
2889    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2890
2891    qemu_purge_queued_packets(nc);
2892
2893    virtio_del_queue(vdev, index * 2);
2894    if (q->tx_timer) {
2895        timer_free(q->tx_timer);
2896        q->tx_timer = NULL;
2897    } else {
2898        qemu_bh_delete(q->tx_bh);
2899        q->tx_bh = NULL;
2900    }
2901    q->tx_waiting = 0;
2902    virtio_del_queue(vdev, index * 2 + 1);
2903}
2904
2905static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2906{
2907    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2908    int old_num_queues = virtio_get_num_queues(vdev);
2909    int new_num_queues = new_max_queue_pairs * 2 + 1;
2910    int i;
2911
2912    assert(old_num_queues >= 3);
2913    assert(old_num_queues % 2 == 1);
2914
2915    if (old_num_queues == new_num_queues) {
2916        return;
2917    }
2918
2919    /*
2920     * We always need to remove and add ctrl vq if
2921     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2922     * and then we only enter one of the following two loops.
2923     */
2924    virtio_del_queue(vdev, old_num_queues - 1);
2925
2926    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2927        /* new_num_queues < old_num_queues */
2928        virtio_net_del_queue(n, i / 2);
2929    }
2930
2931    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2932        /* new_num_queues > old_num_queues */
2933        virtio_net_add_queue(n, i / 2);
2934    }
2935
2936    /* add ctrl_vq last */
2937    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2938}
2939
2940static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2941{
2942    int max = multiqueue ? n->max_queue_pairs : 1;
2943
2944    n->multiqueue = multiqueue;
2945    virtio_net_change_num_queue_pairs(n, max);
2946
2947    virtio_net_set_queue_pairs(n);
2948}
2949
2950static int virtio_net_post_load_device(void *opaque, int version_id)
2951{
2952    VirtIONet *n = opaque;
2953    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2954    int i, link_down;
2955
2956    trace_virtio_net_post_load_device();
2957    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2958                               virtio_vdev_has_feature(vdev,
2959                                                       VIRTIO_F_VERSION_1),
2960                               virtio_vdev_has_feature(vdev,
2961                                                       VIRTIO_NET_F_HASH_REPORT));
2962
2963    /* MAC_TABLE_ENTRIES may be different from the saved image */
2964    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2965        n->mac_table.in_use = 0;
2966    }
2967
2968    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2969        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2970    }
2971
2972    /*
2973     * curr_guest_offloads will be later overwritten by the
2974     * virtio_set_features_nocheck call done from the virtio_load.
2975     * Here we make sure it is preserved and restored accordingly
2976     * in the virtio_net_post_load_virtio callback.
2977     */
2978    n->saved_guest_offloads = n->curr_guest_offloads;
2979
2980    virtio_net_set_queue_pairs(n);
2981
2982    /* Find the first multicast entry in the saved MAC filter */
2983    for (i = 0; i < n->mac_table.in_use; i++) {
2984        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2985            break;
2986        }
2987    }
2988    n->mac_table.first_multi = i;
2989
2990    /* nc.link_down can't be migrated, so infer link_down according
2991     * to link status bit in n->status */
2992    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2993    for (i = 0; i < n->max_queue_pairs; i++) {
2994        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2995    }
2996
2997    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2998        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2999        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3000                                  QEMU_CLOCK_VIRTUAL,
3001                                  virtio_net_announce_timer, n);
3002        if (n->announce_timer.round) {
3003            timer_mod(n->announce_timer.tm,
3004                      qemu_clock_get_ms(n->announce_timer.type));
3005        } else {
3006            qemu_announce_timer_del(&n->announce_timer, false);
3007        }
3008    }
3009
3010    if (n->rss_data.enabled) {
3011        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3012        if (!n->rss_data.populate_hash) {
3013            if (!virtio_net_attach_epbf_rss(n)) {
3014                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3015                    warn_report("Can't post-load eBPF RSS for vhost");
3016                } else {
3017                    warn_report("Can't post-load eBPF RSS - "
3018                                "fallback to software RSS");
3019                    n->rss_data.enabled_software_rss = true;
3020                }
3021            }
3022        }
3023
3024        trace_virtio_net_rss_enable(n->rss_data.hash_types,
3025                                    n->rss_data.indirections_len,
3026                                    sizeof(n->rss_data.key));
3027    } else {
3028        trace_virtio_net_rss_disable();
3029    }
3030    return 0;
3031}
3032
3033static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3034{
3035    VirtIONet *n = VIRTIO_NET(vdev);
3036    /*
3037     * The actual needed state is now in saved_guest_offloads,
3038     * see virtio_net_post_load_device for detail.
3039     * Restore it back and apply the desired offloads.
3040     */
3041    n->curr_guest_offloads = n->saved_guest_offloads;
3042    if (peer_has_vnet_hdr(n)) {
3043        virtio_net_apply_guest_offloads(n);
3044    }
3045
3046    return 0;
3047}
3048
3049/* tx_waiting field of a VirtIONetQueue */
3050static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3051    .name = "virtio-net-queue-tx_waiting",
3052    .fields = (VMStateField[]) {
3053        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3054        VMSTATE_END_OF_LIST()
3055   },
3056};
3057
3058static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3059{
3060    return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3061}
3062
3063static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3064{
3065    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3066                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3067}
3068
3069static bool mac_table_fits(void *opaque, int version_id)
3070{
3071    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3072}
3073
3074static bool mac_table_doesnt_fit(void *opaque, int version_id)
3075{
3076    return !mac_table_fits(opaque, version_id);
3077}
3078
3079/* This temporary type is shared by all the WITH_TMP methods
3080 * although only some fields are used by each.
3081 */
3082struct VirtIONetMigTmp {
3083    VirtIONet      *parent;
3084    VirtIONetQueue *vqs_1;
3085    uint16_t        curr_queue_pairs_1;
3086    uint8_t         has_ufo;
3087    uint32_t        has_vnet_hdr;
3088};
3089
3090/* The 2nd and subsequent tx_waiting flags are loaded later than
3091 * the 1st entry in the queue_pairs and only if there's more than one
3092 * entry.  We use the tmp mechanism to calculate a temporary
3093 * pointer and count and also validate the count.
3094 */
3095
3096static int virtio_net_tx_waiting_pre_save(void *opaque)
3097{
3098    struct VirtIONetMigTmp *tmp = opaque;
3099
3100    tmp->vqs_1 = tmp->parent->vqs + 1;
3101    tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3102    if (tmp->parent->curr_queue_pairs == 0) {
3103        tmp->curr_queue_pairs_1 = 0;
3104    }
3105
3106    return 0;
3107}
3108
3109static int virtio_net_tx_waiting_pre_load(void *opaque)
3110{
3111    struct VirtIONetMigTmp *tmp = opaque;
3112
3113    /* Reuse the pointer setup from save */
3114    virtio_net_tx_waiting_pre_save(opaque);
3115
3116    if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3117        error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3118            tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3119
3120        return -EINVAL;
3121    }
3122
3123    return 0; /* all good */
3124}
3125
3126static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3127    .name      = "virtio-net-tx_waiting",
3128    .pre_load  = virtio_net_tx_waiting_pre_load,
3129    .pre_save  = virtio_net_tx_waiting_pre_save,
3130    .fields    = (VMStateField[]) {
3131        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3132                                     curr_queue_pairs_1,
3133                                     vmstate_virtio_net_queue_tx_waiting,
3134                                     struct VirtIONetQueue),
3135        VMSTATE_END_OF_LIST()
3136    },
3137};
3138
3139/* the 'has_ufo' flag is just tested; if the incoming stream has the
3140 * flag set we need to check that we have it
3141 */
3142static int virtio_net_ufo_post_load(void *opaque, int version_id)
3143{
3144    struct VirtIONetMigTmp *tmp = opaque;
3145
3146    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3147        error_report("virtio-net: saved image requires TUN_F_UFO support");
3148        return -EINVAL;
3149    }
3150
3151    return 0;
3152}
3153
3154static int virtio_net_ufo_pre_save(void *opaque)
3155{
3156    struct VirtIONetMigTmp *tmp = opaque;
3157
3158    tmp->has_ufo = tmp->parent->has_ufo;
3159
3160    return 0;
3161}
3162
3163static const VMStateDescription vmstate_virtio_net_has_ufo = {
3164    .name      = "virtio-net-ufo",
3165    .post_load = virtio_net_ufo_post_load,
3166    .pre_save  = virtio_net_ufo_pre_save,
3167    .fields    = (VMStateField[]) {
3168        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3169        VMSTATE_END_OF_LIST()
3170    },
3171};
3172
3173/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3174 * flag set we need to check that we have it
3175 */
3176static int virtio_net_vnet_post_load(void *opaque, int version_id)
3177{
3178    struct VirtIONetMigTmp *tmp = opaque;
3179
3180    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3181        error_report("virtio-net: saved image requires vnet_hdr=on");
3182        return -EINVAL;
3183    }
3184
3185    return 0;
3186}
3187
3188static int virtio_net_vnet_pre_save(void *opaque)
3189{
3190    struct VirtIONetMigTmp *tmp = opaque;
3191
3192    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3193
3194    return 0;
3195}
3196
3197static const VMStateDescription vmstate_virtio_net_has_vnet = {
3198    .name      = "virtio-net-vnet",
3199    .post_load = virtio_net_vnet_post_load,
3200    .pre_save  = virtio_net_vnet_pre_save,
3201    .fields    = (VMStateField[]) {
3202        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3203        VMSTATE_END_OF_LIST()
3204    },
3205};
3206
3207static bool virtio_net_rss_needed(void *opaque)
3208{
3209    return VIRTIO_NET(opaque)->rss_data.enabled;
3210}
3211
3212static const VMStateDescription vmstate_virtio_net_rss = {
3213    .name      = "virtio-net-device/rss",
3214    .version_id = 1,
3215    .minimum_version_id = 1,
3216    .needed = virtio_net_rss_needed,
3217    .fields = (VMStateField[]) {
3218        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3219        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3220        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3221        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3222        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3223        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3224        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3225                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3226        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3227                                    rss_data.indirections_len, 0,
3228                                    vmstate_info_uint16, uint16_t),
3229        VMSTATE_END_OF_LIST()
3230    },
3231};
3232
3233static const VMStateDescription vmstate_virtio_net_device = {
3234    .name = "virtio-net-device",
3235    .version_id = VIRTIO_NET_VM_VERSION,
3236    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3237    .post_load = virtio_net_post_load_device,
3238    .fields = (VMStateField[]) {
3239        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3240        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3241                               vmstate_virtio_net_queue_tx_waiting,
3242                               VirtIONetQueue),
3243        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3244        VMSTATE_UINT16(status, VirtIONet),
3245        VMSTATE_UINT8(promisc, VirtIONet),
3246        VMSTATE_UINT8(allmulti, VirtIONet),
3247        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3248
3249        /* Guarded pair: If it fits we load it, else we throw it away
3250         * - can happen if source has a larger MAC table.; post-load
3251         *  sets flags in this case.
3252         */
3253        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3254                                0, mac_table_fits, mac_table.in_use,
3255                                 ETH_ALEN),
3256        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3257                                     mac_table.in_use, ETH_ALEN),
3258
3259        /* Note: This is an array of uint32's that's always been saved as a
3260         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3261         * but based on the uint.
3262         */
3263        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3264        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3265                         vmstate_virtio_net_has_vnet),
3266        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3267        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3268        VMSTATE_UINT8(alluni, VirtIONet),
3269        VMSTATE_UINT8(nomulti, VirtIONet),
3270        VMSTATE_UINT8(nouni, VirtIONet),
3271        VMSTATE_UINT8(nobcast, VirtIONet),
3272        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3273                         vmstate_virtio_net_has_ufo),
3274        VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3275                            vmstate_info_uint16_equal, uint16_t),
3276        VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3277        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3278                         vmstate_virtio_net_tx_waiting),
3279        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3280                            has_ctrl_guest_offloads),
3281        VMSTATE_END_OF_LIST()
3282   },
3283    .subsections = (const VMStateDescription * []) {
3284        &vmstate_virtio_net_rss,
3285        NULL
3286    }
3287};
3288
3289static NetClientInfo net_virtio_info = {
3290    .type = NET_CLIENT_DRIVER_NIC,
3291    .size = sizeof(NICState),
3292    .can_receive = virtio_net_can_receive,
3293    .receive = virtio_net_receive,
3294    .link_status_changed = virtio_net_set_link_status,
3295    .query_rx_filter = virtio_net_query_rxfilter,
3296    .announce = virtio_net_announce,
3297};
3298
3299static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3300{
3301    VirtIONet *n = VIRTIO_NET(vdev);
3302    NetClientState *nc;
3303    assert(n->vhost_started);
3304    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3305        /* Must guard against invalid features and bogus queue index
3306         * from being set by malicious guest, or penetrated through
3307         * buggy migration stream.
3308         */
3309        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3310            qemu_log_mask(LOG_GUEST_ERROR,
3311                          "%s: bogus vq index ignored\n", __func__);
3312            return false;
3313        }
3314        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3315    } else {
3316        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3317    }
3318    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3319}
3320
3321static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3322                                           bool mask)
3323{
3324    VirtIONet *n = VIRTIO_NET(vdev);
3325    NetClientState *nc;
3326    assert(n->vhost_started);
3327    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3328        /* Must guard against invalid features and bogus queue index
3329         * from being set by malicious guest, or penetrated through
3330         * buggy migration stream.
3331         */
3332        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3333            qemu_log_mask(LOG_GUEST_ERROR,
3334                          "%s: bogus vq index ignored\n", __func__);
3335            return;
3336        }
3337        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3338    } else {
3339        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3340    }
3341    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3342                             vdev, idx, mask);
3343}
3344
3345static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3346{
3347    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3348
3349    n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3350}
3351
3352void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3353                                   const char *type)
3354{
3355    /*
3356     * The name can be NULL, the netclient name will be type.x.
3357     */
3358    assert(type != NULL);
3359
3360    g_free(n->netclient_name);
3361    g_free(n->netclient_type);
3362    n->netclient_name = g_strdup(name);
3363    n->netclient_type = g_strdup(type);
3364}
3365
3366static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3367{
3368    HotplugHandler *hotplug_ctrl;
3369    PCIDevice *pci_dev;
3370    Error *err = NULL;
3371
3372    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3373    if (hotplug_ctrl) {
3374        pci_dev = PCI_DEVICE(dev);
3375        pci_dev->partially_hotplugged = true;
3376        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3377        if (err) {
3378            error_report_err(err);
3379            return false;
3380        }
3381    } else {
3382        return false;
3383    }
3384    return true;
3385}
3386
3387static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3388                                    Error **errp)
3389{
3390    Error *err = NULL;
3391    HotplugHandler *hotplug_ctrl;
3392    PCIDevice *pdev = PCI_DEVICE(dev);
3393    BusState *primary_bus;
3394
3395    if (!pdev->partially_hotplugged) {
3396        return true;
3397    }
3398    primary_bus = dev->parent_bus;
3399    if (!primary_bus) {
3400        error_setg(errp, "virtio_net: couldn't find primary bus");
3401        return false;
3402    }
3403    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3404    qatomic_set(&n->failover_primary_hidden, false);
3405    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3406    if (hotplug_ctrl) {
3407        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3408        if (err) {
3409            goto out;
3410        }
3411        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3412    }
3413    pdev->partially_hotplugged = false;
3414
3415out:
3416    error_propagate(errp, err);
3417    return !err;
3418}
3419
3420static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3421{
3422    bool should_be_hidden;
3423    Error *err = NULL;
3424    DeviceState *dev = failover_find_primary_device(n);
3425
3426    if (!dev) {
3427        return;
3428    }
3429
3430    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3431
3432    if (migration_in_setup(s) && !should_be_hidden) {
3433        if (failover_unplug_primary(n, dev)) {
3434            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3435            qapi_event_send_unplug_primary(dev->id);
3436            qatomic_set(&n->failover_primary_hidden, true);
3437        } else {
3438            warn_report("couldn't unplug primary device");
3439        }
3440    } else if (migration_has_failed(s)) {
3441        /* We already unplugged the device let's plug it back */
3442        if (!failover_replug_primary(n, dev, &err)) {
3443            if (err) {
3444                error_report_err(err);
3445            }
3446        }
3447    }
3448}
3449
3450static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3451{
3452    MigrationState *s = data;
3453    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3454    virtio_net_handle_migration_primary(n, s);
3455}
3456
3457static bool failover_hide_primary_device(DeviceListener *listener,
3458                                         const QDict *device_opts,
3459                                         bool from_json,
3460                                         Error **errp)
3461{
3462    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3463    const char *standby_id;
3464
3465    if (!device_opts) {
3466        return false;
3467    }
3468
3469    if (!qdict_haskey(device_opts, "failover_pair_id")) {
3470        return false;
3471    }
3472
3473    if (!qdict_haskey(device_opts, "id")) {
3474        error_setg(errp, "Device with failover_pair_id needs to have id");
3475        return false;
3476    }
3477
3478    standby_id = qdict_get_str(device_opts, "failover_pair_id");
3479    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3480        return false;
3481    }
3482
3483    /*
3484     * The hide helper can be called several times for a given device.
3485     * Check there is only one primary for a virtio-net device but
3486     * don't duplicate the qdict several times if it's called for the same
3487     * device.
3488     */
3489    if (n->primary_opts) {
3490        const char *old, *new;
3491        /* devices with failover_pair_id always have an id */
3492        old = qdict_get_str(n->primary_opts, "id");
3493        new = qdict_get_str(device_opts, "id");
3494        if (strcmp(old, new) != 0) {
3495            error_setg(errp, "Cannot attach more than one primary device to "
3496                       "'%s': '%s' and '%s'", n->netclient_name, old, new);
3497            return false;
3498        }
3499    } else {
3500        n->primary_opts = qdict_clone_shallow(device_opts);
3501        n->primary_opts_from_json = from_json;
3502    }
3503
3504    /* failover_primary_hidden is set during feature negotiation */
3505    return qatomic_read(&n->failover_primary_hidden);
3506}
3507
3508static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3509{
3510    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3511    VirtIONet *n = VIRTIO_NET(dev);
3512    NetClientState *nc;
3513    int i;
3514
3515    if (n->net_conf.mtu) {
3516        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3517    }
3518
3519    if (n->net_conf.duplex_str) {
3520        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3521            n->net_conf.duplex = DUPLEX_HALF;
3522        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3523            n->net_conf.duplex = DUPLEX_FULL;
3524        } else {
3525            error_setg(errp, "'duplex' must be 'half' or 'full'");
3526            return;
3527        }
3528        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3529    } else {
3530        n->net_conf.duplex = DUPLEX_UNKNOWN;
3531    }
3532
3533    if (n->net_conf.speed < SPEED_UNKNOWN) {
3534        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3535        return;
3536    }
3537    if (n->net_conf.speed >= 0) {
3538        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3539    }
3540
3541    if (n->failover) {
3542        n->primary_listener.hide_device = failover_hide_primary_device;
3543        qatomic_set(&n->failover_primary_hidden, true);
3544        device_listener_register(&n->primary_listener);
3545        n->migration_state.notify = virtio_net_migration_state_notifier;
3546        add_migration_state_change_notifier(&n->migration_state);
3547        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3548    }
3549
3550    virtio_net_set_config_size(n, n->host_features);
3551    virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3552
3553    /*
3554     * We set a lower limit on RX queue size to what it always was.
3555     * Guests that want a smaller ring can always resize it without
3556     * help from us (using virtio 1 and up).
3557     */
3558    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3559        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3560        !is_power_of_2(n->net_conf.rx_queue_size)) {
3561        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3562                   "must be a power of 2 between %d and %d.",
3563                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3564                   VIRTQUEUE_MAX_SIZE);
3565        virtio_cleanup(vdev);
3566        return;
3567    }
3568
3569    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3570        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3571        !is_power_of_2(n->net_conf.tx_queue_size)) {
3572        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3573                   "must be a power of 2 between %d and %d",
3574                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3575                   VIRTQUEUE_MAX_SIZE);
3576        virtio_cleanup(vdev);
3577        return;
3578    }
3579
3580    n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3581
3582    /*
3583     * Figure out the datapath queue pairs since the backend could
3584     * provide control queue via peers as well.
3585     */
3586    if (n->nic_conf.peers.queues) {
3587        for (i = 0; i < n->max_ncs; i++) {
3588            if (n->nic_conf.peers.ncs[i]->is_datapath) {
3589                ++n->max_queue_pairs;
3590            }
3591        }
3592    }
3593    n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3594
3595    if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3596        error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3597                   "must be a positive integer less than %d.",
3598                   n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3599        virtio_cleanup(vdev);
3600        return;
3601    }
3602    n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3603    n->curr_queue_pairs = 1;
3604    n->tx_timeout = n->net_conf.txtimer;
3605
3606    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3607                       && strcmp(n->net_conf.tx, "bh")) {
3608        warn_report("virtio-net: "
3609                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3610                    n->net_conf.tx);
3611        error_printf("Defaulting to \"bh\"");
3612    }
3613
3614    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3615                                    n->net_conf.tx_queue_size);
3616
3617    for (i = 0; i < n->max_queue_pairs; i++) {
3618        virtio_net_add_queue(n, i);
3619    }
3620
3621    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3622    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3623    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3624    n->status = VIRTIO_NET_S_LINK_UP;
3625    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3626                              QEMU_CLOCK_VIRTUAL,
3627                              virtio_net_announce_timer, n);
3628    n->announce_timer.round = 0;
3629
3630    if (n->netclient_type) {
3631        /*
3632         * Happen when virtio_net_set_netclient_name has been called.
3633         */
3634        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3635                              n->netclient_type, n->netclient_name, n);
3636    } else {
3637        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3638                              object_get_typename(OBJECT(dev)), dev->id, n);
3639    }
3640
3641    for (i = 0; i < n->max_queue_pairs; i++) {
3642        n->nic->ncs[i].do_not_pad = true;
3643    }
3644
3645    peer_test_vnet_hdr(n);
3646    if (peer_has_vnet_hdr(n)) {
3647        for (i = 0; i < n->max_queue_pairs; i++) {
3648            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3649        }
3650        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3651    } else {
3652        n->host_hdr_len = 0;
3653    }
3654
3655    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3656
3657    n->vqs[0].tx_waiting = 0;
3658    n->tx_burst = n->net_conf.txburst;
3659    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3660    n->promisc = 1; /* for compatibility */
3661
3662    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3663
3664    n->vlans = g_malloc0(MAX_VLAN >> 3);
3665
3666    nc = qemu_get_queue(n->nic);
3667    nc->rxfilter_notify_enabled = 1;
3668
3669   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3670        struct virtio_net_config netcfg = {};
3671        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3672        vhost_net_set_config(get_vhost_net(nc->peer),
3673            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3674    }
3675    QTAILQ_INIT(&n->rsc_chains);
3676    n->qdev = dev;
3677
3678    net_rx_pkt_init(&n->rx_pkt, false);
3679
3680    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3681        virtio_net_load_ebpf(n);
3682    }
3683}
3684
3685static void virtio_net_device_unrealize(DeviceState *dev)
3686{
3687    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3688    VirtIONet *n = VIRTIO_NET(dev);
3689    int i, max_queue_pairs;
3690
3691    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3692        virtio_net_unload_ebpf(n);
3693    }
3694
3695    /* This will stop vhost backend if appropriate. */
3696    virtio_net_set_status(vdev, 0);
3697
3698    g_free(n->netclient_name);
3699    n->netclient_name = NULL;
3700    g_free(n->netclient_type);
3701    n->netclient_type = NULL;
3702
3703    g_free(n->mac_table.macs);
3704    g_free(n->vlans);
3705
3706    if (n->failover) {
3707        qobject_unref(n->primary_opts);
3708        device_listener_unregister(&n->primary_listener);
3709        remove_migration_state_change_notifier(&n->migration_state);
3710    } else {
3711        assert(n->primary_opts == NULL);
3712    }
3713
3714    max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3715    for (i = 0; i < max_queue_pairs; i++) {
3716        virtio_net_del_queue(n, i);
3717    }
3718    /* delete also control vq */
3719    virtio_del_queue(vdev, max_queue_pairs * 2);
3720    qemu_announce_timer_del(&n->announce_timer, false);
3721    g_free(n->vqs);
3722    qemu_del_nic(n->nic);
3723    virtio_net_rsc_cleanup(n);
3724    g_free(n->rss_data.indirections_table);
3725    net_rx_pkt_uninit(n->rx_pkt);
3726    virtio_cleanup(vdev);
3727}
3728
3729static void virtio_net_instance_init(Object *obj)
3730{
3731    VirtIONet *n = VIRTIO_NET(obj);
3732
3733    /*
3734     * The default config_size is sizeof(struct virtio_net_config).
3735     * Can be overriden with virtio_net_set_config_size.
3736     */
3737    n->config_size = sizeof(struct virtio_net_config);
3738    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3739                                  "bootindex", "/ethernet-phy@0",
3740                                  DEVICE(n));
3741
3742    ebpf_rss_init(&n->ebpf_rss);
3743}
3744
3745static int virtio_net_pre_save(void *opaque)
3746{
3747    VirtIONet *n = opaque;
3748
3749    /* At this point, backend must be stopped, otherwise
3750     * it might keep writing to memory. */
3751    assert(!n->vhost_started);
3752
3753    return 0;
3754}
3755
3756static bool primary_unplug_pending(void *opaque)
3757{
3758    DeviceState *dev = opaque;
3759    DeviceState *primary;
3760    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3761    VirtIONet *n = VIRTIO_NET(vdev);
3762
3763    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3764        return false;
3765    }
3766    primary = failover_find_primary_device(n);
3767    return primary ? primary->pending_deleted_event : false;
3768}
3769
3770static bool dev_unplug_pending(void *opaque)
3771{
3772    DeviceState *dev = opaque;
3773    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3774
3775    return vdc->primary_unplug_pending(dev);
3776}
3777
3778static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3779{
3780    VirtIONet *n = VIRTIO_NET(vdev);
3781    NetClientState *nc = qemu_get_queue(n->nic);
3782    struct vhost_net *net = get_vhost_net(nc->peer);
3783    return &net->dev;
3784}
3785
3786static const VMStateDescription vmstate_virtio_net = {
3787    .name = "virtio-net",
3788    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3789    .version_id = VIRTIO_NET_VM_VERSION,
3790    .fields = (VMStateField[]) {
3791        VMSTATE_VIRTIO_DEVICE,
3792        VMSTATE_END_OF_LIST()
3793    },
3794    .pre_save = virtio_net_pre_save,
3795    .dev_unplug_pending = dev_unplug_pending,
3796};
3797
3798static Property virtio_net_properties[] = {
3799    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3800                    VIRTIO_NET_F_CSUM, true),
3801    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3802                    VIRTIO_NET_F_GUEST_CSUM, true),
3803    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3804    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3805                    VIRTIO_NET_F_GUEST_TSO4, true),
3806    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3807                    VIRTIO_NET_F_GUEST_TSO6, true),
3808    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3809                    VIRTIO_NET_F_GUEST_ECN, true),
3810    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3811                    VIRTIO_NET_F_GUEST_UFO, true),
3812    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3813                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3814    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3815                    VIRTIO_NET_F_HOST_TSO4, true),
3816    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3817                    VIRTIO_NET_F_HOST_TSO6, true),
3818    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3819                    VIRTIO_NET_F_HOST_ECN, true),
3820    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3821                    VIRTIO_NET_F_HOST_UFO, true),
3822    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3823                    VIRTIO_NET_F_MRG_RXBUF, true),
3824    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3825                    VIRTIO_NET_F_STATUS, true),
3826    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3827                    VIRTIO_NET_F_CTRL_VQ, true),
3828    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3829                    VIRTIO_NET_F_CTRL_RX, true),
3830    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3831                    VIRTIO_NET_F_CTRL_VLAN, true),
3832    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3833                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3834    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3835                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3836    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3837                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3838    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3839    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3840                    VIRTIO_NET_F_RSS, false),
3841    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3842                    VIRTIO_NET_F_HASH_REPORT, false),
3843    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3844                    VIRTIO_NET_F_RSC_EXT, false),
3845    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3846                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3847    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3848    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3849                       TX_TIMER_INTERVAL),
3850    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3851    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3852    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3853                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3854    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3855                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3856    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3857    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3858                     true),
3859    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3860    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3861    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3862    DEFINE_PROP_END_OF_LIST(),
3863};
3864
3865static void virtio_net_class_init(ObjectClass *klass, void *data)
3866{
3867    DeviceClass *dc = DEVICE_CLASS(klass);
3868    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3869
3870    device_class_set_props(dc, virtio_net_properties);
3871    dc->vmsd = &vmstate_virtio_net;
3872    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3873    vdc->realize = virtio_net_device_realize;
3874    vdc->unrealize = virtio_net_device_unrealize;
3875    vdc->get_config = virtio_net_get_config;
3876    vdc->set_config = virtio_net_set_config;
3877    vdc->get_features = virtio_net_get_features;
3878    vdc->set_features = virtio_net_set_features;
3879    vdc->bad_features = virtio_net_bad_features;
3880    vdc->reset = virtio_net_reset;
3881    vdc->queue_reset = virtio_net_queue_reset;
3882    vdc->queue_enable = virtio_net_queue_enable;
3883    vdc->set_status = virtio_net_set_status;
3884    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3885    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3886    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3887    vdc->post_load = virtio_net_post_load_virtio;
3888    vdc->vmsd = &vmstate_virtio_net_device;
3889    vdc->primary_unplug_pending = primary_unplug_pending;
3890    vdc->get_vhost = virtio_net_get_vhost;
3891}
3892
3893static const TypeInfo virtio_net_info = {
3894    .name = TYPE_VIRTIO_NET,
3895    .parent = TYPE_VIRTIO_DEVICE,
3896    .instance_size = sizeof(VirtIONet),
3897    .instance_init = virtio_net_instance_init,
3898    .class_init = virtio_net_class_init,
3899};
3900
3901static void virtio_register_types(void)
3902{
3903    type_register_static(&virtio_net_info);
3904}
3905
3906type_init(virtio_register_types)
3907