qemu/hw/net/virtio-net.c
<<
>>
Prefs
   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/log.h"
  18#include "qemu/main-loop.h"
  19#include "qemu/module.h"
  20#include "hw/virtio/virtio.h"
  21#include "net/net.h"
  22#include "net/checksum.h"
  23#include "net/tap.h"
  24#include "qemu/error-report.h"
  25#include "qemu/timer.h"
  26#include "qemu/option.h"
  27#include "qemu/option_int.h"
  28#include "qemu/config-file.h"
  29#include "qapi/qmp/qdict.h"
  30#include "hw/virtio/virtio-net.h"
  31#include "net/vhost_net.h"
  32#include "net/announce.h"
  33#include "hw/virtio/virtio-bus.h"
  34#include "qapi/error.h"
  35#include "qapi/qapi-events-net.h"
  36#include "hw/qdev-properties.h"
  37#include "qapi/qapi-types-migration.h"
  38#include "qapi/qapi-events-migration.h"
  39#include "hw/virtio/virtio-access.h"
  40#include "migration/misc.h"
  41#include "standard-headers/linux/ethtool.h"
  42#include "sysemu/sysemu.h"
  43#include "trace.h"
  44#include "monitor/qdev.h"
  45#include "hw/pci/pci.h"
  46#include "net_rx_pkt.h"
  47#include "hw/virtio/vhost.h"
  48#include "sysemu/qtest.h"
  49
  50#define VIRTIO_NET_VM_VERSION    11
  51
  52#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  53
  54/* previously fixed value */
  55#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  56#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  57
  58/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
  59#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  60#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  61
  62#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  63
  64#define VIRTIO_NET_TCP_FLAG         0x3F
  65#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  66
  67/* IPv4 max payload, 16 bits in the header */
  68#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  69#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  70
  71/* header length value in ip header without option */
  72#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  73
  74#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  75#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  76
  77/* Purge coalesced packets timer interval, This value affects the performance
  78   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  79   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  80   tso/gso/gro 'off'. */
  81#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  82
  83#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  91                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  92
  93static const VirtIOFeature feature_sizes[] = {
  94    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  95     .end = endof(struct virtio_net_config, mac)},
  96    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  97     .end = endof(struct virtio_net_config, status)},
  98    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  99     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
 100    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 101     .end = endof(struct virtio_net_config, mtu)},
 102    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 103     .end = endof(struct virtio_net_config, duplex)},
 104    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 105     .end = endof(struct virtio_net_config, supported_hash_types)},
 106    {}
 107};
 108
 109static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 110{
 111    VirtIONet *n = qemu_get_nic_opaque(nc);
 112
 113    return &n->vqs[nc->queue_index];
 114}
 115
 116static int vq2q(int queue_index)
 117{
 118    return queue_index / 2;
 119}
 120
 121/* TODO
 122 * - we could suppress RX interrupt if we were so inclined.
 123 */
 124
 125static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 126{
 127    VirtIONet *n = VIRTIO_NET(vdev);
 128    struct virtio_net_config netcfg;
 129    NetClientState *nc = qemu_get_queue(n->nic);
 130    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 131
 132    int ret = 0;
 133    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 134    virtio_stw_p(vdev, &netcfg.status, n->status);
 135    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
 136    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 137    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 138    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 139    netcfg.duplex = n->net_conf.duplex;
 140    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 141    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 142                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 143                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 144    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 145                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 146    memcpy(config, &netcfg, n->config_size);
 147
 148    /*
 149     * Is this VDPA? No peer means not VDPA: there's no way to
 150     * disconnect/reconnect a VDPA peer.
 151     */
 152    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 153        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 154                                   n->config_size);
 155        if (ret != -1) {
 156            /*
 157             * Some NIC/kernel combinations present 0 as the mac address.  As
 158             * that is not a legal address, try to proceed with the
 159             * address from the QEMU command line in the hope that the
 160             * address has been configured correctly elsewhere - just not
 161             * reported by the device.
 162             */
 163            if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 164                info_report("Zero hardware mac address detected. Ignoring.");
 165                memcpy(netcfg.mac, n->mac, ETH_ALEN);
 166            }
 167            memcpy(config, &netcfg, n->config_size);
 168        }
 169    }
 170}
 171
 172static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 173{
 174    VirtIONet *n = VIRTIO_NET(vdev);
 175    struct virtio_net_config netcfg = {};
 176    NetClientState *nc = qemu_get_queue(n->nic);
 177
 178    memcpy(&netcfg, config, n->config_size);
 179
 180    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 181        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 182        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 183        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 184        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 185    }
 186
 187    /*
 188     * Is this VDPA? No peer means not VDPA: there's no way to
 189     * disconnect/reconnect a VDPA peer.
 190     */
 191    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 192        vhost_net_set_config(get_vhost_net(nc->peer),
 193                             (uint8_t *)&netcfg, 0, n->config_size,
 194                             VHOST_SET_CONFIG_TYPE_MASTER);
 195      }
 196}
 197
 198static bool virtio_net_started(VirtIONet *n, uint8_t status)
 199{
 200    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 201    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 202        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 203}
 204
 205static void virtio_net_announce_notify(VirtIONet *net)
 206{
 207    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 208    trace_virtio_net_announce_notify();
 209
 210    net->status |= VIRTIO_NET_S_ANNOUNCE;
 211    virtio_notify_config(vdev);
 212}
 213
 214static void virtio_net_announce_timer(void *opaque)
 215{
 216    VirtIONet *n = opaque;
 217    trace_virtio_net_announce_timer(n->announce_timer.round);
 218
 219    n->announce_timer.round--;
 220    virtio_net_announce_notify(n);
 221}
 222
 223static void virtio_net_announce(NetClientState *nc)
 224{
 225    VirtIONet *n = qemu_get_nic_opaque(nc);
 226    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 227
 228    /*
 229     * Make sure the virtio migration announcement timer isn't running
 230     * If it is, let it trigger announcement so that we do not cause
 231     * confusion.
 232     */
 233    if (n->announce_timer.round) {
 234        return;
 235    }
 236
 237    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 238        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 239            virtio_net_announce_notify(n);
 240    }
 241}
 242
 243static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 244{
 245    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 246    NetClientState *nc = qemu_get_queue(n->nic);
 247    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 248    int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
 249              n->max_ncs - n->max_queue_pairs : 0;
 250
 251    if (!get_vhost_net(nc->peer)) {
 252        return;
 253    }
 254
 255    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 256        !!n->vhost_started) {
 257        return;
 258    }
 259    if (!n->vhost_started) {
 260        int r, i;
 261
 262        if (n->needs_vnet_hdr_swap) {
 263            error_report("backend does not support %s vnet headers; "
 264                         "falling back on userspace virtio",
 265                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 266            return;
 267        }
 268
 269        /* Any packets outstanding? Purge them to avoid touching rings
 270         * when vhost is running.
 271         */
 272        for (i = 0;  i < queue_pairs; i++) {
 273            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 274
 275            /* Purge both directions: TX and RX. */
 276            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 277            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 278        }
 279
 280        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 281            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 282            if (r < 0) {
 283                error_report("%uBytes MTU not supported by the backend",
 284                             n->net_conf.mtu);
 285
 286                return;
 287            }
 288        }
 289
 290        n->vhost_started = 1;
 291        r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 292        if (r < 0) {
 293            error_report("unable to start vhost net: %d: "
 294                         "falling back on userspace virtio", -r);
 295            n->vhost_started = 0;
 296        }
 297    } else {
 298        vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
 299        n->vhost_started = 0;
 300    }
 301}
 302
 303static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 304                                          NetClientState *peer,
 305                                          bool enable)
 306{
 307    if (virtio_is_big_endian(vdev)) {
 308        return qemu_set_vnet_be(peer, enable);
 309    } else {
 310        return qemu_set_vnet_le(peer, enable);
 311    }
 312}
 313
 314static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 315                                       int queue_pairs, bool enable)
 316{
 317    int i;
 318
 319    for (i = 0; i < queue_pairs; i++) {
 320        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 321            enable) {
 322            while (--i >= 0) {
 323                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 324            }
 325
 326            return true;
 327        }
 328    }
 329
 330    return false;
 331}
 332
 333static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 334{
 335    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 336    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 337
 338    if (virtio_net_started(n, status)) {
 339        /* Before using the device, we tell the network backend about the
 340         * endianness to use when parsing vnet headers. If the backend
 341         * can't do it, we fallback onto fixing the headers in the core
 342         * virtio-net code.
 343         */
 344        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 345                                                            queue_pairs, true);
 346    } else if (virtio_net_started(n, vdev->status)) {
 347        /* After using the device, we need to reset the network backend to
 348         * the default (guest native endianness), otherwise the guest may
 349         * lose network connectivity if it is rebooted into a different
 350         * endianness.
 351         */
 352        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
 353    }
 354}
 355
 356static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 357{
 358    unsigned int dropped = virtqueue_drop_all(vq);
 359    if (dropped) {
 360        virtio_notify(vdev, vq);
 361    }
 362}
 363
 364static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 365{
 366    VirtIONet *n = VIRTIO_NET(vdev);
 367    VirtIONetQueue *q;
 368    int i;
 369    uint8_t queue_status;
 370
 371    virtio_net_vnet_endian_status(n, status);
 372    virtio_net_vhost_status(n, status);
 373
 374    for (i = 0; i < n->max_queue_pairs; i++) {
 375        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 376        bool queue_started;
 377        q = &n->vqs[i];
 378
 379        if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
 380            queue_status = 0;
 381        } else {
 382            queue_status = status;
 383        }
 384        queue_started =
 385            virtio_net_started(n, queue_status) && !n->vhost_started;
 386
 387        if (queue_started) {
 388            qemu_flush_queued_packets(ncs);
 389        }
 390
 391        if (!q->tx_waiting) {
 392            continue;
 393        }
 394
 395        if (queue_started) {
 396            if (q->tx_timer) {
 397                timer_mod(q->tx_timer,
 398                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 399            } else {
 400                qemu_bh_schedule(q->tx_bh);
 401            }
 402        } else {
 403            if (q->tx_timer) {
 404                timer_del(q->tx_timer);
 405            } else {
 406                qemu_bh_cancel(q->tx_bh);
 407            }
 408            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 409                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 410                vdev->vm_running) {
 411                /* if tx is waiting we are likely have some packets in tx queue
 412                 * and disabled notification */
 413                q->tx_waiting = 0;
 414                virtio_queue_set_notification(q->tx_vq, 1);
 415                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 416            }
 417        }
 418    }
 419}
 420
 421static void virtio_net_set_link_status(NetClientState *nc)
 422{
 423    VirtIONet *n = qemu_get_nic_opaque(nc);
 424    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 425    uint16_t old_status = n->status;
 426
 427    if (nc->link_down)
 428        n->status &= ~VIRTIO_NET_S_LINK_UP;
 429    else
 430        n->status |= VIRTIO_NET_S_LINK_UP;
 431
 432    if (n->status != old_status)
 433        virtio_notify_config(vdev);
 434
 435    virtio_net_set_status(vdev, vdev->status);
 436}
 437
 438static void rxfilter_notify(NetClientState *nc)
 439{
 440    VirtIONet *n = qemu_get_nic_opaque(nc);
 441
 442    if (nc->rxfilter_notify_enabled) {
 443        char *path = object_get_canonical_path(OBJECT(n->qdev));
 444        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 445                                              n->netclient_name, path);
 446        g_free(path);
 447
 448        /* disable event notification to avoid events flooding */
 449        nc->rxfilter_notify_enabled = 0;
 450    }
 451}
 452
 453static intList *get_vlan_table(VirtIONet *n)
 454{
 455    intList *list;
 456    int i, j;
 457
 458    list = NULL;
 459    for (i = 0; i < MAX_VLAN >> 5; i++) {
 460        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 461            if (n->vlans[i] & (1U << j)) {
 462                QAPI_LIST_PREPEND(list, (i << 5) + j);
 463            }
 464        }
 465    }
 466
 467    return list;
 468}
 469
 470static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 471{
 472    VirtIONet *n = qemu_get_nic_opaque(nc);
 473    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 474    RxFilterInfo *info;
 475    strList *str_list;
 476    int i;
 477
 478    info = g_malloc0(sizeof(*info));
 479    info->name = g_strdup(nc->name);
 480    info->promiscuous = n->promisc;
 481
 482    if (n->nouni) {
 483        info->unicast = RX_STATE_NONE;
 484    } else if (n->alluni) {
 485        info->unicast = RX_STATE_ALL;
 486    } else {
 487        info->unicast = RX_STATE_NORMAL;
 488    }
 489
 490    if (n->nomulti) {
 491        info->multicast = RX_STATE_NONE;
 492    } else if (n->allmulti) {
 493        info->multicast = RX_STATE_ALL;
 494    } else {
 495        info->multicast = RX_STATE_NORMAL;
 496    }
 497
 498    info->broadcast_allowed = n->nobcast;
 499    info->multicast_overflow = n->mac_table.multi_overflow;
 500    info->unicast_overflow = n->mac_table.uni_overflow;
 501
 502    info->main_mac = qemu_mac_strdup_printf(n->mac);
 503
 504    str_list = NULL;
 505    for (i = 0; i < n->mac_table.first_multi; i++) {
 506        QAPI_LIST_PREPEND(str_list,
 507                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 508    }
 509    info->unicast_table = str_list;
 510
 511    str_list = NULL;
 512    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 513        QAPI_LIST_PREPEND(str_list,
 514                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 515    }
 516    info->multicast_table = str_list;
 517    info->vlan_table = get_vlan_table(n);
 518
 519    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 520        info->vlan = RX_STATE_ALL;
 521    } else if (!info->vlan_table) {
 522        info->vlan = RX_STATE_NONE;
 523    } else {
 524        info->vlan = RX_STATE_NORMAL;
 525    }
 526
 527    /* enable event notification after query */
 528    nc->rxfilter_notify_enabled = 1;
 529
 530    return info;
 531}
 532
 533static void virtio_net_reset(VirtIODevice *vdev)
 534{
 535    VirtIONet *n = VIRTIO_NET(vdev);
 536    int i;
 537
 538    /* Reset back to compatibility mode */
 539    n->promisc = 1;
 540    n->allmulti = 0;
 541    n->alluni = 0;
 542    n->nomulti = 0;
 543    n->nouni = 0;
 544    n->nobcast = 0;
 545    /* multiqueue is disabled by default */
 546    n->curr_queue_pairs = 1;
 547    timer_del(n->announce_timer.tm);
 548    n->announce_timer.round = 0;
 549    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 550
 551    /* Flush any MAC and VLAN filter table state */
 552    n->mac_table.in_use = 0;
 553    n->mac_table.first_multi = 0;
 554    n->mac_table.multi_overflow = 0;
 555    n->mac_table.uni_overflow = 0;
 556    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 557    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 558    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 559    memset(n->vlans, 0, MAX_VLAN >> 3);
 560
 561    /* Flush any async TX */
 562    for (i = 0;  i < n->max_queue_pairs; i++) {
 563        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 564
 565        if (nc->peer) {
 566            qemu_flush_or_purge_queued_packets(nc->peer, true);
 567            assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 568        }
 569    }
 570}
 571
 572static void peer_test_vnet_hdr(VirtIONet *n)
 573{
 574    NetClientState *nc = qemu_get_queue(n->nic);
 575    if (!nc->peer) {
 576        return;
 577    }
 578
 579    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 580}
 581
 582static int peer_has_vnet_hdr(VirtIONet *n)
 583{
 584    return n->has_vnet_hdr;
 585}
 586
 587static int peer_has_ufo(VirtIONet *n)
 588{
 589    if (!peer_has_vnet_hdr(n))
 590        return 0;
 591
 592    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 593
 594    return n->has_ufo;
 595}
 596
 597static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 598                                       int version_1, int hash_report)
 599{
 600    int i;
 601    NetClientState *nc;
 602
 603    n->mergeable_rx_bufs = mergeable_rx_bufs;
 604
 605    if (version_1) {
 606        n->guest_hdr_len = hash_report ?
 607            sizeof(struct virtio_net_hdr_v1_hash) :
 608            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 609        n->rss_data.populate_hash = !!hash_report;
 610    } else {
 611        n->guest_hdr_len = n->mergeable_rx_bufs ?
 612            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 613            sizeof(struct virtio_net_hdr);
 614    }
 615
 616    for (i = 0; i < n->max_queue_pairs; i++) {
 617        nc = qemu_get_subqueue(n->nic, i);
 618
 619        if (peer_has_vnet_hdr(n) &&
 620            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 621            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 622            n->host_hdr_len = n->guest_hdr_len;
 623        }
 624    }
 625}
 626
 627static int virtio_net_max_tx_queue_size(VirtIONet *n)
 628{
 629    NetClientState *peer = n->nic_conf.peers.ncs[0];
 630
 631    /*
 632     * Backends other than vhost-user or vhost-vdpa don't support max queue
 633     * size.
 634     */
 635    if (!peer) {
 636        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 637    }
 638
 639    switch(peer->info->type) {
 640    case NET_CLIENT_DRIVER_VHOST_USER:
 641    case NET_CLIENT_DRIVER_VHOST_VDPA:
 642        return VIRTQUEUE_MAX_SIZE;
 643    default:
 644        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 645    };
 646}
 647
 648static int peer_attach(VirtIONet *n, int index)
 649{
 650    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 651
 652    if (!nc->peer) {
 653        return 0;
 654    }
 655
 656    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 657        vhost_set_vring_enable(nc->peer, 1);
 658    }
 659
 660    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 661        return 0;
 662    }
 663
 664    if (n->max_queue_pairs == 1) {
 665        return 0;
 666    }
 667
 668    return tap_enable(nc->peer);
 669}
 670
 671static int peer_detach(VirtIONet *n, int index)
 672{
 673    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 674
 675    if (!nc->peer) {
 676        return 0;
 677    }
 678
 679    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 680        vhost_set_vring_enable(nc->peer, 0);
 681    }
 682
 683    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 684        return 0;
 685    }
 686
 687    return tap_disable(nc->peer);
 688}
 689
 690static void virtio_net_set_queue_pairs(VirtIONet *n)
 691{
 692    int i;
 693    int r;
 694
 695    if (n->nic->peer_deleted) {
 696        return;
 697    }
 698
 699    for (i = 0; i < n->max_queue_pairs; i++) {
 700        if (i < n->curr_queue_pairs) {
 701            r = peer_attach(n, i);
 702            assert(!r);
 703        } else {
 704            r = peer_detach(n, i);
 705            assert(!r);
 706        }
 707    }
 708}
 709
 710static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 711
 712static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 713                                        Error **errp)
 714{
 715    VirtIONet *n = VIRTIO_NET(vdev);
 716    NetClientState *nc = qemu_get_queue(n->nic);
 717
 718    /* Firstly sync all virtio-net possible supported features */
 719    features |= n->host_features;
 720
 721    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 722
 723    if (!peer_has_vnet_hdr(n)) {
 724        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 725        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 726        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 727        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 728
 729        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 730        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 731        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 732        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 733
 734        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 735    }
 736
 737    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 738        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 739        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 740    }
 741
 742    if (!get_vhost_net(nc->peer)) {
 743        return features;
 744    }
 745
 746    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 747        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 748    }
 749    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 750    vdev->backend_features = features;
 751
 752    if (n->mtu_bypass_backend &&
 753            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 754        features |= (1ULL << VIRTIO_NET_F_MTU);
 755    }
 756
 757    return features;
 758}
 759
 760static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 761{
 762    uint64_t features = 0;
 763
 764    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 765     * but also these: */
 766    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 767    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 768    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 769    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 770    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 771
 772    return features;
 773}
 774
 775static void virtio_net_apply_guest_offloads(VirtIONet *n)
 776{
 777    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 778            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 779            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 780            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 781            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 782            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 783}
 784
 785static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 786{
 787    static const uint64_t guest_offloads_mask =
 788        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 789        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 790        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 791        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 792        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 793
 794    return guest_offloads_mask & features;
 795}
 796
 797static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 798{
 799    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 800    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 801}
 802
 803typedef struct {
 804    VirtIONet *n;
 805    DeviceState *dev;
 806} FailoverDevice;
 807
 808/**
 809 * Set the failover primary device
 810 *
 811 * @opaque: FailoverId to setup
 812 * @opts: opts for device we are handling
 813 * @errp: returns an error if this function fails
 814 */
 815static int failover_set_primary(DeviceState *dev, void *opaque)
 816{
 817    FailoverDevice *fdev = opaque;
 818    PCIDevice *pci_dev = (PCIDevice *)
 819        object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
 820
 821    if (!pci_dev) {
 822        return 0;
 823    }
 824
 825    if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
 826        fdev->dev = dev;
 827        return 1;
 828    }
 829
 830    return 0;
 831}
 832
 833/**
 834 * Find the primary device for this failover virtio-net
 835 *
 836 * @n: VirtIONet device
 837 * @errp: returns an error if this function fails
 838 */
 839static DeviceState *failover_find_primary_device(VirtIONet *n)
 840{
 841    FailoverDevice fdev = {
 842        .n = n,
 843    };
 844
 845    qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
 846                       NULL, NULL, &fdev);
 847    return fdev.dev;
 848}
 849
 850static void failover_add_primary(VirtIONet *n, Error **errp)
 851{
 852    Error *err = NULL;
 853    DeviceState *dev = failover_find_primary_device(n);
 854
 855    if (dev) {
 856        return;
 857    }
 858
 859    if (!n->primary_opts) {
 860        error_setg(errp, "Primary device not found");
 861        error_append_hint(errp, "Virtio-net failover will not work. Make "
 862                          "sure primary device has parameter"
 863                          " failover_pair_id=%s\n", n->netclient_name);
 864        return;
 865    }
 866
 867    dev = qdev_device_add_from_qdict(n->primary_opts,
 868                                     n->primary_opts_from_json,
 869                                     &err);
 870    if (err) {
 871        qobject_unref(n->primary_opts);
 872        n->primary_opts = NULL;
 873    } else {
 874        object_unref(OBJECT(dev));
 875    }
 876    error_propagate(errp, err);
 877}
 878
 879static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 880{
 881    VirtIONet *n = VIRTIO_NET(vdev);
 882    Error *err = NULL;
 883    int i;
 884
 885    if (n->mtu_bypass_backend &&
 886            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 887        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 888    }
 889
 890    virtio_net_set_multiqueue(n,
 891                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 892                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 893
 894    virtio_net_set_mrg_rx_bufs(n,
 895                               virtio_has_feature(features,
 896                                                  VIRTIO_NET_F_MRG_RXBUF),
 897                               virtio_has_feature(features,
 898                                                  VIRTIO_F_VERSION_1),
 899                               virtio_has_feature(features,
 900                                                  VIRTIO_NET_F_HASH_REPORT));
 901
 902    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 903        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 904    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 905        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 906    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 907
 908    if (n->has_vnet_hdr) {
 909        n->curr_guest_offloads =
 910            virtio_net_guest_offloads_by_features(features);
 911        virtio_net_apply_guest_offloads(n);
 912    }
 913
 914    for (i = 0;  i < n->max_queue_pairs; i++) {
 915        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 916
 917        if (!get_vhost_net(nc->peer)) {
 918            continue;
 919        }
 920        vhost_net_ack_features(get_vhost_net(nc->peer), features);
 921    }
 922
 923    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 924        memset(n->vlans, 0, MAX_VLAN >> 3);
 925    } else {
 926        memset(n->vlans, 0xff, MAX_VLAN >> 3);
 927    }
 928
 929    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 930        qapi_event_send_failover_negotiated(n->netclient_name);
 931        qatomic_set(&n->failover_primary_hidden, false);
 932        failover_add_primary(n, &err);
 933        if (err) {
 934            if (!qtest_enabled()) {
 935                warn_report_err(err);
 936            } else {
 937                error_free(err);
 938            }
 939        }
 940    }
 941}
 942
 943static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 944                                     struct iovec *iov, unsigned int iov_cnt)
 945{
 946    uint8_t on;
 947    size_t s;
 948    NetClientState *nc = qemu_get_queue(n->nic);
 949
 950    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 951    if (s != sizeof(on)) {
 952        return VIRTIO_NET_ERR;
 953    }
 954
 955    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 956        n->promisc = on;
 957    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 958        n->allmulti = on;
 959    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 960        n->alluni = on;
 961    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 962        n->nomulti = on;
 963    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 964        n->nouni = on;
 965    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 966        n->nobcast = on;
 967    } else {
 968        return VIRTIO_NET_ERR;
 969    }
 970
 971    rxfilter_notify(nc);
 972
 973    return VIRTIO_NET_OK;
 974}
 975
 976static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 977                                     struct iovec *iov, unsigned int iov_cnt)
 978{
 979    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 980    uint64_t offloads;
 981    size_t s;
 982
 983    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 984        return VIRTIO_NET_ERR;
 985    }
 986
 987    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 988    if (s != sizeof(offloads)) {
 989        return VIRTIO_NET_ERR;
 990    }
 991
 992    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 993        uint64_t supported_offloads;
 994
 995        offloads = virtio_ldq_p(vdev, &offloads);
 996
 997        if (!n->has_vnet_hdr) {
 998            return VIRTIO_NET_ERR;
 999        }
1000
1001        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1002            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1003        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1004            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1005        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1006
1007        supported_offloads = virtio_net_supported_guest_offloads(n);
1008        if (offloads & ~supported_offloads) {
1009            return VIRTIO_NET_ERR;
1010        }
1011
1012        n->curr_guest_offloads = offloads;
1013        virtio_net_apply_guest_offloads(n);
1014
1015        return VIRTIO_NET_OK;
1016    } else {
1017        return VIRTIO_NET_ERR;
1018    }
1019}
1020
1021static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1022                                 struct iovec *iov, unsigned int iov_cnt)
1023{
1024    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1025    struct virtio_net_ctrl_mac mac_data;
1026    size_t s;
1027    NetClientState *nc = qemu_get_queue(n->nic);
1028
1029    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1030        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1031            return VIRTIO_NET_ERR;
1032        }
1033        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1034        assert(s == sizeof(n->mac));
1035        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1036        rxfilter_notify(nc);
1037
1038        return VIRTIO_NET_OK;
1039    }
1040
1041    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1042        return VIRTIO_NET_ERR;
1043    }
1044
1045    int in_use = 0;
1046    int first_multi = 0;
1047    uint8_t uni_overflow = 0;
1048    uint8_t multi_overflow = 0;
1049    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1050
1051    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1052                   sizeof(mac_data.entries));
1053    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1054    if (s != sizeof(mac_data.entries)) {
1055        goto error;
1056    }
1057    iov_discard_front(&iov, &iov_cnt, s);
1058
1059    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1060        goto error;
1061    }
1062
1063    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1064        s = iov_to_buf(iov, iov_cnt, 0, macs,
1065                       mac_data.entries * ETH_ALEN);
1066        if (s != mac_data.entries * ETH_ALEN) {
1067            goto error;
1068        }
1069        in_use += mac_data.entries;
1070    } else {
1071        uni_overflow = 1;
1072    }
1073
1074    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1075
1076    first_multi = in_use;
1077
1078    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1079                   sizeof(mac_data.entries));
1080    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1081    if (s != sizeof(mac_data.entries)) {
1082        goto error;
1083    }
1084
1085    iov_discard_front(&iov, &iov_cnt, s);
1086
1087    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1088        goto error;
1089    }
1090
1091    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1092        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1093                       mac_data.entries * ETH_ALEN);
1094        if (s != mac_data.entries * ETH_ALEN) {
1095            goto error;
1096        }
1097        in_use += mac_data.entries;
1098    } else {
1099        multi_overflow = 1;
1100    }
1101
1102    n->mac_table.in_use = in_use;
1103    n->mac_table.first_multi = first_multi;
1104    n->mac_table.uni_overflow = uni_overflow;
1105    n->mac_table.multi_overflow = multi_overflow;
1106    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1107    g_free(macs);
1108    rxfilter_notify(nc);
1109
1110    return VIRTIO_NET_OK;
1111
1112error:
1113    g_free(macs);
1114    return VIRTIO_NET_ERR;
1115}
1116
1117static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1118                                        struct iovec *iov, unsigned int iov_cnt)
1119{
1120    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1121    uint16_t vid;
1122    size_t s;
1123    NetClientState *nc = qemu_get_queue(n->nic);
1124
1125    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1126    vid = virtio_lduw_p(vdev, &vid);
1127    if (s != sizeof(vid)) {
1128        return VIRTIO_NET_ERR;
1129    }
1130
1131    if (vid >= MAX_VLAN)
1132        return VIRTIO_NET_ERR;
1133
1134    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1135        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1136    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1137        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1138    else
1139        return VIRTIO_NET_ERR;
1140
1141    rxfilter_notify(nc);
1142
1143    return VIRTIO_NET_OK;
1144}
1145
1146static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1147                                      struct iovec *iov, unsigned int iov_cnt)
1148{
1149    trace_virtio_net_handle_announce(n->announce_timer.round);
1150    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1151        n->status & VIRTIO_NET_S_ANNOUNCE) {
1152        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1153        if (n->announce_timer.round) {
1154            qemu_announce_timer_step(&n->announce_timer);
1155        }
1156        return VIRTIO_NET_OK;
1157    } else {
1158        return VIRTIO_NET_ERR;
1159    }
1160}
1161
1162static void virtio_net_detach_epbf_rss(VirtIONet *n);
1163
1164static void virtio_net_disable_rss(VirtIONet *n)
1165{
1166    if (n->rss_data.enabled) {
1167        trace_virtio_net_rss_disable();
1168    }
1169    n->rss_data.enabled = false;
1170
1171    virtio_net_detach_epbf_rss(n);
1172}
1173
1174static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1175{
1176    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1177    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1178        return false;
1179    }
1180
1181    return nc->info->set_steering_ebpf(nc, prog_fd);
1182}
1183
1184static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1185                                   struct EBPFRSSConfig *config)
1186{
1187    config->redirect = data->redirect;
1188    config->populate_hash = data->populate_hash;
1189    config->hash_types = data->hash_types;
1190    config->indirections_len = data->indirections_len;
1191    config->default_queue = data->default_queue;
1192}
1193
1194static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1195{
1196    struct EBPFRSSConfig config = {};
1197
1198    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1199        return false;
1200    }
1201
1202    rss_data_to_rss_config(&n->rss_data, &config);
1203
1204    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1205                          n->rss_data.indirections_table, n->rss_data.key)) {
1206        return false;
1207    }
1208
1209    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1210        return false;
1211    }
1212
1213    return true;
1214}
1215
1216static void virtio_net_detach_epbf_rss(VirtIONet *n)
1217{
1218    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1219}
1220
1221static bool virtio_net_load_ebpf(VirtIONet *n)
1222{
1223    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1224        /* backend does't support steering ebpf */
1225        return false;
1226    }
1227
1228    return ebpf_rss_load(&n->ebpf_rss);
1229}
1230
1231static void virtio_net_unload_ebpf(VirtIONet *n)
1232{
1233    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1234    ebpf_rss_unload(&n->ebpf_rss);
1235}
1236
1237static uint16_t virtio_net_handle_rss(VirtIONet *n,
1238                                      struct iovec *iov,
1239                                      unsigned int iov_cnt,
1240                                      bool do_rss)
1241{
1242    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1243    struct virtio_net_rss_config cfg;
1244    size_t s, offset = 0, size_get;
1245    uint16_t queue_pairs, i;
1246    struct {
1247        uint16_t us;
1248        uint8_t b;
1249    } QEMU_PACKED temp;
1250    const char *err_msg = "";
1251    uint32_t err_value = 0;
1252
1253    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1254        err_msg = "RSS is not negotiated";
1255        goto error;
1256    }
1257    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1258        err_msg = "Hash report is not negotiated";
1259        goto error;
1260    }
1261    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1262    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1263    if (s != size_get) {
1264        err_msg = "Short command buffer";
1265        err_value = (uint32_t)s;
1266        goto error;
1267    }
1268    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1269    n->rss_data.indirections_len =
1270        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1271    n->rss_data.indirections_len++;
1272    if (!do_rss) {
1273        n->rss_data.indirections_len = 1;
1274    }
1275    if (!is_power_of_2(n->rss_data.indirections_len)) {
1276        err_msg = "Invalid size of indirection table";
1277        err_value = n->rss_data.indirections_len;
1278        goto error;
1279    }
1280    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1281        err_msg = "Too large indirection table";
1282        err_value = n->rss_data.indirections_len;
1283        goto error;
1284    }
1285    n->rss_data.default_queue = do_rss ?
1286        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1287    if (n->rss_data.default_queue >= n->max_queue_pairs) {
1288        err_msg = "Invalid default queue";
1289        err_value = n->rss_data.default_queue;
1290        goto error;
1291    }
1292    offset += size_get;
1293    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1294    g_free(n->rss_data.indirections_table);
1295    n->rss_data.indirections_table = g_malloc(size_get);
1296    if (!n->rss_data.indirections_table) {
1297        err_msg = "Can't allocate indirections table";
1298        err_value = n->rss_data.indirections_len;
1299        goto error;
1300    }
1301    s = iov_to_buf(iov, iov_cnt, offset,
1302                   n->rss_data.indirections_table, size_get);
1303    if (s != size_get) {
1304        err_msg = "Short indirection table buffer";
1305        err_value = (uint32_t)s;
1306        goto error;
1307    }
1308    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1309        uint16_t val = n->rss_data.indirections_table[i];
1310        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1311    }
1312    offset += size_get;
1313    size_get = sizeof(temp);
1314    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1315    if (s != size_get) {
1316        err_msg = "Can't get queue_pairs";
1317        err_value = (uint32_t)s;
1318        goto error;
1319    }
1320    queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1321    if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1322        err_msg = "Invalid number of queue_pairs";
1323        err_value = queue_pairs;
1324        goto error;
1325    }
1326    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1327        err_msg = "Invalid key size";
1328        err_value = temp.b;
1329        goto error;
1330    }
1331    if (!temp.b && n->rss_data.hash_types) {
1332        err_msg = "No key provided";
1333        err_value = 0;
1334        goto error;
1335    }
1336    if (!temp.b && !n->rss_data.hash_types) {
1337        virtio_net_disable_rss(n);
1338        return queue_pairs;
1339    }
1340    offset += size_get;
1341    size_get = temp.b;
1342    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1343    if (s != size_get) {
1344        err_msg = "Can get key buffer";
1345        err_value = (uint32_t)s;
1346        goto error;
1347    }
1348    n->rss_data.enabled = true;
1349
1350    if (!n->rss_data.populate_hash) {
1351        if (!virtio_net_attach_epbf_rss(n)) {
1352            /* EBPF must be loaded for vhost */
1353            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1354                warn_report("Can't load eBPF RSS for vhost");
1355                goto error;
1356            }
1357            /* fallback to software RSS */
1358            warn_report("Can't load eBPF RSS - fallback to software RSS");
1359            n->rss_data.enabled_software_rss = true;
1360        }
1361    } else {
1362        /* use software RSS for hash populating */
1363        /* and detach eBPF if was loaded before */
1364        virtio_net_detach_epbf_rss(n);
1365        n->rss_data.enabled_software_rss = true;
1366    }
1367
1368    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1369                                n->rss_data.indirections_len,
1370                                temp.b);
1371    return queue_pairs;
1372error:
1373    trace_virtio_net_rss_error(err_msg, err_value);
1374    virtio_net_disable_rss(n);
1375    return 0;
1376}
1377
1378static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1379                                struct iovec *iov, unsigned int iov_cnt)
1380{
1381    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1382    uint16_t queue_pairs;
1383    NetClientState *nc = qemu_get_queue(n->nic);
1384
1385    virtio_net_disable_rss(n);
1386    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1387        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1388        return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1389    }
1390    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1391        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1392    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1393        struct virtio_net_ctrl_mq mq;
1394        size_t s;
1395        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1396            return VIRTIO_NET_ERR;
1397        }
1398        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1399        if (s != sizeof(mq)) {
1400            return VIRTIO_NET_ERR;
1401        }
1402        queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1403
1404    } else {
1405        return VIRTIO_NET_ERR;
1406    }
1407
1408    if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1409        queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1410        queue_pairs > n->max_queue_pairs ||
1411        !n->multiqueue) {
1412        return VIRTIO_NET_ERR;
1413    }
1414
1415    /* Avoid changing the number of queue_pairs for vdpa device in
1416     * userspace handler. A future fix is needed to handle the mq
1417     * change in userspace handler with vhost-vdpa. Let's disable
1418     * the mq handling from userspace for now and only allow get
1419     * done through the kernel. Ripples may be seen when falling
1420     * back to userspace, but without doing it qemu process would
1421     * crash on a recursive entry to virtio_net_set_status().
1422     */
1423    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1424        return VIRTIO_NET_ERR;
1425    }
1426
1427    n->curr_queue_pairs = queue_pairs;
1428    /* stop the backend before changing the number of queue_pairs to avoid handling a
1429     * disabled queue */
1430    virtio_net_set_status(vdev, vdev->status);
1431    virtio_net_set_queue_pairs(n);
1432
1433    return VIRTIO_NET_OK;
1434}
1435
1436size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1437                                  const struct iovec *in_sg, unsigned in_num,
1438                                  const struct iovec *out_sg,
1439                                  unsigned out_num)
1440{
1441    VirtIONet *n = VIRTIO_NET(vdev);
1442    struct virtio_net_ctrl_hdr ctrl;
1443    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1444    size_t s;
1445    struct iovec *iov, *iov2;
1446
1447    if (iov_size(in_sg, in_num) < sizeof(status) ||
1448        iov_size(out_sg, out_num) < sizeof(ctrl)) {
1449        virtio_error(vdev, "virtio-net ctrl missing headers");
1450        return 0;
1451    }
1452
1453    iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1454    s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1455    iov_discard_front(&iov, &out_num, sizeof(ctrl));
1456    if (s != sizeof(ctrl)) {
1457        status = VIRTIO_NET_ERR;
1458    } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1459        status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1460    } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1461        status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1462    } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1463        status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1464    } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1465        status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1466    } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1467        status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1468    } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1469        status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1470    }
1471
1472    s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1473    assert(s == sizeof(status));
1474
1475    g_free(iov2);
1476    return sizeof(status);
1477}
1478
1479static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1480{
1481    VirtQueueElement *elem;
1482
1483    for (;;) {
1484        size_t written;
1485        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1486        if (!elem) {
1487            break;
1488        }
1489
1490        written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1491                                             elem->out_sg, elem->out_num);
1492        if (written > 0) {
1493            virtqueue_push(vq, elem, written);
1494            virtio_notify(vdev, vq);
1495            g_free(elem);
1496        } else {
1497            virtqueue_detach_element(vq, elem, 0);
1498            g_free(elem);
1499            break;
1500        }
1501    }
1502}
1503
1504/* RX */
1505
1506static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1507{
1508    VirtIONet *n = VIRTIO_NET(vdev);
1509    int queue_index = vq2q(virtio_get_queue_index(vq));
1510
1511    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1512}
1513
1514static bool virtio_net_can_receive(NetClientState *nc)
1515{
1516    VirtIONet *n = qemu_get_nic_opaque(nc);
1517    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1518    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1519
1520    if (!vdev->vm_running) {
1521        return false;
1522    }
1523
1524    if (nc->queue_index >= n->curr_queue_pairs) {
1525        return false;
1526    }
1527
1528    if (!virtio_queue_ready(q->rx_vq) ||
1529        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1530        return false;
1531    }
1532
1533    return true;
1534}
1535
1536static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1537{
1538    VirtIONet *n = q->n;
1539    if (virtio_queue_empty(q->rx_vq) ||
1540        (n->mergeable_rx_bufs &&
1541         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1542        virtio_queue_set_notification(q->rx_vq, 1);
1543
1544        /* To avoid a race condition where the guest has made some buffers
1545         * available after the above check but before notification was
1546         * enabled, check for available buffers again.
1547         */
1548        if (virtio_queue_empty(q->rx_vq) ||
1549            (n->mergeable_rx_bufs &&
1550             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1551            return 0;
1552        }
1553    }
1554
1555    virtio_queue_set_notification(q->rx_vq, 0);
1556    return 1;
1557}
1558
1559static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1560{
1561    virtio_tswap16s(vdev, &hdr->hdr_len);
1562    virtio_tswap16s(vdev, &hdr->gso_size);
1563    virtio_tswap16s(vdev, &hdr->csum_start);
1564    virtio_tswap16s(vdev, &hdr->csum_offset);
1565}
1566
1567/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1568 * it never finds out that the packets don't have valid checksums.  This
1569 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1570 * fix this with Xen but it hasn't appeared in an upstream release of
1571 * dhclient yet.
1572 *
1573 * To avoid breaking existing guests, we catch udp packets and add
1574 * checksums.  This is terrible but it's better than hacking the guest
1575 * kernels.
1576 *
1577 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1578 * we should provide a mechanism to disable it to avoid polluting the host
1579 * cache.
1580 */
1581static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1582                                        uint8_t *buf, size_t size)
1583{
1584    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1585        (size > 27 && size < 1500) && /* normal sized MTU */
1586        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1587        (buf[23] == 17) && /* ip.protocol == UDP */
1588        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1589        net_checksum_calculate(buf, size, CSUM_UDP);
1590        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1591    }
1592}
1593
1594static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1595                           const void *buf, size_t size)
1596{
1597    if (n->has_vnet_hdr) {
1598        /* FIXME this cast is evil */
1599        void *wbuf = (void *)buf;
1600        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1601                                    size - n->host_hdr_len);
1602
1603        if (n->needs_vnet_hdr_swap) {
1604            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1605        }
1606        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1607    } else {
1608        struct virtio_net_hdr hdr = {
1609            .flags = 0,
1610            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1611        };
1612        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1613    }
1614}
1615
1616static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1617{
1618    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1619    static const uint8_t vlan[] = {0x81, 0x00};
1620    uint8_t *ptr = (uint8_t *)buf;
1621    int i;
1622
1623    if (n->promisc)
1624        return 1;
1625
1626    ptr += n->host_hdr_len;
1627
1628    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1629        int vid = lduw_be_p(ptr + 14) & 0xfff;
1630        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1631            return 0;
1632    }
1633
1634    if (ptr[0] & 1) { // multicast
1635        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1636            return !n->nobcast;
1637        } else if (n->nomulti) {
1638            return 0;
1639        } else if (n->allmulti || n->mac_table.multi_overflow) {
1640            return 1;
1641        }
1642
1643        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1644            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1645                return 1;
1646            }
1647        }
1648    } else { // unicast
1649        if (n->nouni) {
1650            return 0;
1651        } else if (n->alluni || n->mac_table.uni_overflow) {
1652            return 1;
1653        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1654            return 1;
1655        }
1656
1657        for (i = 0; i < n->mac_table.first_multi; i++) {
1658            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1659                return 1;
1660            }
1661        }
1662    }
1663
1664    return 0;
1665}
1666
1667static uint8_t virtio_net_get_hash_type(bool isip4,
1668                                        bool isip6,
1669                                        bool isudp,
1670                                        bool istcp,
1671                                        uint32_t types)
1672{
1673    if (isip4) {
1674        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1675            return NetPktRssIpV4Tcp;
1676        }
1677        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1678            return NetPktRssIpV4Udp;
1679        }
1680        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1681            return NetPktRssIpV4;
1682        }
1683    } else if (isip6) {
1684        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1685                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1686
1687        if (istcp && (types & mask)) {
1688            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1689                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1690        }
1691        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1692        if (isudp && (types & mask)) {
1693            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1694                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1695        }
1696        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1697        if (types & mask) {
1698            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1699                NetPktRssIpV6Ex : NetPktRssIpV6;
1700        }
1701    }
1702    return 0xff;
1703}
1704
1705static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1706                                   uint32_t hash)
1707{
1708    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1709    hdr->hash_value = hash;
1710    hdr->hash_report = report;
1711}
1712
1713static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1714                                  size_t size)
1715{
1716    VirtIONet *n = qemu_get_nic_opaque(nc);
1717    unsigned int index = nc->queue_index, new_index = index;
1718    struct NetRxPkt *pkt = n->rx_pkt;
1719    uint8_t net_hash_type;
1720    uint32_t hash;
1721    bool isip4, isip6, isudp, istcp;
1722    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1723        VIRTIO_NET_HASH_REPORT_IPv4,
1724        VIRTIO_NET_HASH_REPORT_TCPv4,
1725        VIRTIO_NET_HASH_REPORT_TCPv6,
1726        VIRTIO_NET_HASH_REPORT_IPv6,
1727        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1728        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1729        VIRTIO_NET_HASH_REPORT_UDPv4,
1730        VIRTIO_NET_HASH_REPORT_UDPv6,
1731        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1732    };
1733
1734    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1735                             size - n->host_hdr_len);
1736    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1737    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1738        istcp = isudp = false;
1739    }
1740    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1741        istcp = isudp = false;
1742    }
1743    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1744                                             n->rss_data.hash_types);
1745    if (net_hash_type > NetPktRssIpV6UdpEx) {
1746        if (n->rss_data.populate_hash) {
1747            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1748        }
1749        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1750    }
1751
1752    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1753
1754    if (n->rss_data.populate_hash) {
1755        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1756    }
1757
1758    if (n->rss_data.redirect) {
1759        new_index = hash & (n->rss_data.indirections_len - 1);
1760        new_index = n->rss_data.indirections_table[new_index];
1761    }
1762
1763    return (index == new_index) ? -1 : new_index;
1764}
1765
1766static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1767                                      size_t size, bool no_rss)
1768{
1769    VirtIONet *n = qemu_get_nic_opaque(nc);
1770    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1771    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1772    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1773    size_t lens[VIRTQUEUE_MAX_SIZE];
1774    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1775    struct virtio_net_hdr_mrg_rxbuf mhdr;
1776    unsigned mhdr_cnt = 0;
1777    size_t offset, i, guest_offset, j;
1778    ssize_t err;
1779
1780    if (!virtio_net_can_receive(nc)) {
1781        return -1;
1782    }
1783
1784    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1785        int index = virtio_net_process_rss(nc, buf, size);
1786        if (index >= 0) {
1787            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1788            return virtio_net_receive_rcu(nc2, buf, size, true);
1789        }
1790    }
1791
1792    /* hdr_len refers to the header we supply to the guest */
1793    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1794        return 0;
1795    }
1796
1797    if (!receive_filter(n, buf, size))
1798        return size;
1799
1800    offset = i = 0;
1801
1802    while (offset < size) {
1803        VirtQueueElement *elem;
1804        int len, total;
1805        const struct iovec *sg;
1806
1807        total = 0;
1808
1809        if (i == VIRTQUEUE_MAX_SIZE) {
1810            virtio_error(vdev, "virtio-net unexpected long buffer chain");
1811            err = size;
1812            goto err;
1813        }
1814
1815        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1816        if (!elem) {
1817            if (i) {
1818                virtio_error(vdev, "virtio-net unexpected empty queue: "
1819                             "i %zd mergeable %d offset %zd, size %zd, "
1820                             "guest hdr len %zd, host hdr len %zd "
1821                             "guest features 0x%" PRIx64,
1822                             i, n->mergeable_rx_bufs, offset, size,
1823                             n->guest_hdr_len, n->host_hdr_len,
1824                             vdev->guest_features);
1825            }
1826            err = -1;
1827            goto err;
1828        }
1829
1830        if (elem->in_num < 1) {
1831            virtio_error(vdev,
1832                         "virtio-net receive queue contains no in buffers");
1833            virtqueue_detach_element(q->rx_vq, elem, 0);
1834            g_free(elem);
1835            err = -1;
1836            goto err;
1837        }
1838
1839        sg = elem->in_sg;
1840        if (i == 0) {
1841            assert(offset == 0);
1842            if (n->mergeable_rx_bufs) {
1843                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1844                                    sg, elem->in_num,
1845                                    offsetof(typeof(mhdr), num_buffers),
1846                                    sizeof(mhdr.num_buffers));
1847            }
1848
1849            receive_header(n, sg, elem->in_num, buf, size);
1850            if (n->rss_data.populate_hash) {
1851                offset = sizeof(mhdr);
1852                iov_from_buf(sg, elem->in_num, offset,
1853                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1854            }
1855            offset = n->host_hdr_len;
1856            total += n->guest_hdr_len;
1857            guest_offset = n->guest_hdr_len;
1858        } else {
1859            guest_offset = 0;
1860        }
1861
1862        /* copy in packet.  ugh */
1863        len = iov_from_buf(sg, elem->in_num, guest_offset,
1864                           buf + offset, size - offset);
1865        total += len;
1866        offset += len;
1867        /* If buffers can't be merged, at this point we
1868         * must have consumed the complete packet.
1869         * Otherwise, drop it. */
1870        if (!n->mergeable_rx_bufs && offset < size) {
1871            virtqueue_unpop(q->rx_vq, elem, total);
1872            g_free(elem);
1873            err = size;
1874            goto err;
1875        }
1876
1877        elems[i] = elem;
1878        lens[i] = total;
1879        i++;
1880    }
1881
1882    if (mhdr_cnt) {
1883        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1884        iov_from_buf(mhdr_sg, mhdr_cnt,
1885                     0,
1886                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1887    }
1888
1889    for (j = 0; j < i; j++) {
1890        /* signal other side */
1891        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1892        g_free(elems[j]);
1893    }
1894
1895    virtqueue_flush(q->rx_vq, i);
1896    virtio_notify(vdev, q->rx_vq);
1897
1898    return size;
1899
1900err:
1901    for (j = 0; j < i; j++) {
1902        virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1903        g_free(elems[j]);
1904    }
1905
1906    return err;
1907}
1908
1909static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1910                                  size_t size)
1911{
1912    RCU_READ_LOCK_GUARD();
1913
1914    return virtio_net_receive_rcu(nc, buf, size, false);
1915}
1916
1917static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1918                                         const uint8_t *buf,
1919                                         VirtioNetRscUnit *unit)
1920{
1921    uint16_t ip_hdrlen;
1922    struct ip_header *ip;
1923
1924    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1925                              + sizeof(struct eth_header));
1926    unit->ip = (void *)ip;
1927    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1928    unit->ip_plen = &ip->ip_len;
1929    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1930    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1931    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1932}
1933
1934static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1935                                         const uint8_t *buf,
1936                                         VirtioNetRscUnit *unit)
1937{
1938    struct ip6_header *ip6;
1939
1940    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1941                                 + sizeof(struct eth_header));
1942    unit->ip = ip6;
1943    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1944    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1945                                        + sizeof(struct ip6_header));
1946    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1947
1948    /* There is a difference between payload lenght in ipv4 and v6,
1949       ip header is excluded in ipv6 */
1950    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1951}
1952
1953static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1954                                       VirtioNetRscSeg *seg)
1955{
1956    int ret;
1957    struct virtio_net_hdr_v1 *h;
1958
1959    h = (struct virtio_net_hdr_v1 *)seg->buf;
1960    h->flags = 0;
1961    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1962
1963    if (seg->is_coalesced) {
1964        h->rsc.segments = seg->packets;
1965        h->rsc.dup_acks = seg->dup_ack;
1966        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1967        if (chain->proto == ETH_P_IP) {
1968            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1969        } else {
1970            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1971        }
1972    }
1973
1974    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1975    QTAILQ_REMOVE(&chain->buffers, seg, next);
1976    g_free(seg->buf);
1977    g_free(seg);
1978
1979    return ret;
1980}
1981
1982static void virtio_net_rsc_purge(void *opq)
1983{
1984    VirtioNetRscSeg *seg, *rn;
1985    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1986
1987    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1988        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1989            chain->stat.purge_failed++;
1990            continue;
1991        }
1992    }
1993
1994    chain->stat.timer++;
1995    if (!QTAILQ_EMPTY(&chain->buffers)) {
1996        timer_mod(chain->drain_timer,
1997              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1998    }
1999}
2000
2001static void virtio_net_rsc_cleanup(VirtIONet *n)
2002{
2003    VirtioNetRscChain *chain, *rn_chain;
2004    VirtioNetRscSeg *seg, *rn_seg;
2005
2006    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2007        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2008            QTAILQ_REMOVE(&chain->buffers, seg, next);
2009            g_free(seg->buf);
2010            g_free(seg);
2011        }
2012
2013        timer_free(chain->drain_timer);
2014        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2015        g_free(chain);
2016    }
2017}
2018
2019static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2020                                     NetClientState *nc,
2021                                     const uint8_t *buf, size_t size)
2022{
2023    uint16_t hdr_len;
2024    VirtioNetRscSeg *seg;
2025
2026    hdr_len = chain->n->guest_hdr_len;
2027    seg = g_new(VirtioNetRscSeg, 1);
2028    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2029        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2030    memcpy(seg->buf, buf, size);
2031    seg->size = size;
2032    seg->packets = 1;
2033    seg->dup_ack = 0;
2034    seg->is_coalesced = 0;
2035    seg->nc = nc;
2036
2037    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2038    chain->stat.cache++;
2039
2040    switch (chain->proto) {
2041    case ETH_P_IP:
2042        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2043        break;
2044    case ETH_P_IPV6:
2045        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2046        break;
2047    default:
2048        g_assert_not_reached();
2049    }
2050}
2051
2052static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2053                                         VirtioNetRscSeg *seg,
2054                                         const uint8_t *buf,
2055                                         struct tcp_header *n_tcp,
2056                                         struct tcp_header *o_tcp)
2057{
2058    uint32_t nack, oack;
2059    uint16_t nwin, owin;
2060
2061    nack = htonl(n_tcp->th_ack);
2062    nwin = htons(n_tcp->th_win);
2063    oack = htonl(o_tcp->th_ack);
2064    owin = htons(o_tcp->th_win);
2065
2066    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2067        chain->stat.ack_out_of_win++;
2068        return RSC_FINAL;
2069    } else if (nack == oack) {
2070        /* duplicated ack or window probe */
2071        if (nwin == owin) {
2072            /* duplicated ack, add dup ack count due to whql test up to 1 */
2073            chain->stat.dup_ack++;
2074            return RSC_FINAL;
2075        } else {
2076            /* Coalesce window update */
2077            o_tcp->th_win = n_tcp->th_win;
2078            chain->stat.win_update++;
2079            return RSC_COALESCE;
2080        }
2081    } else {
2082        /* pure ack, go to 'C', finalize*/
2083        chain->stat.pure_ack++;
2084        return RSC_FINAL;
2085    }
2086}
2087
2088static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2089                                            VirtioNetRscSeg *seg,
2090                                            const uint8_t *buf,
2091                                            VirtioNetRscUnit *n_unit)
2092{
2093    void *data;
2094    uint16_t o_ip_len;
2095    uint32_t nseq, oseq;
2096    VirtioNetRscUnit *o_unit;
2097
2098    o_unit = &seg->unit;
2099    o_ip_len = htons(*o_unit->ip_plen);
2100    nseq = htonl(n_unit->tcp->th_seq);
2101    oseq = htonl(o_unit->tcp->th_seq);
2102
2103    /* out of order or retransmitted. */
2104    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2105        chain->stat.data_out_of_win++;
2106        return RSC_FINAL;
2107    }
2108
2109    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2110    if (nseq == oseq) {
2111        if ((o_unit->payload == 0) && n_unit->payload) {
2112            /* From no payload to payload, normal case, not a dup ack or etc */
2113            chain->stat.data_after_pure_ack++;
2114            goto coalesce;
2115        } else {
2116            return virtio_net_rsc_handle_ack(chain, seg, buf,
2117                                             n_unit->tcp, o_unit->tcp);
2118        }
2119    } else if ((nseq - oseq) != o_unit->payload) {
2120        /* Not a consistent packet, out of order */
2121        chain->stat.data_out_of_order++;
2122        return RSC_FINAL;
2123    } else {
2124coalesce:
2125        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2126            chain->stat.over_size++;
2127            return RSC_FINAL;
2128        }
2129
2130        /* Here comes the right data, the payload length in v4/v6 is different,
2131           so use the field value to update and record the new data len */
2132        o_unit->payload += n_unit->payload; /* update new data len */
2133
2134        /* update field in ip header */
2135        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2136
2137        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2138           for windows guest, while this may change the behavior for linux
2139           guest (only if it uses RSC feature). */
2140        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2141
2142        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2143        o_unit->tcp->th_win = n_unit->tcp->th_win;
2144
2145        memmove(seg->buf + seg->size, data, n_unit->payload);
2146        seg->size += n_unit->payload;
2147        seg->packets++;
2148        chain->stat.coalesced++;
2149        return RSC_COALESCE;
2150    }
2151}
2152
2153static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2154                                        VirtioNetRscSeg *seg,
2155                                        const uint8_t *buf, size_t size,
2156                                        VirtioNetRscUnit *unit)
2157{
2158    struct ip_header *ip1, *ip2;
2159
2160    ip1 = (struct ip_header *)(unit->ip);
2161    ip2 = (struct ip_header *)(seg->unit.ip);
2162    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2163        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2164        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2165        chain->stat.no_match++;
2166        return RSC_NO_MATCH;
2167    }
2168
2169    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2170}
2171
2172static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2173                                        VirtioNetRscSeg *seg,
2174                                        const uint8_t *buf, size_t size,
2175                                        VirtioNetRscUnit *unit)
2176{
2177    struct ip6_header *ip1, *ip2;
2178
2179    ip1 = (struct ip6_header *)(unit->ip);
2180    ip2 = (struct ip6_header *)(seg->unit.ip);
2181    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2182        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2183        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2184        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2185            chain->stat.no_match++;
2186            return RSC_NO_MATCH;
2187    }
2188
2189    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2190}
2191
2192/* Packets with 'SYN' should bypass, other flag should be sent after drain
2193 * to prevent out of order */
2194static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2195                                         struct tcp_header *tcp)
2196{
2197    uint16_t tcp_hdr;
2198    uint16_t tcp_flag;
2199
2200    tcp_flag = htons(tcp->th_offset_flags);
2201    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2202    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2203    if (tcp_flag & TH_SYN) {
2204        chain->stat.tcp_syn++;
2205        return RSC_BYPASS;
2206    }
2207
2208    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2209        chain->stat.tcp_ctrl_drain++;
2210        return RSC_FINAL;
2211    }
2212
2213    if (tcp_hdr > sizeof(struct tcp_header)) {
2214        chain->stat.tcp_all_opt++;
2215        return RSC_FINAL;
2216    }
2217
2218    return RSC_CANDIDATE;
2219}
2220
2221static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2222                                         NetClientState *nc,
2223                                         const uint8_t *buf, size_t size,
2224                                         VirtioNetRscUnit *unit)
2225{
2226    int ret;
2227    VirtioNetRscSeg *seg, *nseg;
2228
2229    if (QTAILQ_EMPTY(&chain->buffers)) {
2230        chain->stat.empty_cache++;
2231        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2232        timer_mod(chain->drain_timer,
2233              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2234        return size;
2235    }
2236
2237    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2238        if (chain->proto == ETH_P_IP) {
2239            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2240        } else {
2241            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2242        }
2243
2244        if (ret == RSC_FINAL) {
2245            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2246                /* Send failed */
2247                chain->stat.final_failed++;
2248                return 0;
2249            }
2250
2251            /* Send current packet */
2252            return virtio_net_do_receive(nc, buf, size);
2253        } else if (ret == RSC_NO_MATCH) {
2254            continue;
2255        } else {
2256            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2257            seg->is_coalesced = 1;
2258            return size;
2259        }
2260    }
2261
2262    chain->stat.no_match_cache++;
2263    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2264    return size;
2265}
2266
2267/* Drain a connection data, this is to avoid out of order segments */
2268static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2269                                        NetClientState *nc,
2270                                        const uint8_t *buf, size_t size,
2271                                        uint16_t ip_start, uint16_t ip_size,
2272                                        uint16_t tcp_port)
2273{
2274    VirtioNetRscSeg *seg, *nseg;
2275    uint32_t ppair1, ppair2;
2276
2277    ppair1 = *(uint32_t *)(buf + tcp_port);
2278    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2279        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2280        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2281            || (ppair1 != ppair2)) {
2282            continue;
2283        }
2284        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2285            chain->stat.drain_failed++;
2286        }
2287
2288        break;
2289    }
2290
2291    return virtio_net_do_receive(nc, buf, size);
2292}
2293
2294static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2295                                            struct ip_header *ip,
2296                                            const uint8_t *buf, size_t size)
2297{
2298    uint16_t ip_len;
2299
2300    /* Not an ipv4 packet */
2301    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2302        chain->stat.ip_option++;
2303        return RSC_BYPASS;
2304    }
2305
2306    /* Don't handle packets with ip option */
2307    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2308        chain->stat.ip_option++;
2309        return RSC_BYPASS;
2310    }
2311
2312    if (ip->ip_p != IPPROTO_TCP) {
2313        chain->stat.bypass_not_tcp++;
2314        return RSC_BYPASS;
2315    }
2316
2317    /* Don't handle packets with ip fragment */
2318    if (!(htons(ip->ip_off) & IP_DF)) {
2319        chain->stat.ip_frag++;
2320        return RSC_BYPASS;
2321    }
2322
2323    /* Don't handle packets with ecn flag */
2324    if (IPTOS_ECN(ip->ip_tos)) {
2325        chain->stat.ip_ecn++;
2326        return RSC_BYPASS;
2327    }
2328
2329    ip_len = htons(ip->ip_len);
2330    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2331        || ip_len > (size - chain->n->guest_hdr_len -
2332                     sizeof(struct eth_header))) {
2333        chain->stat.ip_hacked++;
2334        return RSC_BYPASS;
2335    }
2336
2337    return RSC_CANDIDATE;
2338}
2339
2340static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2341                                      NetClientState *nc,
2342                                      const uint8_t *buf, size_t size)
2343{
2344    int32_t ret;
2345    uint16_t hdr_len;
2346    VirtioNetRscUnit unit;
2347
2348    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2349
2350    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2351        + sizeof(struct tcp_header))) {
2352        chain->stat.bypass_not_tcp++;
2353        return virtio_net_do_receive(nc, buf, size);
2354    }
2355
2356    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2357    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2358        != RSC_CANDIDATE) {
2359        return virtio_net_do_receive(nc, buf, size);
2360    }
2361
2362    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2363    if (ret == RSC_BYPASS) {
2364        return virtio_net_do_receive(nc, buf, size);
2365    } else if (ret == RSC_FINAL) {
2366        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2367                ((hdr_len + sizeof(struct eth_header)) + 12),
2368                VIRTIO_NET_IP4_ADDR_SIZE,
2369                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2370    }
2371
2372    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2373}
2374
2375static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2376                                            struct ip6_header *ip6,
2377                                            const uint8_t *buf, size_t size)
2378{
2379    uint16_t ip_len;
2380
2381    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2382        != IP_HEADER_VERSION_6) {
2383        return RSC_BYPASS;
2384    }
2385
2386    /* Both option and protocol is checked in this */
2387    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2388        chain->stat.bypass_not_tcp++;
2389        return RSC_BYPASS;
2390    }
2391
2392    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2393    if (ip_len < sizeof(struct tcp_header) ||
2394        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2395                  - sizeof(struct ip6_header))) {
2396        chain->stat.ip_hacked++;
2397        return RSC_BYPASS;
2398    }
2399
2400    /* Don't handle packets with ecn flag */
2401    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2402        chain->stat.ip_ecn++;
2403        return RSC_BYPASS;
2404    }
2405
2406    return RSC_CANDIDATE;
2407}
2408
2409static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2410                                      const uint8_t *buf, size_t size)
2411{
2412    int32_t ret;
2413    uint16_t hdr_len;
2414    VirtioNetRscChain *chain;
2415    VirtioNetRscUnit unit;
2416
2417    chain = (VirtioNetRscChain *)opq;
2418    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2419
2420    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2421        + sizeof(tcp_header))) {
2422        return virtio_net_do_receive(nc, buf, size);
2423    }
2424
2425    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2426    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2427                                                 unit.ip, buf, size)) {
2428        return virtio_net_do_receive(nc, buf, size);
2429    }
2430
2431    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2432    if (ret == RSC_BYPASS) {
2433        return virtio_net_do_receive(nc, buf, size);
2434    } else if (ret == RSC_FINAL) {
2435        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2436                ((hdr_len + sizeof(struct eth_header)) + 8),
2437                VIRTIO_NET_IP6_ADDR_SIZE,
2438                hdr_len + sizeof(struct eth_header)
2439                + sizeof(struct ip6_header));
2440    }
2441
2442    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2443}
2444
2445static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2446                                                      NetClientState *nc,
2447                                                      uint16_t proto)
2448{
2449    VirtioNetRscChain *chain;
2450
2451    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2452        return NULL;
2453    }
2454
2455    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2456        if (chain->proto == proto) {
2457            return chain;
2458        }
2459    }
2460
2461    chain = g_malloc(sizeof(*chain));
2462    chain->n = n;
2463    chain->proto = proto;
2464    if (proto == (uint16_t)ETH_P_IP) {
2465        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2466        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2467    } else {
2468        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2469        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2470    }
2471    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2472                                      virtio_net_rsc_purge, chain);
2473    memset(&chain->stat, 0, sizeof(chain->stat));
2474
2475    QTAILQ_INIT(&chain->buffers);
2476    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2477
2478    return chain;
2479}
2480
2481static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2482                                      const uint8_t *buf,
2483                                      size_t size)
2484{
2485    uint16_t proto;
2486    VirtioNetRscChain *chain;
2487    struct eth_header *eth;
2488    VirtIONet *n;
2489
2490    n = qemu_get_nic_opaque(nc);
2491    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2492        return virtio_net_do_receive(nc, buf, size);
2493    }
2494
2495    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2496    proto = htons(eth->h_proto);
2497
2498    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2499    if (chain) {
2500        chain->stat.received++;
2501        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2502            return virtio_net_rsc_receive4(chain, nc, buf, size);
2503        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2504            return virtio_net_rsc_receive6(chain, nc, buf, size);
2505        }
2506    }
2507    return virtio_net_do_receive(nc, buf, size);
2508}
2509
2510static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2511                                  size_t size)
2512{
2513    VirtIONet *n = qemu_get_nic_opaque(nc);
2514    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2515        return virtio_net_rsc_receive(nc, buf, size);
2516    } else {
2517        return virtio_net_do_receive(nc, buf, size);
2518    }
2519}
2520
2521static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2522
2523static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2524{
2525    VirtIONet *n = qemu_get_nic_opaque(nc);
2526    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2527    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2528
2529    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2530    virtio_notify(vdev, q->tx_vq);
2531
2532    g_free(q->async_tx.elem);
2533    q->async_tx.elem = NULL;
2534
2535    virtio_queue_set_notification(q->tx_vq, 1);
2536    virtio_net_flush_tx(q);
2537}
2538
2539/* TX */
2540static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2541{
2542    VirtIONet *n = q->n;
2543    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2544    VirtQueueElement *elem;
2545    int32_t num_packets = 0;
2546    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2547    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2548        return num_packets;
2549    }
2550
2551    if (q->async_tx.elem) {
2552        virtio_queue_set_notification(q->tx_vq, 0);
2553        return num_packets;
2554    }
2555
2556    for (;;) {
2557        ssize_t ret;
2558        unsigned int out_num;
2559        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2560        struct virtio_net_hdr_mrg_rxbuf mhdr;
2561
2562        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2563        if (!elem) {
2564            break;
2565        }
2566
2567        out_num = elem->out_num;
2568        out_sg = elem->out_sg;
2569        if (out_num < 1) {
2570            virtio_error(vdev, "virtio-net header not in first element");
2571            virtqueue_detach_element(q->tx_vq, elem, 0);
2572            g_free(elem);
2573            return -EINVAL;
2574        }
2575
2576        if (n->has_vnet_hdr) {
2577            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2578                n->guest_hdr_len) {
2579                virtio_error(vdev, "virtio-net header incorrect");
2580                virtqueue_detach_element(q->tx_vq, elem, 0);
2581                g_free(elem);
2582                return -EINVAL;
2583            }
2584            if (n->needs_vnet_hdr_swap) {
2585                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2586                sg2[0].iov_base = &mhdr;
2587                sg2[0].iov_len = n->guest_hdr_len;
2588                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2589                                   out_sg, out_num,
2590                                   n->guest_hdr_len, -1);
2591                if (out_num == VIRTQUEUE_MAX_SIZE) {
2592                    goto drop;
2593                }
2594                out_num += 1;
2595                out_sg = sg2;
2596            }
2597        }
2598        /*
2599         * If host wants to see the guest header as is, we can
2600         * pass it on unchanged. Otherwise, copy just the parts
2601         * that host is interested in.
2602         */
2603        assert(n->host_hdr_len <= n->guest_hdr_len);
2604        if (n->host_hdr_len != n->guest_hdr_len) {
2605            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2606                                       out_sg, out_num,
2607                                       0, n->host_hdr_len);
2608            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2609                             out_sg, out_num,
2610                             n->guest_hdr_len, -1);
2611            out_num = sg_num;
2612            out_sg = sg;
2613        }
2614
2615        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2616                                      out_sg, out_num, virtio_net_tx_complete);
2617        if (ret == 0) {
2618            virtio_queue_set_notification(q->tx_vq, 0);
2619            q->async_tx.elem = elem;
2620            return -EBUSY;
2621        }
2622
2623drop:
2624        virtqueue_push(q->tx_vq, elem, 0);
2625        virtio_notify(vdev, q->tx_vq);
2626        g_free(elem);
2627
2628        if (++num_packets >= n->tx_burst) {
2629            break;
2630        }
2631    }
2632    return num_packets;
2633}
2634
2635static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2636{
2637    VirtIONet *n = VIRTIO_NET(vdev);
2638    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2639
2640    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2641        virtio_net_drop_tx_queue_data(vdev, vq);
2642        return;
2643    }
2644
2645    /* This happens when device was stopped but VCPU wasn't. */
2646    if (!vdev->vm_running) {
2647        q->tx_waiting = 1;
2648        return;
2649    }
2650
2651    if (q->tx_waiting) {
2652        virtio_queue_set_notification(vq, 1);
2653        timer_del(q->tx_timer);
2654        q->tx_waiting = 0;
2655        if (virtio_net_flush_tx(q) == -EINVAL) {
2656            return;
2657        }
2658    } else {
2659        timer_mod(q->tx_timer,
2660                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2661        q->tx_waiting = 1;
2662        virtio_queue_set_notification(vq, 0);
2663    }
2664}
2665
2666static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2667{
2668    VirtIONet *n = VIRTIO_NET(vdev);
2669    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2670
2671    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2672        virtio_net_drop_tx_queue_data(vdev, vq);
2673        return;
2674    }
2675
2676    if (unlikely(q->tx_waiting)) {
2677        return;
2678    }
2679    q->tx_waiting = 1;
2680    /* This happens when device was stopped but VCPU wasn't. */
2681    if (!vdev->vm_running) {
2682        return;
2683    }
2684    virtio_queue_set_notification(vq, 0);
2685    qemu_bh_schedule(q->tx_bh);
2686}
2687
2688static void virtio_net_tx_timer(void *opaque)
2689{
2690    VirtIONetQueue *q = opaque;
2691    VirtIONet *n = q->n;
2692    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2693    /* This happens when device was stopped but BH wasn't. */
2694    if (!vdev->vm_running) {
2695        /* Make sure tx waiting is set, so we'll run when restarted. */
2696        assert(q->tx_waiting);
2697        return;
2698    }
2699
2700    q->tx_waiting = 0;
2701
2702    /* Just in case the driver is not ready on more */
2703    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2704        return;
2705    }
2706
2707    virtio_queue_set_notification(q->tx_vq, 1);
2708    virtio_net_flush_tx(q);
2709}
2710
2711static void virtio_net_tx_bh(void *opaque)
2712{
2713    VirtIONetQueue *q = opaque;
2714    VirtIONet *n = q->n;
2715    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2716    int32_t ret;
2717
2718    /* This happens when device was stopped but BH wasn't. */
2719    if (!vdev->vm_running) {
2720        /* Make sure tx waiting is set, so we'll run when restarted. */
2721        assert(q->tx_waiting);
2722        return;
2723    }
2724
2725    q->tx_waiting = 0;
2726
2727    /* Just in case the driver is not ready on more */
2728    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2729        return;
2730    }
2731
2732    ret = virtio_net_flush_tx(q);
2733    if (ret == -EBUSY || ret == -EINVAL) {
2734        return; /* Notification re-enable handled by tx_complete or device
2735                 * broken */
2736    }
2737
2738    /* If we flush a full burst of packets, assume there are
2739     * more coming and immediately reschedule */
2740    if (ret >= n->tx_burst) {
2741        qemu_bh_schedule(q->tx_bh);
2742        q->tx_waiting = 1;
2743        return;
2744    }
2745
2746    /* If less than a full burst, re-enable notification and flush
2747     * anything that may have come in while we weren't looking.  If
2748     * we find something, assume the guest is still active and reschedule */
2749    virtio_queue_set_notification(q->tx_vq, 1);
2750    ret = virtio_net_flush_tx(q);
2751    if (ret == -EINVAL) {
2752        return;
2753    } else if (ret > 0) {
2754        virtio_queue_set_notification(q->tx_vq, 0);
2755        qemu_bh_schedule(q->tx_bh);
2756        q->tx_waiting = 1;
2757    }
2758}
2759
2760static void virtio_net_add_queue(VirtIONet *n, int index)
2761{
2762    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2763
2764    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2765                                           virtio_net_handle_rx);
2766
2767    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2768        n->vqs[index].tx_vq =
2769            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2770                             virtio_net_handle_tx_timer);
2771        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2772                                              virtio_net_tx_timer,
2773                                              &n->vqs[index]);
2774    } else {
2775        n->vqs[index].tx_vq =
2776            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2777                             virtio_net_handle_tx_bh);
2778        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2779    }
2780
2781    n->vqs[index].tx_waiting = 0;
2782    n->vqs[index].n = n;
2783}
2784
2785static void virtio_net_del_queue(VirtIONet *n, int index)
2786{
2787    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2788    VirtIONetQueue *q = &n->vqs[index];
2789    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2790
2791    qemu_purge_queued_packets(nc);
2792
2793    virtio_del_queue(vdev, index * 2);
2794    if (q->tx_timer) {
2795        timer_free(q->tx_timer);
2796        q->tx_timer = NULL;
2797    } else {
2798        qemu_bh_delete(q->tx_bh);
2799        q->tx_bh = NULL;
2800    }
2801    q->tx_waiting = 0;
2802    virtio_del_queue(vdev, index * 2 + 1);
2803}
2804
2805static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2806{
2807    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2808    int old_num_queues = virtio_get_num_queues(vdev);
2809    int new_num_queues = new_max_queue_pairs * 2 + 1;
2810    int i;
2811
2812    assert(old_num_queues >= 3);
2813    assert(old_num_queues % 2 == 1);
2814
2815    if (old_num_queues == new_num_queues) {
2816        return;
2817    }
2818
2819    /*
2820     * We always need to remove and add ctrl vq if
2821     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2822     * and then we only enter one of the following two loops.
2823     */
2824    virtio_del_queue(vdev, old_num_queues - 1);
2825
2826    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2827        /* new_num_queues < old_num_queues */
2828        virtio_net_del_queue(n, i / 2);
2829    }
2830
2831    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2832        /* new_num_queues > old_num_queues */
2833        virtio_net_add_queue(n, i / 2);
2834    }
2835
2836    /* add ctrl_vq last */
2837    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2838}
2839
2840static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2841{
2842    int max = multiqueue ? n->max_queue_pairs : 1;
2843
2844    n->multiqueue = multiqueue;
2845    virtio_net_change_num_queue_pairs(n, max);
2846
2847    virtio_net_set_queue_pairs(n);
2848}
2849
2850static int virtio_net_post_load_device(void *opaque, int version_id)
2851{
2852    VirtIONet *n = opaque;
2853    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2854    int i, link_down;
2855
2856    trace_virtio_net_post_load_device();
2857    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2858                               virtio_vdev_has_feature(vdev,
2859                                                       VIRTIO_F_VERSION_1),
2860                               virtio_vdev_has_feature(vdev,
2861                                                       VIRTIO_NET_F_HASH_REPORT));
2862
2863    /* MAC_TABLE_ENTRIES may be different from the saved image */
2864    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2865        n->mac_table.in_use = 0;
2866    }
2867
2868    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2869        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2870    }
2871
2872    /*
2873     * curr_guest_offloads will be later overwritten by the
2874     * virtio_set_features_nocheck call done from the virtio_load.
2875     * Here we make sure it is preserved and restored accordingly
2876     * in the virtio_net_post_load_virtio callback.
2877     */
2878    n->saved_guest_offloads = n->curr_guest_offloads;
2879
2880    virtio_net_set_queue_pairs(n);
2881
2882    /* Find the first multicast entry in the saved MAC filter */
2883    for (i = 0; i < n->mac_table.in_use; i++) {
2884        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2885            break;
2886        }
2887    }
2888    n->mac_table.first_multi = i;
2889
2890    /* nc.link_down can't be migrated, so infer link_down according
2891     * to link status bit in n->status */
2892    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2893    for (i = 0; i < n->max_queue_pairs; i++) {
2894        qemu_get_subqueue(n->nic, i)->link_down = link_down;
2895    }
2896
2897    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2898        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2899        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2900                                  QEMU_CLOCK_VIRTUAL,
2901                                  virtio_net_announce_timer, n);
2902        if (n->announce_timer.round) {
2903            timer_mod(n->announce_timer.tm,
2904                      qemu_clock_get_ms(n->announce_timer.type));
2905        } else {
2906            qemu_announce_timer_del(&n->announce_timer, false);
2907        }
2908    }
2909
2910    if (n->rss_data.enabled) {
2911        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2912        if (!n->rss_data.populate_hash) {
2913            if (!virtio_net_attach_epbf_rss(n)) {
2914                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2915                    warn_report("Can't post-load eBPF RSS for vhost");
2916                } else {
2917                    warn_report("Can't post-load eBPF RSS - "
2918                                "fallback to software RSS");
2919                    n->rss_data.enabled_software_rss = true;
2920                }
2921            }
2922        }
2923
2924        trace_virtio_net_rss_enable(n->rss_data.hash_types,
2925                                    n->rss_data.indirections_len,
2926                                    sizeof(n->rss_data.key));
2927    } else {
2928        trace_virtio_net_rss_disable();
2929    }
2930    return 0;
2931}
2932
2933static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2934{
2935    VirtIONet *n = VIRTIO_NET(vdev);
2936    /*
2937     * The actual needed state is now in saved_guest_offloads,
2938     * see virtio_net_post_load_device for detail.
2939     * Restore it back and apply the desired offloads.
2940     */
2941    n->curr_guest_offloads = n->saved_guest_offloads;
2942    if (peer_has_vnet_hdr(n)) {
2943        virtio_net_apply_guest_offloads(n);
2944    }
2945
2946    return 0;
2947}
2948
2949/* tx_waiting field of a VirtIONetQueue */
2950static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2951    .name = "virtio-net-queue-tx_waiting",
2952    .fields = (VMStateField[]) {
2953        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2954        VMSTATE_END_OF_LIST()
2955   },
2956};
2957
2958static bool max_queue_pairs_gt_1(void *opaque, int version_id)
2959{
2960    return VIRTIO_NET(opaque)->max_queue_pairs > 1;
2961}
2962
2963static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2964{
2965    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2966                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2967}
2968
2969static bool mac_table_fits(void *opaque, int version_id)
2970{
2971    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2972}
2973
2974static bool mac_table_doesnt_fit(void *opaque, int version_id)
2975{
2976    return !mac_table_fits(opaque, version_id);
2977}
2978
2979/* This temporary type is shared by all the WITH_TMP methods
2980 * although only some fields are used by each.
2981 */
2982struct VirtIONetMigTmp {
2983    VirtIONet      *parent;
2984    VirtIONetQueue *vqs_1;
2985    uint16_t        curr_queue_pairs_1;
2986    uint8_t         has_ufo;
2987    uint32_t        has_vnet_hdr;
2988};
2989
2990/* The 2nd and subsequent tx_waiting flags are loaded later than
2991 * the 1st entry in the queue_pairs and only if there's more than one
2992 * entry.  We use the tmp mechanism to calculate a temporary
2993 * pointer and count and also validate the count.
2994 */
2995
2996static int virtio_net_tx_waiting_pre_save(void *opaque)
2997{
2998    struct VirtIONetMigTmp *tmp = opaque;
2999
3000    tmp->vqs_1 = tmp->parent->vqs + 1;
3001    tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3002    if (tmp->parent->curr_queue_pairs == 0) {
3003        tmp->curr_queue_pairs_1 = 0;
3004    }
3005
3006    return 0;
3007}
3008
3009static int virtio_net_tx_waiting_pre_load(void *opaque)
3010{
3011    struct VirtIONetMigTmp *tmp = opaque;
3012
3013    /* Reuse the pointer setup from save */
3014    virtio_net_tx_waiting_pre_save(opaque);
3015
3016    if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3017        error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3018            tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3019
3020        return -EINVAL;
3021    }
3022
3023    return 0; /* all good */
3024}
3025
3026static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3027    .name      = "virtio-net-tx_waiting",
3028    .pre_load  = virtio_net_tx_waiting_pre_load,
3029    .pre_save  = virtio_net_tx_waiting_pre_save,
3030    .fields    = (VMStateField[]) {
3031        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3032                                     curr_queue_pairs_1,
3033                                     vmstate_virtio_net_queue_tx_waiting,
3034                                     struct VirtIONetQueue),
3035        VMSTATE_END_OF_LIST()
3036    },
3037};
3038
3039/* the 'has_ufo' flag is just tested; if the incoming stream has the
3040 * flag set we need to check that we have it
3041 */
3042static int virtio_net_ufo_post_load(void *opaque, int version_id)
3043{
3044    struct VirtIONetMigTmp *tmp = opaque;
3045
3046    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3047        error_report("virtio-net: saved image requires TUN_F_UFO support");
3048        return -EINVAL;
3049    }
3050
3051    return 0;
3052}
3053
3054static int virtio_net_ufo_pre_save(void *opaque)
3055{
3056    struct VirtIONetMigTmp *tmp = opaque;
3057
3058    tmp->has_ufo = tmp->parent->has_ufo;
3059
3060    return 0;
3061}
3062
3063static const VMStateDescription vmstate_virtio_net_has_ufo = {
3064    .name      = "virtio-net-ufo",
3065    .post_load = virtio_net_ufo_post_load,
3066    .pre_save  = virtio_net_ufo_pre_save,
3067    .fields    = (VMStateField[]) {
3068        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3069        VMSTATE_END_OF_LIST()
3070    },
3071};
3072
3073/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3074 * flag set we need to check that we have it
3075 */
3076static int virtio_net_vnet_post_load(void *opaque, int version_id)
3077{
3078    struct VirtIONetMigTmp *tmp = opaque;
3079
3080    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3081        error_report("virtio-net: saved image requires vnet_hdr=on");
3082        return -EINVAL;
3083    }
3084
3085    return 0;
3086}
3087
3088static int virtio_net_vnet_pre_save(void *opaque)
3089{
3090    struct VirtIONetMigTmp *tmp = opaque;
3091
3092    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3093
3094    return 0;
3095}
3096
3097static const VMStateDescription vmstate_virtio_net_has_vnet = {
3098    .name      = "virtio-net-vnet",
3099    .post_load = virtio_net_vnet_post_load,
3100    .pre_save  = virtio_net_vnet_pre_save,
3101    .fields    = (VMStateField[]) {
3102        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3103        VMSTATE_END_OF_LIST()
3104    },
3105};
3106
3107static bool virtio_net_rss_needed(void *opaque)
3108{
3109    return VIRTIO_NET(opaque)->rss_data.enabled;
3110}
3111
3112static const VMStateDescription vmstate_virtio_net_rss = {
3113    .name      = "virtio-net-device/rss",
3114    .version_id = 1,
3115    .minimum_version_id = 1,
3116    .needed = virtio_net_rss_needed,
3117    .fields = (VMStateField[]) {
3118        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3119        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3120        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3121        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3122        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3123        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3124        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3125                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3126        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3127                                    rss_data.indirections_len, 0,
3128                                    vmstate_info_uint16, uint16_t),
3129        VMSTATE_END_OF_LIST()
3130    },
3131};
3132
3133static const VMStateDescription vmstate_virtio_net_device = {
3134    .name = "virtio-net-device",
3135    .version_id = VIRTIO_NET_VM_VERSION,
3136    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3137    .post_load = virtio_net_post_load_device,
3138    .fields = (VMStateField[]) {
3139        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3140        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3141                               vmstate_virtio_net_queue_tx_waiting,
3142                               VirtIONetQueue),
3143        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3144        VMSTATE_UINT16(status, VirtIONet),
3145        VMSTATE_UINT8(promisc, VirtIONet),
3146        VMSTATE_UINT8(allmulti, VirtIONet),
3147        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3148
3149        /* Guarded pair: If it fits we load it, else we throw it away
3150         * - can happen if source has a larger MAC table.; post-load
3151         *  sets flags in this case.
3152         */
3153        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3154                                0, mac_table_fits, mac_table.in_use,
3155                                 ETH_ALEN),
3156        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3157                                     mac_table.in_use, ETH_ALEN),
3158
3159        /* Note: This is an array of uint32's that's always been saved as a
3160         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3161         * but based on the uint.
3162         */
3163        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3164        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3165                         vmstate_virtio_net_has_vnet),
3166        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3167        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3168        VMSTATE_UINT8(alluni, VirtIONet),
3169        VMSTATE_UINT8(nomulti, VirtIONet),
3170        VMSTATE_UINT8(nouni, VirtIONet),
3171        VMSTATE_UINT8(nobcast, VirtIONet),
3172        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3173                         vmstate_virtio_net_has_ufo),
3174        VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3175                            vmstate_info_uint16_equal, uint16_t),
3176        VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3177        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3178                         vmstate_virtio_net_tx_waiting),
3179        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3180                            has_ctrl_guest_offloads),
3181        VMSTATE_END_OF_LIST()
3182   },
3183    .subsections = (const VMStateDescription * []) {
3184        &vmstate_virtio_net_rss,
3185        NULL
3186    }
3187};
3188
3189static NetClientInfo net_virtio_info = {
3190    .type = NET_CLIENT_DRIVER_NIC,
3191    .size = sizeof(NICState),
3192    .can_receive = virtio_net_can_receive,
3193    .receive = virtio_net_receive,
3194    .link_status_changed = virtio_net_set_link_status,
3195    .query_rx_filter = virtio_net_query_rxfilter,
3196    .announce = virtio_net_announce,
3197};
3198
3199static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3200{
3201    VirtIONet *n = VIRTIO_NET(vdev);
3202    NetClientState *nc;
3203    assert(n->vhost_started);
3204    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3205        /* Must guard against invalid features and bogus queue index
3206         * from being set by malicious guest, or penetrated through
3207         * buggy migration stream.
3208         */
3209        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3210            qemu_log_mask(LOG_GUEST_ERROR,
3211                          "%s: bogus vq index ignored\n", __func__);
3212            return false;
3213        }
3214        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3215    } else {
3216        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3217    }
3218    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3219}
3220
3221static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3222                                           bool mask)
3223{
3224    VirtIONet *n = VIRTIO_NET(vdev);
3225    NetClientState *nc;
3226    assert(n->vhost_started);
3227    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3228        /* Must guard against invalid features and bogus queue index
3229         * from being set by malicious guest, or penetrated through
3230         * buggy migration stream.
3231         */
3232        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3233            qemu_log_mask(LOG_GUEST_ERROR,
3234                          "%s: bogus vq index ignored\n", __func__);
3235            return;
3236        }
3237        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3238    } else {
3239        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3240    }
3241    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3242                             vdev, idx, mask);
3243}
3244
3245static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3246{
3247    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3248
3249    n->config_size = virtio_feature_get_config_size(feature_sizes,
3250                                                    host_features);
3251}
3252
3253void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3254                                   const char *type)
3255{
3256    /*
3257     * The name can be NULL, the netclient name will be type.x.
3258     */
3259    assert(type != NULL);
3260
3261    g_free(n->netclient_name);
3262    g_free(n->netclient_type);
3263    n->netclient_name = g_strdup(name);
3264    n->netclient_type = g_strdup(type);
3265}
3266
3267static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3268{
3269    HotplugHandler *hotplug_ctrl;
3270    PCIDevice *pci_dev;
3271    Error *err = NULL;
3272
3273    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3274    if (hotplug_ctrl) {
3275        pci_dev = PCI_DEVICE(dev);
3276        pci_dev->partially_hotplugged = true;
3277        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3278        if (err) {
3279            error_report_err(err);
3280            return false;
3281        }
3282    } else {
3283        return false;
3284    }
3285    return true;
3286}
3287
3288static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3289                                    Error **errp)
3290{
3291    Error *err = NULL;
3292    HotplugHandler *hotplug_ctrl;
3293    PCIDevice *pdev = PCI_DEVICE(dev);
3294    BusState *primary_bus;
3295
3296    if (!pdev->partially_hotplugged) {
3297        return true;
3298    }
3299    primary_bus = dev->parent_bus;
3300    if (!primary_bus) {
3301        error_setg(errp, "virtio_net: couldn't find primary bus");
3302        return false;
3303    }
3304    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3305    qatomic_set(&n->failover_primary_hidden, false);
3306    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3307    if (hotplug_ctrl) {
3308        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3309        if (err) {
3310            goto out;
3311        }
3312        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3313    }
3314    pdev->partially_hotplugged = false;
3315
3316out:
3317    error_propagate(errp, err);
3318    return !err;
3319}
3320
3321static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3322{
3323    bool should_be_hidden;
3324    Error *err = NULL;
3325    DeviceState *dev = failover_find_primary_device(n);
3326
3327    if (!dev) {
3328        return;
3329    }
3330
3331    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3332
3333    if (migration_in_setup(s) && !should_be_hidden) {
3334        if (failover_unplug_primary(n, dev)) {
3335            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3336            qapi_event_send_unplug_primary(dev->id);
3337            qatomic_set(&n->failover_primary_hidden, true);
3338        } else {
3339            warn_report("couldn't unplug primary device");
3340        }
3341    } else if (migration_has_failed(s)) {
3342        /* We already unplugged the device let's plug it back */
3343        if (!failover_replug_primary(n, dev, &err)) {
3344            if (err) {
3345                error_report_err(err);
3346            }
3347        }
3348    }
3349}
3350
3351static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3352{
3353    MigrationState *s = data;
3354    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3355    virtio_net_handle_migration_primary(n, s);
3356}
3357
3358static bool failover_hide_primary_device(DeviceListener *listener,
3359                                         const QDict *device_opts,
3360                                         bool from_json,
3361                                         Error **errp)
3362{
3363    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3364    const char *standby_id;
3365
3366    if (!device_opts) {
3367        return false;
3368    }
3369
3370    if (!qdict_haskey(device_opts, "failover_pair_id")) {
3371        return false;
3372    }
3373
3374    if (!qdict_haskey(device_opts, "id")) {
3375        error_setg(errp, "Device with failover_pair_id needs to have id");
3376        return false;
3377    }
3378
3379    standby_id = qdict_get_str(device_opts, "failover_pair_id");
3380    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3381        return false;
3382    }
3383
3384    /*
3385     * The hide helper can be called several times for a given device.
3386     * Check there is only one primary for a virtio-net device but
3387     * don't duplicate the qdict several times if it's called for the same
3388     * device.
3389     */
3390    if (n->primary_opts) {
3391        const char *old, *new;
3392        /* devices with failover_pair_id always have an id */
3393        old = qdict_get_str(n->primary_opts, "id");
3394        new = qdict_get_str(device_opts, "id");
3395        if (strcmp(old, new) != 0) {
3396            error_setg(errp, "Cannot attach more than one primary device to "
3397                       "'%s': '%s' and '%s'", n->netclient_name, old, new);
3398            return false;
3399        }
3400    } else {
3401        n->primary_opts = qdict_clone_shallow(device_opts);
3402        n->primary_opts_from_json = from_json;
3403    }
3404
3405    /* failover_primary_hidden is set during feature negotiation */
3406    return qatomic_read(&n->failover_primary_hidden);
3407}
3408
3409static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3410{
3411    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3412    VirtIONet *n = VIRTIO_NET(dev);
3413    NetClientState *nc;
3414    int i;
3415
3416    if (n->net_conf.mtu) {
3417        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3418    }
3419
3420    if (n->net_conf.duplex_str) {
3421        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3422            n->net_conf.duplex = DUPLEX_HALF;
3423        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3424            n->net_conf.duplex = DUPLEX_FULL;
3425        } else {
3426            error_setg(errp, "'duplex' must be 'half' or 'full'");
3427            return;
3428        }
3429        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3430    } else {
3431        n->net_conf.duplex = DUPLEX_UNKNOWN;
3432    }
3433
3434    if (n->net_conf.speed < SPEED_UNKNOWN) {
3435        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3436        return;
3437    }
3438    if (n->net_conf.speed >= 0) {
3439        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3440    }
3441
3442    if (n->failover) {
3443        n->primary_listener.hide_device = failover_hide_primary_device;
3444        qatomic_set(&n->failover_primary_hidden, true);
3445        device_listener_register(&n->primary_listener);
3446        n->migration_state.notify = virtio_net_migration_state_notifier;
3447        add_migration_state_change_notifier(&n->migration_state);
3448        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3449    }
3450
3451    virtio_net_set_config_size(n, n->host_features);
3452    virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3453
3454    /*
3455     * We set a lower limit on RX queue size to what it always was.
3456     * Guests that want a smaller ring can always resize it without
3457     * help from us (using virtio 1 and up).
3458     */
3459    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3460        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3461        !is_power_of_2(n->net_conf.rx_queue_size)) {
3462        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3463                   "must be a power of 2 between %d and %d.",
3464                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3465                   VIRTQUEUE_MAX_SIZE);
3466        virtio_cleanup(vdev);
3467        return;
3468    }
3469
3470    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3471        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3472        !is_power_of_2(n->net_conf.tx_queue_size)) {
3473        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3474                   "must be a power of 2 between %d and %d",
3475                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3476                   VIRTQUEUE_MAX_SIZE);
3477        virtio_cleanup(vdev);
3478        return;
3479    }
3480
3481    n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3482
3483    /*
3484     * Figure out the datapath queue pairs since the backend could
3485     * provide control queue via peers as well.
3486     */
3487    if (n->nic_conf.peers.queues) {
3488        for (i = 0; i < n->max_ncs; i++) {
3489            if (n->nic_conf.peers.ncs[i]->is_datapath) {
3490                ++n->max_queue_pairs;
3491            }
3492        }
3493    }
3494    n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3495
3496    if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3497        error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3498                   "must be a positive integer less than %d.",
3499                   n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3500        virtio_cleanup(vdev);
3501        return;
3502    }
3503    n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3504    n->curr_queue_pairs = 1;
3505    n->tx_timeout = n->net_conf.txtimer;
3506
3507    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3508                       && strcmp(n->net_conf.tx, "bh")) {
3509        warn_report("virtio-net: "
3510                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3511                    n->net_conf.tx);
3512        error_printf("Defaulting to \"bh\"");
3513    }
3514
3515    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3516                                    n->net_conf.tx_queue_size);
3517
3518    for (i = 0; i < n->max_queue_pairs; i++) {
3519        virtio_net_add_queue(n, i);
3520    }
3521
3522    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3523    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3524    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3525    n->status = VIRTIO_NET_S_LINK_UP;
3526    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3527                              QEMU_CLOCK_VIRTUAL,
3528                              virtio_net_announce_timer, n);
3529    n->announce_timer.round = 0;
3530
3531    if (n->netclient_type) {
3532        /*
3533         * Happen when virtio_net_set_netclient_name has been called.
3534         */
3535        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3536                              n->netclient_type, n->netclient_name, n);
3537    } else {
3538        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3539                              object_get_typename(OBJECT(dev)), dev->id, n);
3540    }
3541
3542    for (i = 0; i < n->max_queue_pairs; i++) {
3543        n->nic->ncs[i].do_not_pad = true;
3544    }
3545
3546    peer_test_vnet_hdr(n);
3547    if (peer_has_vnet_hdr(n)) {
3548        for (i = 0; i < n->max_queue_pairs; i++) {
3549            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3550        }
3551        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3552    } else {
3553        n->host_hdr_len = 0;
3554    }
3555
3556    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3557
3558    n->vqs[0].tx_waiting = 0;
3559    n->tx_burst = n->net_conf.txburst;
3560    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3561    n->promisc = 1; /* for compatibility */
3562
3563    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3564
3565    n->vlans = g_malloc0(MAX_VLAN >> 3);
3566
3567    nc = qemu_get_queue(n->nic);
3568    nc->rxfilter_notify_enabled = 1;
3569
3570   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3571        struct virtio_net_config netcfg = {};
3572        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3573        vhost_net_set_config(get_vhost_net(nc->peer),
3574            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3575    }
3576    QTAILQ_INIT(&n->rsc_chains);
3577    n->qdev = dev;
3578
3579    net_rx_pkt_init(&n->rx_pkt, false);
3580
3581    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3582        virtio_net_load_ebpf(n);
3583    }
3584}
3585
3586static void virtio_net_device_unrealize(DeviceState *dev)
3587{
3588    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3589    VirtIONet *n = VIRTIO_NET(dev);
3590    int i, max_queue_pairs;
3591
3592    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3593        virtio_net_unload_ebpf(n);
3594    }
3595
3596    /* This will stop vhost backend if appropriate. */
3597    virtio_net_set_status(vdev, 0);
3598
3599    g_free(n->netclient_name);
3600    n->netclient_name = NULL;
3601    g_free(n->netclient_type);
3602    n->netclient_type = NULL;
3603
3604    g_free(n->mac_table.macs);
3605    g_free(n->vlans);
3606
3607    if (n->failover) {
3608        qobject_unref(n->primary_opts);
3609        device_listener_unregister(&n->primary_listener);
3610        remove_migration_state_change_notifier(&n->migration_state);
3611    } else {
3612        assert(n->primary_opts == NULL);
3613    }
3614
3615    max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3616    for (i = 0; i < max_queue_pairs; i++) {
3617        virtio_net_del_queue(n, i);
3618    }
3619    /* delete also control vq */
3620    virtio_del_queue(vdev, max_queue_pairs * 2);
3621    qemu_announce_timer_del(&n->announce_timer, false);
3622    g_free(n->vqs);
3623    qemu_del_nic(n->nic);
3624    virtio_net_rsc_cleanup(n);
3625    g_free(n->rss_data.indirections_table);
3626    net_rx_pkt_uninit(n->rx_pkt);
3627    virtio_cleanup(vdev);
3628}
3629
3630static void virtio_net_instance_init(Object *obj)
3631{
3632    VirtIONet *n = VIRTIO_NET(obj);
3633
3634    /*
3635     * The default config_size is sizeof(struct virtio_net_config).
3636     * Can be overriden with virtio_net_set_config_size.
3637     */
3638    n->config_size = sizeof(struct virtio_net_config);
3639    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3640                                  "bootindex", "/ethernet-phy@0",
3641                                  DEVICE(n));
3642
3643    ebpf_rss_init(&n->ebpf_rss);
3644}
3645
3646static int virtio_net_pre_save(void *opaque)
3647{
3648    VirtIONet *n = opaque;
3649
3650    /* At this point, backend must be stopped, otherwise
3651     * it might keep writing to memory. */
3652    assert(!n->vhost_started);
3653
3654    return 0;
3655}
3656
3657static bool primary_unplug_pending(void *opaque)
3658{
3659    DeviceState *dev = opaque;
3660    DeviceState *primary;
3661    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3662    VirtIONet *n = VIRTIO_NET(vdev);
3663
3664    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3665        return false;
3666    }
3667    primary = failover_find_primary_device(n);
3668    return primary ? primary->pending_deleted_event : false;
3669}
3670
3671static bool dev_unplug_pending(void *opaque)
3672{
3673    DeviceState *dev = opaque;
3674    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3675
3676    return vdc->primary_unplug_pending(dev);
3677}
3678
3679static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3680{
3681    VirtIONet *n = VIRTIO_NET(vdev);
3682    NetClientState *nc = qemu_get_queue(n->nic);
3683    struct vhost_net *net = get_vhost_net(nc->peer);
3684    return &net->dev;
3685}
3686
3687static const VMStateDescription vmstate_virtio_net = {
3688    .name = "virtio-net",
3689    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3690    .version_id = VIRTIO_NET_VM_VERSION,
3691    .fields = (VMStateField[]) {
3692        VMSTATE_VIRTIO_DEVICE,
3693        VMSTATE_END_OF_LIST()
3694    },
3695    .pre_save = virtio_net_pre_save,
3696    .dev_unplug_pending = dev_unplug_pending,
3697};
3698
3699static Property virtio_net_properties[] = {
3700    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3701                    VIRTIO_NET_F_CSUM, true),
3702    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3703                    VIRTIO_NET_F_GUEST_CSUM, true),
3704    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3705    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3706                    VIRTIO_NET_F_GUEST_TSO4, true),
3707    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3708                    VIRTIO_NET_F_GUEST_TSO6, true),
3709    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3710                    VIRTIO_NET_F_GUEST_ECN, true),
3711    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3712                    VIRTIO_NET_F_GUEST_UFO, true),
3713    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3714                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3715    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3716                    VIRTIO_NET_F_HOST_TSO4, true),
3717    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3718                    VIRTIO_NET_F_HOST_TSO6, true),
3719    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3720                    VIRTIO_NET_F_HOST_ECN, true),
3721    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3722                    VIRTIO_NET_F_HOST_UFO, true),
3723    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3724                    VIRTIO_NET_F_MRG_RXBUF, true),
3725    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3726                    VIRTIO_NET_F_STATUS, true),
3727    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3728                    VIRTIO_NET_F_CTRL_VQ, true),
3729    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3730                    VIRTIO_NET_F_CTRL_RX, true),
3731    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3732                    VIRTIO_NET_F_CTRL_VLAN, true),
3733    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3734                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3735    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3736                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3737    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3738                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3739    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3740    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3741                    VIRTIO_NET_F_RSS, false),
3742    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3743                    VIRTIO_NET_F_HASH_REPORT, false),
3744    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3745                    VIRTIO_NET_F_RSC_EXT, false),
3746    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3747                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3748    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3749    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3750                       TX_TIMER_INTERVAL),
3751    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3752    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3753    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3754                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3755    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3756                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3757    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3758    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3759                     true),
3760    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3761    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3762    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3763    DEFINE_PROP_END_OF_LIST(),
3764};
3765
3766static void virtio_net_class_init(ObjectClass *klass, void *data)
3767{
3768    DeviceClass *dc = DEVICE_CLASS(klass);
3769    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3770
3771    device_class_set_props(dc, virtio_net_properties);
3772    dc->vmsd = &vmstate_virtio_net;
3773    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3774    vdc->realize = virtio_net_device_realize;
3775    vdc->unrealize = virtio_net_device_unrealize;
3776    vdc->get_config = virtio_net_get_config;
3777    vdc->set_config = virtio_net_set_config;
3778    vdc->get_features = virtio_net_get_features;
3779    vdc->set_features = virtio_net_set_features;
3780    vdc->bad_features = virtio_net_bad_features;
3781    vdc->reset = virtio_net_reset;
3782    vdc->set_status = virtio_net_set_status;
3783    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3784    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3785    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3786    vdc->post_load = virtio_net_post_load_virtio;
3787    vdc->vmsd = &vmstate_virtio_net_device;
3788    vdc->primary_unplug_pending = primary_unplug_pending;
3789    vdc->get_vhost = virtio_net_get_vhost;
3790}
3791
3792static const TypeInfo virtio_net_info = {
3793    .name = TYPE_VIRTIO_NET,
3794    .parent = TYPE_VIRTIO_DEVICE,
3795    .instance_size = sizeof(VirtIONet),
3796    .instance_init = virtio_net_instance_init,
3797    .class_init = virtio_net_class_init,
3798};
3799
3800static void virtio_register_types(void)
3801{
3802    type_register_static(&virtio_net_info);
3803}
3804
3805type_init(virtio_register_types)
3806