LXR qemu/hw/net/virtio-net.c

   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/log.h"
  18#include "qemu/main-loop.h"
  19#include "qemu/module.h"
  20#include "hw/virtio/virtio.h"
  21#include "net/net.h"
  22#include "net/checksum.h"
  23#include "net/tap.h"
  24#include "qemu/error-report.h"
  25#include "qemu/timer.h"
  26#include "qemu/option.h"
  27#include "qemu/option_int.h"
  28#include "qemu/config-file.h"
  29#include "qapi/qmp/qdict.h"
  30#include "hw/virtio/virtio-net.h"
  31#include "net/vhost_net.h"
  32#include "net/announce.h"
  33#include "hw/virtio/virtio-bus.h"
  34#include "qapi/error.h"
  35#include "qapi/qapi-events-net.h"
  36#include "hw/qdev-properties.h"
  37#include "qapi/qapi-types-migration.h"
  38#include "qapi/qapi-events-migration.h"
  39#include "hw/virtio/virtio-access.h"
  40#include "migration/misc.h"
  41#include "standard-headers/linux/ethtool.h"
  42#include "sysemu/sysemu.h"
  43#include "trace.h"
  44#include "monitor/qdev.h"
  45#include "hw/pci/pci_device.h"
  46#include "net_rx_pkt.h"
  47#include "hw/virtio/vhost.h"
  48#include "sysemu/qtest.h"
  49
  50#define VIRTIO_NET_VM_VERSION    11
  51
  52#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  53
  54/* previously fixed value */
  55#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  56#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  57
  58/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
  59#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  60#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  61
  62#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  63
  64#define VIRTIO_NET_TCP_FLAG         0x3F
  65#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  66
  67/* IPv4 max payload, 16 bits in the header */
  68#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  69#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  70
  71/* header length value in ip header without option */
  72#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  73
  74#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  75#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  76
  77/* Purge coalesced packets timer interval, This value affects the performance
  78   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  79   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  80   tso/gso/gro 'off'. */
  81#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  82
  83#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  91                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  92
  93static const VirtIOFeature feature_sizes[] = {
  94    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  95     .end = endof(struct virtio_net_config, mac)},
  96    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  97     .end = endof(struct virtio_net_config, status)},
  98    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  99     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
 100    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 101     .end = endof(struct virtio_net_config, mtu)},
 102    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 103     .end = endof(struct virtio_net_config, duplex)},
 104    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 105     .end = endof(struct virtio_net_config, supported_hash_types)},
 106    {}
 107};
 108
 109static const VirtIOConfigSizeParams cfg_size_params = {
 110    .min_size = endof(struct virtio_net_config, mac),
 111    .max_size = sizeof(struct virtio_net_config),
 112    .feature_sizes = feature_sizes
 113};
 114
 115static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 116{
 117    VirtIONet *n = qemu_get_nic_opaque(nc);
 118
 119    return &n->vqs[nc->queue_index];
 120}
 121
 122static int vq2q(int queue_index)
 123{
 124    return queue_index / 2;
 125}
 126
 127static void flush_or_purge_queued_packets(NetClientState *nc)
 128{
 129    if (!nc->peer) {
 130        return;
 131    }
 132
 133    qemu_flush_or_purge_queued_packets(nc->peer, true);
 134    assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 135}
 136
 137/* TODO
 138 * - we could suppress RX interrupt if we were so inclined.
 139 */
 140
 141static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 142{
 143    VirtIONet *n = VIRTIO_NET(vdev);
 144    struct virtio_net_config netcfg;
 145    NetClientState *nc = qemu_get_queue(n->nic);
 146    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 147
 148    int ret = 0;
 149    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 150    virtio_stw_p(vdev, &netcfg.status, n->status);
 151    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
 152    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 153    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 154    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 155    netcfg.duplex = n->net_conf.duplex;
 156    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 157    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 158                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 159                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 160    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 161                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 162    memcpy(config, &netcfg, n->config_size);
 163
 164    /*
 165     * Is this VDPA? No peer means not VDPA: there's no way to
 166     * disconnect/reconnect a VDPA peer.
 167     */
 168    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 169        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 170                                   n->config_size);
 171        if (ret == -1) {
 172            return;
 173        }
 174
 175        /*
 176         * Some NIC/kernel combinations present 0 as the mac address.  As that
 177         * is not a legal address, try to proceed with the address from the
 178         * QEMU command line in the hope that the address has been configured
 179         * correctly elsewhere - just not reported by the device.
 180         */
 181        if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 182            info_report("Zero hardware mac address detected. Ignoring.");
 183            memcpy(netcfg.mac, n->mac, ETH_ALEN);
 184        }
 185
 186        netcfg.status |= virtio_tswap16(vdev,
 187                                        n->status & VIRTIO_NET_S_ANNOUNCE);
 188        memcpy(config, &netcfg, n->config_size);
 189    }
 190}
 191
 192static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 193{
 194    VirtIONet *n = VIRTIO_NET(vdev);
 195    struct virtio_net_config netcfg = {};
 196    NetClientState *nc = qemu_get_queue(n->nic);
 197
 198    memcpy(&netcfg, config, n->config_size);
 199
 200    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 201        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 202        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 203        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 204        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 205    }
 206
 207    /*
 208     * Is this VDPA? No peer means not VDPA: there's no way to
 209     * disconnect/reconnect a VDPA peer.
 210     */
 211    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 212        vhost_net_set_config(get_vhost_net(nc->peer),
 213                             (uint8_t *)&netcfg, 0, n->config_size,
 214                             VHOST_SET_CONFIG_TYPE_FRONTEND);
 215      }
 216}
 217
 218static bool virtio_net_started(VirtIONet *n, uint8_t status)
 219{
 220    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 221    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 222        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 223}
 224
 225static void virtio_net_announce_notify(VirtIONet *net)
 226{
 227    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 228    trace_virtio_net_announce_notify();
 229
 230    net->status |= VIRTIO_NET_S_ANNOUNCE;
 231    virtio_notify_config(vdev);
 232}
 233
 234static void virtio_net_announce_timer(void *opaque)
 235{
 236    VirtIONet *n = opaque;
 237    trace_virtio_net_announce_timer(n->announce_timer.round);
 238
 239    n->announce_timer.round--;
 240    virtio_net_announce_notify(n);
 241}
 242
 243static void virtio_net_announce(NetClientState *nc)
 244{
 245    VirtIONet *n = qemu_get_nic_opaque(nc);
 246    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 247
 248    /*
 249     * Make sure the virtio migration announcement timer isn't running
 250     * If it is, let it trigger announcement so that we do not cause
 251     * confusion.
 252     */
 253    if (n->announce_timer.round) {
 254        return;
 255    }
 256
 257    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 258        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 259            virtio_net_announce_notify(n);
 260    }
 261}
 262
 263static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 264{
 265    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 266    NetClientState *nc = qemu_get_queue(n->nic);
 267    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 268    int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
 269              n->max_ncs - n->max_queue_pairs : 0;
 270
 271    if (!get_vhost_net(nc->peer)) {
 272        return;
 273    }
 274
 275    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 276        !!n->vhost_started) {
 277        return;
 278    }
 279    if (!n->vhost_started) {
 280        int r, i;
 281
 282        if (n->needs_vnet_hdr_swap) {
 283            error_report("backend does not support %s vnet headers; "
 284                         "falling back on userspace virtio",
 285                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 286            return;
 287        }
 288
 289        /* Any packets outstanding? Purge them to avoid touching rings
 290         * when vhost is running.
 291         */
 292        for (i = 0;  i < queue_pairs; i++) {
 293            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 294
 295            /* Purge both directions: TX and RX. */
 296            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 297            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 298        }
 299
 300        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 301            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 302            if (r < 0) {
 303                error_report("%uBytes MTU not supported by the backend",
 304                             n->net_conf.mtu);
 305
 306                return;
 307            }
 308        }
 309
 310        n->vhost_started = 1;
 311        r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 312        if (r < 0) {
 313            error_report("unable to start vhost net: %d: "
 314                         "falling back on userspace virtio", -r);
 315            n->vhost_started = 0;
 316        }
 317    } else {
 318        vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
 319        n->vhost_started = 0;
 320    }
 321}
 322
 323static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 324                                          NetClientState *peer,
 325                                          bool enable)
 326{
 327    if (virtio_is_big_endian(vdev)) {
 328        return qemu_set_vnet_be(peer, enable);
 329    } else {
 330        return qemu_set_vnet_le(peer, enable);
 331    }
 332}
 333
 334static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 335                                       int queue_pairs, bool enable)
 336{
 337    int i;
 338
 339    for (i = 0; i < queue_pairs; i++) {
 340        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 341            enable) {
 342            while (--i >= 0) {
 343                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 344            }
 345
 346            return true;
 347        }
 348    }
 349
 350    return false;
 351}
 352
 353static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 354{
 355    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 356    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 357
 358    if (virtio_net_started(n, status)) {
 359        /* Before using the device, we tell the network backend about the
 360         * endianness to use when parsing vnet headers. If the backend
 361         * can't do it, we fallback onto fixing the headers in the core
 362         * virtio-net code.
 363         */
 364        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 365                                                            queue_pairs, true);
 366    } else if (virtio_net_started(n, vdev->status)) {
 367        /* After using the device, we need to reset the network backend to
 368         * the default (guest native endianness), otherwise the guest may
 369         * lose network connectivity if it is rebooted into a different
 370         * endianness.
 371         */
 372        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
 373    }
 374}
 375
 376static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 377{
 378    unsigned int dropped = virtqueue_drop_all(vq);
 379    if (dropped) {
 380        virtio_notify(vdev, vq);
 381    }
 382}
 383
 384static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 385{
 386    VirtIONet *n = VIRTIO_NET(vdev);
 387    VirtIONetQueue *q;
 388    int i;
 389    uint8_t queue_status;
 390
 391    virtio_net_vnet_endian_status(n, status);
 392    virtio_net_vhost_status(n, status);
 393
 394    for (i = 0; i < n->max_queue_pairs; i++) {
 395        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 396        bool queue_started;
 397        q = &n->vqs[i];
 398
 399        if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
 400            queue_status = 0;
 401        } else {
 402            queue_status = status;
 403        }
 404        queue_started =
 405            virtio_net_started(n, queue_status) && !n->vhost_started;
 406
 407        if (queue_started) {
 408            qemu_flush_queued_packets(ncs);
 409        }
 410
 411        if (!q->tx_waiting) {
 412            continue;
 413        }
 414
 415        if (queue_started) {
 416            if (q->tx_timer) {
 417                timer_mod(q->tx_timer,
 418                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 419            } else {
 420                qemu_bh_schedule(q->tx_bh);
 421            }
 422        } else {
 423            if (q->tx_timer) {
 424                timer_del(q->tx_timer);
 425            } else {
 426                qemu_bh_cancel(q->tx_bh);
 427            }
 428            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 429                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 430                vdev->vm_running) {
 431                /* if tx is waiting we are likely have some packets in tx queue
 432                 * and disabled notification */
 433                q->tx_waiting = 0;
 434                virtio_queue_set_notification(q->tx_vq, 1);
 435                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 436            }
 437        }
 438    }
 439}
 440
 441static void virtio_net_set_link_status(NetClientState *nc)
 442{
 443    VirtIONet *n = qemu_get_nic_opaque(nc);
 444    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 445    uint16_t old_status = n->status;
 446
 447    if (nc->link_down)
 448        n->status &= ~VIRTIO_NET_S_LINK_UP;
 449    else
 450        n->status |= VIRTIO_NET_S_LINK_UP;
 451
 452    if (n->status != old_status)
 453        virtio_notify_config(vdev);
 454
 455    virtio_net_set_status(vdev, vdev->status);
 456}
 457
 458static void rxfilter_notify(NetClientState *nc)
 459{
 460    VirtIONet *n = qemu_get_nic_opaque(nc);
 461
 462    if (nc->rxfilter_notify_enabled) {
 463        char *path = object_get_canonical_path(OBJECT(n->qdev));
 464        qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
 465        g_free(path);
 466
 467        /* disable event notification to avoid events flooding */
 468        nc->rxfilter_notify_enabled = 0;
 469    }
 470}
 471
 472static intList *get_vlan_table(VirtIONet *n)
 473{
 474    intList *list;
 475    int i, j;
 476
 477    list = NULL;
 478    for (i = 0; i < MAX_VLAN >> 5; i++) {
 479        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 480            if (n->vlans[i] & (1U << j)) {
 481                QAPI_LIST_PREPEND(list, (i << 5) + j);
 482            }
 483        }
 484    }
 485
 486    return list;
 487}
 488
 489static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 490{
 491    VirtIONet *n = qemu_get_nic_opaque(nc);
 492    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 493    RxFilterInfo *info;
 494    strList *str_list;
 495    int i;
 496
 497    info = g_malloc0(sizeof(*info));
 498    info->name = g_strdup(nc->name);
 499    info->promiscuous = n->promisc;
 500
 501    if (n->nouni) {
 502        info->unicast = RX_STATE_NONE;
 503    } else if (n->alluni) {
 504        info->unicast = RX_STATE_ALL;
 505    } else {
 506        info->unicast = RX_STATE_NORMAL;
 507    }
 508
 509    if (n->nomulti) {
 510        info->multicast = RX_STATE_NONE;
 511    } else if (n->allmulti) {
 512        info->multicast = RX_STATE_ALL;
 513    } else {
 514        info->multicast = RX_STATE_NORMAL;
 515    }
 516
 517    info->broadcast_allowed = n->nobcast;
 518    info->multicast_overflow = n->mac_table.multi_overflow;
 519    info->unicast_overflow = n->mac_table.uni_overflow;
 520
 521    info->main_mac = qemu_mac_strdup_printf(n->mac);
 522
 523    str_list = NULL;
 524    for (i = 0; i < n->mac_table.first_multi; i++) {
 525        QAPI_LIST_PREPEND(str_list,
 526                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 527    }
 528    info->unicast_table = str_list;
 529
 530    str_list = NULL;
 531    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 532        QAPI_LIST_PREPEND(str_list,
 533                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 534    }
 535    info->multicast_table = str_list;
 536    info->vlan_table = get_vlan_table(n);
 537
 538    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 539        info->vlan = RX_STATE_ALL;
 540    } else if (!info->vlan_table) {
 541        info->vlan = RX_STATE_NONE;
 542    } else {
 543        info->vlan = RX_STATE_NORMAL;
 544    }
 545
 546    /* enable event notification after query */
 547    nc->rxfilter_notify_enabled = 1;
 548
 549    return info;
 550}
 551
 552static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
 553{
 554    VirtIONet *n = VIRTIO_NET(vdev);
 555    NetClientState *nc;
 556
 557    /* validate queue_index and skip for cvq */
 558    if (queue_index >= n->max_queue_pairs * 2) {
 559        return;
 560    }
 561
 562    nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 563
 564    if (!nc->peer) {
 565        return;
 566    }
 567
 568    if (get_vhost_net(nc->peer) &&
 569        nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 570        vhost_net_virtqueue_reset(vdev, nc, queue_index);
 571    }
 572
 573    flush_or_purge_queued_packets(nc);
 574}
 575
 576static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
 577{
 578    VirtIONet *n = VIRTIO_NET(vdev);
 579    NetClientState *nc;
 580    int r;
 581
 582    /* validate queue_index and skip for cvq */
 583    if (queue_index >= n->max_queue_pairs * 2) {
 584        return;
 585    }
 586
 587    nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 588
 589    if (!nc->peer || !vdev->vhost_started) {
 590        return;
 591    }
 592
 593    if (get_vhost_net(nc->peer) &&
 594        nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 595        r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
 596        if (r < 0) {
 597            error_report("unable to restart vhost net virtqueue: %d, "
 598                            "when resetting the queue", queue_index);
 599        }
 600    }
 601}
 602
 603static void virtio_net_reset(VirtIODevice *vdev)
 604{
 605    VirtIONet *n = VIRTIO_NET(vdev);
 606    int i;
 607
 608    /* Reset back to compatibility mode */
 609    n->promisc = 1;
 610    n->allmulti = 0;
 611    n->alluni = 0;
 612    n->nomulti = 0;
 613    n->nouni = 0;
 614    n->nobcast = 0;
 615    /* multiqueue is disabled by default */
 616    n->curr_queue_pairs = 1;
 617    timer_del(n->announce_timer.tm);
 618    n->announce_timer.round = 0;
 619    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 620
 621    /* Flush any MAC and VLAN filter table state */
 622    n->mac_table.in_use = 0;
 623    n->mac_table.first_multi = 0;
 624    n->mac_table.multi_overflow = 0;
 625    n->mac_table.uni_overflow = 0;
 626    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 627    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 628    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 629    memset(n->vlans, 0, MAX_VLAN >> 3);
 630
 631    /* Flush any async TX */
 632    for (i = 0;  i < n->max_queue_pairs; i++) {
 633        flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
 634    }
 635}
 636
 637static void peer_test_vnet_hdr(VirtIONet *n)
 638{
 639    NetClientState *nc = qemu_get_queue(n->nic);
 640    if (!nc->peer) {
 641        return;
 642    }
 643
 644    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 645}
 646
 647static int peer_has_vnet_hdr(VirtIONet *n)
 648{
 649    return n->has_vnet_hdr;
 650}
 651
 652static int peer_has_ufo(VirtIONet *n)
 653{
 654    if (!peer_has_vnet_hdr(n))
 655        return 0;
 656
 657    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 658
 659    return n->has_ufo;
 660}
 661
 662static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 663                                       int version_1, int hash_report)
 664{
 665    int i;
 666    NetClientState *nc;
 667
 668    n->mergeable_rx_bufs = mergeable_rx_bufs;
 669
 670    if (version_1) {
 671        n->guest_hdr_len = hash_report ?
 672            sizeof(struct virtio_net_hdr_v1_hash) :
 673            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 674        n->rss_data.populate_hash = !!hash_report;
 675    } else {
 676        n->guest_hdr_len = n->mergeable_rx_bufs ?
 677            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 678            sizeof(struct virtio_net_hdr);
 679    }
 680
 681    for (i = 0; i < n->max_queue_pairs; i++) {
 682        nc = qemu_get_subqueue(n->nic, i);
 683
 684        if (peer_has_vnet_hdr(n) &&
 685            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 686            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 687            n->host_hdr_len = n->guest_hdr_len;
 688        }
 689    }
 690}
 691
 692static int virtio_net_max_tx_queue_size(VirtIONet *n)
 693{
 694    NetClientState *peer = n->nic_conf.peers.ncs[0];
 695
 696    /*
 697     * Backends other than vhost-user or vhost-vdpa don't support max queue
 698     * size.
 699     */
 700    if (!peer) {
 701        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 702    }
 703
 704    switch(peer->info->type) {
 705    case NET_CLIENT_DRIVER_VHOST_USER:
 706    case NET_CLIENT_DRIVER_VHOST_VDPA:
 707        return VIRTQUEUE_MAX_SIZE;
 708    default:
 709        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 710    };
 711}
 712
 713static int peer_attach(VirtIONet *n, int index)
 714{
 715    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 716
 717    if (!nc->peer) {
 718        return 0;
 719    }
 720
 721    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 722        vhost_set_vring_enable(nc->peer, 1);
 723    }
 724
 725    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 726        return 0;
 727    }
 728
 729    if (n->max_queue_pairs == 1) {
 730        return 0;
 731    }
 732
 733    return tap_enable(nc->peer);
 734}
 735
 736static int peer_detach(VirtIONet *n, int index)
 737{
 738    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 739
 740    if (!nc->peer) {
 741        return 0;
 742    }
 743
 744    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 745        vhost_set_vring_enable(nc->peer, 0);
 746    }
 747
 748    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 749        return 0;
 750    }
 751
 752    return tap_disable(nc->peer);
 753}
 754
 755static void virtio_net_set_queue_pairs(VirtIONet *n)
 756{
 757    int i;
 758    int r;
 759
 760    if (n->nic->peer_deleted) {
 761        return;
 762    }
 763
 764    for (i = 0; i < n->max_queue_pairs; i++) {
 765        if (i < n->curr_queue_pairs) {
 766            r = peer_attach(n, i);
 767            assert(!r);
 768        } else {
 769            r = peer_detach(n, i);
 770            assert(!r);
 771        }
 772    }
 773}
 774
 775static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 776
 777static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 778                                        Error **errp)
 779{
 780    VirtIONet *n = VIRTIO_NET(vdev);
 781    NetClientState *nc = qemu_get_queue(n->nic);
 782
 783    /* Firstly sync all virtio-net possible supported features */
 784    features |= n->host_features;
 785
 786    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 787
 788    if (!peer_has_vnet_hdr(n)) {
 789        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 790        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 791        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 792        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 793
 794        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 795        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 796        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 797        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 798
 799        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 800    }
 801
 802    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 803        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 804        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 805    }
 806
 807    if (!get_vhost_net(nc->peer)) {
 808        return features;
 809    }
 810
 811    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 812        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 813    }
 814    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 815    vdev->backend_features = features;
 816
 817    if (n->mtu_bypass_backend &&
 818            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 819        features |= (1ULL << VIRTIO_NET_F_MTU);
 820    }
 821
 822    /*
 823     * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
 824     * enabled. This happens in the vDPA case.
 825     *
 826     * Make sure the feature set is not incoherent, as the driver could refuse
 827     * to start.
 828     *
 829     * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
 830     * helping guest to notify the new location with vDPA devices that does not
 831     * support it.
 832     */
 833    if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
 834        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
 835    }
 836
 837    return features;
 838}
 839
 840static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 841{
 842    uint64_t features = 0;
 843
 844    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 845     * but also these: */
 846    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 847    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 848    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 849    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 850    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 851
 852    return features;
 853}
 854
 855static void virtio_net_apply_guest_offloads(VirtIONet *n)
 856{
 857    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 858            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 859            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 860            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 861            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 862            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 863}
 864
 865static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 866{
 867    static const uint64_t guest_offloads_mask =
 868        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 869        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 870        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 871        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 872        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 873
 874    return guest_offloads_mask & features;
 875}
 876
 877uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
 878{
 879    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 880    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 881}
 882
 883typedef struct {
 884    VirtIONet *n;
 885    DeviceState *dev;
 886} FailoverDevice;
 887
 888/**
 889 * Set the failover primary device
 890 *
 891 * @opaque: FailoverId to setup
 892 * @opts: opts for device we are handling
 893 * @errp: returns an error if this function fails
 894 */
 895static int failover_set_primary(DeviceState *dev, void *opaque)
 896{
 897    FailoverDevice *fdev = opaque;
 898    PCIDevice *pci_dev = (PCIDevice *)
 899        object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
 900
 901    if (!pci_dev) {
 902        return 0;
 903    }
 904
 905    if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
 906        fdev->dev = dev;
 907        return 1;
 908    }
 909
 910    return 0;
 911}
 912
 913/**
 914 * Find the primary device for this failover virtio-net
 915 *
 916 * @n: VirtIONet device
 917 * @errp: returns an error if this function fails
 918 */
 919static DeviceState *failover_find_primary_device(VirtIONet *n)
 920{
 921    FailoverDevice fdev = {
 922        .n = n,
 923    };
 924
 925    qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
 926                       NULL, NULL, &fdev);
 927    return fdev.dev;
 928}
 929
 930static void failover_add_primary(VirtIONet *n, Error **errp)
 931{
 932    Error *err = NULL;
 933    DeviceState *dev = failover_find_primary_device(n);
 934
 935    if (dev) {
 936        return;
 937    }
 938
 939    if (!n->primary_opts) {
 940        error_setg(errp, "Primary device not found");
 941        error_append_hint(errp, "Virtio-net failover will not work. Make "
 942                          "sure primary device has parameter"
 943                          " failover_pair_id=%s\n", n->netclient_name);
 944        return;
 945    }
 946
 947    dev = qdev_device_add_from_qdict(n->primary_opts,
 948                                     n->primary_opts_from_json,
 949                                     &err);
 950    if (err) {
 951        qobject_unref(n->primary_opts);
 952        n->primary_opts = NULL;
 953    } else {
 954        object_unref(OBJECT(dev));
 955    }
 956    error_propagate(errp, err);
 957}
 958
 959static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 960{
 961    VirtIONet *n = VIRTIO_NET(vdev);
 962    Error *err = NULL;
 963    int i;
 964
 965    if (n->mtu_bypass_backend &&
 966            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 967        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 968    }
 969
 970    virtio_net_set_multiqueue(n,
 971                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 972                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 973
 974    virtio_net_set_mrg_rx_bufs(n,
 975                               virtio_has_feature(features,
 976                                                  VIRTIO_NET_F_MRG_RXBUF),
 977                               virtio_has_feature(features,
 978                                                  VIRTIO_F_VERSION_1),
 979                               virtio_has_feature(features,
 980                                                  VIRTIO_NET_F_HASH_REPORT));
 981
 982    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 983        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 984    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 985        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 986    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 987
 988    if (n->has_vnet_hdr) {
 989        n->curr_guest_offloads =
 990            virtio_net_guest_offloads_by_features(features);
 991        virtio_net_apply_guest_offloads(n);
 992    }
 993
 994    for (i = 0;  i < n->max_queue_pairs; i++) {
 995        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 996
 997        if (!get_vhost_net(nc->peer)) {
 998            continue;
 999        }
1000        vhost_net_ack_features(get_vhost_net(nc->peer), features);

1001
1002        /*
1003         * keep acked_features in NetVhostUserState up-to-date so it
1004         * can't miss any features configured by guest virtio driver.
1005         */
1006        vhost_net_save_acked_features(nc->peer);
1007    }
1008
1009    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1010        memset(n->vlans, 0, MAX_VLAN >> 3);
1011    } else {
1012        memset(n->vlans, 0xff, MAX_VLAN >> 3);
1013    }
1014
1015    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1016        qapi_event_send_failover_negotiated(n->netclient_name);
1017        qatomic_set(&n->failover_primary_hidden, false);
1018        failover_add_primary(n, &err);
1019        if (err) {
1020            if (!qtest_enabled()) {
1021                warn_report_err(err);
1022            } else {
1023                error_free(err);
1024            }
1025        }
1026    }
1027}
1028
1029static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1030                                     struct iovec *iov, unsigned int iov_cnt)
1031{
1032    uint8_t on;
1033    size_t s;
1034    NetClientState *nc = qemu_get_queue(n->nic);
1035
1036    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1037    if (s != sizeof(on)) {
1038        return VIRTIO_NET_ERR;
1039    }
1040
1041    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1042        n->promisc = on;
1043    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1044        n->allmulti = on;
1045    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1046        n->alluni = on;
1047    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1048        n->nomulti = on;
1049    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1050        n->nouni = on;
1051    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1052        n->nobcast = on;
1053    } else {
1054        return VIRTIO_NET_ERR;
1055    }
1056
1057    rxfilter_notify(nc);
1058
1059    return VIRTIO_NET_OK;
1060}
1061
1062static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1063                                     struct iovec *iov, unsigned int iov_cnt)
1064{
1065    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1066    uint64_t offloads;
1067    size_t s;
1068
1069    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1070        return VIRTIO_NET_ERR;
1071    }
1072
1073    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1074    if (s != sizeof(offloads)) {
1075        return VIRTIO_NET_ERR;
1076    }
1077
1078    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1079        uint64_t supported_offloads;
1080
1081        offloads = virtio_ldq_p(vdev, &offloads);
1082
1083        if (!n->has_vnet_hdr) {
1084            return VIRTIO_NET_ERR;
1085        }
1086
1087        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1088            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1089        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1090            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1091        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1092
1093        supported_offloads = virtio_net_supported_guest_offloads(n);
1094        if (offloads & ~supported_offloads) {
1095            return VIRTIO_NET_ERR;
1096        }
1097
1098        n->curr_guest_offloads = offloads;
1099        virtio_net_apply_guest_offloads(n);
1100
1101        return VIRTIO_NET_OK;
1102    } else {
1103        return VIRTIO_NET_ERR;
1104    }
1105}
1106
1107static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1108                                 struct iovec *iov, unsigned int iov_cnt)
1109{
1110    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1111    struct virtio_net_ctrl_mac mac_data;
1112    size_t s;
1113    NetClientState *nc = qemu_get_queue(n->nic);
1114
1115    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1116        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1117            return VIRTIO_NET_ERR;
1118        }
1119        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1120        assert(s == sizeof(n->mac));
1121        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1122        rxfilter_notify(nc);
1123
1124        return VIRTIO_NET_OK;
1125    }
1126
1127    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1128        return VIRTIO_NET_ERR;
1129    }
1130
1131    int in_use = 0;
1132    int first_multi = 0;
1133    uint8_t uni_overflow = 0;
1134    uint8_t multi_overflow = 0;
1135    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1136
1137    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1138                   sizeof(mac_data.entries));
1139    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1140    if (s != sizeof(mac_data.entries)) {
1141        goto error;
1142    }
1143    iov_discard_front(&iov, &iov_cnt, s);
1144
1145    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1146        goto error;
1147    }
1148
1149    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1150        s = iov_to_buf(iov, iov_cnt, 0, macs,
1151                       mac_data.entries * ETH_ALEN);
1152        if (s != mac_data.entries * ETH_ALEN) {
1153            goto error;
1154        }
1155        in_use += mac_data.entries;
1156    } else {
1157        uni_overflow = 1;
1158    }
1159
1160    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1161
1162    first_multi = in_use;
1163
1164    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1165                   sizeof(mac_data.entries));
1166    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1167    if (s != sizeof(mac_data.entries)) {
1168        goto error;
1169    }
1170
1171    iov_discard_front(&iov, &iov_cnt, s);
1172
1173    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1174        goto error;
1175    }
1176
1177    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1178        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1179                       mac_data.entries * ETH_ALEN);
1180        if (s != mac_data.entries * ETH_ALEN) {
1181            goto error;
1182        }
1183        in_use += mac_data.entries;
1184    } else {
1185        multi_overflow = 1;
1186    }
1187
1188    n->mac_table.in_use = in_use;
1189    n->mac_table.first_multi = first_multi;
1190    n->mac_table.uni_overflow = uni_overflow;
1191    n->mac_table.multi_overflow = multi_overflow;
1192    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1193    g_free(macs);
1194    rxfilter_notify(nc);
1195
1196    return VIRTIO_NET_OK;
1197
1198error:
1199    g_free(macs);
1200    return VIRTIO_NET_ERR;
1201}
1202
1203static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1204                                        struct iovec *iov, unsigned int iov_cnt)
1205{
1206    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1207    uint16_t vid;
1208    size_t s;
1209    NetClientState *nc = qemu_get_queue(n->nic);
1210
1211    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1212    vid = virtio_lduw_p(vdev, &vid);
1213    if (s != sizeof(vid)) {
1214        return VIRTIO_NET_ERR;
1215    }
1216
1217    if (vid >= MAX_VLAN)
1218        return VIRTIO_NET_ERR;
1219
1220    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1221        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1222    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1223        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1224    else
1225        return VIRTIO_NET_ERR;
1226
1227    rxfilter_notify(nc);
1228
1229    return VIRTIO_NET_OK;
1230}
1231
1232static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1233                                      struct iovec *iov, unsigned int iov_cnt)
1234{
1235    trace_virtio_net_handle_announce(n->announce_timer.round);
1236    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1237        n->status & VIRTIO_NET_S_ANNOUNCE) {
1238        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1239        if (n->announce_timer.round) {
1240            qemu_announce_timer_step(&n->announce_timer);
1241        }
1242        return VIRTIO_NET_OK;
1243    } else {
1244        return VIRTIO_NET_ERR;
1245    }
1246}
1247
1248static void virtio_net_detach_epbf_rss(VirtIONet *n);
1249
1250static void virtio_net_disable_rss(VirtIONet *n)
1251{
1252    if (n->rss_data.enabled) {
1253        trace_virtio_net_rss_disable();
1254    }
1255    n->rss_data.enabled = false;
1256
1257    virtio_net_detach_epbf_rss(n);
1258}
1259
1260static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1261{
1262    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1263    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1264        return false;
1265    }
1266
1267    return nc->info->set_steering_ebpf(nc, prog_fd);
1268}
1269
1270static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1271                                   struct EBPFRSSConfig *config)
1272{
1273    config->redirect = data->redirect;
1274    config->populate_hash = data->populate_hash;
1275    config->hash_types = data->hash_types;
1276    config->indirections_len = data->indirections_len;
1277    config->default_queue = data->default_queue;
1278}
1279
1280static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1281{
1282    struct EBPFRSSConfig config = {};
1283
1284    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1285        return false;
1286    }
1287
1288    rss_data_to_rss_config(&n->rss_data, &config);
1289
1290    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1291                          n->rss_data.indirections_table, n->rss_data.key)) {
1292        return false;
1293    }
1294
1295    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1296        return false;
1297    }
1298
1299    return true;
1300}
1301
1302static void virtio_net_detach_epbf_rss(VirtIONet *n)
1303{
1304    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1305}
1306
1307static bool virtio_net_load_ebpf(VirtIONet *n)
1308{
1309    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1310        /* backend does't support steering ebpf */
1311        return false;
1312    }
1313
1314    return ebpf_rss_load(&n->ebpf_rss);
1315}
1316
1317static void virtio_net_unload_ebpf(VirtIONet *n)
1318{
1319    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1320    ebpf_rss_unload(&n->ebpf_rss);
1321}
1322
1323static uint16_t virtio_net_handle_rss(VirtIONet *n,
1324                                      struct iovec *iov,
1325                                      unsigned int iov_cnt,
1326                                      bool do_rss)
1327{
1328    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1329    struct virtio_net_rss_config cfg;
1330    size_t s, offset = 0, size_get;
1331    uint16_t queue_pairs, i;
1332    struct {
1333        uint16_t us;
1334        uint8_t b;
1335    } QEMU_PACKED temp;
1336    const char *err_msg = "";
1337    uint32_t err_value = 0;
1338
1339    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1340        err_msg = "RSS is not negotiated";
1341        goto error;
1342    }
1343    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1344        err_msg = "Hash report is not negotiated";
1345        goto error;
1346    }
1347    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1348    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1349    if (s != size_get) {
1350        err_msg = "Short command buffer";
1351        err_value = (uint32_t)s;
1352        goto error;
1353    }
1354    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1355    n->rss_data.indirections_len =
1356        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1357    n->rss_data.indirections_len++;
1358    if (!do_rss) {
1359        n->rss_data.indirections_len = 1;
1360    }
1361    if (!is_power_of_2(n->rss_data.indirections_len)) {
1362        err_msg = "Invalid size of indirection table";
1363        err_value = n->rss_data.indirections_len;
1364        goto error;
1365    }
1366    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1367        err_msg = "Too large indirection table";
1368        err_value = n->rss_data.indirections_len;
1369        goto error;
1370    }
1371    n->rss_data.default_queue = do_rss ?
1372        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1373    if (n->rss_data.default_queue >= n->max_queue_pairs) {
1374        err_msg = "Invalid default queue";
1375        err_value = n->rss_data.default_queue;
1376        goto error;
1377    }
1378    offset += size_get;
1379    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1380    g_free(n->rss_data.indirections_table);
1381    n->rss_data.indirections_table = g_malloc(size_get);
1382    if (!n->rss_data.indirections_table) {
1383        err_msg = "Can't allocate indirections table";
1384        err_value = n->rss_data.indirections_len;
1385        goto error;
1386    }
1387    s = iov_to_buf(iov, iov_cnt, offset,
1388                   n->rss_data.indirections_table, size_get);
1389    if (s != size_get) {
1390        err_msg = "Short indirection table buffer";
1391        err_value = (uint32_t)s;
1392        goto error;
1393    }
1394    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1395        uint16_t val = n->rss_data.indirections_table[i];
1396        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1397    }
1398    offset += size_get;
1399    size_get = sizeof(temp);
1400    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1401    if (s != size_get) {
1402        err_msg = "Can't get queue_pairs";
1403        err_value = (uint32_t)s;
1404        goto error;
1405    }
1406    queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1407    if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1408        err_msg = "Invalid number of queue_pairs";
1409        err_value = queue_pairs;
1410        goto error;
1411    }
1412    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1413        err_msg = "Invalid key size";
1414        err_value = temp.b;
1415        goto error;
1416    }
1417    if (!temp.b && n->rss_data.hash_types) {
1418        err_msg = "No key provided";
1419        err_value = 0;
1420        goto error;
1421    }
1422    if (!temp.b && !n->rss_data.hash_types) {
1423        virtio_net_disable_rss(n);
1424        return queue_pairs;
1425    }
1426    offset += size_get;
1427    size_get = temp.b;
1428    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1429    if (s != size_get) {
1430        err_msg = "Can get key buffer";
1431        err_value = (uint32_t)s;
1432        goto error;
1433    }
1434    n->rss_data.enabled = true;
1435
1436    if (!n->rss_data.populate_hash) {
1437        if (!virtio_net_attach_epbf_rss(n)) {
1438            /* EBPF must be loaded for vhost */
1439            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1440                warn_report("Can't load eBPF RSS for vhost");
1441                goto error;
1442            }
1443            /* fallback to software RSS */
1444            warn_report("Can't load eBPF RSS - fallback to software RSS");
1445            n->rss_data.enabled_software_rss = true;
1446        }
1447    } else {
1448        /* use software RSS for hash populating */
1449        /* and detach eBPF if was loaded before */
1450        virtio_net_detach_epbf_rss(n);
1451        n->rss_data.enabled_software_rss = true;
1452    }
1453
1454    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1455                                n->rss_data.indirections_len,
1456                                temp.b);
1457    return queue_pairs;
1458error:
1459    trace_virtio_net_rss_error(err_msg, err_value);
1460    virtio_net_disable_rss(n);
1461    return 0;
1462}
1463
1464static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1465                                struct iovec *iov, unsigned int iov_cnt)
1466{
1467    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1468    uint16_t queue_pairs;
1469    NetClientState *nc = qemu_get_queue(n->nic);
1470
1471    virtio_net_disable_rss(n);
1472    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1473        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1474        return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1475    }
1476    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1477        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1478    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1479        struct virtio_net_ctrl_mq mq;
1480        size_t s;
1481        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1482            return VIRTIO_NET_ERR;
1483        }
1484        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1485        if (s != sizeof(mq)) {
1486            return VIRTIO_NET_ERR;
1487        }
1488        queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1489
1490    } else {
1491        return VIRTIO_NET_ERR;
1492    }
1493
1494    if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1495        queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1496        queue_pairs > n->max_queue_pairs ||
1497        !n->multiqueue) {
1498        return VIRTIO_NET_ERR;
1499    }
1500
1501    n->curr_queue_pairs = queue_pairs;
1502    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1503        /*
1504         * Avoid updating the backend for a vdpa device: We're only interested
1505         * in updating the device model queues.
1506         */
1507        return VIRTIO_NET_OK;
1508    }
1509    /* stop the backend before changing the number of queue_pairs to avoid handling a
1510     * disabled queue */
1511    virtio_net_set_status(vdev, vdev->status);
1512    virtio_net_set_queue_pairs(n);
1513
1514    return VIRTIO_NET_OK;
1515}
1516
1517size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1518                                  const struct iovec *in_sg, unsigned in_num,
1519                                  const struct iovec *out_sg,
1520                                  unsigned out_num)
1521{
1522    VirtIONet *n = VIRTIO_NET(vdev);
1523    struct virtio_net_ctrl_hdr ctrl;
1524    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1525    size_t s;
1526    struct iovec *iov, *iov2;
1527
1528    if (iov_size(in_sg, in_num) < sizeof(status) ||
1529        iov_size(out_sg, out_num) < sizeof(ctrl)) {
1530        virtio_error(vdev, "virtio-net ctrl missing headers");
1531        return 0;
1532    }
1533
1534    iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1535    s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1536    iov_discard_front(&iov, &out_num, sizeof(ctrl));
1537    if (s != sizeof(ctrl)) {
1538        status = VIRTIO_NET_ERR;
1539    } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1540        status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1541    } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1542        status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1543    } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1544        status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1545    } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1546        status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1547    } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1548        status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1549    } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1550        status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1551    }
1552
1553    s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1554    assert(s == sizeof(status));
1555
1556    g_free(iov2);
1557    return sizeof(status);
1558}
1559
1560static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1561{
1562    VirtQueueElement *elem;
1563
1564    for (;;) {
1565        size_t written;
1566        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1567        if (!elem) {
1568            break;
1569        }
1570
1571        written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1572                                             elem->out_sg, elem->out_num);
1573        if (written > 0) {
1574            virtqueue_push(vq, elem, written);
1575            virtio_notify(vdev, vq);
1576            g_free(elem);
1577        } else {
1578            virtqueue_detach_element(vq, elem, 0);
1579            g_free(elem);
1580            break;
1581        }
1582    }
1583}
1584
1585/* RX */
1586
1587static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1588{
1589    VirtIONet *n = VIRTIO_NET(vdev);
1590    int queue_index = vq2q(virtio_get_queue_index(vq));
1591
1592    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1593}
1594
1595static bool virtio_net_can_receive(NetClientState *nc)
1596{
1597    VirtIONet *n = qemu_get_nic_opaque(nc);
1598    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1599    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1600
1601    if (!vdev->vm_running) {
1602        return false;
1603    }
1604
1605    if (nc->queue_index >= n->curr_queue_pairs) {
1606        return false;
1607    }
1608
1609    if (!virtio_queue_ready(q->rx_vq) ||
1610        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1611        return false;
1612    }
1613
1614    return true;
1615}
1616
1617static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1618{
1619    VirtIONet *n = q->n;
1620    if (virtio_queue_empty(q->rx_vq) ||
1621        (n->mergeable_rx_bufs &&
1622         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1623        virtio_queue_set_notification(q->rx_vq, 1);
1624
1625        /* To avoid a race condition where the guest has made some buffers
1626         * available after the above check but before notification was
1627         * enabled, check for available buffers again.
1628         */
1629        if (virtio_queue_empty(q->rx_vq) ||
1630            (n->mergeable_rx_bufs &&
1631             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1632            return 0;
1633        }
1634    }
1635
1636    virtio_queue_set_notification(q->rx_vq, 0);
1637    return 1;
1638}
1639
1640static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1641{
1642    virtio_tswap16s(vdev, &hdr->hdr_len);
1643    virtio_tswap16s(vdev, &hdr->gso_size);
1644    virtio_tswap16s(vdev, &hdr->csum_start);
1645    virtio_tswap16s(vdev, &hdr->csum_offset);
1646}
1647
1648/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1649 * it never finds out that the packets don't have valid checksums.  This
1650 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1651 * fix this with Xen but it hasn't appeared in an upstream release of
1652 * dhclient yet.
1653 *
1654 * To avoid breaking existing guests, we catch udp packets and add
1655 * checksums.  This is terrible but it's better than hacking the guest
1656 * kernels.
1657 *
1658 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1659 * we should provide a mechanism to disable it to avoid polluting the host
1660 * cache.
1661 */
1662static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1663                                        uint8_t *buf, size_t size)
1664{
1665    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1666        (size > 27 && size < 1500) && /* normal sized MTU */
1667        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1668        (buf[23] == 17) && /* ip.protocol == UDP */
1669        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1670        net_checksum_calculate(buf, size, CSUM_UDP);
1671        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1672    }
1673}
1674
1675static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1676                           const void *buf, size_t size)
1677{
1678    if (n->has_vnet_hdr) {
1679        /* FIXME this cast is evil */
1680        void *wbuf = (void *)buf;
1681        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1682                                    size - n->host_hdr_len);
1683
1684        if (n->needs_vnet_hdr_swap) {
1685            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1686        }
1687        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1688    } else {
1689        struct virtio_net_hdr hdr = {
1690            .flags = 0,
1691            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1692        };
1693        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1694    }
1695}
1696
1697static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1698{
1699    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1700    static const uint8_t vlan[] = {0x81, 0x00};
1701    uint8_t *ptr = (uint8_t *)buf;
1702    int i;
1703
1704    if (n->promisc)
1705        return 1;
1706
1707    ptr += n->host_hdr_len;
1708
1709    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1710        int vid = lduw_be_p(ptr + 14) & 0xfff;
1711        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1712            return 0;
1713    }
1714
1715    if (ptr[0] & 1) { // multicast
1716        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1717            return !n->nobcast;
1718        } else if (n->nomulti) {
1719            return 0;
1720        } else if (n->allmulti || n->mac_table.multi_overflow) {
1721            return 1;
1722        }
1723
1724        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1725            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1726                return 1;
1727            }
1728        }
1729    } else { // unicast
1730        if (n->nouni) {
1731            return 0;
1732        } else if (n->alluni || n->mac_table.uni_overflow) {
1733            return 1;
1734        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1735            return 1;
1736        }
1737
1738        for (i = 0; i < n->mac_table.first_multi; i++) {
1739            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1740                return 1;
1741            }
1742        }
1743    }
1744
1745    return 0;
1746}
1747
1748static uint8_t virtio_net_get_hash_type(bool hasip4,
1749                                        bool hasip6,
1750                                        EthL4HdrProto l4hdr_proto,
1751                                        uint32_t types)
1752{
1753    if (hasip4) {
1754        switch (l4hdr_proto) {
1755        case ETH_L4_HDR_PROTO_TCP:
1756            if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1757                return NetPktRssIpV4Tcp;
1758            }
1759            break;
1760
1761        case ETH_L4_HDR_PROTO_UDP:
1762            if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1763                return NetPktRssIpV4Udp;
1764            }
1765            break;
1766
1767        default:
1768            break;
1769        }
1770
1771        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1772            return NetPktRssIpV4;
1773        }
1774    } else if (hasip6) {
1775        switch (l4hdr_proto) {
1776        case ETH_L4_HDR_PROTO_TCP:
1777            if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1778                return NetPktRssIpV6TcpEx;
1779            }
1780            if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1781                return NetPktRssIpV6Tcp;
1782            }
1783            break;
1784
1785        case ETH_L4_HDR_PROTO_UDP:
1786            if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1787                return NetPktRssIpV6UdpEx;
1788            }
1789            if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1790                return NetPktRssIpV6Udp;
1791            }
1792            break;
1793
1794        default:
1795            break;
1796        }
1797
1798        if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1799            return NetPktRssIpV6Ex;
1800        }
1801        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1802            return NetPktRssIpV6;
1803        }
1804    }
1805    return 0xff;
1806}
1807
1808static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1809                                   uint32_t hash)
1810{
1811    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1812    hdr->hash_value = hash;
1813    hdr->hash_report = report;
1814}
1815
1816static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1817                                  size_t size)
1818{
1819    VirtIONet *n = qemu_get_nic_opaque(nc);
1820    unsigned int index = nc->queue_index, new_index = index;
1821    struct NetRxPkt *pkt = n->rx_pkt;
1822    uint8_t net_hash_type;
1823    uint32_t hash;
1824    bool hasip4, hasip6;
1825    EthL4HdrProto l4hdr_proto;
1826    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1827        VIRTIO_NET_HASH_REPORT_IPv4,
1828        VIRTIO_NET_HASH_REPORT_TCPv4,
1829        VIRTIO_NET_HASH_REPORT_TCPv6,
1830        VIRTIO_NET_HASH_REPORT_IPv6,
1831        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1832        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1833        VIRTIO_NET_HASH_REPORT_UDPv4,
1834        VIRTIO_NET_HASH_REPORT_UDPv6,
1835        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1836    };
1837    struct iovec iov = {
1838        .iov_base = (void *)buf,
1839        .iov_len = size
1840    };
1841
1842    net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1843    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1844    net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1845                                             n->rss_data.hash_types);
1846    if (net_hash_type > NetPktRssIpV6UdpEx) {
1847        if (n->rss_data.populate_hash) {
1848            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1849        }
1850        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1851    }
1852
1853    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1854
1855    if (n->rss_data.populate_hash) {
1856        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1857    }
1858
1859    if (n->rss_data.redirect) {
1860        new_index = hash & (n->rss_data.indirections_len - 1);
1861        new_index = n->rss_data.indirections_table[new_index];
1862    }
1863
1864    return (index == new_index) ? -1 : new_index;
1865}
1866
1867static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1868                                      size_t size, bool no_rss)
1869{
1870    VirtIONet *n = qemu_get_nic_opaque(nc);
1871    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1872    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1873    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1874    size_t lens[VIRTQUEUE_MAX_SIZE];
1875    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1876    struct virtio_net_hdr_mrg_rxbuf mhdr;
1877    unsigned mhdr_cnt = 0;
1878    size_t offset, i, guest_offset, j;
1879    ssize_t err;
1880
1881    if (!virtio_net_can_receive(nc)) {
1882        return -1;
1883    }
1884
1885    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1886        int index = virtio_net_process_rss(nc, buf, size);
1887        if (index >= 0) {
1888            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1889            return virtio_net_receive_rcu(nc2, buf, size, true);
1890        }
1891    }
1892
1893    /* hdr_len refers to the header we supply to the guest */
1894    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1895        return 0;
1896    }
1897
1898    if (!receive_filter(n, buf, size))
1899        return size;
1900
1901    offset = i = 0;
1902
1903    while (offset < size) {
1904        VirtQueueElement *elem;
1905        int len, total;
1906        const struct iovec *sg;
1907
1908        total = 0;
1909
1910        if (i == VIRTQUEUE_MAX_SIZE) {
1911            virtio_error(vdev, "virtio-net unexpected long buffer chain");
1912            err = size;
1913            goto err;
1914        }
1915
1916        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1917        if (!elem) {
1918            if (i) {
1919                virtio_error(vdev, "virtio-net unexpected empty queue: "
1920                             "i %zd mergeable %d offset %zd, size %zd, "
1921                             "guest hdr len %zd, host hdr len %zd "
1922                             "guest features 0x%" PRIx64,
1923                             i, n->mergeable_rx_bufs, offset, size,
1924                             n->guest_hdr_len, n->host_hdr_len,
1925                             vdev->guest_features);
1926            }
1927            err = -1;
1928            goto err;
1929        }
1930
1931        if (elem->in_num < 1) {
1932            virtio_error(vdev,
1933                         "virtio-net receive queue contains no in buffers");
1934            virtqueue_detach_element(q->rx_vq, elem, 0);
1935            g_free(elem);
1936            err = -1;
1937            goto err;
1938        }
1939
1940        sg = elem->in_sg;
1941        if (i == 0) {
1942            assert(offset == 0);
1943            if (n->mergeable_rx_bufs) {
1944                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1945                                    sg, elem->in_num,
1946                                    offsetof(typeof(mhdr), num_buffers),
1947                                    sizeof(mhdr.num_buffers));
1948            }
1949
1950            receive_header(n, sg, elem->in_num, buf, size);
1951            if (n->rss_data.populate_hash) {
1952                offset = sizeof(mhdr);
1953                iov_from_buf(sg, elem->in_num, offset,
1954                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1955            }
1956            offset = n->host_hdr_len;
1957            total += n->guest_hdr_len;
1958            guest_offset = n->guest_hdr_len;
1959        } else {
1960            guest_offset = 0;
1961        }
1962
1963        /* copy in packet.  ugh */
1964        len = iov_from_buf(sg, elem->in_num, guest_offset,
1965                           buf + offset, size - offset);
1966        total += len;
1967        offset += len;
1968        /* If buffers can't be merged, at this point we
1969         * must have consumed the complete packet.
1970         * Otherwise, drop it. */
1971        if (!n->mergeable_rx_bufs && offset < size) {
1972            virtqueue_unpop(q->rx_vq, elem, total);
1973            g_free(elem);
1974            err = size;
1975            goto err;
1976        }
1977
1978        elems[i] = elem;
1979        lens[i] = total;
1980        i++;
1981    }
1982
1983    if (mhdr_cnt) {
1984        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1985        iov_from_buf(mhdr_sg, mhdr_cnt,
1986                     0,
1987                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1988    }
1989
1990    for (j = 0; j < i; j++) {
1991        /* signal other side */
1992        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1993        g_free(elems[j]);
1994    }
1995
1996    virtqueue_flush(q->rx_vq, i);
1997    virtio_notify(vdev, q->rx_vq);
1998
1999    return size;
2000

2001err:
2002    for (j = 0; j < i; j++) {
2003        virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2004        g_free(elems[j]);
2005    }
2006
2007    return err;
2008}
2009
2010static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2011                                  size_t size)
2012{
2013    RCU_READ_LOCK_GUARD();
2014
2015    return virtio_net_receive_rcu(nc, buf, size, false);
2016}
2017
2018static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2019                                         const uint8_t *buf,
2020                                         VirtioNetRscUnit *unit)
2021{
2022    uint16_t ip_hdrlen;
2023    struct ip_header *ip;
2024
2025    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2026                              + sizeof(struct eth_header));
2027    unit->ip = (void *)ip;
2028    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2029    unit->ip_plen = &ip->ip_len;
2030    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2031    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2032    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2033}
2034
2035static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2036                                         const uint8_t *buf,
2037                                         VirtioNetRscUnit *unit)
2038{
2039    struct ip6_header *ip6;
2040
2041    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2042                                 + sizeof(struct eth_header));
2043    unit->ip = ip6;
2044    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2045    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2046                                        + sizeof(struct ip6_header));
2047    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2048
2049    /* There is a difference between payload lenght in ipv4 and v6,
2050       ip header is excluded in ipv6 */
2051    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2052}
2053
2054static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2055                                       VirtioNetRscSeg *seg)
2056{
2057    int ret;
2058    struct virtio_net_hdr_v1 *h;
2059
2060    h = (struct virtio_net_hdr_v1 *)seg->buf;
2061    h->flags = 0;
2062    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2063
2064    if (seg->is_coalesced) {
2065        h->rsc.segments = seg->packets;
2066        h->rsc.dup_acks = seg->dup_ack;
2067        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2068        if (chain->proto == ETH_P_IP) {
2069            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2070        } else {
2071            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2072        }
2073    }
2074
2075    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2076    QTAILQ_REMOVE(&chain->buffers, seg, next);
2077    g_free(seg->buf);
2078    g_free(seg);
2079
2080    return ret;
2081}
2082
2083static void virtio_net_rsc_purge(void *opq)
2084{
2085    VirtioNetRscSeg *seg, *rn;
2086    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2087
2088    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2089        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2090            chain->stat.purge_failed++;
2091            continue;
2092        }
2093    }
2094
2095    chain->stat.timer++;
2096    if (!QTAILQ_EMPTY(&chain->buffers)) {
2097        timer_mod(chain->drain_timer,
2098              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2099    }
2100}
2101
2102static void virtio_net_rsc_cleanup(VirtIONet *n)
2103{
2104    VirtioNetRscChain *chain, *rn_chain;
2105    VirtioNetRscSeg *seg, *rn_seg;
2106
2107    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2108        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2109            QTAILQ_REMOVE(&chain->buffers, seg, next);
2110            g_free(seg->buf);
2111            g_free(seg);
2112        }
2113
2114        timer_free(chain->drain_timer);
2115        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2116        g_free(chain);
2117    }
2118}
2119
2120static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2121                                     NetClientState *nc,
2122                                     const uint8_t *buf, size_t size)
2123{
2124    uint16_t hdr_len;
2125    VirtioNetRscSeg *seg;
2126
2127    hdr_len = chain->n->guest_hdr_len;
2128    seg = g_new(VirtioNetRscSeg, 1);
2129    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2130        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2131    memcpy(seg->buf, buf, size);
2132    seg->size = size;
2133    seg->packets = 1;
2134    seg->dup_ack = 0;
2135    seg->is_coalesced = 0;
2136    seg->nc = nc;
2137
2138    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2139    chain->stat.cache++;
2140
2141    switch (chain->proto) {
2142    case ETH_P_IP:
2143        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2144        break;
2145    case ETH_P_IPV6:
2146        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2147        break;
2148    default:
2149        g_assert_not_reached();
2150    }
2151}
2152
2153static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2154                                         VirtioNetRscSeg *seg,
2155                                         const uint8_t *buf,
2156                                         struct tcp_header *n_tcp,
2157                                         struct tcp_header *o_tcp)
2158{
2159    uint32_t nack, oack;
2160    uint16_t nwin, owin;
2161
2162    nack = htonl(n_tcp->th_ack);
2163    nwin = htons(n_tcp->th_win);
2164    oack = htonl(o_tcp->th_ack);
2165    owin = htons(o_tcp->th_win);
2166
2167    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2168        chain->stat.ack_out_of_win++;
2169        return RSC_FINAL;
2170    } else if (nack == oack) {
2171        /* duplicated ack or window probe */
2172        if (nwin == owin) {
2173            /* duplicated ack, add dup ack count due to whql test up to 1 */
2174            chain->stat.dup_ack++;
2175            return RSC_FINAL;
2176        } else {
2177            /* Coalesce window update */
2178            o_tcp->th_win = n_tcp->th_win;
2179            chain->stat.win_update++;
2180            return RSC_COALESCE;
2181        }
2182    } else {
2183        /* pure ack, go to 'C', finalize*/
2184        chain->stat.pure_ack++;
2185        return RSC_FINAL;
2186    }
2187}
2188
2189static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2190                                            VirtioNetRscSeg *seg,
2191                                            const uint8_t *buf,
2192                                            VirtioNetRscUnit *n_unit)
2193{
2194    void *data;
2195    uint16_t o_ip_len;
2196    uint32_t nseq, oseq;
2197    VirtioNetRscUnit *o_unit;
2198
2199    o_unit = &seg->unit;
2200    o_ip_len = htons(*o_unit->ip_plen);
2201    nseq = htonl(n_unit->tcp->th_seq);
2202    oseq = htonl(o_unit->tcp->th_seq);
2203
2204    /* out of order or retransmitted. */
2205    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2206        chain->stat.data_out_of_win++;
2207        return RSC_FINAL;
2208    }
2209
2210    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2211    if (nseq == oseq) {
2212        if ((o_unit->payload == 0) && n_unit->payload) {
2213            /* From no payload to payload, normal case, not a dup ack or etc */
2214            chain->stat.data_after_pure_ack++;
2215            goto coalesce;
2216        } else {
2217            return virtio_net_rsc_handle_ack(chain, seg, buf,
2218                                             n_unit->tcp, o_unit->tcp);
2219        }
2220    } else if ((nseq - oseq) != o_unit->payload) {
2221        /* Not a consistent packet, out of order */
2222        chain->stat.data_out_of_order++;
2223        return RSC_FINAL;
2224    } else {
2225coalesce:
2226        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2227            chain->stat.over_size++;
2228            return RSC_FINAL;
2229        }
2230
2231        /* Here comes the right data, the payload length in v4/v6 is different,
2232           so use the field value to update and record the new data len */
2233        o_unit->payload += n_unit->payload; /* update new data len */
2234
2235        /* update field in ip header */
2236        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2237
2238        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2239           for windows guest, while this may change the behavior for linux
2240           guest (only if it uses RSC feature). */
2241        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2242
2243        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2244        o_unit->tcp->th_win = n_unit->tcp->th_win;
2245
2246        memmove(seg->buf + seg->size, data, n_unit->payload);
2247        seg->size += n_unit->payload;
2248        seg->packets++;
2249        chain->stat.coalesced++;
2250        return RSC_COALESCE;
2251    }
2252}
2253
2254static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2255                                        VirtioNetRscSeg *seg,
2256                                        const uint8_t *buf, size_t size,
2257                                        VirtioNetRscUnit *unit)
2258{
2259    struct ip_header *ip1, *ip2;
2260
2261    ip1 = (struct ip_header *)(unit->ip);
2262    ip2 = (struct ip_header *)(seg->unit.ip);
2263    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2264        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2265        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2266        chain->stat.no_match++;
2267        return RSC_NO_MATCH;
2268    }
2269
2270    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2271}
2272
2273static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2274                                        VirtioNetRscSeg *seg,
2275                                        const uint8_t *buf, size_t size,
2276                                        VirtioNetRscUnit *unit)
2277{
2278    struct ip6_header *ip1, *ip2;
2279
2280    ip1 = (struct ip6_header *)(unit->ip);
2281    ip2 = (struct ip6_header *)(seg->unit.ip);
2282    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2283        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2284        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2285        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2286            chain->stat.no_match++;
2287            return RSC_NO_MATCH;
2288    }
2289
2290    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2291}
2292
2293/* Packets with 'SYN' should bypass, other flag should be sent after drain
2294 * to prevent out of order */
2295static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2296                                         struct tcp_header *tcp)
2297{
2298    uint16_t tcp_hdr;
2299    uint16_t tcp_flag;
2300
2301    tcp_flag = htons(tcp->th_offset_flags);
2302    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2303    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2304    if (tcp_flag & TH_SYN) {
2305        chain->stat.tcp_syn++;
2306        return RSC_BYPASS;
2307    }
2308
2309    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2310        chain->stat.tcp_ctrl_drain++;
2311        return RSC_FINAL;
2312    }
2313
2314    if (tcp_hdr > sizeof(struct tcp_header)) {
2315        chain->stat.tcp_all_opt++;
2316        return RSC_FINAL;
2317    }
2318
2319    return RSC_CANDIDATE;
2320}
2321
2322static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2323                                         NetClientState *nc,
2324                                         const uint8_t *buf, size_t size,
2325                                         VirtioNetRscUnit *unit)
2326{
2327    int ret;
2328    VirtioNetRscSeg *seg, *nseg;
2329
2330    if (QTAILQ_EMPTY(&chain->buffers)) {
2331        chain->stat.empty_cache++;
2332        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2333        timer_mod(chain->drain_timer,
2334              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2335        return size;
2336    }
2337
2338    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2339        if (chain->proto == ETH_P_IP) {
2340            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2341        } else {
2342            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2343        }
2344
2345        if (ret == RSC_FINAL) {
2346            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2347                /* Send failed */
2348                chain->stat.final_failed++;
2349                return 0;
2350            }
2351
2352            /* Send current packet */
2353            return virtio_net_do_receive(nc, buf, size);
2354        } else if (ret == RSC_NO_MATCH) {
2355            continue;
2356        } else {
2357            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2358            seg->is_coalesced = 1;
2359            return size;
2360        }
2361    }
2362
2363    chain->stat.no_match_cache++;
2364    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2365    return size;
2366}
2367
2368/* Drain a connection data, this is to avoid out of order segments */
2369static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2370                                        NetClientState *nc,
2371                                        const uint8_t *buf, size_t size,
2372                                        uint16_t ip_start, uint16_t ip_size,
2373                                        uint16_t tcp_port)
2374{
2375    VirtioNetRscSeg *seg, *nseg;
2376    uint32_t ppair1, ppair2;
2377
2378    ppair1 = *(uint32_t *)(buf + tcp_port);
2379    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2380        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2381        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2382            || (ppair1 != ppair2)) {
2383            continue;
2384        }
2385        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2386            chain->stat.drain_failed++;
2387        }
2388
2389        break;
2390    }
2391
2392    return virtio_net_do_receive(nc, buf, size);
2393}
2394
2395static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2396                                            struct ip_header *ip,
2397                                            const uint8_t *buf, size_t size)
2398{
2399    uint16_t ip_len;
2400
2401    /* Not an ipv4 packet */
2402    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2403        chain->stat.ip_option++;
2404        return RSC_BYPASS;
2405    }
2406
2407    /* Don't handle packets with ip option */
2408    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2409        chain->stat.ip_option++;
2410        return RSC_BYPASS;
2411    }
2412
2413    if (ip->ip_p != IPPROTO_TCP) {
2414        chain->stat.bypass_not_tcp++;
2415        return RSC_BYPASS;
2416    }
2417
2418    /* Don't handle packets with ip fragment */
2419    if (!(htons(ip->ip_off) & IP_DF)) {
2420        chain->stat.ip_frag++;
2421        return RSC_BYPASS;
2422    }
2423
2424    /* Don't handle packets with ecn flag */
2425    if (IPTOS_ECN(ip->ip_tos)) {
2426        chain->stat.ip_ecn++;
2427        return RSC_BYPASS;
2428    }
2429
2430    ip_len = htons(ip->ip_len);
2431    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2432        || ip_len > (size - chain->n->guest_hdr_len -
2433                     sizeof(struct eth_header))) {
2434        chain->stat.ip_hacked++;
2435        return RSC_BYPASS;
2436    }
2437
2438    return RSC_CANDIDATE;
2439}
2440
2441static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2442                                      NetClientState *nc,
2443                                      const uint8_t *buf, size_t size)
2444{
2445    int32_t ret;
2446    uint16_t hdr_len;
2447    VirtioNetRscUnit unit;
2448
2449    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2450
2451    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2452        + sizeof(struct tcp_header))) {
2453        chain->stat.bypass_not_tcp++;
2454        return virtio_net_do_receive(nc, buf, size);
2455    }
2456
2457    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2458    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2459        != RSC_CANDIDATE) {
2460        return virtio_net_do_receive(nc, buf, size);
2461    }
2462
2463    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2464    if (ret == RSC_BYPASS) {
2465        return virtio_net_do_receive(nc, buf, size);
2466    } else if (ret == RSC_FINAL) {
2467        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2468                ((hdr_len + sizeof(struct eth_header)) + 12),
2469                VIRTIO_NET_IP4_ADDR_SIZE,
2470                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2471    }
2472
2473    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2474}
2475
2476static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2477                                            struct ip6_header *ip6,
2478                                            const uint8_t *buf, size_t size)
2479{
2480    uint16_t ip_len;
2481
2482    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2483        != IP_HEADER_VERSION_6) {
2484        return RSC_BYPASS;
2485    }
2486
2487    /* Both option and protocol is checked in this */
2488    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2489        chain->stat.bypass_not_tcp++;
2490        return RSC_BYPASS;
2491    }
2492
2493    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2494    if (ip_len < sizeof(struct tcp_header) ||
2495        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2496                  - sizeof(struct ip6_header))) {
2497        chain->stat.ip_hacked++;
2498        return RSC_BYPASS;
2499    }
2500
2501    /* Don't handle packets with ecn flag */
2502    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2503        chain->stat.ip_ecn++;
2504        return RSC_BYPASS;
2505    }
2506
2507    return RSC_CANDIDATE;
2508}
2509
2510static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2511                                      const uint8_t *buf, size_t size)
2512{
2513    int32_t ret;
2514    uint16_t hdr_len;
2515    VirtioNetRscChain *chain;
2516    VirtioNetRscUnit unit;
2517
2518    chain = opq;
2519    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2520
2521    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2522        + sizeof(tcp_header))) {
2523        return virtio_net_do_receive(nc, buf, size);
2524    }
2525
2526    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2527    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2528                                                 unit.ip, buf, size)) {
2529        return virtio_net_do_receive(nc, buf, size);
2530    }
2531
2532    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2533    if (ret == RSC_BYPASS) {
2534        return virtio_net_do_receive(nc, buf, size);
2535    } else if (ret == RSC_FINAL) {
2536        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2537                ((hdr_len + sizeof(struct eth_header)) + 8),
2538                VIRTIO_NET_IP6_ADDR_SIZE,
2539                hdr_len + sizeof(struct eth_header)
2540                + sizeof(struct ip6_header));
2541    }
2542
2543    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2544}
2545
2546static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2547                                                      NetClientState *nc,
2548                                                      uint16_t proto)
2549{
2550    VirtioNetRscChain *chain;
2551
2552    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2553        return NULL;
2554    }
2555
2556    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2557        if (chain->proto == proto) {
2558            return chain;
2559        }
2560    }
2561
2562    chain = g_malloc(sizeof(*chain));
2563    chain->n = n;
2564    chain->proto = proto;
2565    if (proto == (uint16_t)ETH_P_IP) {
2566        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2567        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2568    } else {
2569        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2570        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2571    }
2572    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2573                                      virtio_net_rsc_purge, chain);
2574    memset(&chain->stat, 0, sizeof(chain->stat));
2575
2576    QTAILQ_INIT(&chain->buffers);
2577    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2578
2579    return chain;
2580}
2581
2582static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2583                                      const uint8_t *buf,
2584                                      size_t size)
2585{
2586    uint16_t proto;
2587    VirtioNetRscChain *chain;
2588    struct eth_header *eth;
2589    VirtIONet *n;
2590
2591    n = qemu_get_nic_opaque(nc);
2592    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2593        return virtio_net_do_receive(nc, buf, size);
2594    }
2595
2596    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2597    proto = htons(eth->h_proto);
2598
2599    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2600    if (chain) {
2601        chain->stat.received++;
2602        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2603            return virtio_net_rsc_receive4(chain, nc, buf, size);
2604        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2605            return virtio_net_rsc_receive6(chain, nc, buf, size);
2606        }
2607    }
2608    return virtio_net_do_receive(nc, buf, size);
2609}
2610
2611static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2612                                  size_t size)
2613{
2614    VirtIONet *n = qemu_get_nic_opaque(nc);
2615    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2616        return virtio_net_rsc_receive(nc, buf, size);
2617    } else {
2618        return virtio_net_do_receive(nc, buf, size);
2619    }
2620}
2621
2622static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2623
2624static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2625{
2626    VirtIONet *n = qemu_get_nic_opaque(nc);
2627    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2628    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2629    int ret;
2630
2631    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2632    virtio_notify(vdev, q->tx_vq);
2633
2634    g_free(q->async_tx.elem);
2635    q->async_tx.elem = NULL;
2636
2637    virtio_queue_set_notification(q->tx_vq, 1);
2638    ret = virtio_net_flush_tx(q);
2639    if (ret >= n->tx_burst) {
2640        /*
2641         * the flush has been stopped by tx_burst
2642         * we will not receive notification for the
2643         * remainining part, so re-schedule
2644         */
2645        virtio_queue_set_notification(q->tx_vq, 0);
2646        if (q->tx_bh) {
2647            qemu_bh_schedule(q->tx_bh);
2648        } else {
2649            timer_mod(q->tx_timer,
2650                      qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2651        }
2652        q->tx_waiting = 1;
2653    }
2654}
2655
2656/* TX */
2657static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2658{
2659    VirtIONet *n = q->n;
2660    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2661    VirtQueueElement *elem;
2662    int32_t num_packets = 0;
2663    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2664    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2665        return num_packets;
2666    }
2667
2668    if (q->async_tx.elem) {
2669        virtio_queue_set_notification(q->tx_vq, 0);
2670        return num_packets;
2671    }
2672
2673    for (;;) {
2674        ssize_t ret;
2675        unsigned int out_num;
2676        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2677        struct virtio_net_hdr_mrg_rxbuf mhdr;
2678
2679        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2680        if (!elem) {
2681            break;
2682        }
2683
2684        out_num = elem->out_num;
2685        out_sg = elem->out_sg;
2686        if (out_num < 1) {
2687            virtio_error(vdev, "virtio-net header not in first element");
2688            virtqueue_detach_element(q->tx_vq, elem, 0);
2689            g_free(elem);
2690            return -EINVAL;
2691        }
2692
2693        if (n->has_vnet_hdr) {
2694            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2695                n->guest_hdr_len) {
2696                virtio_error(vdev, "virtio-net header incorrect");
2697                virtqueue_detach_element(q->tx_vq, elem, 0);
2698                g_free(elem);
2699                return -EINVAL;
2700            }
2701            if (n->needs_vnet_hdr_swap) {
2702                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2703                sg2[0].iov_base = &mhdr;
2704                sg2[0].iov_len = n->guest_hdr_len;
2705                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2706                                   out_sg, out_num,
2707                                   n->guest_hdr_len, -1);
2708                if (out_num == VIRTQUEUE_MAX_SIZE) {
2709                    goto drop;
2710                }
2711                out_num += 1;
2712                out_sg = sg2;
2713            }
2714        }
2715        /*
2716         * If host wants to see the guest header as is, we can
2717         * pass it on unchanged. Otherwise, copy just the parts
2718         * that host is interested in.
2719         */
2720        assert(n->host_hdr_len <= n->guest_hdr_len);
2721        if (n->host_hdr_len != n->guest_hdr_len) {
2722            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2723                                       out_sg, out_num,
2724                                       0, n->host_hdr_len);
2725            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2726                             out_sg, out_num,
2727                             n->guest_hdr_len, -1);
2728            out_num = sg_num;
2729            out_sg = sg;
2730        }
2731
2732        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2733                                      out_sg, out_num, virtio_net_tx_complete);
2734        if (ret == 0) {
2735            virtio_queue_set_notification(q->tx_vq, 0);
2736            q->async_tx.elem = elem;
2737            return -EBUSY;
2738        }
2739
2740drop:
2741        virtqueue_push(q->tx_vq, elem, 0);
2742        virtio_notify(vdev, q->tx_vq);
2743        g_free(elem);
2744
2745        if (++num_packets >= n->tx_burst) {
2746            break;
2747        }
2748    }
2749    return num_packets;
2750}
2751
2752static void virtio_net_tx_timer(void *opaque);
2753
2754static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2755{
2756    VirtIONet *n = VIRTIO_NET(vdev);
2757    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2758
2759    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2760        virtio_net_drop_tx_queue_data(vdev, vq);
2761        return;
2762    }
2763
2764    /* This happens when device was stopped but VCPU wasn't. */
2765    if (!vdev->vm_running) {
2766        q->tx_waiting = 1;
2767        return;
2768    }
2769
2770    if (q->tx_waiting) {
2771        /* We already have queued packets, immediately flush */
2772        timer_del(q->tx_timer);
2773        virtio_net_tx_timer(q);
2774    } else {
2775        /* re-arm timer to flush it (and more) on next tick */
2776        timer_mod(q->tx_timer,
2777                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2778        q->tx_waiting = 1;
2779        virtio_queue_set_notification(vq, 0);
2780    }
2781}
2782
2783static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2784{
2785    VirtIONet *n = VIRTIO_NET(vdev);
2786    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2787
2788    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2789        virtio_net_drop_tx_queue_data(vdev, vq);
2790        return;
2791    }
2792
2793    if (unlikely(q->tx_waiting)) {
2794        return;
2795    }
2796    q->tx_waiting = 1;
2797    /* This happens when device was stopped but VCPU wasn't. */
2798    if (!vdev->vm_running) {
2799        return;
2800    }
2801    virtio_queue_set_notification(vq, 0);
2802    qemu_bh_schedule(q->tx_bh);
2803}
2804
2805static void virtio_net_tx_timer(void *opaque)
2806{
2807    VirtIONetQueue *q = opaque;
2808    VirtIONet *n = q->n;
2809    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2810    int ret;
2811
2812    /* This happens when device was stopped but BH wasn't. */
2813    if (!vdev->vm_running) {
2814        /* Make sure tx waiting is set, so we'll run when restarted. */
2815        assert(q->tx_waiting);
2816        return;
2817    }
2818
2819    q->tx_waiting = 0;
2820
2821    /* Just in case the driver is not ready on more */
2822    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2823        return;
2824    }
2825
2826    ret = virtio_net_flush_tx(q);
2827    if (ret == -EBUSY || ret == -EINVAL) {
2828        return;
2829    }
2830    /*
2831     * If we flush a full burst of packets, assume there are
2832     * more coming and immediately rearm
2833     */
2834    if (ret >= n->tx_burst) {
2835        q->tx_waiting = 1;
2836        timer_mod(q->tx_timer,
2837                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2838        return;
2839    }
2840    /*
2841     * If less than a full burst, re-enable notification and flush
2842     * anything that may have come in while we weren't looking.  If
2843     * we find something, assume the guest is still active and rearm
2844     */
2845    virtio_queue_set_notification(q->tx_vq, 1);
2846    ret = virtio_net_flush_tx(q);
2847    if (ret > 0) {
2848        virtio_queue_set_notification(q->tx_vq, 0);
2849        q->tx_waiting = 1;
2850        timer_mod(q->tx_timer,
2851                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2852    }
2853}
2854
2855static void virtio_net_tx_bh(void *opaque)
2856{
2857    VirtIONetQueue *q = opaque;
2858    VirtIONet *n = q->n;
2859    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2860    int32_t ret;
2861
2862    /* This happens when device was stopped but BH wasn't. */
2863    if (!vdev->vm_running) {
2864        /* Make sure tx waiting is set, so we'll run when restarted. */
2865        assert(q->tx_waiting);
2866        return;
2867    }
2868
2869    q->tx_waiting = 0;
2870
2871    /* Just in case the driver is not ready on more */
2872    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2873        return;
2874    }
2875
2876    ret = virtio_net_flush_tx(q);
2877    if (ret == -EBUSY || ret == -EINVAL) {
2878        return; /* Notification re-enable handled by tx_complete or device
2879                 * broken */
2880    }
2881
2882    /* If we flush a full burst of packets, assume there are
2883     * more coming and immediately reschedule */
2884    if (ret >= n->tx_burst) {
2885        qemu_bh_schedule(q->tx_bh);
2886        q->tx_waiting = 1;
2887        return;
2888    }
2889
2890    /* If less than a full burst, re-enable notification and flush
2891     * anything that may have come in while we weren't looking.  If
2892     * we find something, assume the guest is still active and reschedule */
2893    virtio_queue_set_notification(q->tx_vq, 1);
2894    ret = virtio_net_flush_tx(q);
2895    if (ret == -EINVAL) {
2896        return;
2897    } else if (ret > 0) {
2898        virtio_queue_set_notification(q->tx_vq, 0);
2899        qemu_bh_schedule(q->tx_bh);
2900        q->tx_waiting = 1;
2901    }
2902}
2903
2904static void virtio_net_add_queue(VirtIONet *n, int index)
2905{
2906    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2907
2908    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2909                                           virtio_net_handle_rx);
2910
2911    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2912        n->vqs[index].tx_vq =
2913            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2914                             virtio_net_handle_tx_timer);
2915        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2916                                              virtio_net_tx_timer,
2917                                              &n->vqs[index]);
2918    } else {
2919        n->vqs[index].tx_vq =
2920            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2921                             virtio_net_handle_tx_bh);
2922        n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2923                                                  &DEVICE(vdev)->mem_reentrancy_guard);
2924    }
2925
2926    n->vqs[index].tx_waiting = 0;
2927    n->vqs[index].n = n;
2928}
2929
2930static void virtio_net_del_queue(VirtIONet *n, int index)
2931{
2932    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2933    VirtIONetQueue *q = &n->vqs[index];
2934    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2935
2936    qemu_purge_queued_packets(nc);
2937
2938    virtio_del_queue(vdev, index * 2);
2939    if (q->tx_timer) {
2940        timer_free(q->tx_timer);
2941        q->tx_timer = NULL;
2942    } else {
2943        qemu_bh_delete(q->tx_bh);
2944        q->tx_bh = NULL;
2945    }
2946    q->tx_waiting = 0;
2947    virtio_del_queue(vdev, index * 2 + 1);
2948}
2949
2950static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2951{
2952    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2953    int old_num_queues = virtio_get_num_queues(vdev);
2954    int new_num_queues = new_max_queue_pairs * 2 + 1;
2955    int i;
2956
2957    assert(old_num_queues >= 3);
2958    assert(old_num_queues % 2 == 1);
2959
2960    if (old_num_queues == new_num_queues) {
2961        return;
2962    }
2963
2964    /*
2965     * We always need to remove and add ctrl vq if
2966     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2967     * and then we only enter one of the following two loops.
2968     */
2969    virtio_del_queue(vdev, old_num_queues - 1);
2970
2971    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2972        /* new_num_queues < old_num_queues */
2973        virtio_net_del_queue(n, i / 2);
2974    }
2975
2976    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2977        /* new_num_queues > old_num_queues */
2978        virtio_net_add_queue(n, i / 2);
2979    }
2980
2981    /* add ctrl_vq last */
2982    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2983}
2984
2985static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2986{
2987    int max = multiqueue ? n->max_queue_pairs : 1;
2988
2989    n->multiqueue = multiqueue;
2990    virtio_net_change_num_queue_pairs(n, max);
2991
2992    virtio_net_set_queue_pairs(n);
2993}
2994
2995static int virtio_net_post_load_device(void *opaque, int version_id)
2996{
2997    VirtIONet *n = opaque;
2998    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2999    int i, link_down;
3000

3001    trace_virtio_net_post_load_device();
3002    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3003                               virtio_vdev_has_feature(vdev,
3004                                                       VIRTIO_F_VERSION_1),
3005                               virtio_vdev_has_feature(vdev,
3006                                                       VIRTIO_NET_F_HASH_REPORT));
3007
3008    /* MAC_TABLE_ENTRIES may be different from the saved image */
3009    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3010        n->mac_table.in_use = 0;
3011    }
3012
3013    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3014        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3015    }
3016
3017    /*
3018     * curr_guest_offloads will be later overwritten by the
3019     * virtio_set_features_nocheck call done from the virtio_load.
3020     * Here we make sure it is preserved and restored accordingly
3021     * in the virtio_net_post_load_virtio callback.
3022     */
3023    n->saved_guest_offloads = n->curr_guest_offloads;
3024
3025    virtio_net_set_queue_pairs(n);
3026
3027    /* Find the first multicast entry in the saved MAC filter */
3028    for (i = 0; i < n->mac_table.in_use; i++) {
3029        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3030            break;
3031        }
3032    }
3033    n->mac_table.first_multi = i;
3034
3035    /* nc.link_down can't be migrated, so infer link_down according
3036     * to link status bit in n->status */
3037    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3038    for (i = 0; i < n->max_queue_pairs; i++) {
3039        qemu_get_subqueue(n->nic, i)->link_down = link_down;
3040    }
3041
3042    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3043        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3044        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3045                                  QEMU_CLOCK_VIRTUAL,
3046                                  virtio_net_announce_timer, n);
3047        if (n->announce_timer.round) {
3048            timer_mod(n->announce_timer.tm,
3049                      qemu_clock_get_ms(n->announce_timer.type));
3050        } else {
3051            qemu_announce_timer_del(&n->announce_timer, false);
3052        }
3053    }
3054
3055    if (n->rss_data.enabled) {
3056        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3057        if (!n->rss_data.populate_hash) {
3058            if (!virtio_net_attach_epbf_rss(n)) {
3059                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3060                    warn_report("Can't post-load eBPF RSS for vhost");
3061                } else {
3062                    warn_report("Can't post-load eBPF RSS - "
3063                                "fallback to software RSS");
3064                    n->rss_data.enabled_software_rss = true;
3065                }
3066            }
3067        }
3068
3069        trace_virtio_net_rss_enable(n->rss_data.hash_types,
3070                                    n->rss_data.indirections_len,
3071                                    sizeof(n->rss_data.key));
3072    } else {
3073        trace_virtio_net_rss_disable();
3074    }
3075    return 0;
3076}
3077
3078static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3079{
3080    VirtIONet *n = VIRTIO_NET(vdev);
3081    /*
3082     * The actual needed state is now in saved_guest_offloads,
3083     * see virtio_net_post_load_device for detail.
3084     * Restore it back and apply the desired offloads.
3085     */
3086    n->curr_guest_offloads = n->saved_guest_offloads;
3087    if (peer_has_vnet_hdr(n)) {
3088        virtio_net_apply_guest_offloads(n);
3089    }
3090
3091    return 0;
3092}
3093
3094/* tx_waiting field of a VirtIONetQueue */
3095static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3096    .name = "virtio-net-queue-tx_waiting",
3097    .fields = (VMStateField[]) {
3098        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3099        VMSTATE_END_OF_LIST()
3100   },
3101};
3102
3103static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3104{
3105    return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3106}
3107
3108static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3109{
3110    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3111                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3112}
3113
3114static bool mac_table_fits(void *opaque, int version_id)
3115{
3116    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3117}
3118
3119static bool mac_table_doesnt_fit(void *opaque, int version_id)
3120{
3121    return !mac_table_fits(opaque, version_id);
3122}
3123
3124/* This temporary type is shared by all the WITH_TMP methods
3125 * although only some fields are used by each.
3126 */
3127struct VirtIONetMigTmp {
3128    VirtIONet      *parent;
3129    VirtIONetQueue *vqs_1;
3130    uint16_t        curr_queue_pairs_1;
3131    uint8_t         has_ufo;
3132    uint32_t        has_vnet_hdr;
3133};
3134
3135/* The 2nd and subsequent tx_waiting flags are loaded later than
3136 * the 1st entry in the queue_pairs and only if there's more than one
3137 * entry.  We use the tmp mechanism to calculate a temporary
3138 * pointer and count and also validate the count.
3139 */
3140
3141static int virtio_net_tx_waiting_pre_save(void *opaque)
3142{
3143    struct VirtIONetMigTmp *tmp = opaque;
3144
3145    tmp->vqs_1 = tmp->parent->vqs + 1;
3146    tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3147    if (tmp->parent->curr_queue_pairs == 0) {
3148        tmp->curr_queue_pairs_1 = 0;
3149    }
3150
3151    return 0;
3152}
3153
3154static int virtio_net_tx_waiting_pre_load(void *opaque)
3155{
3156    struct VirtIONetMigTmp *tmp = opaque;
3157
3158    /* Reuse the pointer setup from save */
3159    virtio_net_tx_waiting_pre_save(opaque);
3160
3161    if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3162        error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3163            tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3164
3165        return -EINVAL;
3166    }
3167
3168    return 0; /* all good */
3169}
3170
3171static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3172    .name      = "virtio-net-tx_waiting",
3173    .pre_load  = virtio_net_tx_waiting_pre_load,
3174    .pre_save  = virtio_net_tx_waiting_pre_save,
3175    .fields    = (VMStateField[]) {
3176        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3177                                     curr_queue_pairs_1,
3178                                     vmstate_virtio_net_queue_tx_waiting,
3179                                     struct VirtIONetQueue),
3180        VMSTATE_END_OF_LIST()
3181    },
3182};
3183
3184/* the 'has_ufo' flag is just tested; if the incoming stream has the
3185 * flag set we need to check that we have it
3186 */
3187static int virtio_net_ufo_post_load(void *opaque, int version_id)
3188{
3189    struct VirtIONetMigTmp *tmp = opaque;
3190
3191    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3192        error_report("virtio-net: saved image requires TUN_F_UFO support");
3193        return -EINVAL;
3194    }
3195
3196    return 0;
3197}
3198
3199static int virtio_net_ufo_pre_save(void *opaque)
3200{
3201    struct VirtIONetMigTmp *tmp = opaque;
3202
3203    tmp->has_ufo = tmp->parent->has_ufo;
3204
3205    return 0;
3206}
3207
3208static const VMStateDescription vmstate_virtio_net_has_ufo = {
3209    .name      = "virtio-net-ufo",
3210    .post_load = virtio_net_ufo_post_load,
3211    .pre_save  = virtio_net_ufo_pre_save,
3212    .fields    = (VMStateField[]) {
3213        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3214        VMSTATE_END_OF_LIST()
3215    },
3216};
3217
3218/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3219 * flag set we need to check that we have it
3220 */
3221static int virtio_net_vnet_post_load(void *opaque, int version_id)
3222{
3223    struct VirtIONetMigTmp *tmp = opaque;
3224
3225    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3226        error_report("virtio-net: saved image requires vnet_hdr=on");
3227        return -EINVAL;
3228    }
3229
3230    return 0;
3231}
3232
3233static int virtio_net_vnet_pre_save(void *opaque)
3234{
3235    struct VirtIONetMigTmp *tmp = opaque;
3236
3237    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3238
3239    return 0;
3240}
3241
3242static const VMStateDescription vmstate_virtio_net_has_vnet = {
3243    .name      = "virtio-net-vnet",
3244    .post_load = virtio_net_vnet_post_load,
3245    .pre_save  = virtio_net_vnet_pre_save,
3246    .fields    = (VMStateField[]) {
3247        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3248        VMSTATE_END_OF_LIST()
3249    },
3250};
3251
3252static bool virtio_net_rss_needed(void *opaque)
3253{
3254    return VIRTIO_NET(opaque)->rss_data.enabled;
3255}
3256
3257static const VMStateDescription vmstate_virtio_net_rss = {
3258    .name      = "virtio-net-device/rss",
3259    .version_id = 1,
3260    .minimum_version_id = 1,
3261    .needed = virtio_net_rss_needed,
3262    .fields = (VMStateField[]) {
3263        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3264        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3265        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3266        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3267        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3268        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3269        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3270                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3271        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3272                                    rss_data.indirections_len, 0,
3273                                    vmstate_info_uint16, uint16_t),
3274        VMSTATE_END_OF_LIST()
3275    },
3276};
3277
3278static const VMStateDescription vmstate_virtio_net_device = {
3279    .name = "virtio-net-device",
3280    .version_id = VIRTIO_NET_VM_VERSION,
3281    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3282    .post_load = virtio_net_post_load_device,
3283    .fields = (VMStateField[]) {
3284        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3285        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3286                               vmstate_virtio_net_queue_tx_waiting,
3287                               VirtIONetQueue),
3288        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3289        VMSTATE_UINT16(status, VirtIONet),
3290        VMSTATE_UINT8(promisc, VirtIONet),
3291        VMSTATE_UINT8(allmulti, VirtIONet),
3292        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3293
3294        /* Guarded pair: If it fits we load it, else we throw it away
3295         * - can happen if source has a larger MAC table.; post-load
3296         *  sets flags in this case.
3297         */
3298        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3299                                0, mac_table_fits, mac_table.in_use,
3300                                 ETH_ALEN),
3301        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3302                                     mac_table.in_use, ETH_ALEN),
3303
3304        /* Note: This is an array of uint32's that's always been saved as a
3305         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3306         * but based on the uint.
3307         */
3308        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3309        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3310                         vmstate_virtio_net_has_vnet),
3311        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3312        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3313        VMSTATE_UINT8(alluni, VirtIONet),
3314        VMSTATE_UINT8(nomulti, VirtIONet),
3315        VMSTATE_UINT8(nouni, VirtIONet),
3316        VMSTATE_UINT8(nobcast, VirtIONet),
3317        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3318                         vmstate_virtio_net_has_ufo),
3319        VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3320                            vmstate_info_uint16_equal, uint16_t),
3321        VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3322        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3323                         vmstate_virtio_net_tx_waiting),
3324        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3325                            has_ctrl_guest_offloads),
3326        VMSTATE_END_OF_LIST()
3327   },
3328    .subsections = (const VMStateDescription * []) {
3329        &vmstate_virtio_net_rss,
3330        NULL
3331    }
3332};
3333
3334static NetClientInfo net_virtio_info = {
3335    .type = NET_CLIENT_DRIVER_NIC,
3336    .size = sizeof(NICState),
3337    .can_receive = virtio_net_can_receive,
3338    .receive = virtio_net_receive,
3339    .link_status_changed = virtio_net_set_link_status,
3340    .query_rx_filter = virtio_net_query_rxfilter,
3341    .announce = virtio_net_announce,
3342};
3343
3344static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3345{
3346    VirtIONet *n = VIRTIO_NET(vdev);
3347    NetClientState *nc;
3348    assert(n->vhost_started);
3349    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3350        /* Must guard against invalid features and bogus queue index
3351         * from being set by malicious guest, or penetrated through
3352         * buggy migration stream.
3353         */
3354        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3355            qemu_log_mask(LOG_GUEST_ERROR,
3356                          "%s: bogus vq index ignored\n", __func__);
3357            return false;
3358        }
3359        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3360    } else {
3361        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3362    }
3363    /*
3364     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3365     * as the macro of configure interrupt's IDX, If this driver does not
3366     * support, the function will return false
3367     */
3368
3369    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3370        return vhost_net_config_pending(get_vhost_net(nc->peer));
3371    }
3372    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3373}
3374
3375static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3376                                           bool mask)
3377{
3378    VirtIONet *n = VIRTIO_NET(vdev);
3379    NetClientState *nc;
3380    assert(n->vhost_started);
3381    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3382        /* Must guard against invalid features and bogus queue index
3383         * from being set by malicious guest, or penetrated through
3384         * buggy migration stream.
3385         */
3386        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3387            qemu_log_mask(LOG_GUEST_ERROR,
3388                          "%s: bogus vq index ignored\n", __func__);
3389            return;
3390        }
3391        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3392    } else {
3393        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3394    }
3395    /*
3396     *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3397     * as the macro of configure interrupt's IDX, If this driver does not
3398     * support, the function will return
3399     */
3400
3401    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3402        vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3403        return;
3404    }
3405    vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3406}
3407
3408static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3409{
3410    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3411
3412    n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3413}
3414
3415void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3416                                   const char *type)
3417{
3418    /*
3419     * The name can be NULL, the netclient name will be type.x.
3420     */
3421    assert(type != NULL);
3422
3423    g_free(n->netclient_name);
3424    g_free(n->netclient_type);
3425    n->netclient_name = g_strdup(name);
3426    n->netclient_type = g_strdup(type);
3427}
3428
3429static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3430{
3431    HotplugHandler *hotplug_ctrl;
3432    PCIDevice *pci_dev;
3433    Error *err = NULL;
3434
3435    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3436    if (hotplug_ctrl) {
3437        pci_dev = PCI_DEVICE(dev);
3438        pci_dev->partially_hotplugged = true;
3439        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3440        if (err) {
3441            error_report_err(err);
3442            return false;
3443        }
3444    } else {
3445        return false;
3446    }
3447    return true;
3448}
3449
3450static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3451                                    Error **errp)
3452{
3453    Error *err = NULL;
3454    HotplugHandler *hotplug_ctrl;
3455    PCIDevice *pdev = PCI_DEVICE(dev);
3456    BusState *primary_bus;
3457
3458    if (!pdev->partially_hotplugged) {
3459        return true;
3460    }
3461    primary_bus = dev->parent_bus;
3462    if (!primary_bus) {
3463        error_setg(errp, "virtio_net: couldn't find primary bus");
3464        return false;
3465    }
3466    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3467    qatomic_set(&n->failover_primary_hidden, false);
3468    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3469    if (hotplug_ctrl) {
3470        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3471        if (err) {
3472            goto out;
3473        }
3474        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3475    }
3476    pdev->partially_hotplugged = false;
3477
3478out:
3479    error_propagate(errp, err);
3480    return !err;
3481}
3482
3483static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3484{
3485    bool should_be_hidden;
3486    Error *err = NULL;
3487    DeviceState *dev = failover_find_primary_device(n);
3488
3489    if (!dev) {
3490        return;
3491    }
3492
3493    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3494
3495    if (migration_in_setup(s) && !should_be_hidden) {
3496        if (failover_unplug_primary(n, dev)) {
3497            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3498            qapi_event_send_unplug_primary(dev->id);
3499            qatomic_set(&n->failover_primary_hidden, true);
3500        } else {
3501            warn_report("couldn't unplug primary device");
3502        }
3503    } else if (migration_has_failed(s)) {
3504        /* We already unplugged the device let's plug it back */
3505        if (!failover_replug_primary(n, dev, &err)) {
3506            if (err) {
3507                error_report_err(err);
3508            }
3509        }
3510    }
3511}
3512
3513static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3514{
3515    MigrationState *s = data;
3516    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3517    virtio_net_handle_migration_primary(n, s);
3518}
3519
3520static bool failover_hide_primary_device(DeviceListener *listener,
3521                                         const QDict *device_opts,
3522                                         bool from_json,
3523                                         Error **errp)
3524{
3525    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3526    const char *standby_id;
3527
3528    if (!device_opts) {
3529        return false;
3530    }
3531
3532    if (!qdict_haskey(device_opts, "failover_pair_id")) {
3533        return false;
3534    }
3535
3536    if (!qdict_haskey(device_opts, "id")) {
3537        error_setg(errp, "Device with failover_pair_id needs to have id");
3538        return false;
3539    }
3540
3541    standby_id = qdict_get_str(device_opts, "failover_pair_id");
3542    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3543        return false;
3544    }
3545
3546    /*
3547     * The hide helper can be called several times for a given device.
3548     * Check there is only one primary for a virtio-net device but
3549     * don't duplicate the qdict several times if it's called for the same
3550     * device.
3551     */
3552    if (n->primary_opts) {
3553        const char *old, *new;
3554        /* devices with failover_pair_id always have an id */
3555        old = qdict_get_str(n->primary_opts, "id");
3556        new = qdict_get_str(device_opts, "id");
3557        if (strcmp(old, new) != 0) {
3558            error_setg(errp, "Cannot attach more than one primary device to "
3559                       "'%s': '%s' and '%s'", n->netclient_name, old, new);
3560            return false;
3561        }
3562    } else {
3563        n->primary_opts = qdict_clone_shallow(device_opts);
3564        n->primary_opts_from_json = from_json;
3565    }
3566
3567    /* failover_primary_hidden is set during feature negotiation */
3568    return qatomic_read(&n->failover_primary_hidden);
3569}
3570
3571static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3572{
3573    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3574    VirtIONet *n = VIRTIO_NET(dev);
3575    NetClientState *nc;
3576    int i;
3577
3578    if (n->net_conf.mtu) {
3579        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3580    }
3581
3582    if (n->net_conf.duplex_str) {
3583        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3584            n->net_conf.duplex = DUPLEX_HALF;
3585        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3586            n->net_conf.duplex = DUPLEX_FULL;
3587        } else {
3588            error_setg(errp, "'duplex' must be 'half' or 'full'");
3589            return;
3590        }
3591        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3592    } else {
3593        n->net_conf.duplex = DUPLEX_UNKNOWN;
3594    }
3595
3596    if (n->net_conf.speed < SPEED_UNKNOWN) {
3597        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3598        return;
3599    }
3600    if (n->net_conf.speed >= 0) {
3601        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3602    }
3603
3604    if (n->failover) {
3605        n->primary_listener.hide_device = failover_hide_primary_device;
3606        qatomic_set(&n->failover_primary_hidden, true);
3607        device_listener_register(&n->primary_listener);
3608        n->migration_state.notify = virtio_net_migration_state_notifier;
3609        add_migration_state_change_notifier(&n->migration_state);
3610        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3611    }
3612
3613    virtio_net_set_config_size(n, n->host_features);
3614    virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3615
3616    /*
3617     * We set a lower limit on RX queue size to what it always was.
3618     * Guests that want a smaller ring can always resize it without
3619     * help from us (using virtio 1 and up).
3620     */
3621    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3622        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3623        !is_power_of_2(n->net_conf.rx_queue_size)) {
3624        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3625                   "must be a power of 2 between %d and %d.",
3626                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3627                   VIRTQUEUE_MAX_SIZE);
3628        virtio_cleanup(vdev);
3629        return;
3630    }
3631
3632    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3633        n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3634        !is_power_of_2(n->net_conf.tx_queue_size)) {
3635        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3636                   "must be a power of 2 between %d and %d",
3637                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3638                   virtio_net_max_tx_queue_size(n));
3639        virtio_cleanup(vdev);
3640        return;
3641    }
3642
3643    n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3644
3645    /*
3646     * Figure out the datapath queue pairs since the backend could
3647     * provide control queue via peers as well.
3648     */
3649    if (n->nic_conf.peers.queues) {
3650        for (i = 0; i < n->max_ncs; i++) {
3651            if (n->nic_conf.peers.ncs[i]->is_datapath) {
3652                ++n->max_queue_pairs;
3653            }
3654        }
3655    }
3656    n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3657
3658    if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3659        error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3660                   "must be a positive integer less than %d.",
3661                   n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3662        virtio_cleanup(vdev);
3663        return;
3664    }
3665    n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3666    n->curr_queue_pairs = 1;
3667    n->tx_timeout = n->net_conf.txtimer;
3668
3669    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3670                       && strcmp(n->net_conf.tx, "bh")) {
3671        warn_report("virtio-net: "
3672                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3673                    n->net_conf.tx);
3674        error_printf("Defaulting to \"bh\"");
3675    }
3676
3677    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3678                                    n->net_conf.tx_queue_size);
3679
3680    for (i = 0; i < n->max_queue_pairs; i++) {
3681        virtio_net_add_queue(n, i);
3682    }
3683
3684    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3685    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3686    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3687    n->status = VIRTIO_NET_S_LINK_UP;
3688    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3689                              QEMU_CLOCK_VIRTUAL,
3690                              virtio_net_announce_timer, n);
3691    n->announce_timer.round = 0;
3692
3693    if (n->netclient_type) {
3694        /*
3695         * Happen when virtio_net_set_netclient_name has been called.
3696         */
3697        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3698                              n->netclient_type, n->netclient_name, n);
3699    } else {
3700        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3701                              object_get_typename(OBJECT(dev)), dev->id, n);
3702    }
3703
3704    for (i = 0; i < n->max_queue_pairs; i++) {
3705        n->nic->ncs[i].do_not_pad = true;
3706    }
3707
3708    peer_test_vnet_hdr(n);
3709    if (peer_has_vnet_hdr(n)) {
3710        for (i = 0; i < n->max_queue_pairs; i++) {
3711            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3712        }
3713        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3714    } else {
3715        n->host_hdr_len = 0;
3716    }
3717
3718    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3719
3720    n->vqs[0].tx_waiting = 0;
3721    n->tx_burst = n->net_conf.txburst;
3722    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3723    n->promisc = 1; /* for compatibility */
3724
3725    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3726
3727    n->vlans = g_malloc0(MAX_VLAN >> 3);
3728
3729    nc = qemu_get_queue(n->nic);
3730    nc->rxfilter_notify_enabled = 1;
3731
3732   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3733        struct virtio_net_config netcfg = {};
3734        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3735        vhost_net_set_config(get_vhost_net(nc->peer),
3736            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3737    }
3738    QTAILQ_INIT(&n->rsc_chains);
3739    n->qdev = dev;
3740
3741    net_rx_pkt_init(&n->rx_pkt);
3742
3743    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3744        virtio_net_load_ebpf(n);
3745    }
3746}
3747
3748static void virtio_net_device_unrealize(DeviceState *dev)
3749{
3750    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3751    VirtIONet *n = VIRTIO_NET(dev);
3752    int i, max_queue_pairs;
3753
3754    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3755        virtio_net_unload_ebpf(n);
3756    }
3757
3758    /* This will stop vhost backend if appropriate. */
3759    virtio_net_set_status(vdev, 0);
3760
3761    g_free(n->netclient_name);
3762    n->netclient_name = NULL;
3763    g_free(n->netclient_type);
3764    n->netclient_type = NULL;
3765
3766    g_free(n->mac_table.macs);
3767    g_free(n->vlans);
3768
3769    if (n->failover) {
3770        qobject_unref(n->primary_opts);
3771        device_listener_unregister(&n->primary_listener);
3772        remove_migration_state_change_notifier(&n->migration_state);
3773    } else {
3774        assert(n->primary_opts == NULL);
3775    }
3776
3777    max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3778    for (i = 0; i < max_queue_pairs; i++) {
3779        virtio_net_del_queue(n, i);
3780    }
3781    /* delete also control vq */
3782    virtio_del_queue(vdev, max_queue_pairs * 2);
3783    qemu_announce_timer_del(&n->announce_timer, false);
3784    g_free(n->vqs);
3785    qemu_del_nic(n->nic);
3786    virtio_net_rsc_cleanup(n);
3787    g_free(n->rss_data.indirections_table);
3788    net_rx_pkt_uninit(n->rx_pkt);
3789    virtio_cleanup(vdev);
3790}
3791
3792static void virtio_net_instance_init(Object *obj)
3793{
3794    VirtIONet *n = VIRTIO_NET(obj);
3795
3796    /*
3797     * The default config_size is sizeof(struct virtio_net_config).
3798     * Can be overriden with virtio_net_set_config_size.
3799     */
3800    n->config_size = sizeof(struct virtio_net_config);
3801    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3802                                  "bootindex", "/ethernet-phy@0",
3803                                  DEVICE(n));
3804
3805    ebpf_rss_init(&n->ebpf_rss);
3806}
3807
3808static int virtio_net_pre_save(void *opaque)
3809{
3810    VirtIONet *n = opaque;
3811
3812    /* At this point, backend must be stopped, otherwise
3813     * it might keep writing to memory. */
3814    assert(!n->vhost_started);
3815
3816    return 0;
3817}
3818
3819static bool primary_unplug_pending(void *opaque)
3820{
3821    DeviceState *dev = opaque;
3822    DeviceState *primary;
3823    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3824    VirtIONet *n = VIRTIO_NET(vdev);
3825
3826    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3827        return false;
3828    }
3829    primary = failover_find_primary_device(n);
3830    return primary ? primary->pending_deleted_event : false;
3831}
3832
3833static bool dev_unplug_pending(void *opaque)
3834{
3835    DeviceState *dev = opaque;
3836    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3837
3838    return vdc->primary_unplug_pending(dev);
3839}
3840
3841static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3842{
3843    VirtIONet *n = VIRTIO_NET(vdev);
3844    NetClientState *nc = qemu_get_queue(n->nic);
3845    struct vhost_net *net = get_vhost_net(nc->peer);
3846    return &net->dev;
3847}
3848
3849static const VMStateDescription vmstate_virtio_net = {
3850    .name = "virtio-net",
3851    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3852    .version_id = VIRTIO_NET_VM_VERSION,
3853    .fields = (VMStateField[]) {
3854        VMSTATE_VIRTIO_DEVICE,
3855        VMSTATE_END_OF_LIST()
3856    },
3857    .pre_save = virtio_net_pre_save,
3858    .dev_unplug_pending = dev_unplug_pending,
3859};
3860
3861static Property virtio_net_properties[] = {
3862    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3863                    VIRTIO_NET_F_CSUM, true),
3864    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3865                    VIRTIO_NET_F_GUEST_CSUM, true),
3866    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3867    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3868                    VIRTIO_NET_F_GUEST_TSO4, true),
3869    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3870                    VIRTIO_NET_F_GUEST_TSO6, true),
3871    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3872                    VIRTIO_NET_F_GUEST_ECN, true),
3873    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3874                    VIRTIO_NET_F_GUEST_UFO, true),
3875    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3876                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3877    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3878                    VIRTIO_NET_F_HOST_TSO4, true),
3879    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3880                    VIRTIO_NET_F_HOST_TSO6, true),
3881    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3882                    VIRTIO_NET_F_HOST_ECN, true),
3883    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3884                    VIRTIO_NET_F_HOST_UFO, true),
3885    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3886                    VIRTIO_NET_F_MRG_RXBUF, true),
3887    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3888                    VIRTIO_NET_F_STATUS, true),
3889    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3890                    VIRTIO_NET_F_CTRL_VQ, true),
3891    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3892                    VIRTIO_NET_F_CTRL_RX, true),
3893    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3894                    VIRTIO_NET_F_CTRL_VLAN, true),
3895    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3896                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3897    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3898                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3899    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3900                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3901    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3902    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3903                    VIRTIO_NET_F_RSS, false),
3904    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3905                    VIRTIO_NET_F_HASH_REPORT, false),
3906    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3907                    VIRTIO_NET_F_RSC_EXT, false),
3908    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3909                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3910    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3911    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3912                       TX_TIMER_INTERVAL),
3913    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3914    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3915    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3916                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3917    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3918                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3919    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3920    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3921                     true),
3922    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3923    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3924    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3925    DEFINE_PROP_END_OF_LIST(),
3926};
3927
3928static void virtio_net_class_init(ObjectClass *klass, void *data)
3929{
3930    DeviceClass *dc = DEVICE_CLASS(klass);
3931    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3932
3933    device_class_set_props(dc, virtio_net_properties);
3934    dc->vmsd = &vmstate_virtio_net;
3935    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3936    vdc->realize = virtio_net_device_realize;
3937    vdc->unrealize = virtio_net_device_unrealize;
3938    vdc->get_config = virtio_net_get_config;
3939    vdc->set_config = virtio_net_set_config;
3940    vdc->get_features = virtio_net_get_features;
3941    vdc->set_features = virtio_net_set_features;
3942    vdc->bad_features = virtio_net_bad_features;
3943    vdc->reset = virtio_net_reset;
3944    vdc->queue_reset = virtio_net_queue_reset;
3945    vdc->queue_enable = virtio_net_queue_enable;
3946    vdc->set_status = virtio_net_set_status;
3947    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3948    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3949    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3950    vdc->post_load = virtio_net_post_load_virtio;
3951    vdc->vmsd = &vmstate_virtio_net_device;
3952    vdc->primary_unplug_pending = primary_unplug_pending;
3953    vdc->get_vhost = virtio_net_get_vhost;
3954    vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
3955}
3956
3957static const TypeInfo virtio_net_info = {
3958    .name = TYPE_VIRTIO_NET,
3959    .parent = TYPE_VIRTIO_DEVICE,
3960    .instance_size = sizeof(VirtIONet),
3961    .instance_init = virtio_net_instance_init,
3962    .class_init = virtio_net_class_init,
3963};
3964
3965static void virtio_register_types(void)
3966{
3967    type_register_static(&virtio_net_info);
3968}
3969
3970type_init(virtio_register_types)
3971