LXR qemu/hw/net/virtio-net.c

   1/*
   2 * Virtio Network Device
   3 *
   4 * Copyright IBM, Corp. 2007
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/atomic.h"
  16#include "qemu/iov.h"
  17#include "qemu/log.h"
  18#include "qemu/main-loop.h"
  19#include "qemu/module.h"
  20#include "hw/virtio/virtio.h"
  21#include "net/net.h"
  22#include "net/checksum.h"
  23#include "net/tap.h"
  24#include "qemu/error-report.h"
  25#include "qemu/timer.h"
  26#include "qemu/option.h"
  27#include "qemu/option_int.h"
  28#include "qemu/config-file.h"
  29#include "qapi/qmp/qdict.h"
  30#include "hw/virtio/virtio-net.h"
  31#include "net/vhost_net.h"
  32#include "net/announce.h"
  33#include "hw/virtio/virtio-bus.h"
  34#include "qapi/error.h"
  35#include "qapi/qapi-events-net.h"
  36#include "hw/qdev-properties.h"
  37#include "qapi/qapi-types-migration.h"
  38#include "qapi/qapi-events-migration.h"
  39#include "hw/virtio/virtio-access.h"
  40#include "migration/misc.h"
  41#include "standard-headers/linux/ethtool.h"
  42#include "sysemu/sysemu.h"
  43#include "trace.h"
  44#include "monitor/qdev.h"
  45#include "hw/pci/pci_device.h"
  46#include "net_rx_pkt.h"
  47#include "hw/virtio/vhost.h"
  48#include "sysemu/qtest.h"
  49
  50#define VIRTIO_NET_VM_VERSION    11
  51
  52#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  53
  54/* previously fixed value */
  55#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  56#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  57
  58/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
  59#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  60#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  61
  62#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  63
  64#define VIRTIO_NET_TCP_FLAG         0x3F
  65#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  66
  67/* IPv4 max payload, 16 bits in the header */
  68#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  69#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  70
  71/* header length value in ip header without option */
  72#define VIRTIO_NET_IP4_HEADER_LENGTH 5
  73
  74#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  75#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  76
  77/* Purge coalesced packets timer interval, This value affects the performance
  78   a lot, and should be tuned carefully, '300000'(300us) is the recommended
  79   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  80   tso/gso/gro 'off'. */
  81#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  82
  83#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  84                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  85                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  86                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  87                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  88                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  89                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  90                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  91                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  92
  93static const VirtIOFeature feature_sizes[] = {
  94    {.flags = 1ULL << VIRTIO_NET_F_MAC,
  95     .end = endof(struct virtio_net_config, mac)},
  96    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  97     .end = endof(struct virtio_net_config, status)},
  98    {.flags = 1ULL << VIRTIO_NET_F_MQ,
  99     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
 100    {.flags = 1ULL << VIRTIO_NET_F_MTU,
 101     .end = endof(struct virtio_net_config, mtu)},
 102    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 103     .end = endof(struct virtio_net_config, duplex)},
 104    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 105     .end = endof(struct virtio_net_config, supported_hash_types)},
 106    {}
 107};
 108
 109static const VirtIOConfigSizeParams cfg_size_params = {
 110    .min_size = endof(struct virtio_net_config, mac),
 111    .max_size = sizeof(struct virtio_net_config),
 112    .feature_sizes = feature_sizes
 113};
 114
 115static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 116{
 117    VirtIONet *n = qemu_get_nic_opaque(nc);
 118
 119    return &n->vqs[nc->queue_index];
 120}
 121
 122static int vq2q(int queue_index)
 123{
 124    return queue_index / 2;
 125}
 126
 127static void flush_or_purge_queued_packets(NetClientState *nc)
 128{
 129    if (!nc->peer) {
 130        return;
 131    }
 132
 133    qemu_flush_or_purge_queued_packets(nc->peer, true);
 134    assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 135}
 136
 137/* TODO
 138 * - we could suppress RX interrupt if we were so inclined.
 139 */
 140
 141static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 142{
 143    VirtIONet *n = VIRTIO_NET(vdev);
 144    struct virtio_net_config netcfg;
 145    NetClientState *nc = qemu_get_queue(n->nic);
 146    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 147
 148    int ret = 0;
 149    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 150    virtio_stw_p(vdev, &netcfg.status, n->status);
 151    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
 152    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 153    memcpy(netcfg.mac, n->mac, ETH_ALEN);
 154    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 155    netcfg.duplex = n->net_conf.duplex;
 156    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 157    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 158                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 159                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 160    virtio_stl_p(vdev, &netcfg.supported_hash_types,
 161                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
 162    memcpy(config, &netcfg, n->config_size);
 163
 164    /*
 165     * Is this VDPA? No peer means not VDPA: there's no way to
 166     * disconnect/reconnect a VDPA peer.
 167     */
 168    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 169        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 170                                   n->config_size);
 171        if (ret == -1) {
 172            return;
 173        }
 174
 175        /*
 176         * Some NIC/kernel combinations present 0 as the mac address.  As that
 177         * is not a legal address, try to proceed with the address from the
 178         * QEMU command line in the hope that the address has been configured
 179         * correctly elsewhere - just not reported by the device.
 180         */
 181        if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 182            info_report("Zero hardware mac address detected. Ignoring.");
 183            memcpy(netcfg.mac, n->mac, ETH_ALEN);
 184        }
 185
 186        netcfg.status |= virtio_tswap16(vdev,
 187                                        n->status & VIRTIO_NET_S_ANNOUNCE);
 188        memcpy(config, &netcfg, n->config_size);
 189    }
 190}
 191
 192static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 193{
 194    VirtIONet *n = VIRTIO_NET(vdev);
 195    struct virtio_net_config netcfg = {};
 196    NetClientState *nc = qemu_get_queue(n->nic);
 197
 198    memcpy(&netcfg, config, n->config_size);
 199
 200    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 201        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 202        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 203        memcpy(n->mac, netcfg.mac, ETH_ALEN);
 204        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 205    }
 206
 207    /*
 208     * Is this VDPA? No peer means not VDPA: there's no way to
 209     * disconnect/reconnect a VDPA peer.
 210     */
 211    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 212        vhost_net_set_config(get_vhost_net(nc->peer),
 213                             (uint8_t *)&netcfg, 0, n->config_size,
 214                             VHOST_SET_CONFIG_TYPE_MASTER);
 215      }
 216}
 217
 218static bool virtio_net_started(VirtIONet *n, uint8_t status)
 219{
 220    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 221    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 222        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 223}
 224
 225static void virtio_net_announce_notify(VirtIONet *net)
 226{
 227    VirtIODevice *vdev = VIRTIO_DEVICE(net);
 228    trace_virtio_net_announce_notify();
 229
 230    net->status |= VIRTIO_NET_S_ANNOUNCE;
 231    virtio_notify_config(vdev);
 232}
 233
 234static void virtio_net_announce_timer(void *opaque)
 235{
 236    VirtIONet *n = opaque;
 237    trace_virtio_net_announce_timer(n->announce_timer.round);
 238
 239    n->announce_timer.round--;
 240    virtio_net_announce_notify(n);
 241}
 242
 243static void virtio_net_announce(NetClientState *nc)
 244{
 245    VirtIONet *n = qemu_get_nic_opaque(nc);
 246    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 247
 248    /*
 249     * Make sure the virtio migration announcement timer isn't running
 250     * If it is, let it trigger announcement so that we do not cause
 251     * confusion.
 252     */
 253    if (n->announce_timer.round) {
 254        return;
 255    }
 256
 257    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 258        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 259            virtio_net_announce_notify(n);
 260    }
 261}
 262
 263static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 264{
 265    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 266    NetClientState *nc = qemu_get_queue(n->nic);
 267    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 268    int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
 269              n->max_ncs - n->max_queue_pairs : 0;
 270
 271    if (!get_vhost_net(nc->peer)) {
 272        return;
 273    }
 274
 275    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 276        !!n->vhost_started) {
 277        return;
 278    }
 279    if (!n->vhost_started) {
 280        int r, i;
 281
 282        if (n->needs_vnet_hdr_swap) {
 283            error_report("backend does not support %s vnet headers; "
 284                         "falling back on userspace virtio",
 285                         virtio_is_big_endian(vdev) ? "BE" : "LE");
 286            return;
 287        }
 288
 289        /* Any packets outstanding? Purge them to avoid touching rings
 290         * when vhost is running.
 291         */
 292        for (i = 0;  i < queue_pairs; i++) {
 293            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 294
 295            /* Purge both directions: TX and RX. */
 296            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 297            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 298        }
 299
 300        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 301            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 302            if (r < 0) {
 303                error_report("%uBytes MTU not supported by the backend",
 304                             n->net_conf.mtu);
 305
 306                return;
 307            }
 308        }
 309
 310        n->vhost_started = 1;
 311        r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 312        if (r < 0) {
 313            error_report("unable to start vhost net: %d: "
 314                         "falling back on userspace virtio", -r);
 315            n->vhost_started = 0;
 316        }
 317    } else {
 318        vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
 319        n->vhost_started = 0;
 320    }
 321}
 322
 323static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 324                                          NetClientState *peer,
 325                                          bool enable)
 326{
 327    if (virtio_is_big_endian(vdev)) {
 328        return qemu_set_vnet_be(peer, enable);
 329    } else {
 330        return qemu_set_vnet_le(peer, enable);
 331    }
 332}
 333
 334static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 335                                       int queue_pairs, bool enable)
 336{
 337    int i;
 338
 339    for (i = 0; i < queue_pairs; i++) {
 340        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 341            enable) {
 342            while (--i >= 0) {
 343                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 344            }
 345
 346            return true;
 347        }
 348    }
 349
 350    return false;
 351}
 352
 353static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 354{
 355    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 356    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 357
 358    if (virtio_net_started(n, status)) {
 359        /* Before using the device, we tell the network backend about the
 360         * endianness to use when parsing vnet headers. If the backend
 361         * can't do it, we fallback onto fixing the headers in the core
 362         * virtio-net code.
 363         */
 364        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 365                                                            queue_pairs, true);
 366    } else if (virtio_net_started(n, vdev->status)) {
 367        /* After using the device, we need to reset the network backend to
 368         * the default (guest native endianness), otherwise the guest may
 369         * lose network connectivity if it is rebooted into a different
 370         * endianness.
 371         */
 372        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
 373    }
 374}
 375
 376static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 377{
 378    unsigned int dropped = virtqueue_drop_all(vq);
 379    if (dropped) {
 380        virtio_notify(vdev, vq);
 381    }
 382}
 383
 384static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 385{
 386    VirtIONet *n = VIRTIO_NET(vdev);
 387    VirtIONetQueue *q;
 388    int i;
 389    uint8_t queue_status;
 390
 391    virtio_net_vnet_endian_status(n, status);
 392    virtio_net_vhost_status(n, status);
 393
 394    for (i = 0; i < n->max_queue_pairs; i++) {
 395        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 396        bool queue_started;
 397        q = &n->vqs[i];
 398
 399        if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
 400            queue_status = 0;
 401        } else {
 402            queue_status = status;
 403        }
 404        queue_started =
 405            virtio_net_started(n, queue_status) && !n->vhost_started;
 406
 407        if (queue_started) {
 408            qemu_flush_queued_packets(ncs);
 409        }
 410
 411        if (!q->tx_waiting) {
 412            continue;
 413        }
 414
 415        if (queue_started) {
 416            if (q->tx_timer) {
 417                timer_mod(q->tx_timer,
 418                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 419            } else {
 420                qemu_bh_schedule(q->tx_bh);
 421            }
 422        } else {
 423            if (q->tx_timer) {
 424                timer_del(q->tx_timer);
 425            } else {
 426                qemu_bh_cancel(q->tx_bh);
 427            }
 428            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 429                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 430                vdev->vm_running) {
 431                /* if tx is waiting we are likely have some packets in tx queue
 432                 * and disabled notification */
 433                q->tx_waiting = 0;
 434                virtio_queue_set_notification(q->tx_vq, 1);
 435                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 436            }
 437        }
 438    }
 439}
 440
 441static void virtio_net_set_link_status(NetClientState *nc)
 442{
 443    VirtIONet *n = qemu_get_nic_opaque(nc);
 444    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 445    uint16_t old_status = n->status;
 446
 447    if (nc->link_down)
 448        n->status &= ~VIRTIO_NET_S_LINK_UP;
 449    else
 450        n->status |= VIRTIO_NET_S_LINK_UP;
 451
 452    if (n->status != old_status)
 453        virtio_notify_config(vdev);
 454
 455    virtio_net_set_status(vdev, vdev->status);
 456}
 457
 458static void rxfilter_notify(NetClientState *nc)
 459{
 460    VirtIONet *n = qemu_get_nic_opaque(nc);
 461
 462    if (nc->rxfilter_notify_enabled) {
 463        char *path = object_get_canonical_path(OBJECT(n->qdev));
 464        qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
 465        g_free(path);
 466
 467        /* disable event notification to avoid events flooding */
 468        nc->rxfilter_notify_enabled = 0;
 469    }
 470}
 471
 472static intList *get_vlan_table(VirtIONet *n)
 473{
 474    intList *list;
 475    int i, j;
 476
 477    list = NULL;
 478    for (i = 0; i < MAX_VLAN >> 5; i++) {
 479        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 480            if (n->vlans[i] & (1U << j)) {
 481                QAPI_LIST_PREPEND(list, (i << 5) + j);
 482            }
 483        }
 484    }
 485
 486    return list;
 487}
 488
 489static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 490{
 491    VirtIONet *n = qemu_get_nic_opaque(nc);
 492    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 493    RxFilterInfo *info;
 494    strList *str_list;
 495    int i;
 496
 497    info = g_malloc0(sizeof(*info));
 498    info->name = g_strdup(nc->name);
 499    info->promiscuous = n->promisc;
 500
 501    if (n->nouni) {
 502        info->unicast = RX_STATE_NONE;
 503    } else if (n->alluni) {
 504        info->unicast = RX_STATE_ALL;
 505    } else {
 506        info->unicast = RX_STATE_NORMAL;
 507    }
 508
 509    if (n->nomulti) {
 510        info->multicast = RX_STATE_NONE;
 511    } else if (n->allmulti) {
 512        info->multicast = RX_STATE_ALL;
 513    } else {
 514        info->multicast = RX_STATE_NORMAL;
 515    }
 516
 517    info->broadcast_allowed = n->nobcast;
 518    info->multicast_overflow = n->mac_table.multi_overflow;
 519    info->unicast_overflow = n->mac_table.uni_overflow;
 520
 521    info->main_mac = qemu_mac_strdup_printf(n->mac);
 522
 523    str_list = NULL;
 524    for (i = 0; i < n->mac_table.first_multi; i++) {
 525        QAPI_LIST_PREPEND(str_list,
 526                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 527    }
 528    info->unicast_table = str_list;
 529
 530    str_list = NULL;
 531    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 532        QAPI_LIST_PREPEND(str_list,
 533                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 534    }
 535    info->multicast_table = str_list;
 536    info->vlan_table = get_vlan_table(n);
 537
 538    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 539        info->vlan = RX_STATE_ALL;
 540    } else if (!info->vlan_table) {
 541        info->vlan = RX_STATE_NONE;
 542    } else {
 543        info->vlan = RX_STATE_NORMAL;
 544    }
 545
 546    /* enable event notification after query */
 547    nc->rxfilter_notify_enabled = 1;
 548
 549    return info;
 550}
 551
 552static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
 553{
 554    VirtIONet *n = VIRTIO_NET(vdev);
 555    NetClientState *nc;
 556
 557    /* validate queue_index and skip for cvq */
 558    if (queue_index >= n->max_queue_pairs * 2) {
 559        return;
 560    }
 561
 562    nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 563
 564    if (!nc->peer) {
 565        return;
 566    }
 567
 568    if (get_vhost_net(nc->peer) &&
 569        nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 570        vhost_net_virtqueue_reset(vdev, nc, queue_index);
 571    }
 572
 573    flush_or_purge_queued_packets(nc);
 574}
 575
 576static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
 577{
 578    VirtIONet *n = VIRTIO_NET(vdev);
 579    NetClientState *nc;
 580    int r;
 581
 582    /* validate queue_index and skip for cvq */
 583    if (queue_index >= n->max_queue_pairs * 2) {
 584        return;
 585    }
 586
 587    nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 588
 589    if (!nc->peer || !vdev->vhost_started) {
 590        return;
 591    }
 592
 593    if (get_vhost_net(nc->peer) &&
 594        nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 595        r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
 596        if (r < 0) {
 597            error_report("unable to restart vhost net virtqueue: %d, "
 598                            "when resetting the queue", queue_index);
 599        }
 600    }
 601}
 602
 603static void virtio_net_reset(VirtIODevice *vdev)
 604{
 605    VirtIONet *n = VIRTIO_NET(vdev);
 606    int i;
 607
 608    /* Reset back to compatibility mode */
 609    n->promisc = 1;
 610    n->allmulti = 0;
 611    n->alluni = 0;
 612    n->nomulti = 0;
 613    n->nouni = 0;
 614    n->nobcast = 0;
 615    /* multiqueue is disabled by default */
 616    n->curr_queue_pairs = 1;
 617    timer_del(n->announce_timer.tm);
 618    n->announce_timer.round = 0;
 619    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 620
 621    /* Flush any MAC and VLAN filter table state */
 622    n->mac_table.in_use = 0;
 623    n->mac_table.first_multi = 0;
 624    n->mac_table.multi_overflow = 0;
 625    n->mac_table.uni_overflow = 0;
 626    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 627    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 628    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 629    memset(n->vlans, 0, MAX_VLAN >> 3);
 630
 631    /* Flush any async TX */
 632    for (i = 0;  i < n->max_queue_pairs; i++) {
 633        flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
 634    }
 635}
 636
 637static void peer_test_vnet_hdr(VirtIONet *n)
 638{
 639    NetClientState *nc = qemu_get_queue(n->nic);
 640    if (!nc->peer) {
 641        return;
 642    }
 643
 644    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 645}
 646
 647static int peer_has_vnet_hdr(VirtIONet *n)
 648{
 649    return n->has_vnet_hdr;
 650}
 651
 652static int peer_has_ufo(VirtIONet *n)
 653{
 654    if (!peer_has_vnet_hdr(n))
 655        return 0;
 656
 657    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 658
 659    return n->has_ufo;
 660}
 661
 662static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 663                                       int version_1, int hash_report)
 664{
 665    int i;
 666    NetClientState *nc;
 667
 668    n->mergeable_rx_bufs = mergeable_rx_bufs;
 669
 670    if (version_1) {
 671        n->guest_hdr_len = hash_report ?
 672            sizeof(struct virtio_net_hdr_v1_hash) :
 673            sizeof(struct virtio_net_hdr_mrg_rxbuf);
 674        n->rss_data.populate_hash = !!hash_report;
 675    } else {
 676        n->guest_hdr_len = n->mergeable_rx_bufs ?
 677            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 678            sizeof(struct virtio_net_hdr);
 679    }
 680
 681    for (i = 0; i < n->max_queue_pairs; i++) {
 682        nc = qemu_get_subqueue(n->nic, i);
 683
 684        if (peer_has_vnet_hdr(n) &&
 685            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 686            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 687            n->host_hdr_len = n->guest_hdr_len;
 688        }
 689    }
 690}
 691
 692static int virtio_net_max_tx_queue_size(VirtIONet *n)
 693{
 694    NetClientState *peer = n->nic_conf.peers.ncs[0];
 695
 696    /*
 697     * Backends other than vhost-user or vhost-vdpa don't support max queue
 698     * size.
 699     */
 700    if (!peer) {
 701        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 702    }
 703
 704    switch(peer->info->type) {
 705    case NET_CLIENT_DRIVER_VHOST_USER:
 706    case NET_CLIENT_DRIVER_VHOST_VDPA:
 707        return VIRTQUEUE_MAX_SIZE;
 708    default:
 709        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 710    };
 711}
 712
 713static int peer_attach(VirtIONet *n, int index)
 714{
 715    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 716
 717    if (!nc->peer) {
 718        return 0;
 719    }
 720
 721    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 722        vhost_set_vring_enable(nc->peer, 1);
 723    }
 724
 725    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 726        return 0;
 727    }
 728
 729    if (n->max_queue_pairs == 1) {
 730        return 0;
 731    }
 732
 733    return tap_enable(nc->peer);
 734}
 735
 736static int peer_detach(VirtIONet *n, int index)
 737{
 738    NetClientState *nc = qemu_get_subqueue(n->nic, index);
 739
 740    if (!nc->peer) {
 741        return 0;
 742    }
 743
 744    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 745        vhost_set_vring_enable(nc->peer, 0);
 746    }
 747
 748    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 749        return 0;
 750    }
 751
 752    return tap_disable(nc->peer);
 753}
 754
 755static void virtio_net_set_queue_pairs(VirtIONet *n)
 756{
 757    int i;
 758    int r;
 759
 760    if (n->nic->peer_deleted) {
 761        return;
 762    }
 763
 764    for (i = 0; i < n->max_queue_pairs; i++) {
 765        if (i < n->curr_queue_pairs) {
 766            r = peer_attach(n, i);
 767            assert(!r);
 768        } else {
 769            r = peer_detach(n, i);
 770            assert(!r);
 771        }
 772    }
 773}
 774
 775static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 776
 777static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 778                                        Error **errp)
 779{
 780    VirtIONet *n = VIRTIO_NET(vdev);
 781    NetClientState *nc = qemu_get_queue(n->nic);
 782
 783    /* Firstly sync all virtio-net possible supported features */
 784    features |= n->host_features;
 785
 786    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 787
 788    if (!peer_has_vnet_hdr(n)) {
 789        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 790        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 791        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 792        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 793
 794        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 795        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 796        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 797        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 798
 799        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 800    }
 801
 802    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 803        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 804        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 805    }
 806
 807    if (!get_vhost_net(nc->peer)) {
 808        return features;
 809    }
 810
 811    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 812        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 813    }
 814    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 815    vdev->backend_features = features;
 816
 817    if (n->mtu_bypass_backend &&
 818            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 819        features |= (1ULL << VIRTIO_NET_F_MTU);
 820    }
 821
 822    /*
 823     * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
 824     * enabled. This happens in the vDPA case.
 825     *
 826     * Make sure the feature set is not incoherent, as the driver could refuse
 827     * to start.
 828     *
 829     * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
 830     * helping guest to notify the new location with vDPA devices that does not
 831     * support it.
 832     */
 833    if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
 834        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
 835    }
 836
 837    return features;
 838}
 839
 840static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 841{
 842    uint64_t features = 0;
 843
 844    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 845     * but also these: */
 846    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 847    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 848    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 849    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 850    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 851
 852    return features;
 853}
 854
 855static void virtio_net_apply_guest_offloads(VirtIONet *n)
 856{
 857    qemu_set_offload(qemu_get_queue(n->nic)->peer,
 858            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 859            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 860            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 861            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 862            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 863}
 864
 865static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 866{
 867    static const uint64_t guest_offloads_mask =
 868        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 869        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 870        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 871        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 872        (1ULL << VIRTIO_NET_F_GUEST_UFO);
 873
 874    return guest_offloads_mask & features;
 875}
 876
 877static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 878{
 879    VirtIODevice *vdev = VIRTIO_DEVICE(n);
 880    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 881}
 882
 883typedef struct {
 884    VirtIONet *n;
 885    DeviceState *dev;
 886} FailoverDevice;
 887
 888/**
 889 * Set the failover primary device
 890 *
 891 * @opaque: FailoverId to setup
 892 * @opts: opts for device we are handling
 893 * @errp: returns an error if this function fails
 894 */
 895static int failover_set_primary(DeviceState *dev, void *opaque)
 896{
 897    FailoverDevice *fdev = opaque;
 898    PCIDevice *pci_dev = (PCIDevice *)
 899        object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
 900
 901    if (!pci_dev) {
 902        return 0;
 903    }
 904
 905    if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
 906        fdev->dev = dev;
 907        return 1;
 908    }
 909
 910    return 0;
 911}
 912
 913/**
 914 * Find the primary device for this failover virtio-net
 915 *
 916 * @n: VirtIONet device
 917 * @errp: returns an error if this function fails
 918 */
 919static DeviceState *failover_find_primary_device(VirtIONet *n)
 920{
 921    FailoverDevice fdev = {
 922        .n = n,
 923    };
 924
 925    qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
 926                       NULL, NULL, &fdev);
 927    return fdev.dev;
 928}
 929
 930static void failover_add_primary(VirtIONet *n, Error **errp)
 931{
 932    Error *err = NULL;
 933    DeviceState *dev = failover_find_primary_device(n);
 934
 935    if (dev) {
 936        return;
 937    }
 938
 939    if (!n->primary_opts) {
 940        error_setg(errp, "Primary device not found");
 941        error_append_hint(errp, "Virtio-net failover will not work. Make "
 942                          "sure primary device has parameter"
 943                          " failover_pair_id=%s\n", n->netclient_name);
 944        return;
 945    }
 946
 947    dev = qdev_device_add_from_qdict(n->primary_opts,
 948                                     n->primary_opts_from_json,
 949                                     &err);
 950    if (err) {
 951        qobject_unref(n->primary_opts);
 952        n->primary_opts = NULL;
 953    } else {
 954        object_unref(OBJECT(dev));
 955    }
 956    error_propagate(errp, err);
 957}
 958
 959static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 960{
 961    VirtIONet *n = VIRTIO_NET(vdev);
 962    Error *err = NULL;
 963    int i;
 964
 965    if (n->mtu_bypass_backend &&
 966            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 967        features &= ~(1ULL << VIRTIO_NET_F_MTU);
 968    }
 969
 970    virtio_net_set_multiqueue(n,
 971                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 972                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
 973
 974    virtio_net_set_mrg_rx_bufs(n,
 975                               virtio_has_feature(features,
 976                                                  VIRTIO_NET_F_MRG_RXBUF),
 977                               virtio_has_feature(features,
 978                                                  VIRTIO_F_VERSION_1),
 979                               virtio_has_feature(features,
 980                                                  VIRTIO_NET_F_HASH_REPORT));
 981
 982    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 983        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 984    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 985        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 986    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 987
 988    if (n->has_vnet_hdr) {
 989        n->curr_guest_offloads =
 990            virtio_net_guest_offloads_by_features(features);
 991        virtio_net_apply_guest_offloads(n);
 992    }
 993
 994    for (i = 0;  i < n->max_queue_pairs; i++) {
 995        NetClientState *nc = qemu_get_subqueue(n->nic, i);
 996
 997        if (!get_vhost_net(nc->peer)) {
 998            continue;
 999        }
1000        vhost_net_ack_features(get_vhost_net(nc->peer), features);

1001
1002        /*
1003         * keep acked_features in NetVhostUserState up-to-date so it
1004         * can't miss any features configured by guest virtio driver.
1005         */
1006        vhost_net_save_acked_features(nc->peer);
1007    }
1008
1009    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1010        memset(n->vlans, 0, MAX_VLAN >> 3);
1011    } else {
1012        memset(n->vlans, 0xff, MAX_VLAN >> 3);
1013    }
1014
1015    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1016        qapi_event_send_failover_negotiated(n->netclient_name);
1017        qatomic_set(&n->failover_primary_hidden, false);
1018        failover_add_primary(n, &err);
1019        if (err) {
1020            if (!qtest_enabled()) {
1021                warn_report_err(err);
1022            } else {
1023                error_free(err);
1024            }
1025        }
1026    }
1027}
1028
1029static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1030                                     struct iovec *iov, unsigned int iov_cnt)
1031{
1032    uint8_t on;
1033    size_t s;
1034    NetClientState *nc = qemu_get_queue(n->nic);
1035
1036    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1037    if (s != sizeof(on)) {
1038        return VIRTIO_NET_ERR;
1039    }
1040
1041    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1042        n->promisc = on;
1043    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1044        n->allmulti = on;
1045    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1046        n->alluni = on;
1047    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1048        n->nomulti = on;
1049    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1050        n->nouni = on;
1051    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1052        n->nobcast = on;
1053    } else {
1054        return VIRTIO_NET_ERR;
1055    }
1056
1057    rxfilter_notify(nc);
1058
1059    return VIRTIO_NET_OK;
1060}
1061
1062static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1063                                     struct iovec *iov, unsigned int iov_cnt)
1064{
1065    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1066    uint64_t offloads;
1067    size_t s;
1068
1069    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1070        return VIRTIO_NET_ERR;
1071    }
1072
1073    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1074    if (s != sizeof(offloads)) {
1075        return VIRTIO_NET_ERR;
1076    }
1077
1078    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1079        uint64_t supported_offloads;
1080
1081        offloads = virtio_ldq_p(vdev, &offloads);
1082
1083        if (!n->has_vnet_hdr) {
1084            return VIRTIO_NET_ERR;
1085        }
1086
1087        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1088            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1089        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1090            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1091        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1092
1093        supported_offloads = virtio_net_supported_guest_offloads(n);
1094        if (offloads & ~supported_offloads) {
1095            return VIRTIO_NET_ERR;
1096        }
1097
1098        n->curr_guest_offloads = offloads;
1099        virtio_net_apply_guest_offloads(n);
1100
1101        return VIRTIO_NET_OK;
1102    } else {
1103        return VIRTIO_NET_ERR;
1104    }
1105}
1106
1107static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1108                                 struct iovec *iov, unsigned int iov_cnt)
1109{
1110    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1111    struct virtio_net_ctrl_mac mac_data;
1112    size_t s;
1113    NetClientState *nc = qemu_get_queue(n->nic);
1114
1115    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1116        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1117            return VIRTIO_NET_ERR;
1118        }
1119        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1120        assert(s == sizeof(n->mac));
1121        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1122        rxfilter_notify(nc);
1123
1124        return VIRTIO_NET_OK;
1125    }
1126
1127    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1128        return VIRTIO_NET_ERR;
1129    }
1130
1131    int in_use = 0;
1132    int first_multi = 0;
1133    uint8_t uni_overflow = 0;
1134    uint8_t multi_overflow = 0;
1135    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1136
1137    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1138                   sizeof(mac_data.entries));
1139    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1140    if (s != sizeof(mac_data.entries)) {
1141        goto error;
1142    }
1143    iov_discard_front(&iov, &iov_cnt, s);
1144
1145    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1146        goto error;
1147    }
1148
1149    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1150        s = iov_to_buf(iov, iov_cnt, 0, macs,
1151                       mac_data.entries * ETH_ALEN);
1152        if (s != mac_data.entries * ETH_ALEN) {
1153            goto error;
1154        }
1155        in_use += mac_data.entries;
1156    } else {
1157        uni_overflow = 1;
1158    }
1159
1160    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1161
1162    first_multi = in_use;
1163
1164    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1165                   sizeof(mac_data.entries));
1166    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1167    if (s != sizeof(mac_data.entries)) {
1168        goto error;
1169    }
1170
1171    iov_discard_front(&iov, &iov_cnt, s);
1172
1173    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1174        goto error;
1175    }
1176
1177    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1178        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1179                       mac_data.entries * ETH_ALEN);
1180        if (s != mac_data.entries * ETH_ALEN) {
1181            goto error;
1182        }
1183        in_use += mac_data.entries;
1184    } else {
1185        multi_overflow = 1;
1186    }
1187
1188    n->mac_table.in_use = in_use;
1189    n->mac_table.first_multi = first_multi;
1190    n->mac_table.uni_overflow = uni_overflow;
1191    n->mac_table.multi_overflow = multi_overflow;
1192    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1193    g_free(macs);
1194    rxfilter_notify(nc);
1195
1196    return VIRTIO_NET_OK;
1197
1198error:
1199    g_free(macs);
1200    return VIRTIO_NET_ERR;
1201}
1202
1203static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1204                                        struct iovec *iov, unsigned int iov_cnt)
1205{
1206    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1207    uint16_t vid;
1208    size_t s;
1209    NetClientState *nc = qemu_get_queue(n->nic);
1210
1211    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1212    vid = virtio_lduw_p(vdev, &vid);
1213    if (s != sizeof(vid)) {
1214        return VIRTIO_NET_ERR;
1215    }
1216
1217    if (vid >= MAX_VLAN)
1218        return VIRTIO_NET_ERR;
1219
1220    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1221        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1222    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1223        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1224    else
1225        return VIRTIO_NET_ERR;
1226
1227    rxfilter_notify(nc);
1228
1229    return VIRTIO_NET_OK;
1230}
1231
1232static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1233                                      struct iovec *iov, unsigned int iov_cnt)
1234{
1235    trace_virtio_net_handle_announce(n->announce_timer.round);
1236    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1237        n->status & VIRTIO_NET_S_ANNOUNCE) {
1238        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1239        if (n->announce_timer.round) {
1240            qemu_announce_timer_step(&n->announce_timer);
1241        }
1242        return VIRTIO_NET_OK;
1243    } else {
1244        return VIRTIO_NET_ERR;
1245    }
1246}
1247
1248static void virtio_net_detach_epbf_rss(VirtIONet *n);
1249
1250static void virtio_net_disable_rss(VirtIONet *n)
1251{
1252    if (n->rss_data.enabled) {
1253        trace_virtio_net_rss_disable();
1254    }
1255    n->rss_data.enabled = false;
1256
1257    virtio_net_detach_epbf_rss(n);
1258}
1259
1260static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1261{
1262    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1263    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1264        return false;
1265    }
1266
1267    return nc->info->set_steering_ebpf(nc, prog_fd);
1268}
1269
1270static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1271                                   struct EBPFRSSConfig *config)
1272{
1273    config->redirect = data->redirect;
1274    config->populate_hash = data->populate_hash;
1275    config->hash_types = data->hash_types;
1276    config->indirections_len = data->indirections_len;
1277    config->default_queue = data->default_queue;
1278}
1279
1280static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1281{
1282    struct EBPFRSSConfig config = {};
1283
1284    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1285        return false;
1286    }
1287
1288    rss_data_to_rss_config(&n->rss_data, &config);
1289
1290    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1291                          n->rss_data.indirections_table, n->rss_data.key)) {
1292        return false;
1293    }
1294
1295    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1296        return false;
1297    }
1298
1299    return true;
1300}
1301
1302static void virtio_net_detach_epbf_rss(VirtIONet *n)
1303{
1304    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1305}
1306
1307static bool virtio_net_load_ebpf(VirtIONet *n)
1308{
1309    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1310        /* backend does't support steering ebpf */
1311        return false;
1312    }
1313
1314    return ebpf_rss_load(&n->ebpf_rss);
1315}
1316
1317static void virtio_net_unload_ebpf(VirtIONet *n)
1318{
1319    virtio_net_attach_ebpf_to_backend(n->nic, -1);
1320    ebpf_rss_unload(&n->ebpf_rss);
1321}
1322
1323static uint16_t virtio_net_handle_rss(VirtIONet *n,
1324                                      struct iovec *iov,
1325                                      unsigned int iov_cnt,
1326                                      bool do_rss)
1327{
1328    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1329    struct virtio_net_rss_config cfg;
1330    size_t s, offset = 0, size_get;
1331    uint16_t queue_pairs, i;
1332    struct {
1333        uint16_t us;
1334        uint8_t b;
1335    } QEMU_PACKED temp;
1336    const char *err_msg = "";
1337    uint32_t err_value = 0;
1338
1339    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1340        err_msg = "RSS is not negotiated";
1341        goto error;
1342    }
1343    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1344        err_msg = "Hash report is not negotiated";
1345        goto error;
1346    }
1347    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1348    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1349    if (s != size_get) {
1350        err_msg = "Short command buffer";
1351        err_value = (uint32_t)s;
1352        goto error;
1353    }
1354    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1355    n->rss_data.indirections_len =
1356        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1357    n->rss_data.indirections_len++;
1358    if (!do_rss) {
1359        n->rss_data.indirections_len = 1;
1360    }
1361    if (!is_power_of_2(n->rss_data.indirections_len)) {
1362        err_msg = "Invalid size of indirection table";
1363        err_value = n->rss_data.indirections_len;
1364        goto error;
1365    }
1366    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1367        err_msg = "Too large indirection table";
1368        err_value = n->rss_data.indirections_len;
1369        goto error;
1370    }
1371    n->rss_data.default_queue = do_rss ?
1372        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1373    if (n->rss_data.default_queue >= n->max_queue_pairs) {
1374        err_msg = "Invalid default queue";
1375        err_value = n->rss_data.default_queue;
1376        goto error;
1377    }
1378    offset += size_get;
1379    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1380    g_free(n->rss_data.indirections_table);
1381    n->rss_data.indirections_table = g_malloc(size_get);
1382    if (!n->rss_data.indirections_table) {
1383        err_msg = "Can't allocate indirections table";
1384        err_value = n->rss_data.indirections_len;
1385        goto error;
1386    }
1387    s = iov_to_buf(iov, iov_cnt, offset,
1388                   n->rss_data.indirections_table, size_get);
1389    if (s != size_get) {
1390        err_msg = "Short indirection table buffer";
1391        err_value = (uint32_t)s;
1392        goto error;
1393    }
1394    for (i = 0; i < n->rss_data.indirections_len; ++i) {
1395        uint16_t val = n->rss_data.indirections_table[i];
1396        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1397    }
1398    offset += size_get;
1399    size_get = sizeof(temp);
1400    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1401    if (s != size_get) {
1402        err_msg = "Can't get queue_pairs";
1403        err_value = (uint32_t)s;
1404        goto error;
1405    }
1406    queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1407    if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1408        err_msg = "Invalid number of queue_pairs";
1409        err_value = queue_pairs;
1410        goto error;
1411    }
1412    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1413        err_msg = "Invalid key size";
1414        err_value = temp.b;
1415        goto error;
1416    }
1417    if (!temp.b && n->rss_data.hash_types) {
1418        err_msg = "No key provided";
1419        err_value = 0;
1420        goto error;
1421    }
1422    if (!temp.b && !n->rss_data.hash_types) {
1423        virtio_net_disable_rss(n);
1424        return queue_pairs;
1425    }
1426    offset += size_get;
1427    size_get = temp.b;
1428    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1429    if (s != size_get) {
1430        err_msg = "Can get key buffer";
1431        err_value = (uint32_t)s;
1432        goto error;
1433    }
1434    n->rss_data.enabled = true;
1435
1436    if (!n->rss_data.populate_hash) {
1437        if (!virtio_net_attach_epbf_rss(n)) {
1438            /* EBPF must be loaded for vhost */
1439            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1440                warn_report("Can't load eBPF RSS for vhost");
1441                goto error;
1442            }
1443            /* fallback to software RSS */
1444            warn_report("Can't load eBPF RSS - fallback to software RSS");
1445            n->rss_data.enabled_software_rss = true;
1446        }
1447    } else {
1448        /* use software RSS for hash populating */
1449        /* and detach eBPF if was loaded before */
1450        virtio_net_detach_epbf_rss(n);
1451        n->rss_data.enabled_software_rss = true;
1452    }
1453
1454    trace_virtio_net_rss_enable(n->rss_data.hash_types,
1455                                n->rss_data.indirections_len,
1456                                temp.b);
1457    return queue_pairs;
1458error:
1459    trace_virtio_net_rss_error(err_msg, err_value);
1460    virtio_net_disable_rss(n);
1461    return 0;
1462}
1463
1464static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1465                                struct iovec *iov, unsigned int iov_cnt)
1466{
1467    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1468    uint16_t queue_pairs;
1469    NetClientState *nc = qemu_get_queue(n->nic);
1470
1471    virtio_net_disable_rss(n);
1472    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1473        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1474        return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1475    }
1476    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1477        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1478    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1479        struct virtio_net_ctrl_mq mq;
1480        size_t s;
1481        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1482            return VIRTIO_NET_ERR;
1483        }
1484        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1485        if (s != sizeof(mq)) {
1486            return VIRTIO_NET_ERR;
1487        }
1488        queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1489
1490    } else {
1491        return VIRTIO_NET_ERR;
1492    }
1493
1494    if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1495        queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1496        queue_pairs > n->max_queue_pairs ||
1497        !n->multiqueue) {
1498        return VIRTIO_NET_ERR;
1499    }
1500
1501    n->curr_queue_pairs = queue_pairs;
1502    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1503        /*
1504         * Avoid updating the backend for a vdpa device: We're only interested
1505         * in updating the device model queues.
1506         */
1507        return VIRTIO_NET_OK;
1508    }
1509    /* stop the backend before changing the number of queue_pairs to avoid handling a
1510     * disabled queue */
1511    virtio_net_set_status(vdev, vdev->status);
1512    virtio_net_set_queue_pairs(n);
1513
1514    return VIRTIO_NET_OK;
1515}
1516
1517size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1518                                  const struct iovec *in_sg, unsigned in_num,
1519                                  const struct iovec *out_sg,
1520                                  unsigned out_num)
1521{
1522    VirtIONet *n = VIRTIO_NET(vdev);
1523    struct virtio_net_ctrl_hdr ctrl;
1524    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1525    size_t s;
1526    struct iovec *iov, *iov2;
1527
1528    if (iov_size(in_sg, in_num) < sizeof(status) ||
1529        iov_size(out_sg, out_num) < sizeof(ctrl)) {
1530        virtio_error(vdev, "virtio-net ctrl missing headers");
1531        return 0;
1532    }
1533
1534    iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1535    s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1536    iov_discard_front(&iov, &out_num, sizeof(ctrl));
1537    if (s != sizeof(ctrl)) {
1538        status = VIRTIO_NET_ERR;
1539    } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1540        status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1541    } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1542        status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1543    } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1544        status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1545    } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1546        status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1547    } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1548        status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1549    } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1550        status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1551    }
1552
1553    s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1554    assert(s == sizeof(status));
1555
1556    g_free(iov2);
1557    return sizeof(status);
1558}
1559
1560static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1561{
1562    VirtQueueElement *elem;
1563
1564    for (;;) {
1565        size_t written;
1566        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1567        if (!elem) {
1568            break;
1569        }
1570
1571        written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1572                                             elem->out_sg, elem->out_num);
1573        if (written > 0) {
1574            virtqueue_push(vq, elem, written);
1575            virtio_notify(vdev, vq);
1576            g_free(elem);
1577        } else {
1578            virtqueue_detach_element(vq, elem, 0);
1579            g_free(elem);
1580            break;
1581        }
1582    }
1583}
1584
1585/* RX */
1586
1587static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1588{
1589    VirtIONet *n = VIRTIO_NET(vdev);
1590    int queue_index = vq2q(virtio_get_queue_index(vq));
1591
1592    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1593}
1594
1595static bool virtio_net_can_receive(NetClientState *nc)
1596{
1597    VirtIONet *n = qemu_get_nic_opaque(nc);
1598    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1599    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1600
1601    if (!vdev->vm_running) {
1602        return false;
1603    }
1604
1605    if (nc->queue_index >= n->curr_queue_pairs) {
1606        return false;
1607    }
1608
1609    if (!virtio_queue_ready(q->rx_vq) ||
1610        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1611        return false;
1612    }
1613
1614    return true;
1615}
1616
1617static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1618{
1619    VirtIONet *n = q->n;
1620    if (virtio_queue_empty(q->rx_vq) ||
1621        (n->mergeable_rx_bufs &&
1622         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1623        virtio_queue_set_notification(q->rx_vq, 1);
1624
1625        /* To avoid a race condition where the guest has made some buffers
1626         * available after the above check but before notification was
1627         * enabled, check for available buffers again.
1628         */
1629        if (virtio_queue_empty(q->rx_vq) ||
1630            (n->mergeable_rx_bufs &&
1631             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1632            return 0;
1633        }
1634    }
1635
1636    virtio_queue_set_notification(q->rx_vq, 0);
1637    return 1;
1638}
1639
1640static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1641{
1642    virtio_tswap16s(vdev, &hdr->hdr_len);
1643    virtio_tswap16s(vdev, &hdr->gso_size);
1644    virtio_tswap16s(vdev, &hdr->csum_start);
1645    virtio_tswap16s(vdev, &hdr->csum_offset);
1646}
1647
1648/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1649 * it never finds out that the packets don't have valid checksums.  This
1650 * causes dhclient to get upset.  Fedora's carried a patch for ages to
1651 * fix this with Xen but it hasn't appeared in an upstream release of
1652 * dhclient yet.
1653 *
1654 * To avoid breaking existing guests, we catch udp packets and add
1655 * checksums.  This is terrible but it's better than hacking the guest
1656 * kernels.
1657 *
1658 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1659 * we should provide a mechanism to disable it to avoid polluting the host
1660 * cache.
1661 */
1662static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1663                                        uint8_t *buf, size_t size)
1664{
1665    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1666        (size > 27 && size < 1500) && /* normal sized MTU */
1667        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1668        (buf[23] == 17) && /* ip.protocol == UDP */
1669        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1670        net_checksum_calculate(buf, size, CSUM_UDP);
1671        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1672    }
1673}
1674
1675static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1676                           const void *buf, size_t size)
1677{
1678    if (n->has_vnet_hdr) {
1679        /* FIXME this cast is evil */
1680        void *wbuf = (void *)buf;
1681        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1682                                    size - n->host_hdr_len);
1683
1684        if (n->needs_vnet_hdr_swap) {
1685            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1686        }
1687        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1688    } else {
1689        struct virtio_net_hdr hdr = {
1690            .flags = 0,
1691            .gso_type = VIRTIO_NET_HDR_GSO_NONE
1692        };
1693        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1694    }
1695}
1696
1697static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1698{
1699    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1700    static const uint8_t vlan[] = {0x81, 0x00};
1701    uint8_t *ptr = (uint8_t *)buf;
1702    int i;
1703
1704    if (n->promisc)
1705        return 1;
1706
1707    ptr += n->host_hdr_len;
1708
1709    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1710        int vid = lduw_be_p(ptr + 14) & 0xfff;
1711        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1712            return 0;
1713    }
1714
1715    if (ptr[0] & 1) { // multicast
1716        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1717            return !n->nobcast;
1718        } else if (n->nomulti) {
1719            return 0;
1720        } else if (n->allmulti || n->mac_table.multi_overflow) {
1721            return 1;
1722        }
1723
1724        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1725            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1726                return 1;
1727            }
1728        }
1729    } else { // unicast
1730        if (n->nouni) {
1731            return 0;
1732        } else if (n->alluni || n->mac_table.uni_overflow) {
1733            return 1;
1734        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1735            return 1;
1736        }
1737
1738        for (i = 0; i < n->mac_table.first_multi; i++) {
1739            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1740                return 1;
1741            }
1742        }
1743    }
1744
1745    return 0;
1746}
1747
1748static uint8_t virtio_net_get_hash_type(bool hasip4,
1749                                        bool hasip6,
1750                                        EthL4HdrProto l4hdr_proto,
1751                                        uint32_t types)
1752{
1753    if (hasip4) {
1754        switch (l4hdr_proto) {
1755        case ETH_L4_HDR_PROTO_TCP:
1756            if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1757                return NetPktRssIpV4Tcp;
1758            }
1759            break;
1760
1761        case ETH_L4_HDR_PROTO_UDP:
1762            if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1763                return NetPktRssIpV4Udp;
1764            }
1765            break;
1766
1767        default:
1768            break;
1769        }
1770
1771        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1772            return NetPktRssIpV4;
1773        }
1774    } else if (hasip6) {
1775        switch (l4hdr_proto) {
1776        case ETH_L4_HDR_PROTO_TCP:
1777            if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1778                return NetPktRssIpV6TcpEx;
1779            }
1780            if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1781                return NetPktRssIpV6Tcp;
1782            }
1783            break;
1784
1785        case ETH_L4_HDR_PROTO_UDP:
1786            if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1787                return NetPktRssIpV6UdpEx;
1788            }
1789            if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1790                return NetPktRssIpV6Udp;
1791            }
1792            break;
1793
1794        default:
1795            break;
1796        }
1797
1798        if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1799            return NetPktRssIpV6Ex;
1800        }
1801        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1802            return NetPktRssIpV6;
1803        }
1804    }
1805    return 0xff;
1806}
1807
1808static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1809                                   uint32_t hash)
1810{
1811    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1812    hdr->hash_value = hash;
1813    hdr->hash_report = report;
1814}
1815
1816static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1817                                  size_t size)
1818{
1819    VirtIONet *n = qemu_get_nic_opaque(nc);
1820    unsigned int index = nc->queue_index, new_index = index;
1821    struct NetRxPkt *pkt = n->rx_pkt;
1822    uint8_t net_hash_type;
1823    uint32_t hash;
1824    bool hasip4, hasip6;
1825    EthL4HdrProto l4hdr_proto;
1826    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1827        VIRTIO_NET_HASH_REPORT_IPv4,
1828        VIRTIO_NET_HASH_REPORT_TCPv4,
1829        VIRTIO_NET_HASH_REPORT_TCPv6,
1830        VIRTIO_NET_HASH_REPORT_IPv6,
1831        VIRTIO_NET_HASH_REPORT_IPv6_EX,
1832        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1833        VIRTIO_NET_HASH_REPORT_UDPv4,
1834        VIRTIO_NET_HASH_REPORT_UDPv6,
1835        VIRTIO_NET_HASH_REPORT_UDPv6_EX
1836    };
1837    struct iovec iov = {
1838        .iov_base = (void *)buf,
1839        .iov_len = size
1840    };
1841
1842    net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1843    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1844    net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1845                                             n->rss_data.hash_types);
1846    if (net_hash_type > NetPktRssIpV6UdpEx) {
1847        if (n->rss_data.populate_hash) {
1848            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1849        }
1850        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1851    }
1852
1853    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1854
1855    if (n->rss_data.populate_hash) {
1856        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1857    }
1858
1859    if (n->rss_data.redirect) {
1860        new_index = hash & (n->rss_data.indirections_len - 1);
1861        new_index = n->rss_data.indirections_table[new_index];
1862    }
1863
1864    return (index == new_index) ? -1 : new_index;
1865}
1866
1867static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1868                                      size_t size, bool no_rss)
1869{
1870    VirtIONet *n = qemu_get_nic_opaque(nc);
1871    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1872    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1873    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1874    size_t lens[VIRTQUEUE_MAX_SIZE];
1875    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1876    struct virtio_net_hdr_mrg_rxbuf mhdr;
1877    unsigned mhdr_cnt = 0;
1878    size_t offset, i, guest_offset, j;
1879    ssize_t err;
1880
1881    if (!virtio_net_can_receive(nc)) {
1882        return -1;
1883    }
1884
1885    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1886        int index = virtio_net_process_rss(nc, buf, size);
1887        if (index >= 0) {
1888            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1889            return virtio_net_receive_rcu(nc2, buf, size, true);
1890        }
1891    }
1892
1893    /* hdr_len refers to the header we supply to the guest */
1894    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1895        return 0;
1896    }
1897
1898    if (!receive_filter(n, buf, size))
1899        return size;
1900
1901    offset = i = 0;
1902
1903    while (offset < size) {
1904        VirtQueueElement *elem;
1905        int len, total;
1906        const struct iovec *sg;
1907
1908        total = 0;
1909
1910        if (i == VIRTQUEUE_MAX_SIZE) {
1911            virtio_error(vdev, "virtio-net unexpected long buffer chain");
1912            err = size;
1913            goto err;
1914        }
1915
1916        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1917        if (!elem) {
1918            if (i) {
1919                virtio_error(vdev, "virtio-net unexpected empty queue: "
1920                             "i %zd mergeable %d offset %zd, size %zd, "
1921                             "guest hdr len %zd, host hdr len %zd "
1922                             "guest features 0x%" PRIx64,
1923                             i, n->mergeable_rx_bufs, offset, size,
1924                             n->guest_hdr_len, n->host_hdr_len,
1925                             vdev->guest_features);
1926            }
1927            err = -1;
1928            goto err;
1929        }
1930
1931        if (elem->in_num < 1) {
1932            virtio_error(vdev,
1933                         "virtio-net receive queue contains no in buffers");
1934            virtqueue_detach_element(q->rx_vq, elem, 0);
1935            g_free(elem);
1936            err = -1;
1937            goto err;
1938        }
1939
1940        sg = elem->in_sg;
1941        if (i == 0) {
1942            assert(offset == 0);
1943            if (n->mergeable_rx_bufs) {
1944                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1945                                    sg, elem->in_num,
1946                                    offsetof(typeof(mhdr), num_buffers),
1947                                    sizeof(mhdr.num_buffers));
1948            }
1949
1950            receive_header(n, sg, elem->in_num, buf, size);
1951            if (n->rss_data.populate_hash) {
1952                offset = sizeof(mhdr);
1953                iov_from_buf(sg, elem->in_num, offset,
1954                             buf + offset, n->host_hdr_len - sizeof(mhdr));
1955            }
1956            offset = n->host_hdr_len;
1957            total += n->guest_hdr_len;
1958            guest_offset = n->guest_hdr_len;
1959        } else {
1960            guest_offset = 0;
1961        }
1962
1963        /* copy in packet.  ugh */
1964        len = iov_from_buf(sg, elem->in_num, guest_offset,
1965                           buf + offset, size - offset);
1966        total += len;
1967        offset += len;
1968        /* If buffers can't be merged, at this point we
1969         * must have consumed the complete packet.
1970         * Otherwise, drop it. */
1971        if (!n->mergeable_rx_bufs && offset < size) {
1972            virtqueue_unpop(q->rx_vq, elem, total);
1973            g_free(elem);
1974            err = size;
1975            goto err;
1976        }
1977
1978        elems[i] = elem;
1979        lens[i] = total;
1980        i++;
1981    }
1982
1983    if (mhdr_cnt) {
1984        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1985        iov_from_buf(mhdr_sg, mhdr_cnt,
1986                     0,
1987                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1988    }
1989
1990    for (j = 0; j < i; j++) {
1991        /* signal other side */
1992        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1993        g_free(elems[j]);
1994    }
1995
1996    virtqueue_flush(q->rx_vq, i);
1997    virtio_notify(vdev, q->rx_vq);
1998
1999    return size;
2000

2001err:
2002    for (j = 0; j < i; j++) {
2003        virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2004        g_free(elems[j]);
2005    }
2006
2007    return err;
2008}
2009
2010static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2011                                  size_t size)
2012{
2013    RCU_READ_LOCK_GUARD();
2014
2015    return virtio_net_receive_rcu(nc, buf, size, false);
2016}
2017
2018static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2019                                         const uint8_t *buf,
2020                                         VirtioNetRscUnit *unit)
2021{
2022    uint16_t ip_hdrlen;
2023    struct ip_header *ip;
2024
2025    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2026                              + sizeof(struct eth_header));
2027    unit->ip = (void *)ip;
2028    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2029    unit->ip_plen = &ip->ip_len;
2030    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2031    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2032    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2033}
2034
2035static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2036                                         const uint8_t *buf,
2037                                         VirtioNetRscUnit *unit)
2038{
2039    struct ip6_header *ip6;
2040
2041    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2042                                 + sizeof(struct eth_header));
2043    unit->ip = ip6;
2044    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2045    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2046                                        + sizeof(struct ip6_header));
2047    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2048
2049    /* There is a difference between payload lenght in ipv4 and v6,
2050       ip header is excluded in ipv6 */
2051    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2052}
2053
2054static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2055                                       VirtioNetRscSeg *seg)
2056{
2057    int ret;
2058    struct virtio_net_hdr_v1 *h;
2059
2060    h = (struct virtio_net_hdr_v1 *)seg->buf;
2061    h->flags = 0;
2062    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2063
2064    if (seg->is_coalesced) {
2065        h->rsc.segments = seg->packets;
2066        h->rsc.dup_acks = seg->dup_ack;
2067        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2068        if (chain->proto == ETH_P_IP) {
2069            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2070        } else {
2071            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2072        }
2073    }
2074
2075    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2076    QTAILQ_REMOVE(&chain->buffers, seg, next);
2077    g_free(seg->buf);
2078    g_free(seg);
2079
2080    return ret;
2081}
2082
2083static void virtio_net_rsc_purge(void *opq)
2084{
2085    VirtioNetRscSeg *seg, *rn;
2086    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2087
2088    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2089        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2090            chain->stat.purge_failed++;
2091            continue;
2092        }
2093    }
2094
2095    chain->stat.timer++;
2096    if (!QTAILQ_EMPTY(&chain->buffers)) {
2097        timer_mod(chain->drain_timer,
2098              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2099    }
2100}
2101
2102static void virtio_net_rsc_cleanup(VirtIONet *n)
2103{
2104    VirtioNetRscChain *chain, *rn_chain;
2105    VirtioNetRscSeg *seg, *rn_seg;
2106
2107    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2108        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2109            QTAILQ_REMOVE(&chain->buffers, seg, next);
2110            g_free(seg->buf);
2111            g_free(seg);
2112        }
2113
2114        timer_free(chain->drain_timer);
2115        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2116        g_free(chain);
2117    }
2118}
2119
2120static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2121                                     NetClientState *nc,
2122                                     const uint8_t *buf, size_t size)
2123{
2124    uint16_t hdr_len;
2125    VirtioNetRscSeg *seg;
2126
2127    hdr_len = chain->n->guest_hdr_len;
2128    seg = g_new(VirtioNetRscSeg, 1);
2129    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2130        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2131    memcpy(seg->buf, buf, size);
2132    seg->size = size;
2133    seg->packets = 1;
2134    seg->dup_ack = 0;
2135    seg->is_coalesced = 0;
2136    seg->nc = nc;
2137
2138    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2139    chain->stat.cache++;
2140
2141    switch (chain->proto) {
2142    case ETH_P_IP:
2143        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2144        break;
2145    case ETH_P_IPV6:
2146        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2147        break;
2148    default:
2149        g_assert_not_reached();
2150    }
2151}
2152
2153static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2154                                         VirtioNetRscSeg *seg,
2155                                         const uint8_t *buf,
2156                                         struct tcp_header *n_tcp,
2157                                         struct tcp_header *o_tcp)
2158{
2159    uint32_t nack, oack;
2160    uint16_t nwin, owin;
2161
2162    nack = htonl(n_tcp->th_ack);
2163    nwin = htons(n_tcp->th_win);
2164    oack = htonl(o_tcp->th_ack);
2165    owin = htons(o_tcp->th_win);
2166
2167    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2168        chain->stat.ack_out_of_win++;
2169        return RSC_FINAL;
2170    } else if (nack == oack) {
2171        /* duplicated ack or window probe */
2172        if (nwin == owin) {
2173            /* duplicated ack, add dup ack count due to whql test up to 1 */
2174            chain->stat.dup_ack++;
2175            return RSC_FINAL;
2176        } else {
2177            /* Coalesce window update */
2178            o_tcp->th_win = n_tcp->th_win;
2179            chain->stat.win_update++;
2180            return RSC_COALESCE;
2181        }
2182    } else {
2183        /* pure ack, go to 'C', finalize*/
2184        chain->stat.pure_ack++;
2185        return RSC_FINAL;
2186    }
2187}
2188
2189static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2190                                            VirtioNetRscSeg *seg,
2191                                            const uint8_t *buf,
2192                                            VirtioNetRscUnit *n_unit)
2193{
2194    void *data;
2195    uint16_t o_ip_len;
2196    uint32_t nseq, oseq;
2197    VirtioNetRscUnit *o_unit;
2198
2199    o_unit = &seg->unit;
2200    o_ip_len = htons(*o_unit->ip_plen);
2201    nseq = htonl(n_unit->tcp->th_seq);
2202    oseq = htonl(o_unit->tcp->th_seq);
2203
2204    /* out of order or retransmitted. */
2205    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2206        chain->stat.data_out_of_win++;
2207        return RSC_FINAL;
2208    }
2209
2210    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2211    if (nseq == oseq) {
2212        if ((o_unit->payload == 0) && n_unit->payload) {
2213            /* From no payload to payload, normal case, not a dup ack or etc */
2214            chain->stat.data_after_pure_ack++;
2215            goto coalesce;
2216        } else {
2217            return virtio_net_rsc_handle_ack(chain, seg, buf,
2218                                             n_unit->tcp, o_unit->tcp);
2219        }
2220    } else if ((nseq - oseq) != o_unit->payload) {
2221        /* Not a consistent packet, out of order */
2222        chain->stat.data_out_of_order++;
2223        return RSC_FINAL;
2224    } else {
2225coalesce:
2226        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2227            chain->stat.over_size++;
2228            return RSC_FINAL;
2229        }
2230
2231        /* Here comes the right data, the payload length in v4/v6 is different,
2232           so use the field value to update and record the new data len */
2233        o_unit->payload += n_unit->payload; /* update new data len */
2234
2235        /* update field in ip header */
2236        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2237
2238        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2239           for windows guest, while this may change the behavior for linux
2240           guest (only if it uses RSC feature). */
2241        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2242
2243        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2244        o_unit->tcp->th_win = n_unit->tcp->th_win;
2245
2246        memmove(seg->buf + seg->size, data, n_unit->payload);
2247        seg->size += n_unit->payload;
2248        seg->packets++;
2249        chain->stat.coalesced++;
2250        return RSC_COALESCE;
2251    }
2252}
2253
2254static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2255                                        VirtioNetRscSeg *seg,
2256                                        const uint8_t *buf, size_t size,
2257                                        VirtioNetRscUnit *unit)
2258{
2259    struct ip_header *ip1, *ip2;
2260
2261    ip1 = (struct ip_header *)(unit->ip);
2262    ip2 = (struct ip_header *)(seg->unit.ip);
2263    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2264        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2265        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2266        chain->stat.no_match++;
2267        return RSC_NO_MATCH;
2268    }
2269
2270    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2271}
2272
2273static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2274                                        VirtioNetRscSeg *seg,
2275                                        const uint8_t *buf, size_t size,
2276                                        VirtioNetRscUnit *unit)
2277{
2278    struct ip6_header *ip1, *ip2;
2279
2280    ip1 = (struct ip6_header *)(unit->ip);
2281    ip2 = (struct ip6_header *)(seg->unit.ip);
2282    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2283        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2284        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2285        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2286            chain->stat.no_match++;
2287            return RSC_NO_MATCH;
2288    }
2289
2290    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2291}
2292
2293/* Packets with 'SYN' should bypass, other flag should be sent after drain
2294 * to prevent out of order */
2295static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2296                                         struct tcp_header *tcp)
2297{
2298    uint16_t tcp_hdr;
2299    uint16_t tcp_flag;
2300
2301    tcp_flag = htons(tcp->th_offset_flags);
2302    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2303    tcp_flag &= VIRTIO_NET_TCP_FLAG;
2304    if (tcp_flag & TH_SYN) {
2305        chain->stat.tcp_syn++;
2306        return RSC_BYPASS;
2307    }
2308
2309    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2310        chain->stat.tcp_ctrl_drain++;
2311        return RSC_FINAL;
2312    }
2313
2314    if (tcp_hdr > sizeof(struct tcp_header)) {
2315        chain->stat.tcp_all_opt++;
2316        return RSC_FINAL;
2317    }
2318
2319    return RSC_CANDIDATE;
2320}
2321
2322static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2323                                         NetClientState *nc,
2324                                         const uint8_t *buf, size_t size,
2325                                         VirtioNetRscUnit *unit)
2326{
2327    int ret;
2328    VirtioNetRscSeg *seg, *nseg;
2329
2330    if (QTAILQ_EMPTY(&chain->buffers)) {
2331        chain->stat.empty_cache++;
2332        virtio_net_rsc_cache_buf(chain, nc, buf, size);
2333        timer_mod(chain->drain_timer,
2334              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2335        return size;
2336    }
2337
2338    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2339        if (chain->proto == ETH_P_IP) {
2340            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2341        } else {
2342            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2343        }
2344
2345        if (ret == RSC_FINAL) {
2346            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2347                /* Send failed */
2348                chain->stat.final_failed++;
2349                return 0;
2350            }
2351
2352            /* Send current packet */
2353            return virtio_net_do_receive(nc, buf, size);
2354        } else if (ret == RSC_NO_MATCH) {
2355            continue;
2356        } else {
2357            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2358            seg->is_coalesced = 1;
2359            return size;
2360        }
2361    }
2362
2363    chain->stat.no_match_cache++;
2364    virtio_net_rsc_cache_buf(chain, nc, buf, size);
2365    return size;
2366}
2367
2368/* Drain a connection data, this is to avoid out of order segments */
2369static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2370                                        NetClientState *nc,
2371                                        const uint8_t *buf, size_t size,
2372                                        uint16_t ip_start, uint16_t ip_size,
2373                                        uint16_t tcp_port)
2374{
2375    VirtioNetRscSeg *seg, *nseg;
2376    uint32_t ppair1, ppair2;
2377
2378    ppair1 = *(uint32_t *)(buf + tcp_port);
2379    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2380        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2381        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2382            || (ppair1 != ppair2)) {
2383            continue;
2384        }
2385        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2386            chain->stat.drain_failed++;
2387        }
2388
2389        break;
2390    }
2391
2392    return virtio_net_do_receive(nc, buf, size);
2393}
2394
2395static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2396                                            struct ip_header *ip,
2397                                            const uint8_t *buf, size_t size)
2398{
2399    uint16_t ip_len;
2400
2401    /* Not an ipv4 packet */
2402    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2403        chain->stat.ip_option++;
2404        return RSC_BYPASS;
2405    }
2406
2407    /* Don't handle packets with ip option */
2408    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2409        chain->stat.ip_option++;
2410        return RSC_BYPASS;
2411    }
2412
2413    if (ip->ip_p != IPPROTO_TCP) {
2414        chain->stat.bypass_not_tcp++;
2415        return RSC_BYPASS;
2416    }
2417
2418    /* Don't handle packets with ip fragment */
2419    if (!(htons(ip->ip_off) & IP_DF)) {
2420        chain->stat.ip_frag++;
2421        return RSC_BYPASS;
2422    }
2423
2424    /* Don't handle packets with ecn flag */
2425    if (IPTOS_ECN(ip->ip_tos)) {
2426        chain->stat.ip_ecn++;
2427        return RSC_BYPASS;
2428    }
2429
2430    ip_len = htons(ip->ip_len);
2431    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2432        || ip_len > (size - chain->n->guest_hdr_len -
2433                     sizeof(struct eth_header))) {
2434        chain->stat.ip_hacked++;
2435        return RSC_BYPASS;
2436    }
2437
2438    return RSC_CANDIDATE;
2439}
2440
2441static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2442                                      NetClientState *nc,
2443                                      const uint8_t *buf, size_t size)
2444{
2445    int32_t ret;
2446    uint16_t hdr_len;
2447    VirtioNetRscUnit unit;
2448
2449    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2450
2451    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2452        + sizeof(struct tcp_header))) {
2453        chain->stat.bypass_not_tcp++;
2454        return virtio_net_do_receive(nc, buf, size);
2455    }
2456
2457    virtio_net_rsc_extract_unit4(chain, buf, &unit);
2458    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2459        != RSC_CANDIDATE) {
2460        return virtio_net_do_receive(nc, buf, size);
2461    }
2462
2463    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2464    if (ret == RSC_BYPASS) {
2465        return virtio_net_do_receive(nc, buf, size);
2466    } else if (ret == RSC_FINAL) {
2467        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2468                ((hdr_len + sizeof(struct eth_header)) + 12),
2469                VIRTIO_NET_IP4_ADDR_SIZE,
2470                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2471    }
2472
2473    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2474}
2475
2476static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2477                                            struct ip6_header *ip6,
2478                                            const uint8_t *buf, size_t size)
2479{
2480    uint16_t ip_len;
2481
2482    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2483        != IP_HEADER_VERSION_6) {
2484        return RSC_BYPASS;
2485    }
2486
2487    /* Both option and protocol is checked in this */
2488    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2489        chain->stat.bypass_not_tcp++;
2490        return RSC_BYPASS;
2491    }
2492
2493    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2494    if (ip_len < sizeof(struct tcp_header) ||
2495        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2496                  - sizeof(struct ip6_header))) {
2497        chain->stat.ip_hacked++;
2498        return RSC_BYPASS;
2499    }
2500
2501    /* Don't handle packets with ecn flag */
2502    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2503        chain->stat.ip_ecn++;
2504        return RSC_BYPASS;
2505    }
2506
2507    return RSC_CANDIDATE;
2508}
2509
2510static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2511                                      const uint8_t *buf, size_t size)
2512{
2513    int32_t ret;
2514    uint16_t hdr_len;
2515    VirtioNetRscChain *chain;
2516    VirtioNetRscUnit unit;
2517
2518    chain = opq;
2519    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2520
2521    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2522        + sizeof(tcp_header))) {
2523        return virtio_net_do_receive(nc, buf, size);
2524    }
2525
2526    virtio_net_rsc_extract_unit6(chain, buf, &unit);
2527    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2528                                                 unit.ip, buf, size)) {
2529        return virtio_net_do_receive(nc, buf, size);
2530    }
2531
2532    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2533    if (ret == RSC_BYPASS) {
2534        return virtio_net_do_receive(nc, buf, size);
2535    } else if (ret == RSC_FINAL) {
2536        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2537                ((hdr_len + sizeof(struct eth_header)) + 8),
2538                VIRTIO_NET_IP6_ADDR_SIZE,
2539                hdr_len + sizeof(struct eth_header)
2540                + sizeof(struct ip6_header));
2541    }
2542
2543    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2544}
2545
2546static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2547                                                      NetClientState *nc,
2548                                                      uint16_t proto)
2549{
2550    VirtioNetRscChain *chain;
2551
2552    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2553        return NULL;
2554    }
2555
2556    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2557        if (chain->proto == proto) {
2558            return chain;
2559        }
2560    }
2561
2562    chain = g_malloc(sizeof(*chain));
2563    chain->n = n;
2564    chain->proto = proto;
2565    if (proto == (uint16_t)ETH_P_IP) {
2566        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2567        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2568    } else {
2569        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2570        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2571    }
2572    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2573                                      virtio_net_rsc_purge, chain);
2574    memset(&chain->stat, 0, sizeof(chain->stat));
2575
2576    QTAILQ_INIT(&chain->buffers);
2577    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2578
2579    return chain;
2580}
2581
2582static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2583                                      const uint8_t *buf,
2584                                      size_t size)
2585{
2586    uint16_t proto;
2587    VirtioNetRscChain *chain;
2588    struct eth_header *eth;
2589    VirtIONet *n;
2590
2591    n = qemu_get_nic_opaque(nc);
2592    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2593        return virtio_net_do_receive(nc, buf, size);
2594    }
2595
2596    eth = (struct eth_header *)(buf + n->guest_hdr_len);
2597    proto = htons(eth->h_proto);
2598
2599    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2600    if (chain) {
2601        chain->stat.received++;
2602        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2603            return virtio_net_rsc_receive4(chain, nc, buf, size);
2604        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2605            return virtio_net_rsc_receive6(chain, nc, buf, size);
2606        }
2607    }
2608    return virtio_net_do_receive(nc, buf, size);
2609}
2610
2611static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2612                                  size_t size)
2613{
2614    VirtIONet *n = qemu_get_nic_opaque(nc);
2615    if ((n->rsc4_enabled || n->rsc6_enabled)) {
2616        return virtio_net_rsc_receive(nc, buf, size);
2617    } else {
2618        return virtio_net_do_receive(nc, buf, size);
2619    }
2620}
2621
2622static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2623
2624static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2625{
2626    VirtIONet *n = qemu_get_nic_opaque(nc);
2627    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2628    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2629    int ret;
2630
2631    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2632    virtio_notify(vdev, q->tx_vq);
2633
2634    g_free(q->async_tx.elem);
2635    q->async_tx.elem = NULL;
2636
2637    virtio_queue_set_notification(q->tx_vq, 1);
2638    ret = virtio_net_flush_tx(q);
2639    if (ret >= n->tx_burst) {
2640        /*
2641         * the flush has been stopped by tx_burst
2642         * we will not receive notification for the
2643         * remainining part, so re-schedule
2644         */
2645        virtio_queue_set_notification(q->tx_vq, 0);
2646        if (q->tx_bh) {
2647            qemu_bh_schedule(q->tx_bh);
2648        } else {
2649            timer_mod(q->tx_timer,
2650                      qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2651        }
2652        q->tx_waiting = 1;
2653    }
2654}
2655
2656/* TX */
2657static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2658{
2659    VirtIONet *n = q->n;
2660    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2661    VirtQueueElement *elem;
2662    int32_t num_packets = 0;
2663    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2664    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2665        return num_packets;
2666    }
2667
2668    if (q->async_tx.elem) {
2669        virtio_queue_set_notification(q->tx_vq, 0);
2670        return num_packets;
2671    }
2672
2673    for (;;) {
2674        ssize_t ret;
2675        unsigned int out_num;
2676        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2677        struct virtio_net_hdr_mrg_rxbuf mhdr;
2678
2679        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2680        if (!elem) {
2681            break;
2682        }
2683
2684        out_num = elem->out_num;
2685        out_sg = elem->out_sg;
2686        if (out_num < 1) {
2687            virtio_error(vdev, "virtio-net header not in first element");
2688            virtqueue_detach_element(q->tx_vq, elem, 0);
2689            g_free(elem);
2690            return -EINVAL;
2691        }
2692
2693        if (n->has_vnet_hdr) {
2694            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2695                n->guest_hdr_len) {
2696                virtio_error(vdev, "virtio-net header incorrect");
2697                virtqueue_detach_element(q->tx_vq, elem, 0);
2698                g_free(elem);
2699                return -EINVAL;
2700            }
2701            if (n->needs_vnet_hdr_swap) {
2702                virtio_net_hdr_swap(vdev, (void *) &mhdr);
2703                sg2[0].iov_base = &mhdr;
2704                sg2[0].iov_len = n->guest_hdr_len;
2705                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2706                                   out_sg, out_num,
2707                                   n->guest_hdr_len, -1);
2708                if (out_num == VIRTQUEUE_MAX_SIZE) {
2709                    goto drop;
2710                }
2711                out_num += 1;
2712                out_sg = sg2;
2713            }
2714        }
2715        /*
2716         * If host wants to see the guest header as is, we can
2717         * pass it on unchanged. Otherwise, copy just the parts
2718         * that host is interested in.
2719         */
2720        assert(n->host_hdr_len <= n->guest_hdr_len);
2721        if (n->host_hdr_len != n->guest_hdr_len) {
2722            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2723                                       out_sg, out_num,
2724                                       0, n->host_hdr_len);
2725            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2726                             out_sg, out_num,
2727                             n->guest_hdr_len, -1);
2728            out_num = sg_num;
2729            out_sg = sg;
2730        }
2731
2732        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2733                                      out_sg, out_num, virtio_net_tx_complete);
2734        if (ret == 0) {
2735            virtio_queue_set_notification(q->tx_vq, 0);
2736            q->async_tx.elem = elem;
2737            return -EBUSY;
2738        }
2739
2740drop:
2741        virtqueue_push(q->tx_vq, elem, 0);
2742        virtio_notify(vdev, q->tx_vq);
2743        g_free(elem);
2744
2745        if (++num_packets >= n->tx_burst) {
2746            break;
2747        }
2748    }
2749    return num_packets;
2750}
2751
2752static void virtio_net_tx_timer(void *opaque);
2753
2754static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2755{
2756    VirtIONet *n = VIRTIO_NET(vdev);
2757    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2758
2759    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2760        virtio_net_drop_tx_queue_data(vdev, vq);
2761        return;
2762    }
2763
2764    /* This happens when device was stopped but VCPU wasn't. */
2765    if (!vdev->vm_running) {
2766        q->tx_waiting = 1;
2767        return;
2768    }
2769
2770    if (q->tx_waiting) {
2771        /* We already have queued packets, immediately flush */
2772        timer_del(q->tx_timer);
2773        virtio_net_tx_timer(q);
2774    } else {
2775        /* re-arm timer to flush it (and more) on next tick */
2776        timer_mod(q->tx_timer,
2777                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2778        q->tx_waiting = 1;
2779        virtio_queue_set_notification(vq, 0);
2780    }
2781}
2782
2783static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2784{
2785    VirtIONet *n = VIRTIO_NET(vdev);
2786    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2787
2788    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2789        virtio_net_drop_tx_queue_data(vdev, vq);
2790        return;
2791    }
2792
2793    if (unlikely(q->tx_waiting)) {
2794        return;
2795    }
2796    q->tx_waiting = 1;
2797    /* This happens when device was stopped but VCPU wasn't. */
2798    if (!vdev->vm_running) {
2799        return;
2800    }
2801    virtio_queue_set_notification(vq, 0);
2802    qemu_bh_schedule(q->tx_bh);
2803}
2804
2805static void virtio_net_tx_timer(void *opaque)
2806{
2807    VirtIONetQueue *q = opaque;
2808    VirtIONet *n = q->n;
2809    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2810    int ret;
2811
2812    /* This happens when device was stopped but BH wasn't. */
2813    if (!vdev->vm_running) {
2814        /* Make sure tx waiting is set, so we'll run when restarted. */
2815        assert(q->tx_waiting);
2816        return;
2817    }
2818
2819    q->tx_waiting = 0;
2820
2821    /* Just in case the driver is not ready on more */
2822    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2823        return;
2824    }
2825
2826    ret = virtio_net_flush_tx(q);
2827    if (ret == -EBUSY || ret == -EINVAL) {
2828        return;
2829    }
2830    /*
2831     * If we flush a full burst of packets, assume there are
2832     * more coming and immediately rearm
2833     */
2834    if (ret >= n->tx_burst) {
2835        q->tx_waiting = 1;
2836        timer_mod(q->tx_timer,
2837                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2838        return;
2839    }
2840    /*
2841     * If less than a full burst, re-enable notification and flush
2842     * anything that may have come in while we weren't looking.  If
2843     * we find something, assume the guest is still active and rearm
2844     */
2845    virtio_queue_set_notification(q->tx_vq, 1);
2846    ret = virtio_net_flush_tx(q);
2847    if (ret > 0) {
2848        virtio_queue_set_notification(q->tx_vq, 0);
2849        q->tx_waiting = 1;
2850        timer_mod(q->tx_timer,
2851                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2852    }
2853}
2854
2855static void virtio_net_tx_bh(void *opaque)
2856{
2857    VirtIONetQueue *q = opaque;
2858    VirtIONet *n = q->n;
2859    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2860    int32_t ret;
2861
2862    /* This happens when device was stopped but BH wasn't. */
2863    if (!vdev->vm_running) {
2864        /* Make sure tx waiting is set, so we'll run when restarted. */
2865        assert(q->tx_waiting);
2866        return;
2867    }
2868
2869    q->tx_waiting = 0;
2870
2871    /* Just in case the driver is not ready on more */
2872    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2873        return;
2874    }
2875
2876    ret = virtio_net_flush_tx(q);
2877    if (ret == -EBUSY || ret == -EINVAL) {
2878        return; /* Notification re-enable handled by tx_complete or device
2879                 * broken */
2880    }
2881
2882    /* If we flush a full burst of packets, assume there are
2883     * more coming and immediately reschedule */
2884    if (ret >= n->tx_burst) {
2885        qemu_bh_schedule(q->tx_bh);
2886        q->tx_waiting = 1;
2887        return;
2888    }
2889
2890    /* If less than a full burst, re-enable notification and flush
2891     * anything that may have come in while we weren't looking.  If
2892     * we find something, assume the guest is still active and reschedule */
2893    virtio_queue_set_notification(q->tx_vq, 1);
2894    ret = virtio_net_flush_tx(q);
2895    if (ret == -EINVAL) {
2896        return;
2897    } else if (ret > 0) {
2898        virtio_queue_set_notification(q->tx_vq, 0);
2899        qemu_bh_schedule(q->tx_bh);
2900        q->tx_waiting = 1;
2901    }
2902}
2903
2904static void virtio_net_add_queue(VirtIONet *n, int index)
2905{
2906    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2907
2908    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2909                                           virtio_net_handle_rx);
2910
2911    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2912        n->vqs[index].tx_vq =
2913            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2914                             virtio_net_handle_tx_timer);
2915        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2916                                              virtio_net_tx_timer,
2917                                              &n->vqs[index]);
2918    } else {
2919        n->vqs[index].tx_vq =
2920            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2921                             virtio_net_handle_tx_bh);
2922        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2923    }
2924
2925    n->vqs[index].tx_waiting = 0;
2926    n->vqs[index].n = n;
2927}
2928
2929static void virtio_net_del_queue(VirtIONet *n, int index)
2930{
2931    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2932    VirtIONetQueue *q = &n->vqs[index];
2933    NetClientState *nc = qemu_get_subqueue(n->nic, index);
2934
2935    qemu_purge_queued_packets(nc);
2936
2937    virtio_del_queue(vdev, index * 2);
2938    if (q->tx_timer) {
2939        timer_free(q->tx_timer);
2940        q->tx_timer = NULL;
2941    } else {
2942        qemu_bh_delete(q->tx_bh);
2943        q->tx_bh = NULL;
2944    }
2945    q->tx_waiting = 0;
2946    virtio_del_queue(vdev, index * 2 + 1);
2947}
2948
2949static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2950{
2951    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2952    int old_num_queues = virtio_get_num_queues(vdev);
2953    int new_num_queues = new_max_queue_pairs * 2 + 1;
2954    int i;
2955
2956    assert(old_num_queues >= 3);
2957    assert(old_num_queues % 2 == 1);
2958
2959    if (old_num_queues == new_num_queues) {
2960        return;
2961    }
2962
2963    /*
2964     * We always need to remove and add ctrl vq if
2965     * old_num_queues != new_num_queues. Remove ctrl_vq first,
2966     * and then we only enter one of the following two loops.
2967     */
2968    virtio_del_queue(vdev, old_num_queues - 1);
2969
2970    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2971        /* new_num_queues < old_num_queues */
2972        virtio_net_del_queue(n, i / 2);
2973    }
2974
2975    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2976        /* new_num_queues > old_num_queues */
2977        virtio_net_add_queue(n, i / 2);
2978    }
2979
2980    /* add ctrl_vq last */
2981    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2982}
2983
2984static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2985{
2986    int max = multiqueue ? n->max_queue_pairs : 1;
2987
2988    n->multiqueue = multiqueue;
2989    virtio_net_change_num_queue_pairs(n, max);
2990
2991    virtio_net_set_queue_pairs(n);
2992}
2993
2994static int virtio_net_post_load_device(void *opaque, int version_id)
2995{
2996    VirtIONet *n = opaque;
2997    VirtIODevice *vdev = VIRTIO_DEVICE(n);
2998    int i, link_down;
2999
3000    trace_virtio_net_post_load_device();

3001    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3002                               virtio_vdev_has_feature(vdev,
3003                                                       VIRTIO_F_VERSION_1),
3004                               virtio_vdev_has_feature(vdev,
3005                                                       VIRTIO_NET_F_HASH_REPORT));
3006
3007    /* MAC_TABLE_ENTRIES may be different from the saved image */
3008    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3009        n->mac_table.in_use = 0;
3010    }
3011
3012    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3013        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3014    }
3015
3016    /*
3017     * curr_guest_offloads will be later overwritten by the
3018     * virtio_set_features_nocheck call done from the virtio_load.
3019     * Here we make sure it is preserved and restored accordingly
3020     * in the virtio_net_post_load_virtio callback.
3021     */
3022    n->saved_guest_offloads = n->curr_guest_offloads;
3023
3024    virtio_net_set_queue_pairs(n);
3025
3026    /* Find the first multicast entry in the saved MAC filter */
3027    for (i = 0; i < n->mac_table.in_use; i++) {
3028        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3029            break;
3030        }
3031    }
3032    n->mac_table.first_multi = i;
3033
3034    /* nc.link_down can't be migrated, so infer link_down according
3035     * to link status bit in n->status */
3036    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3037    for (i = 0; i < n->max_queue_pairs; i++) {
3038        qemu_get_subqueue(n->nic, i)->link_down = link_down;
3039    }
3040
3041    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3042        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3043        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3044                                  QEMU_CLOCK_VIRTUAL,
3045                                  virtio_net_announce_timer, n);
3046        if (n->announce_timer.round) {
3047            timer_mod(n->announce_timer.tm,
3048                      qemu_clock_get_ms(n->announce_timer.type));
3049        } else {
3050            qemu_announce_timer_del(&n->announce_timer, false);
3051        }
3052    }
3053
3054    if (n->rss_data.enabled) {
3055        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3056        if (!n->rss_data.populate_hash) {
3057            if (!virtio_net_attach_epbf_rss(n)) {
3058                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3059                    warn_report("Can't post-load eBPF RSS for vhost");
3060                } else {
3061                    warn_report("Can't post-load eBPF RSS - "
3062                                "fallback to software RSS");
3063                    n->rss_data.enabled_software_rss = true;
3064                }
3065            }
3066        }
3067
3068        trace_virtio_net_rss_enable(n->rss_data.hash_types,
3069                                    n->rss_data.indirections_len,
3070                                    sizeof(n->rss_data.key));
3071    } else {
3072        trace_virtio_net_rss_disable();
3073    }
3074    return 0;
3075}
3076
3077static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3078{
3079    VirtIONet *n = VIRTIO_NET(vdev);
3080    /*
3081     * The actual needed state is now in saved_guest_offloads,
3082     * see virtio_net_post_load_device for detail.
3083     * Restore it back and apply the desired offloads.
3084     */
3085    n->curr_guest_offloads = n->saved_guest_offloads;
3086    if (peer_has_vnet_hdr(n)) {
3087        virtio_net_apply_guest_offloads(n);
3088    }
3089
3090    return 0;
3091}
3092
3093/* tx_waiting field of a VirtIONetQueue */
3094static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3095    .name = "virtio-net-queue-tx_waiting",
3096    .fields = (VMStateField[]) {
3097        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3098        VMSTATE_END_OF_LIST()
3099   },
3100};
3101
3102static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3103{
3104    return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3105}
3106
3107static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3108{
3109    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3110                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3111}
3112
3113static bool mac_table_fits(void *opaque, int version_id)
3114{
3115    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3116}
3117
3118static bool mac_table_doesnt_fit(void *opaque, int version_id)
3119{
3120    return !mac_table_fits(opaque, version_id);
3121}
3122
3123/* This temporary type is shared by all the WITH_TMP methods
3124 * although only some fields are used by each.
3125 */
3126struct VirtIONetMigTmp {
3127    VirtIONet      *parent;
3128    VirtIONetQueue *vqs_1;
3129    uint16_t        curr_queue_pairs_1;
3130    uint8_t         has_ufo;
3131    uint32_t        has_vnet_hdr;
3132};
3133
3134/* The 2nd and subsequent tx_waiting flags are loaded later than
3135 * the 1st entry in the queue_pairs and only if there's more than one
3136 * entry.  We use the tmp mechanism to calculate a temporary
3137 * pointer and count and also validate the count.
3138 */
3139
3140static int virtio_net_tx_waiting_pre_save(void *opaque)
3141{
3142    struct VirtIONetMigTmp *tmp = opaque;
3143
3144    tmp->vqs_1 = tmp->parent->vqs + 1;
3145    tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3146    if (tmp->parent->curr_queue_pairs == 0) {
3147        tmp->curr_queue_pairs_1 = 0;
3148    }
3149
3150    return 0;
3151}
3152
3153static int virtio_net_tx_waiting_pre_load(void *opaque)
3154{
3155    struct VirtIONetMigTmp *tmp = opaque;
3156
3157    /* Reuse the pointer setup from save */
3158    virtio_net_tx_waiting_pre_save(opaque);
3159
3160    if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3161        error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3162            tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3163
3164        return -EINVAL;
3165    }
3166
3167    return 0; /* all good */
3168}
3169
3170static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3171    .name      = "virtio-net-tx_waiting",
3172    .pre_load  = virtio_net_tx_waiting_pre_load,
3173    .pre_save  = virtio_net_tx_waiting_pre_save,
3174    .fields    = (VMStateField[]) {
3175        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3176                                     curr_queue_pairs_1,
3177                                     vmstate_virtio_net_queue_tx_waiting,
3178                                     struct VirtIONetQueue),
3179        VMSTATE_END_OF_LIST()
3180    },
3181};
3182
3183/* the 'has_ufo' flag is just tested; if the incoming stream has the
3184 * flag set we need to check that we have it
3185 */
3186static int virtio_net_ufo_post_load(void *opaque, int version_id)
3187{
3188    struct VirtIONetMigTmp *tmp = opaque;
3189
3190    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3191        error_report("virtio-net: saved image requires TUN_F_UFO support");
3192        return -EINVAL;
3193    }
3194
3195    return 0;
3196}
3197
3198static int virtio_net_ufo_pre_save(void *opaque)
3199{
3200    struct VirtIONetMigTmp *tmp = opaque;
3201
3202    tmp->has_ufo = tmp->parent->has_ufo;
3203
3204    return 0;
3205}
3206
3207static const VMStateDescription vmstate_virtio_net_has_ufo = {
3208    .name      = "virtio-net-ufo",
3209    .post_load = virtio_net_ufo_post_load,
3210    .pre_save  = virtio_net_ufo_pre_save,
3211    .fields    = (VMStateField[]) {
3212        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3213        VMSTATE_END_OF_LIST()
3214    },
3215};
3216
3217/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3218 * flag set we need to check that we have it
3219 */
3220static int virtio_net_vnet_post_load(void *opaque, int version_id)
3221{
3222    struct VirtIONetMigTmp *tmp = opaque;
3223
3224    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3225        error_report("virtio-net: saved image requires vnet_hdr=on");
3226        return -EINVAL;
3227    }
3228
3229    return 0;
3230}
3231
3232static int virtio_net_vnet_pre_save(void *opaque)
3233{
3234    struct VirtIONetMigTmp *tmp = opaque;
3235
3236    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3237
3238    return 0;
3239}
3240
3241static const VMStateDescription vmstate_virtio_net_has_vnet = {
3242    .name      = "virtio-net-vnet",
3243    .post_load = virtio_net_vnet_post_load,
3244    .pre_save  = virtio_net_vnet_pre_save,
3245    .fields    = (VMStateField[]) {
3246        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3247        VMSTATE_END_OF_LIST()
3248    },
3249};
3250
3251static bool virtio_net_rss_needed(void *opaque)
3252{
3253    return VIRTIO_NET(opaque)->rss_data.enabled;
3254}
3255
3256static const VMStateDescription vmstate_virtio_net_rss = {
3257    .name      = "virtio-net-device/rss",
3258    .version_id = 1,
3259    .minimum_version_id = 1,
3260    .needed = virtio_net_rss_needed,
3261    .fields = (VMStateField[]) {
3262        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3263        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3264        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3265        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3266        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3267        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3268        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3269                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
3270        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3271                                    rss_data.indirections_len, 0,
3272                                    vmstate_info_uint16, uint16_t),
3273        VMSTATE_END_OF_LIST()
3274    },
3275};
3276
3277static const VMStateDescription vmstate_virtio_net_device = {
3278    .name = "virtio-net-device",
3279    .version_id = VIRTIO_NET_VM_VERSION,
3280    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3281    .post_load = virtio_net_post_load_device,
3282    .fields = (VMStateField[]) {
3283        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3284        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3285                               vmstate_virtio_net_queue_tx_waiting,
3286                               VirtIONetQueue),
3287        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3288        VMSTATE_UINT16(status, VirtIONet),
3289        VMSTATE_UINT8(promisc, VirtIONet),
3290        VMSTATE_UINT8(allmulti, VirtIONet),
3291        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3292
3293        /* Guarded pair: If it fits we load it, else we throw it away
3294         * - can happen if source has a larger MAC table.; post-load
3295         *  sets flags in this case.
3296         */
3297        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3298                                0, mac_table_fits, mac_table.in_use,
3299                                 ETH_ALEN),
3300        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3301                                     mac_table.in_use, ETH_ALEN),
3302
3303        /* Note: This is an array of uint32's that's always been saved as a
3304         * buffer; hold onto your endiannesses; it's actually used as a bitmap
3305         * but based on the uint.
3306         */
3307        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3308        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3309                         vmstate_virtio_net_has_vnet),
3310        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3311        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3312        VMSTATE_UINT8(alluni, VirtIONet),
3313        VMSTATE_UINT8(nomulti, VirtIONet),
3314        VMSTATE_UINT8(nouni, VirtIONet),
3315        VMSTATE_UINT8(nobcast, VirtIONet),
3316        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3317                         vmstate_virtio_net_has_ufo),
3318        VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3319                            vmstate_info_uint16_equal, uint16_t),
3320        VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3321        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3322                         vmstate_virtio_net_tx_waiting),
3323        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3324                            has_ctrl_guest_offloads),
3325        VMSTATE_END_OF_LIST()
3326   },
3327    .subsections = (const VMStateDescription * []) {
3328        &vmstate_virtio_net_rss,
3329        NULL
3330    }
3331};
3332
3333static NetClientInfo net_virtio_info = {
3334    .type = NET_CLIENT_DRIVER_NIC,
3335    .size = sizeof(NICState),
3336    .can_receive = virtio_net_can_receive,
3337    .receive = virtio_net_receive,
3338    .link_status_changed = virtio_net_set_link_status,
3339    .query_rx_filter = virtio_net_query_rxfilter,
3340    .announce = virtio_net_announce,
3341};
3342
3343static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3344{
3345    VirtIONet *n = VIRTIO_NET(vdev);
3346    NetClientState *nc;
3347    assert(n->vhost_started);
3348    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3349        /* Must guard against invalid features and bogus queue index
3350         * from being set by malicious guest, or penetrated through
3351         * buggy migration stream.
3352         */
3353        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3354            qemu_log_mask(LOG_GUEST_ERROR,
3355                          "%s: bogus vq index ignored\n", __func__);
3356            return false;
3357        }
3358        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3359    } else {
3360        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3361    }
3362    /*
3363     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3364     * as the Marco of configure interrupt's IDX, If this driver does not
3365     * support, the function will return false
3366     */
3367
3368    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3369        return vhost_net_config_pending(get_vhost_net(nc->peer));
3370    }
3371    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3372}
3373
3374static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3375                                           bool mask)
3376{
3377    VirtIONet *n = VIRTIO_NET(vdev);
3378    NetClientState *nc;
3379    assert(n->vhost_started);
3380    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3381        /* Must guard against invalid features and bogus queue index
3382         * from being set by malicious guest, or penetrated through
3383         * buggy migration stream.
3384         */
3385        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3386            qemu_log_mask(LOG_GUEST_ERROR,
3387                          "%s: bogus vq index ignored\n", __func__);
3388            return;
3389        }
3390        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3391    } else {
3392        nc = qemu_get_subqueue(n->nic, vq2q(idx));
3393    }
3394    /*
3395     *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3396     * as the Marco of configure interrupt's IDX, If this driver does not
3397     * support, the function will return
3398     */
3399
3400    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3401        vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3402        return;
3403    }
3404    vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3405}
3406
3407static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3408{
3409    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3410
3411    n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3412}
3413
3414void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3415                                   const char *type)
3416{
3417    /*
3418     * The name can be NULL, the netclient name will be type.x.
3419     */
3420    assert(type != NULL);
3421
3422    g_free(n->netclient_name);
3423    g_free(n->netclient_type);
3424    n->netclient_name = g_strdup(name);
3425    n->netclient_type = g_strdup(type);
3426}
3427
3428static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3429{
3430    HotplugHandler *hotplug_ctrl;
3431    PCIDevice *pci_dev;
3432    Error *err = NULL;
3433
3434    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3435    if (hotplug_ctrl) {
3436        pci_dev = PCI_DEVICE(dev);
3437        pci_dev->partially_hotplugged = true;
3438        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3439        if (err) {
3440            error_report_err(err);
3441            return false;
3442        }
3443    } else {
3444        return false;
3445    }
3446    return true;
3447}
3448
3449static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3450                                    Error **errp)
3451{
3452    Error *err = NULL;
3453    HotplugHandler *hotplug_ctrl;
3454    PCIDevice *pdev = PCI_DEVICE(dev);
3455    BusState *primary_bus;
3456
3457    if (!pdev->partially_hotplugged) {
3458        return true;
3459    }
3460    primary_bus = dev->parent_bus;
3461    if (!primary_bus) {
3462        error_setg(errp, "virtio_net: couldn't find primary bus");
3463        return false;
3464    }
3465    qdev_set_parent_bus(dev, primary_bus, &error_abort);
3466    qatomic_set(&n->failover_primary_hidden, false);
3467    hotplug_ctrl = qdev_get_hotplug_handler(dev);
3468    if (hotplug_ctrl) {
3469        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3470        if (err) {
3471            goto out;
3472        }
3473        hotplug_handler_plug(hotplug_ctrl, dev, &err);
3474    }
3475    pdev->partially_hotplugged = false;
3476
3477out:
3478    error_propagate(errp, err);
3479    return !err;
3480}
3481
3482static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3483{
3484    bool should_be_hidden;
3485    Error *err = NULL;
3486    DeviceState *dev = failover_find_primary_device(n);
3487
3488    if (!dev) {
3489        return;
3490    }
3491
3492    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3493
3494    if (migration_in_setup(s) && !should_be_hidden) {
3495        if (failover_unplug_primary(n, dev)) {
3496            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3497            qapi_event_send_unplug_primary(dev->id);
3498            qatomic_set(&n->failover_primary_hidden, true);
3499        } else {
3500            warn_report("couldn't unplug primary device");
3501        }
3502    } else if (migration_has_failed(s)) {
3503        /* We already unplugged the device let's plug it back */
3504        if (!failover_replug_primary(n, dev, &err)) {
3505            if (err) {
3506                error_report_err(err);
3507            }
3508        }
3509    }
3510}
3511
3512static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3513{
3514    MigrationState *s = data;
3515    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3516    virtio_net_handle_migration_primary(n, s);
3517}
3518
3519static bool failover_hide_primary_device(DeviceListener *listener,
3520                                         const QDict *device_opts,
3521                                         bool from_json,
3522                                         Error **errp)
3523{
3524    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3525    const char *standby_id;
3526
3527    if (!device_opts) {
3528        return false;
3529    }
3530
3531    if (!qdict_haskey(device_opts, "failover_pair_id")) {
3532        return false;
3533    }
3534
3535    if (!qdict_haskey(device_opts, "id")) {
3536        error_setg(errp, "Device with failover_pair_id needs to have id");
3537        return false;
3538    }
3539
3540    standby_id = qdict_get_str(device_opts, "failover_pair_id");
3541    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3542        return false;
3543    }
3544
3545    /*
3546     * The hide helper can be called several times for a given device.
3547     * Check there is only one primary for a virtio-net device but
3548     * don't duplicate the qdict several times if it's called for the same
3549     * device.
3550     */
3551    if (n->primary_opts) {
3552        const char *old, *new;
3553        /* devices with failover_pair_id always have an id */
3554        old = qdict_get_str(n->primary_opts, "id");
3555        new = qdict_get_str(device_opts, "id");
3556        if (strcmp(old, new) != 0) {
3557            error_setg(errp, "Cannot attach more than one primary device to "
3558                       "'%s': '%s' and '%s'", n->netclient_name, old, new);
3559            return false;
3560        }
3561    } else {
3562        n->primary_opts = qdict_clone_shallow(device_opts);
3563        n->primary_opts_from_json = from_json;
3564    }
3565
3566    /* failover_primary_hidden is set during feature negotiation */
3567    return qatomic_read(&n->failover_primary_hidden);
3568}
3569
3570static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3571{
3572    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3573    VirtIONet *n = VIRTIO_NET(dev);
3574    NetClientState *nc;
3575    int i;
3576
3577    if (n->net_conf.mtu) {
3578        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3579    }
3580
3581    if (n->net_conf.duplex_str) {
3582        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3583            n->net_conf.duplex = DUPLEX_HALF;
3584        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3585            n->net_conf.duplex = DUPLEX_FULL;
3586        } else {
3587            error_setg(errp, "'duplex' must be 'half' or 'full'");
3588            return;
3589        }
3590        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3591    } else {
3592        n->net_conf.duplex = DUPLEX_UNKNOWN;
3593    }
3594
3595    if (n->net_conf.speed < SPEED_UNKNOWN) {
3596        error_setg(errp, "'speed' must be between 0 and INT_MAX");
3597        return;
3598    }
3599    if (n->net_conf.speed >= 0) {
3600        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3601    }
3602
3603    if (n->failover) {
3604        n->primary_listener.hide_device = failover_hide_primary_device;
3605        qatomic_set(&n->failover_primary_hidden, true);
3606        device_listener_register(&n->primary_listener);
3607        n->migration_state.notify = virtio_net_migration_state_notifier;
3608        add_migration_state_change_notifier(&n->migration_state);
3609        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3610    }
3611
3612    virtio_net_set_config_size(n, n->host_features);
3613    virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3614
3615    /*
3616     * We set a lower limit on RX queue size to what it always was.
3617     * Guests that want a smaller ring can always resize it without
3618     * help from us (using virtio 1 and up).
3619     */
3620    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3621        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3622        !is_power_of_2(n->net_conf.rx_queue_size)) {
3623        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3624                   "must be a power of 2 between %d and %d.",
3625                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3626                   VIRTQUEUE_MAX_SIZE);
3627        virtio_cleanup(vdev);
3628        return;
3629    }
3630
3631    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3632        n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3633        !is_power_of_2(n->net_conf.tx_queue_size)) {
3634        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3635                   "must be a power of 2 between %d and %d",
3636                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3637                   virtio_net_max_tx_queue_size(n));
3638        virtio_cleanup(vdev);
3639        return;
3640    }
3641
3642    n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3643
3644    /*
3645     * Figure out the datapath queue pairs since the backend could
3646     * provide control queue via peers as well.
3647     */
3648    if (n->nic_conf.peers.queues) {
3649        for (i = 0; i < n->max_ncs; i++) {
3650            if (n->nic_conf.peers.ncs[i]->is_datapath) {
3651                ++n->max_queue_pairs;
3652            }
3653        }
3654    }
3655    n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3656
3657    if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3658        error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3659                   "must be a positive integer less than %d.",
3660                   n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3661        virtio_cleanup(vdev);
3662        return;
3663    }
3664    n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3665    n->curr_queue_pairs = 1;
3666    n->tx_timeout = n->net_conf.txtimer;
3667
3668    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3669                       && strcmp(n->net_conf.tx, "bh")) {
3670        warn_report("virtio-net: "
3671                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3672                    n->net_conf.tx);
3673        error_printf("Defaulting to \"bh\"");
3674    }
3675
3676    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3677                                    n->net_conf.tx_queue_size);
3678
3679    for (i = 0; i < n->max_queue_pairs; i++) {
3680        virtio_net_add_queue(n, i);
3681    }
3682
3683    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3684    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3685    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3686    n->status = VIRTIO_NET_S_LINK_UP;
3687    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3688                              QEMU_CLOCK_VIRTUAL,
3689                              virtio_net_announce_timer, n);
3690    n->announce_timer.round = 0;
3691
3692    if (n->netclient_type) {
3693        /*
3694         * Happen when virtio_net_set_netclient_name has been called.
3695         */
3696        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3697                              n->netclient_type, n->netclient_name, n);
3698    } else {
3699        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3700                              object_get_typename(OBJECT(dev)), dev->id, n);
3701    }
3702
3703    for (i = 0; i < n->max_queue_pairs; i++) {
3704        n->nic->ncs[i].do_not_pad = true;
3705    }
3706
3707    peer_test_vnet_hdr(n);
3708    if (peer_has_vnet_hdr(n)) {
3709        for (i = 0; i < n->max_queue_pairs; i++) {
3710            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3711        }
3712        n->host_hdr_len = sizeof(struct virtio_net_hdr);
3713    } else {
3714        n->host_hdr_len = 0;
3715    }
3716
3717    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3718
3719    n->vqs[0].tx_waiting = 0;
3720    n->tx_burst = n->net_conf.txburst;
3721    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3722    n->promisc = 1; /* for compatibility */
3723
3724    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3725
3726    n->vlans = g_malloc0(MAX_VLAN >> 3);
3727
3728    nc = qemu_get_queue(n->nic);
3729    nc->rxfilter_notify_enabled = 1;
3730
3731   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3732        struct virtio_net_config netcfg = {};
3733        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3734        vhost_net_set_config(get_vhost_net(nc->peer),
3735            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3736    }
3737    QTAILQ_INIT(&n->rsc_chains);
3738    n->qdev = dev;
3739
3740    net_rx_pkt_init(&n->rx_pkt);
3741
3742    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3743        virtio_net_load_ebpf(n);
3744    }
3745}
3746
3747static void virtio_net_device_unrealize(DeviceState *dev)
3748{
3749    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3750    VirtIONet *n = VIRTIO_NET(dev);
3751    int i, max_queue_pairs;
3752
3753    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3754        virtio_net_unload_ebpf(n);
3755    }
3756
3757    /* This will stop vhost backend if appropriate. */
3758    virtio_net_set_status(vdev, 0);
3759
3760    g_free(n->netclient_name);
3761    n->netclient_name = NULL;
3762    g_free(n->netclient_type);
3763    n->netclient_type = NULL;
3764
3765    g_free(n->mac_table.macs);
3766    g_free(n->vlans);
3767
3768    if (n->failover) {
3769        qobject_unref(n->primary_opts);
3770        device_listener_unregister(&n->primary_listener);
3771        remove_migration_state_change_notifier(&n->migration_state);
3772    } else {
3773        assert(n->primary_opts == NULL);
3774    }
3775
3776    max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3777    for (i = 0; i < max_queue_pairs; i++) {
3778        virtio_net_del_queue(n, i);
3779    }
3780    /* delete also control vq */
3781    virtio_del_queue(vdev, max_queue_pairs * 2);
3782    qemu_announce_timer_del(&n->announce_timer, false);
3783    g_free(n->vqs);
3784    qemu_del_nic(n->nic);
3785    virtio_net_rsc_cleanup(n);
3786    g_free(n->rss_data.indirections_table);
3787    net_rx_pkt_uninit(n->rx_pkt);
3788    virtio_cleanup(vdev);
3789}
3790
3791static void virtio_net_instance_init(Object *obj)
3792{
3793    VirtIONet *n = VIRTIO_NET(obj);
3794
3795    /*
3796     * The default config_size is sizeof(struct virtio_net_config).
3797     * Can be overriden with virtio_net_set_config_size.
3798     */
3799    n->config_size = sizeof(struct virtio_net_config);
3800    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3801                                  "bootindex", "/ethernet-phy@0",
3802                                  DEVICE(n));
3803
3804    ebpf_rss_init(&n->ebpf_rss);
3805}
3806
3807static int virtio_net_pre_save(void *opaque)
3808{
3809    VirtIONet *n = opaque;
3810
3811    /* At this point, backend must be stopped, otherwise
3812     * it might keep writing to memory. */
3813    assert(!n->vhost_started);
3814
3815    return 0;
3816}
3817
3818static bool primary_unplug_pending(void *opaque)
3819{
3820    DeviceState *dev = opaque;
3821    DeviceState *primary;
3822    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3823    VirtIONet *n = VIRTIO_NET(vdev);
3824
3825    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3826        return false;
3827    }
3828    primary = failover_find_primary_device(n);
3829    return primary ? primary->pending_deleted_event : false;
3830}
3831
3832static bool dev_unplug_pending(void *opaque)
3833{
3834    DeviceState *dev = opaque;
3835    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3836
3837    return vdc->primary_unplug_pending(dev);
3838}
3839
3840static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3841{
3842    VirtIONet *n = VIRTIO_NET(vdev);
3843    NetClientState *nc = qemu_get_queue(n->nic);
3844    struct vhost_net *net = get_vhost_net(nc->peer);
3845    return &net->dev;
3846}
3847
3848static const VMStateDescription vmstate_virtio_net = {
3849    .name = "virtio-net",
3850    .minimum_version_id = VIRTIO_NET_VM_VERSION,
3851    .version_id = VIRTIO_NET_VM_VERSION,
3852    .fields = (VMStateField[]) {
3853        VMSTATE_VIRTIO_DEVICE,
3854        VMSTATE_END_OF_LIST()
3855    },
3856    .pre_save = virtio_net_pre_save,
3857    .dev_unplug_pending = dev_unplug_pending,
3858};
3859
3860static Property virtio_net_properties[] = {
3861    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3862                    VIRTIO_NET_F_CSUM, true),
3863    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3864                    VIRTIO_NET_F_GUEST_CSUM, true),
3865    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3866    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3867                    VIRTIO_NET_F_GUEST_TSO4, true),
3868    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3869                    VIRTIO_NET_F_GUEST_TSO6, true),
3870    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3871                    VIRTIO_NET_F_GUEST_ECN, true),
3872    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3873                    VIRTIO_NET_F_GUEST_UFO, true),
3874    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3875                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3876    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3877                    VIRTIO_NET_F_HOST_TSO4, true),
3878    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3879                    VIRTIO_NET_F_HOST_TSO6, true),
3880    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3881                    VIRTIO_NET_F_HOST_ECN, true),
3882    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3883                    VIRTIO_NET_F_HOST_UFO, true),
3884    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3885                    VIRTIO_NET_F_MRG_RXBUF, true),
3886    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3887                    VIRTIO_NET_F_STATUS, true),
3888    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3889                    VIRTIO_NET_F_CTRL_VQ, true),
3890    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3891                    VIRTIO_NET_F_CTRL_RX, true),
3892    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3893                    VIRTIO_NET_F_CTRL_VLAN, true),
3894    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3895                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3896    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3897                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3898    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3899                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3900    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3901    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3902                    VIRTIO_NET_F_RSS, false),
3903    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3904                    VIRTIO_NET_F_HASH_REPORT, false),
3905    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3906                    VIRTIO_NET_F_RSC_EXT, false),
3907    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3908                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3909    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3910    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3911                       TX_TIMER_INTERVAL),
3912    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3913    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3914    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3915                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3916    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3917                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3918    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3919    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3920                     true),
3921    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3922    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3923    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3924    DEFINE_PROP_END_OF_LIST(),
3925};
3926
3927static void virtio_net_class_init(ObjectClass *klass, void *data)
3928{
3929    DeviceClass *dc = DEVICE_CLASS(klass);
3930    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3931
3932    device_class_set_props(dc, virtio_net_properties);
3933    dc->vmsd = &vmstate_virtio_net;
3934    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3935    vdc->realize = virtio_net_device_realize;
3936    vdc->unrealize = virtio_net_device_unrealize;
3937    vdc->get_config = virtio_net_get_config;
3938    vdc->set_config = virtio_net_set_config;
3939    vdc->get_features = virtio_net_get_features;
3940    vdc->set_features = virtio_net_set_features;
3941    vdc->bad_features = virtio_net_bad_features;
3942    vdc->reset = virtio_net_reset;
3943    vdc->queue_reset = virtio_net_queue_reset;
3944    vdc->queue_enable = virtio_net_queue_enable;
3945    vdc->set_status = virtio_net_set_status;
3946    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3947    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3948    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3949    vdc->post_load = virtio_net_post_load_virtio;
3950    vdc->vmsd = &vmstate_virtio_net_device;
3951    vdc->primary_unplug_pending = primary_unplug_pending;
3952    vdc->get_vhost = virtio_net_get_vhost;
3953    vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
3954}
3955
3956static const TypeInfo virtio_net_info = {
3957    .name = TYPE_VIRTIO_NET,
3958    .parent = TYPE_VIRTIO_DEVICE,
3959    .instance_size = sizeof(VirtIONet),
3960    .instance_init = virtio_net_instance_init,
3961    .class_init = virtio_net_class_init,
3962};
3963
3964static void virtio_register_types(void)
3965{
3966    type_register_static(&virtio_net_info);
3967}
3968
3969type_init(virtio_register_types)
3970