1
2
3
4
5
6
7
8
9
10
11
12
13
14#include "qemu/osdep.h"
15#include "qemu/atomic.h"
16#include "qemu/iov.h"
17#include "qemu/main-loop.h"
18#include "qemu/module.h"
19#include "hw/virtio/virtio.h"
20#include "net/net.h"
21#include "net/checksum.h"
22#include "net/tap.h"
23#include "qemu/error-report.h"
24#include "qemu/timer.h"
25#include "qemu/option.h"
26#include "qemu/option_int.h"
27#include "qemu/config-file.h"
28#include "qapi/qmp/qdict.h"
29#include "hw/virtio/virtio-net.h"
30#include "net/vhost_net.h"
31#include "net/announce.h"
32#include "hw/virtio/virtio-bus.h"
33#include "qapi/error.h"
34#include "qapi/qapi-events-net.h"
35#include "hw/qdev-properties.h"
36#include "qapi/qapi-types-migration.h"
37#include "qapi/qapi-events-migration.h"
38#include "hw/virtio/virtio-access.h"
39#include "migration/misc.h"
40#include "standard-headers/linux/ethtool.h"
41#include "sysemu/sysemu.h"
42#include "trace.h"
43#include "monitor/qdev.h"
44#include "hw/pci/pci.h"
45#include "net_rx_pkt.h"
46#include "hw/virtio/vhost.h"
47
48#define VIRTIO_NET_VM_VERSION 11
49
50#define MAC_TABLE_ENTRIES 64
51#define MAX_VLAN (1 << 12)
52
53
54#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56
57
58#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60
61#define VIRTIO_NET_IP4_ADDR_SIZE 8
62
63#define VIRTIO_NET_TCP_FLAG 0x3F
64#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
65
66
67#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69
70
71#define VIRTIO_NET_IP4_HEADER_LENGTH 5
72
73#define VIRTIO_NET_IP6_ADDR_SIZE 32
74#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75
76
77
78
79
80#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81
82#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91
92static const VirtIOFeature feature_sizes[] = {
93 {.flags = 1ULL << VIRTIO_NET_F_MAC,
94 .end = endof(struct virtio_net_config, mac)},
95 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96 .end = endof(struct virtio_net_config, status)},
97 {.flags = 1ULL << VIRTIO_NET_F_MQ,
98 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99 {.flags = 1ULL << VIRTIO_NET_F_MTU,
100 .end = endof(struct virtio_net_config, mtu)},
101 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102 .end = endof(struct virtio_net_config, duplex)},
103 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104 .end = endof(struct virtio_net_config, supported_hash_types)},
105 {}
106};
107
108static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109{
110 VirtIONet *n = qemu_get_nic_opaque(nc);
111
112 return &n->vqs[nc->queue_index];
113}
114
115static int vq2q(int queue_index)
116{
117 return queue_index / 2;
118}
119
120
121
122
123
124static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125{
126 VirtIONet *n = VIRTIO_NET(vdev);
127 struct virtio_net_config netcfg;
128 NetClientState *nc = qemu_get_queue(n->nic);
129 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
130
131 int ret = 0;
132 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
133 virtio_stw_p(vdev, &netcfg.status, n->status);
134 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
135 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
136 memcpy(netcfg.mac, n->mac, ETH_ALEN);
137 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
138 netcfg.duplex = n->net_conf.duplex;
139 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
140 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
141 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
142 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
143 virtio_stl_p(vdev, &netcfg.supported_hash_types,
144 VIRTIO_NET_RSS_SUPPORTED_HASHES);
145 memcpy(config, &netcfg, n->config_size);
146
147
148
149
150
151 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
152 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
153 n->config_size);
154 if (ret != -1) {
155
156
157
158
159
160
161
162 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
163 info_report("Zero hardware mac address detected. Ignoring.");
164 memcpy(netcfg.mac, n->mac, ETH_ALEN);
165 }
166 memcpy(config, &netcfg, n->config_size);
167 }
168 }
169}
170
171static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
172{
173 VirtIONet *n = VIRTIO_NET(vdev);
174 struct virtio_net_config netcfg = {};
175 NetClientState *nc = qemu_get_queue(n->nic);
176
177 memcpy(&netcfg, config, n->config_size);
178
179 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
180 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
181 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
182 memcpy(n->mac, netcfg.mac, ETH_ALEN);
183 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
184 }
185
186
187
188
189
190 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
191 vhost_net_set_config(get_vhost_net(nc->peer),
192 (uint8_t *)&netcfg, 0, n->config_size,
193 VHOST_SET_CONFIG_TYPE_MASTER);
194 }
195}
196
197static bool virtio_net_started(VirtIONet *n, uint8_t status)
198{
199 VirtIODevice *vdev = VIRTIO_DEVICE(n);
200 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
201 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
202}
203
204static void virtio_net_announce_notify(VirtIONet *net)
205{
206 VirtIODevice *vdev = VIRTIO_DEVICE(net);
207 trace_virtio_net_announce_notify();
208
209 net->status |= VIRTIO_NET_S_ANNOUNCE;
210 virtio_notify_config(vdev);
211}
212
213static void virtio_net_announce_timer(void *opaque)
214{
215 VirtIONet *n = opaque;
216 trace_virtio_net_announce_timer(n->announce_timer.round);
217
218 n->announce_timer.round--;
219 virtio_net_announce_notify(n);
220}
221
222static void virtio_net_announce(NetClientState *nc)
223{
224 VirtIONet *n = qemu_get_nic_opaque(nc);
225 VirtIODevice *vdev = VIRTIO_DEVICE(n);
226
227
228
229
230
231
232 if (n->announce_timer.round) {
233 return;
234 }
235
236 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
237 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
238 virtio_net_announce_notify(n);
239 }
240}
241
242static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
243{
244 VirtIODevice *vdev = VIRTIO_DEVICE(n);
245 NetClientState *nc = qemu_get_queue(n->nic);
246 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
247 int cvq = n->max_ncs - n->max_queue_pairs;
248
249 if (!get_vhost_net(nc->peer)) {
250 return;
251 }
252
253 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
254 !!n->vhost_started) {
255 return;
256 }
257 if (!n->vhost_started) {
258 int r, i;
259
260 if (n->needs_vnet_hdr_swap) {
261 error_report("backend does not support %s vnet headers; "
262 "falling back on userspace virtio",
263 virtio_is_big_endian(vdev) ? "BE" : "LE");
264 return;
265 }
266
267
268
269
270 for (i = 0; i < queue_pairs; i++) {
271 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
272
273
274 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
275 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
276 }
277
278 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
279 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
280 if (r < 0) {
281 error_report("%uBytes MTU not supported by the backend",
282 n->net_conf.mtu);
283
284 return;
285 }
286 }
287
288 n->vhost_started = 1;
289 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
290 if (r < 0) {
291 error_report("unable to start vhost net: %d: "
292 "falling back on userspace virtio", -r);
293 n->vhost_started = 0;
294 }
295 } else {
296 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
297 n->vhost_started = 0;
298 }
299}
300
301static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
302 NetClientState *peer,
303 bool enable)
304{
305 if (virtio_is_big_endian(vdev)) {
306 return qemu_set_vnet_be(peer, enable);
307 } else {
308 return qemu_set_vnet_le(peer, enable);
309 }
310}
311
312static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
313 int queue_pairs, bool enable)
314{
315 int i;
316
317 for (i = 0; i < queue_pairs; i++) {
318 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
319 enable) {
320 while (--i >= 0) {
321 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
322 }
323
324 return true;
325 }
326 }
327
328 return false;
329}
330
331static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
332{
333 VirtIODevice *vdev = VIRTIO_DEVICE(n);
334 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
335
336 if (virtio_net_started(n, status)) {
337
338
339
340
341
342 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
343 queue_pairs, true);
344 } else if (virtio_net_started(n, vdev->status)) {
345
346
347
348
349
350 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
351 }
352}
353
354static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
355{
356 unsigned int dropped = virtqueue_drop_all(vq);
357 if (dropped) {
358 virtio_notify(vdev, vq);
359 }
360}
361
362static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
363{
364 VirtIONet *n = VIRTIO_NET(vdev);
365 VirtIONetQueue *q;
366 int i;
367 uint8_t queue_status;
368
369 virtio_net_vnet_endian_status(n, status);
370 virtio_net_vhost_status(n, status);
371
372 for (i = 0; i < n->max_queue_pairs; i++) {
373 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
374 bool queue_started;
375 q = &n->vqs[i];
376
377 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
378 queue_status = 0;
379 } else {
380 queue_status = status;
381 }
382 queue_started =
383 virtio_net_started(n, queue_status) && !n->vhost_started;
384
385 if (queue_started) {
386 qemu_flush_queued_packets(ncs);
387 }
388
389 if (!q->tx_waiting) {
390 continue;
391 }
392
393 if (queue_started) {
394 if (q->tx_timer) {
395 timer_mod(q->tx_timer,
396 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
397 } else {
398 qemu_bh_schedule(q->tx_bh);
399 }
400 } else {
401 if (q->tx_timer) {
402 timer_del(q->tx_timer);
403 } else {
404 qemu_bh_cancel(q->tx_bh);
405 }
406 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
407 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
408 vdev->vm_running) {
409
410
411 q->tx_waiting = 0;
412 virtio_queue_set_notification(q->tx_vq, 1);
413 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
414 }
415 }
416 }
417}
418
419static void virtio_net_set_link_status(NetClientState *nc)
420{
421 VirtIONet *n = qemu_get_nic_opaque(nc);
422 VirtIODevice *vdev = VIRTIO_DEVICE(n);
423 uint16_t old_status = n->status;
424
425 if (nc->link_down)
426 n->status &= ~VIRTIO_NET_S_LINK_UP;
427 else
428 n->status |= VIRTIO_NET_S_LINK_UP;
429
430 if (n->status != old_status)
431 virtio_notify_config(vdev);
432
433 virtio_net_set_status(vdev, vdev->status);
434}
435
436static void rxfilter_notify(NetClientState *nc)
437{
438 VirtIONet *n = qemu_get_nic_opaque(nc);
439
440 if (nc->rxfilter_notify_enabled) {
441 char *path = object_get_canonical_path(OBJECT(n->qdev));
442 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
443 n->netclient_name, path);
444 g_free(path);
445
446
447 nc->rxfilter_notify_enabled = 0;
448 }
449}
450
451static intList *get_vlan_table(VirtIONet *n)
452{
453 intList *list;
454 int i, j;
455
456 list = NULL;
457 for (i = 0; i < MAX_VLAN >> 5; i++) {
458 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
459 if (n->vlans[i] & (1U << j)) {
460 QAPI_LIST_PREPEND(list, (i << 5) + j);
461 }
462 }
463 }
464
465 return list;
466}
467
468static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
469{
470 VirtIONet *n = qemu_get_nic_opaque(nc);
471 VirtIODevice *vdev = VIRTIO_DEVICE(n);
472 RxFilterInfo *info;
473 strList *str_list;
474 int i;
475
476 info = g_malloc0(sizeof(*info));
477 info->name = g_strdup(nc->name);
478 info->promiscuous = n->promisc;
479
480 if (n->nouni) {
481 info->unicast = RX_STATE_NONE;
482 } else if (n->alluni) {
483 info->unicast = RX_STATE_ALL;
484 } else {
485 info->unicast = RX_STATE_NORMAL;
486 }
487
488 if (n->nomulti) {
489 info->multicast = RX_STATE_NONE;
490 } else if (n->allmulti) {
491 info->multicast = RX_STATE_ALL;
492 } else {
493 info->multicast = RX_STATE_NORMAL;
494 }
495
496 info->broadcast_allowed = n->nobcast;
497 info->multicast_overflow = n->mac_table.multi_overflow;
498 info->unicast_overflow = n->mac_table.uni_overflow;
499
500 info->main_mac = qemu_mac_strdup_printf(n->mac);
501
502 str_list = NULL;
503 for (i = 0; i < n->mac_table.first_multi; i++) {
504 QAPI_LIST_PREPEND(str_list,
505 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
506 }
507 info->unicast_table = str_list;
508
509 str_list = NULL;
510 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
511 QAPI_LIST_PREPEND(str_list,
512 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
513 }
514 info->multicast_table = str_list;
515 info->vlan_table = get_vlan_table(n);
516
517 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
518 info->vlan = RX_STATE_ALL;
519 } else if (!info->vlan_table) {
520 info->vlan = RX_STATE_NONE;
521 } else {
522 info->vlan = RX_STATE_NORMAL;
523 }
524
525
526 nc->rxfilter_notify_enabled = 1;
527
528 return info;
529}
530
531static void virtio_net_reset(VirtIODevice *vdev)
532{
533 VirtIONet *n = VIRTIO_NET(vdev);
534 int i;
535
536
537 n->promisc = 1;
538 n->allmulti = 0;
539 n->alluni = 0;
540 n->nomulti = 0;
541 n->nouni = 0;
542 n->nobcast = 0;
543
544 n->curr_queue_pairs = 1;
545 timer_del(n->announce_timer.tm);
546 n->announce_timer.round = 0;
547 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
548
549
550 n->mac_table.in_use = 0;
551 n->mac_table.first_multi = 0;
552 n->mac_table.multi_overflow = 0;
553 n->mac_table.uni_overflow = 0;
554 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
555 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
556 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
557 memset(n->vlans, 0, MAX_VLAN >> 3);
558
559
560 for (i = 0; i < n->max_queue_pairs; i++) {
561 NetClientState *nc = qemu_get_subqueue(n->nic, i);
562
563 if (nc->peer) {
564 qemu_flush_or_purge_queued_packets(nc->peer, true);
565 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
566 }
567 }
568}
569
570static void peer_test_vnet_hdr(VirtIONet *n)
571{
572 NetClientState *nc = qemu_get_queue(n->nic);
573 if (!nc->peer) {
574 return;
575 }
576
577 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
578}
579
580static int peer_has_vnet_hdr(VirtIONet *n)
581{
582 return n->has_vnet_hdr;
583}
584
585static int peer_has_ufo(VirtIONet *n)
586{
587 if (!peer_has_vnet_hdr(n))
588 return 0;
589
590 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
591
592 return n->has_ufo;
593}
594
595static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
596 int version_1, int hash_report)
597{
598 int i;
599 NetClientState *nc;
600
601 n->mergeable_rx_bufs = mergeable_rx_bufs;
602
603 if (version_1) {
604 n->guest_hdr_len = hash_report ?
605 sizeof(struct virtio_net_hdr_v1_hash) :
606 sizeof(struct virtio_net_hdr_mrg_rxbuf);
607 n->rss_data.populate_hash = !!hash_report;
608 } else {
609 n->guest_hdr_len = n->mergeable_rx_bufs ?
610 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
611 sizeof(struct virtio_net_hdr);
612 }
613
614 for (i = 0; i < n->max_queue_pairs; i++) {
615 nc = qemu_get_subqueue(n->nic, i);
616
617 if (peer_has_vnet_hdr(n) &&
618 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
619 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
620 n->host_hdr_len = n->guest_hdr_len;
621 }
622 }
623}
624
625static int virtio_net_max_tx_queue_size(VirtIONet *n)
626{
627 NetClientState *peer = n->nic_conf.peers.ncs[0];
628
629
630
631
632 if (!peer) {
633 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
634 }
635
636 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
637 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
638 }
639
640 return VIRTQUEUE_MAX_SIZE;
641}
642
643static int peer_attach(VirtIONet *n, int index)
644{
645 NetClientState *nc = qemu_get_subqueue(n->nic, index);
646
647 if (!nc->peer) {
648 return 0;
649 }
650
651 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
652 vhost_set_vring_enable(nc->peer, 1);
653 }
654
655 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
656 return 0;
657 }
658
659 if (n->max_queue_pairs == 1) {
660 return 0;
661 }
662
663 return tap_enable(nc->peer);
664}
665
666static int peer_detach(VirtIONet *n, int index)
667{
668 NetClientState *nc = qemu_get_subqueue(n->nic, index);
669
670 if (!nc->peer) {
671 return 0;
672 }
673
674 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
675 vhost_set_vring_enable(nc->peer, 0);
676 }
677
678 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
679 return 0;
680 }
681
682 return tap_disable(nc->peer);
683}
684
685static void virtio_net_set_queue_pairs(VirtIONet *n)
686{
687 int i;
688 int r;
689
690 if (n->nic->peer_deleted) {
691 return;
692 }
693
694 for (i = 0; i < n->max_queue_pairs; i++) {
695 if (i < n->curr_queue_pairs) {
696 r = peer_attach(n, i);
697 assert(!r);
698 } else {
699 r = peer_detach(n, i);
700 assert(!r);
701 }
702 }
703}
704
705static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
706
707static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
708 Error **errp)
709{
710 VirtIONet *n = VIRTIO_NET(vdev);
711 NetClientState *nc = qemu_get_queue(n->nic);
712
713
714 features |= n->host_features;
715
716 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
717
718 if (!peer_has_vnet_hdr(n)) {
719 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
720 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
721 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
722 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
723
724 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
725 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
726 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
727 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
728
729 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
730 }
731
732 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
733 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
734 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
735 }
736
737 if (!get_vhost_net(nc->peer)) {
738 return features;
739 }
740
741 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
742 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
743 }
744 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
745 vdev->backend_features = features;
746
747 if (n->mtu_bypass_backend &&
748 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
749 features |= (1ULL << VIRTIO_NET_F_MTU);
750 }
751
752 return features;
753}
754
755static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
756{
757 uint64_t features = 0;
758
759
760
761 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
762 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
763 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
764 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
765 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
766
767 return features;
768}
769
770static void virtio_net_apply_guest_offloads(VirtIONet *n)
771{
772 qemu_set_offload(qemu_get_queue(n->nic)->peer,
773 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
774 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
775 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
776 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
777 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
778}
779
780static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
781{
782 static const uint64_t guest_offloads_mask =
783 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
784 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
785 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
786 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
787 (1ULL << VIRTIO_NET_F_GUEST_UFO);
788
789 return guest_offloads_mask & features;
790}
791
792static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
793{
794 VirtIODevice *vdev = VIRTIO_DEVICE(n);
795 return virtio_net_guest_offloads_by_features(vdev->guest_features);
796}
797
798typedef struct {
799 VirtIONet *n;
800 DeviceState *dev;
801} FailoverDevice;
802
803
804
805
806
807
808
809
810static int failover_set_primary(DeviceState *dev, void *opaque)
811{
812 FailoverDevice *fdev = opaque;
813 PCIDevice *pci_dev = (PCIDevice *)
814 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
815
816 if (!pci_dev) {
817 return 0;
818 }
819
820 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
821 fdev->dev = dev;
822 return 1;
823 }
824
825 return 0;
826}
827
828
829
830
831
832
833
834static DeviceState *failover_find_primary_device(VirtIONet *n)
835{
836 FailoverDevice fdev = {
837 .n = n,
838 };
839
840 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
841 NULL, NULL, &fdev);
842 return fdev.dev;
843}
844
845static void failover_add_primary(VirtIONet *n, Error **errp)
846{
847 Error *err = NULL;
848 DeviceState *dev = failover_find_primary_device(n);
849
850 if (dev) {
851 return;
852 }
853
854 if (!n->primary_opts) {
855 error_setg(errp, "Primary device not found");
856 error_append_hint(errp, "Virtio-net failover will not work. Make "
857 "sure primary device has parameter"
858 " failover_pair_id=%s\n", n->netclient_name);
859 return;
860 }
861
862 dev = qdev_device_add_from_qdict(n->primary_opts,
863 n->primary_opts_from_json,
864 &err);
865 if (err) {
866 qobject_unref(n->primary_opts);
867 n->primary_opts = NULL;
868 } else {
869 object_unref(OBJECT(dev));
870 }
871 error_propagate(errp, err);
872}
873
874static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
875{
876 VirtIONet *n = VIRTIO_NET(vdev);
877 Error *err = NULL;
878 int i;
879
880 if (n->mtu_bypass_backend &&
881 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
882 features &= ~(1ULL << VIRTIO_NET_F_MTU);
883 }
884
885 virtio_net_set_multiqueue(n,
886 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
887 virtio_has_feature(features, VIRTIO_NET_F_MQ));
888
889 virtio_net_set_mrg_rx_bufs(n,
890 virtio_has_feature(features,
891 VIRTIO_NET_F_MRG_RXBUF),
892 virtio_has_feature(features,
893 VIRTIO_F_VERSION_1),
894 virtio_has_feature(features,
895 VIRTIO_NET_F_HASH_REPORT));
896
897 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
898 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
899 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
900 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
901 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
902
903 if (n->has_vnet_hdr) {
904 n->curr_guest_offloads =
905 virtio_net_guest_offloads_by_features(features);
906 virtio_net_apply_guest_offloads(n);
907 }
908
909 for (i = 0; i < n->max_queue_pairs; i++) {
910 NetClientState *nc = qemu_get_subqueue(n->nic, i);
911
912 if (!get_vhost_net(nc->peer)) {
913 continue;
914 }
915 vhost_net_ack_features(get_vhost_net(nc->peer), features);
916 }
917
918 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
919 memset(n->vlans, 0, MAX_VLAN >> 3);
920 } else {
921 memset(n->vlans, 0xff, MAX_VLAN >> 3);
922 }
923
924 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
925 qapi_event_send_failover_negotiated(n->netclient_name);
926 qatomic_set(&n->failover_primary_hidden, false);
927 failover_add_primary(n, &err);
928 if (err) {
929 warn_report_err(err);
930 }
931 }
932}
933
934static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
935 struct iovec *iov, unsigned int iov_cnt)
936{
937 uint8_t on;
938 size_t s;
939 NetClientState *nc = qemu_get_queue(n->nic);
940
941 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
942 if (s != sizeof(on)) {
943 return VIRTIO_NET_ERR;
944 }
945
946 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
947 n->promisc = on;
948 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
949 n->allmulti = on;
950 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
951 n->alluni = on;
952 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
953 n->nomulti = on;
954 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
955 n->nouni = on;
956 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
957 n->nobcast = on;
958 } else {
959 return VIRTIO_NET_ERR;
960 }
961
962 rxfilter_notify(nc);
963
964 return VIRTIO_NET_OK;
965}
966
967static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
968 struct iovec *iov, unsigned int iov_cnt)
969{
970 VirtIODevice *vdev = VIRTIO_DEVICE(n);
971 uint64_t offloads;
972 size_t s;
973
974 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
975 return VIRTIO_NET_ERR;
976 }
977
978 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
979 if (s != sizeof(offloads)) {
980 return VIRTIO_NET_ERR;
981 }
982
983 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
984 uint64_t supported_offloads;
985
986 offloads = virtio_ldq_p(vdev, &offloads);
987
988 if (!n->has_vnet_hdr) {
989 return VIRTIO_NET_ERR;
990 }
991
992 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
993 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
994 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
995 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
996 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
997
998 supported_offloads = virtio_net_supported_guest_offloads(n);
999 if (offloads & ~supported_offloads) {
1000 return VIRTIO_NET_ERR;
1001 }
1002
1003 n->curr_guest_offloads = offloads;
1004 virtio_net_apply_guest_offloads(n);
1005
1006 return VIRTIO_NET_OK;
1007 } else {
1008 return VIRTIO_NET_ERR;
1009 }
1010}
1011
1012static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1013 struct iovec *iov, unsigned int iov_cnt)
1014{
1015 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1016 struct virtio_net_ctrl_mac mac_data;
1017 size_t s;
1018 NetClientState *nc = qemu_get_queue(n->nic);
1019
1020 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1021 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1022 return VIRTIO_NET_ERR;
1023 }
1024 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1025 assert(s == sizeof(n->mac));
1026 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1027 rxfilter_notify(nc);
1028
1029 return VIRTIO_NET_OK;
1030 }
1031
1032 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1033 return VIRTIO_NET_ERR;
1034 }
1035
1036 int in_use = 0;
1037 int first_multi = 0;
1038 uint8_t uni_overflow = 0;
1039 uint8_t multi_overflow = 0;
1040 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1041
1042 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1043 sizeof(mac_data.entries));
1044 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1045 if (s != sizeof(mac_data.entries)) {
1046 goto error;
1047 }
1048 iov_discard_front(&iov, &iov_cnt, s);
1049
1050 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1051 goto error;
1052 }
1053
1054 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1055 s = iov_to_buf(iov, iov_cnt, 0, macs,
1056 mac_data.entries * ETH_ALEN);
1057 if (s != mac_data.entries * ETH_ALEN) {
1058 goto error;
1059 }
1060 in_use += mac_data.entries;
1061 } else {
1062 uni_overflow = 1;
1063 }
1064
1065 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1066
1067 first_multi = in_use;
1068
1069 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1070 sizeof(mac_data.entries));
1071 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1072 if (s != sizeof(mac_data.entries)) {
1073 goto error;
1074 }
1075
1076 iov_discard_front(&iov, &iov_cnt, s);
1077
1078 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1079 goto error;
1080 }
1081
1082 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1083 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1084 mac_data.entries * ETH_ALEN);
1085 if (s != mac_data.entries * ETH_ALEN) {
1086 goto error;
1087 }
1088 in_use += mac_data.entries;
1089 } else {
1090 multi_overflow = 1;
1091 }
1092
1093 n->mac_table.in_use = in_use;
1094 n->mac_table.first_multi = first_multi;
1095 n->mac_table.uni_overflow = uni_overflow;
1096 n->mac_table.multi_overflow = multi_overflow;
1097 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1098 g_free(macs);
1099 rxfilter_notify(nc);
1100
1101 return VIRTIO_NET_OK;
1102
1103error:
1104 g_free(macs);
1105 return VIRTIO_NET_ERR;
1106}
1107
1108static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1109 struct iovec *iov, unsigned int iov_cnt)
1110{
1111 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1112 uint16_t vid;
1113 size_t s;
1114 NetClientState *nc = qemu_get_queue(n->nic);
1115
1116 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1117 vid = virtio_lduw_p(vdev, &vid);
1118 if (s != sizeof(vid)) {
1119 return VIRTIO_NET_ERR;
1120 }
1121
1122 if (vid >= MAX_VLAN)
1123 return VIRTIO_NET_ERR;
1124
1125 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1126 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1127 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1128 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1129 else
1130 return VIRTIO_NET_ERR;
1131
1132 rxfilter_notify(nc);
1133
1134 return VIRTIO_NET_OK;
1135}
1136
1137static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1138 struct iovec *iov, unsigned int iov_cnt)
1139{
1140 trace_virtio_net_handle_announce(n->announce_timer.round);
1141 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1142 n->status & VIRTIO_NET_S_ANNOUNCE) {
1143 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1144 if (n->announce_timer.round) {
1145 qemu_announce_timer_step(&n->announce_timer);
1146 }
1147 return VIRTIO_NET_OK;
1148 } else {
1149 return VIRTIO_NET_ERR;
1150 }
1151}
1152
1153static void virtio_net_detach_epbf_rss(VirtIONet *n);
1154
1155static void virtio_net_disable_rss(VirtIONet *n)
1156{
1157 if (n->rss_data.enabled) {
1158 trace_virtio_net_rss_disable();
1159 }
1160 n->rss_data.enabled = false;
1161
1162 virtio_net_detach_epbf_rss(n);
1163}
1164
1165static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1166{
1167 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1168 if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1169 return false;
1170 }
1171
1172 return nc->info->set_steering_ebpf(nc, prog_fd);
1173}
1174
1175static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1176 struct EBPFRSSConfig *config)
1177{
1178 config->redirect = data->redirect;
1179 config->populate_hash = data->populate_hash;
1180 config->hash_types = data->hash_types;
1181 config->indirections_len = data->indirections_len;
1182 config->default_queue = data->default_queue;
1183}
1184
1185static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1186{
1187 struct EBPFRSSConfig config = {};
1188
1189 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1190 return false;
1191 }
1192
1193 rss_data_to_rss_config(&n->rss_data, &config);
1194
1195 if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1196 n->rss_data.indirections_table, n->rss_data.key)) {
1197 return false;
1198 }
1199
1200 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1201 return false;
1202 }
1203
1204 return true;
1205}
1206
1207static void virtio_net_detach_epbf_rss(VirtIONet *n)
1208{
1209 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1210}
1211
1212static bool virtio_net_load_ebpf(VirtIONet *n)
1213{
1214 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1215
1216 return false;
1217 }
1218
1219 return ebpf_rss_load(&n->ebpf_rss);
1220}
1221
1222static void virtio_net_unload_ebpf(VirtIONet *n)
1223{
1224 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1225 ebpf_rss_unload(&n->ebpf_rss);
1226}
1227
1228static uint16_t virtio_net_handle_rss(VirtIONet *n,
1229 struct iovec *iov,
1230 unsigned int iov_cnt,
1231 bool do_rss)
1232{
1233 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1234 struct virtio_net_rss_config cfg;
1235 size_t s, offset = 0, size_get;
1236 uint16_t queue_pairs, i;
1237 struct {
1238 uint16_t us;
1239 uint8_t b;
1240 } QEMU_PACKED temp;
1241 const char *err_msg = "";
1242 uint32_t err_value = 0;
1243
1244 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1245 err_msg = "RSS is not negotiated";
1246 goto error;
1247 }
1248 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1249 err_msg = "Hash report is not negotiated";
1250 goto error;
1251 }
1252 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1253 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1254 if (s != size_get) {
1255 err_msg = "Short command buffer";
1256 err_value = (uint32_t)s;
1257 goto error;
1258 }
1259 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1260 n->rss_data.indirections_len =
1261 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1262 n->rss_data.indirections_len++;
1263 if (!do_rss) {
1264 n->rss_data.indirections_len = 1;
1265 }
1266 if (!is_power_of_2(n->rss_data.indirections_len)) {
1267 err_msg = "Invalid size of indirection table";
1268 err_value = n->rss_data.indirections_len;
1269 goto error;
1270 }
1271 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1272 err_msg = "Too large indirection table";
1273 err_value = n->rss_data.indirections_len;
1274 goto error;
1275 }
1276 n->rss_data.default_queue = do_rss ?
1277 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1278 if (n->rss_data.default_queue >= n->max_queue_pairs) {
1279 err_msg = "Invalid default queue";
1280 err_value = n->rss_data.default_queue;
1281 goto error;
1282 }
1283 offset += size_get;
1284 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1285 g_free(n->rss_data.indirections_table);
1286 n->rss_data.indirections_table = g_malloc(size_get);
1287 if (!n->rss_data.indirections_table) {
1288 err_msg = "Can't allocate indirections table";
1289 err_value = n->rss_data.indirections_len;
1290 goto error;
1291 }
1292 s = iov_to_buf(iov, iov_cnt, offset,
1293 n->rss_data.indirections_table, size_get);
1294 if (s != size_get) {
1295 err_msg = "Short indirection table buffer";
1296 err_value = (uint32_t)s;
1297 goto error;
1298 }
1299 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1300 uint16_t val = n->rss_data.indirections_table[i];
1301 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1302 }
1303 offset += size_get;
1304 size_get = sizeof(temp);
1305 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1306 if (s != size_get) {
1307 err_msg = "Can't get queue_pairs";
1308 err_value = (uint32_t)s;
1309 goto error;
1310 }
1311 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1312 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1313 err_msg = "Invalid number of queue_pairs";
1314 err_value = queue_pairs;
1315 goto error;
1316 }
1317 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1318 err_msg = "Invalid key size";
1319 err_value = temp.b;
1320 goto error;
1321 }
1322 if (!temp.b && n->rss_data.hash_types) {
1323 err_msg = "No key provided";
1324 err_value = 0;
1325 goto error;
1326 }
1327 if (!temp.b && !n->rss_data.hash_types) {
1328 virtio_net_disable_rss(n);
1329 return queue_pairs;
1330 }
1331 offset += size_get;
1332 size_get = temp.b;
1333 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1334 if (s != size_get) {
1335 err_msg = "Can get key buffer";
1336 err_value = (uint32_t)s;
1337 goto error;
1338 }
1339 n->rss_data.enabled = true;
1340
1341 if (!n->rss_data.populate_hash) {
1342 if (!virtio_net_attach_epbf_rss(n)) {
1343
1344 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1345 warn_report("Can't load eBPF RSS for vhost");
1346 goto error;
1347 }
1348
1349 warn_report("Can't load eBPF RSS - fallback to software RSS");
1350 n->rss_data.enabled_software_rss = true;
1351 }
1352 } else {
1353
1354
1355 virtio_net_detach_epbf_rss(n);
1356 n->rss_data.enabled_software_rss = true;
1357 }
1358
1359 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1360 n->rss_data.indirections_len,
1361 temp.b);
1362 return queue_pairs;
1363error:
1364 trace_virtio_net_rss_error(err_msg, err_value);
1365 virtio_net_disable_rss(n);
1366 return 0;
1367}
1368
1369static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1370 struct iovec *iov, unsigned int iov_cnt)
1371{
1372 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1373 uint16_t queue_pairs;
1374
1375 virtio_net_disable_rss(n);
1376 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1377 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1378 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1379 }
1380 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1381 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1382 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1383 struct virtio_net_ctrl_mq mq;
1384 size_t s;
1385 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1386 return VIRTIO_NET_ERR;
1387 }
1388 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1389 if (s != sizeof(mq)) {
1390 return VIRTIO_NET_ERR;
1391 }
1392 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1393
1394 } else {
1395 return VIRTIO_NET_ERR;
1396 }
1397
1398 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1399 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1400 queue_pairs > n->max_queue_pairs ||
1401 !n->multiqueue) {
1402 return VIRTIO_NET_ERR;
1403 }
1404
1405 n->curr_queue_pairs = queue_pairs;
1406
1407
1408 virtio_net_set_status(vdev, vdev->status);
1409 virtio_net_set_queue_pairs(n);
1410
1411 return VIRTIO_NET_OK;
1412}
1413
1414static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1415{
1416 VirtIONet *n = VIRTIO_NET(vdev);
1417 struct virtio_net_ctrl_hdr ctrl;
1418 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1419 VirtQueueElement *elem;
1420 size_t s;
1421 struct iovec *iov, *iov2;
1422 unsigned int iov_cnt;
1423
1424 for (;;) {
1425 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1426 if (!elem) {
1427 break;
1428 }
1429 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1430 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1431 virtio_error(vdev, "virtio-net ctrl missing headers");
1432 virtqueue_detach_element(vq, elem, 0);
1433 g_free(elem);
1434 break;
1435 }
1436
1437 iov_cnt = elem->out_num;
1438 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1439 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1440 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1441 if (s != sizeof(ctrl)) {
1442 status = VIRTIO_NET_ERR;
1443 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1444 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1445 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1446 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1447 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1448 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1449 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1450 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1451 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1452 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1453 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1454 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1455 }
1456
1457 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1458 assert(s == sizeof(status));
1459
1460 virtqueue_push(vq, elem, sizeof(status));
1461 virtio_notify(vdev, vq);
1462 g_free(iov2);
1463 g_free(elem);
1464 }
1465}
1466
1467
1468
1469static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1470{
1471 VirtIONet *n = VIRTIO_NET(vdev);
1472 int queue_index = vq2q(virtio_get_queue_index(vq));
1473
1474 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1475}
1476
1477static bool virtio_net_can_receive(NetClientState *nc)
1478{
1479 VirtIONet *n = qemu_get_nic_opaque(nc);
1480 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1481 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1482
1483 if (!vdev->vm_running) {
1484 return false;
1485 }
1486
1487 if (nc->queue_index >= n->curr_queue_pairs) {
1488 return false;
1489 }
1490
1491 if (!virtio_queue_ready(q->rx_vq) ||
1492 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1493 return false;
1494 }
1495
1496 return true;
1497}
1498
1499static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1500{
1501 VirtIONet *n = q->n;
1502 if (virtio_queue_empty(q->rx_vq) ||
1503 (n->mergeable_rx_bufs &&
1504 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1505 virtio_queue_set_notification(q->rx_vq, 1);
1506
1507
1508
1509
1510
1511 if (virtio_queue_empty(q->rx_vq) ||
1512 (n->mergeable_rx_bufs &&
1513 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1514 return 0;
1515 }
1516 }
1517
1518 virtio_queue_set_notification(q->rx_vq, 0);
1519 return 1;
1520}
1521
1522static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1523{
1524 virtio_tswap16s(vdev, &hdr->hdr_len);
1525 virtio_tswap16s(vdev, &hdr->gso_size);
1526 virtio_tswap16s(vdev, &hdr->csum_start);
1527 virtio_tswap16s(vdev, &hdr->csum_offset);
1528}
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1545 uint8_t *buf, size_t size)
1546{
1547 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1548 (size > 27 && size < 1500) &&
1549 (buf[12] == 0x08 && buf[13] == 0x00) &&
1550 (buf[23] == 17) &&
1551 (buf[34] == 0 && buf[35] == 67)) {
1552 net_checksum_calculate(buf, size, CSUM_UDP);
1553 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1554 }
1555}
1556
1557static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1558 const void *buf, size_t size)
1559{
1560 if (n->has_vnet_hdr) {
1561
1562 void *wbuf = (void *)buf;
1563 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1564 size - n->host_hdr_len);
1565
1566 if (n->needs_vnet_hdr_swap) {
1567 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1568 }
1569 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1570 } else {
1571 struct virtio_net_hdr hdr = {
1572 .flags = 0,
1573 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1574 };
1575 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1576 }
1577}
1578
1579static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1580{
1581 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1582 static const uint8_t vlan[] = {0x81, 0x00};
1583 uint8_t *ptr = (uint8_t *)buf;
1584 int i;
1585
1586 if (n->promisc)
1587 return 1;
1588
1589 ptr += n->host_hdr_len;
1590
1591 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1592 int vid = lduw_be_p(ptr + 14) & 0xfff;
1593 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1594 return 0;
1595 }
1596
1597 if (ptr[0] & 1) {
1598 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1599 return !n->nobcast;
1600 } else if (n->nomulti) {
1601 return 0;
1602 } else if (n->allmulti || n->mac_table.multi_overflow) {
1603 return 1;
1604 }
1605
1606 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1607 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1608 return 1;
1609 }
1610 }
1611 } else {
1612 if (n->nouni) {
1613 return 0;
1614 } else if (n->alluni || n->mac_table.uni_overflow) {
1615 return 1;
1616 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1617 return 1;
1618 }
1619
1620 for (i = 0; i < n->mac_table.first_multi; i++) {
1621 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1622 return 1;
1623 }
1624 }
1625 }
1626
1627 return 0;
1628}
1629
1630static uint8_t virtio_net_get_hash_type(bool isip4,
1631 bool isip6,
1632 bool isudp,
1633 bool istcp,
1634 uint32_t types)
1635{
1636 if (isip4) {
1637 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1638 return NetPktRssIpV4Tcp;
1639 }
1640 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1641 return NetPktRssIpV4Udp;
1642 }
1643 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1644 return NetPktRssIpV4;
1645 }
1646 } else if (isip6) {
1647 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1648 VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1649
1650 if (istcp && (types & mask)) {
1651 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1652 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1653 }
1654 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1655 if (isudp && (types & mask)) {
1656 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1657 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1658 }
1659 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1660 if (types & mask) {
1661 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1662 NetPktRssIpV6Ex : NetPktRssIpV6;
1663 }
1664 }
1665 return 0xff;
1666}
1667
1668static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1669 uint32_t hash)
1670{
1671 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1672 hdr->hash_value = hash;
1673 hdr->hash_report = report;
1674}
1675
1676static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1677 size_t size)
1678{
1679 VirtIONet *n = qemu_get_nic_opaque(nc);
1680 unsigned int index = nc->queue_index, new_index = index;
1681 struct NetRxPkt *pkt = n->rx_pkt;
1682 uint8_t net_hash_type;
1683 uint32_t hash;
1684 bool isip4, isip6, isudp, istcp;
1685 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1686 VIRTIO_NET_HASH_REPORT_IPv4,
1687 VIRTIO_NET_HASH_REPORT_TCPv4,
1688 VIRTIO_NET_HASH_REPORT_TCPv6,
1689 VIRTIO_NET_HASH_REPORT_IPv6,
1690 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1691 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1692 VIRTIO_NET_HASH_REPORT_UDPv4,
1693 VIRTIO_NET_HASH_REPORT_UDPv6,
1694 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1695 };
1696
1697 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1698 size - n->host_hdr_len);
1699 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1700 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1701 istcp = isudp = false;
1702 }
1703 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1704 istcp = isudp = false;
1705 }
1706 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1707 n->rss_data.hash_types);
1708 if (net_hash_type > NetPktRssIpV6UdpEx) {
1709 if (n->rss_data.populate_hash) {
1710 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1711 }
1712 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1713 }
1714
1715 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1716
1717 if (n->rss_data.populate_hash) {
1718 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1719 }
1720
1721 if (n->rss_data.redirect) {
1722 new_index = hash & (n->rss_data.indirections_len - 1);
1723 new_index = n->rss_data.indirections_table[new_index];
1724 }
1725
1726 return (index == new_index) ? -1 : new_index;
1727}
1728
1729static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1730 size_t size, bool no_rss)
1731{
1732 VirtIONet *n = qemu_get_nic_opaque(nc);
1733 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1734 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1735 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1736 size_t lens[VIRTQUEUE_MAX_SIZE];
1737 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1738 struct virtio_net_hdr_mrg_rxbuf mhdr;
1739 unsigned mhdr_cnt = 0;
1740 size_t offset, i, guest_offset, j;
1741 ssize_t err;
1742
1743 if (!virtio_net_can_receive(nc)) {
1744 return -1;
1745 }
1746
1747 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1748 int index = virtio_net_process_rss(nc, buf, size);
1749 if (index >= 0) {
1750 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1751 return virtio_net_receive_rcu(nc2, buf, size, true);
1752 }
1753 }
1754
1755
1756 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1757 return 0;
1758 }
1759
1760 if (!receive_filter(n, buf, size))
1761 return size;
1762
1763 offset = i = 0;
1764
1765 while (offset < size) {
1766 VirtQueueElement *elem;
1767 int len, total;
1768 const struct iovec *sg;
1769
1770 total = 0;
1771
1772 if (i == VIRTQUEUE_MAX_SIZE) {
1773 virtio_error(vdev, "virtio-net unexpected long buffer chain");
1774 err = size;
1775 goto err;
1776 }
1777
1778 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1779 if (!elem) {
1780 if (i) {
1781 virtio_error(vdev, "virtio-net unexpected empty queue: "
1782 "i %zd mergeable %d offset %zd, size %zd, "
1783 "guest hdr len %zd, host hdr len %zd "
1784 "guest features 0x%" PRIx64,
1785 i, n->mergeable_rx_bufs, offset, size,
1786 n->guest_hdr_len, n->host_hdr_len,
1787 vdev->guest_features);
1788 }
1789 err = -1;
1790 goto err;
1791 }
1792
1793 if (elem->in_num < 1) {
1794 virtio_error(vdev,
1795 "virtio-net receive queue contains no in buffers");
1796 virtqueue_detach_element(q->rx_vq, elem, 0);
1797 g_free(elem);
1798 err = -1;
1799 goto err;
1800 }
1801
1802 sg = elem->in_sg;
1803 if (i == 0) {
1804 assert(offset == 0);
1805 if (n->mergeable_rx_bufs) {
1806 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1807 sg, elem->in_num,
1808 offsetof(typeof(mhdr), num_buffers),
1809 sizeof(mhdr.num_buffers));
1810 }
1811
1812 receive_header(n, sg, elem->in_num, buf, size);
1813 if (n->rss_data.populate_hash) {
1814 offset = sizeof(mhdr);
1815 iov_from_buf(sg, elem->in_num, offset,
1816 buf + offset, n->host_hdr_len - sizeof(mhdr));
1817 }
1818 offset = n->host_hdr_len;
1819 total += n->guest_hdr_len;
1820 guest_offset = n->guest_hdr_len;
1821 } else {
1822 guest_offset = 0;
1823 }
1824
1825
1826 len = iov_from_buf(sg, elem->in_num, guest_offset,
1827 buf + offset, size - offset);
1828 total += len;
1829 offset += len;
1830
1831
1832
1833 if (!n->mergeable_rx_bufs && offset < size) {
1834 virtqueue_unpop(q->rx_vq, elem, total);
1835 g_free(elem);
1836 err = size;
1837 goto err;
1838 }
1839
1840 elems[i] = elem;
1841 lens[i] = total;
1842 i++;
1843 }
1844
1845 if (mhdr_cnt) {
1846 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1847 iov_from_buf(mhdr_sg, mhdr_cnt,
1848 0,
1849 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1850 }
1851
1852 for (j = 0; j < i; j++) {
1853
1854 virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1855 g_free(elems[j]);
1856 }
1857
1858 virtqueue_flush(q->rx_vq, i);
1859 virtio_notify(vdev, q->rx_vq);
1860
1861 return size;
1862
1863err:
1864 for (j = 0; j < i; j++) {
1865 g_free(elems[j]);
1866 }
1867
1868 return err;
1869}
1870
1871static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1872 size_t size)
1873{
1874 RCU_READ_LOCK_GUARD();
1875
1876 return virtio_net_receive_rcu(nc, buf, size, false);
1877}
1878
1879static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1880 const uint8_t *buf,
1881 VirtioNetRscUnit *unit)
1882{
1883 uint16_t ip_hdrlen;
1884 struct ip_header *ip;
1885
1886 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1887 + sizeof(struct eth_header));
1888 unit->ip = (void *)ip;
1889 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1890 unit->ip_plen = &ip->ip_len;
1891 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1892 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1893 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1894}
1895
1896static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1897 const uint8_t *buf,
1898 VirtioNetRscUnit *unit)
1899{
1900 struct ip6_header *ip6;
1901
1902 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1903 + sizeof(struct eth_header));
1904 unit->ip = ip6;
1905 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1906 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1907 + sizeof(struct ip6_header));
1908 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1909
1910
1911
1912 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1913}
1914
1915static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1916 VirtioNetRscSeg *seg)
1917{
1918 int ret;
1919 struct virtio_net_hdr_v1 *h;
1920
1921 h = (struct virtio_net_hdr_v1 *)seg->buf;
1922 h->flags = 0;
1923 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1924
1925 if (seg->is_coalesced) {
1926 h->rsc.segments = seg->packets;
1927 h->rsc.dup_acks = seg->dup_ack;
1928 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1929 if (chain->proto == ETH_P_IP) {
1930 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1931 } else {
1932 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1933 }
1934 }
1935
1936 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1937 QTAILQ_REMOVE(&chain->buffers, seg, next);
1938 g_free(seg->buf);
1939 g_free(seg);
1940
1941 return ret;
1942}
1943
1944static void virtio_net_rsc_purge(void *opq)
1945{
1946 VirtioNetRscSeg *seg, *rn;
1947 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1948
1949 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1950 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1951 chain->stat.purge_failed++;
1952 continue;
1953 }
1954 }
1955
1956 chain->stat.timer++;
1957 if (!QTAILQ_EMPTY(&chain->buffers)) {
1958 timer_mod(chain->drain_timer,
1959 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1960 }
1961}
1962
1963static void virtio_net_rsc_cleanup(VirtIONet *n)
1964{
1965 VirtioNetRscChain *chain, *rn_chain;
1966 VirtioNetRscSeg *seg, *rn_seg;
1967
1968 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1969 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1970 QTAILQ_REMOVE(&chain->buffers, seg, next);
1971 g_free(seg->buf);
1972 g_free(seg);
1973 }
1974
1975 timer_free(chain->drain_timer);
1976 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1977 g_free(chain);
1978 }
1979}
1980
1981static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1982 NetClientState *nc,
1983 const uint8_t *buf, size_t size)
1984{
1985 uint16_t hdr_len;
1986 VirtioNetRscSeg *seg;
1987
1988 hdr_len = chain->n->guest_hdr_len;
1989 seg = g_malloc(sizeof(VirtioNetRscSeg));
1990 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1991 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1992 memcpy(seg->buf, buf, size);
1993 seg->size = size;
1994 seg->packets = 1;
1995 seg->dup_ack = 0;
1996 seg->is_coalesced = 0;
1997 seg->nc = nc;
1998
1999 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2000 chain->stat.cache++;
2001
2002 switch (chain->proto) {
2003 case ETH_P_IP:
2004 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2005 break;
2006 case ETH_P_IPV6:
2007 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2008 break;
2009 default:
2010 g_assert_not_reached();
2011 }
2012}
2013
2014static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2015 VirtioNetRscSeg *seg,
2016 const uint8_t *buf,
2017 struct tcp_header *n_tcp,
2018 struct tcp_header *o_tcp)
2019{
2020 uint32_t nack, oack;
2021 uint16_t nwin, owin;
2022
2023 nack = htonl(n_tcp->th_ack);
2024 nwin = htons(n_tcp->th_win);
2025 oack = htonl(o_tcp->th_ack);
2026 owin = htons(o_tcp->th_win);
2027
2028 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2029 chain->stat.ack_out_of_win++;
2030 return RSC_FINAL;
2031 } else if (nack == oack) {
2032
2033 if (nwin == owin) {
2034
2035 chain->stat.dup_ack++;
2036 return RSC_FINAL;
2037 } else {
2038
2039 o_tcp->th_win = n_tcp->th_win;
2040 chain->stat.win_update++;
2041 return RSC_COALESCE;
2042 }
2043 } else {
2044
2045 chain->stat.pure_ack++;
2046 return RSC_FINAL;
2047 }
2048}
2049
2050static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2051 VirtioNetRscSeg *seg,
2052 const uint8_t *buf,
2053 VirtioNetRscUnit *n_unit)
2054{
2055 void *data;
2056 uint16_t o_ip_len;
2057 uint32_t nseq, oseq;
2058 VirtioNetRscUnit *o_unit;
2059
2060 o_unit = &seg->unit;
2061 o_ip_len = htons(*o_unit->ip_plen);
2062 nseq = htonl(n_unit->tcp->th_seq);
2063 oseq = htonl(o_unit->tcp->th_seq);
2064
2065
2066 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2067 chain->stat.data_out_of_win++;
2068 return RSC_FINAL;
2069 }
2070
2071 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2072 if (nseq == oseq) {
2073 if ((o_unit->payload == 0) && n_unit->payload) {
2074
2075 chain->stat.data_after_pure_ack++;
2076 goto coalesce;
2077 } else {
2078 return virtio_net_rsc_handle_ack(chain, seg, buf,
2079 n_unit->tcp, o_unit->tcp);
2080 }
2081 } else if ((nseq - oseq) != o_unit->payload) {
2082
2083 chain->stat.data_out_of_order++;
2084 return RSC_FINAL;
2085 } else {
2086coalesce:
2087 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2088 chain->stat.over_size++;
2089 return RSC_FINAL;
2090 }
2091
2092
2093
2094 o_unit->payload += n_unit->payload;
2095
2096
2097 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2098
2099
2100
2101
2102 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2103
2104 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2105 o_unit->tcp->th_win = n_unit->tcp->th_win;
2106
2107 memmove(seg->buf + seg->size, data, n_unit->payload);
2108 seg->size += n_unit->payload;
2109 seg->packets++;
2110 chain->stat.coalesced++;
2111 return RSC_COALESCE;
2112 }
2113}
2114
2115static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2116 VirtioNetRscSeg *seg,
2117 const uint8_t *buf, size_t size,
2118 VirtioNetRscUnit *unit)
2119{
2120 struct ip_header *ip1, *ip2;
2121
2122 ip1 = (struct ip_header *)(unit->ip);
2123 ip2 = (struct ip_header *)(seg->unit.ip);
2124 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2125 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2126 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2127 chain->stat.no_match++;
2128 return RSC_NO_MATCH;
2129 }
2130
2131 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2132}
2133
2134static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2135 VirtioNetRscSeg *seg,
2136 const uint8_t *buf, size_t size,
2137 VirtioNetRscUnit *unit)
2138{
2139 struct ip6_header *ip1, *ip2;
2140
2141 ip1 = (struct ip6_header *)(unit->ip);
2142 ip2 = (struct ip6_header *)(seg->unit.ip);
2143 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2144 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2145 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2146 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2147 chain->stat.no_match++;
2148 return RSC_NO_MATCH;
2149 }
2150
2151 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2152}
2153
2154
2155
2156static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2157 struct tcp_header *tcp)
2158{
2159 uint16_t tcp_hdr;
2160 uint16_t tcp_flag;
2161
2162 tcp_flag = htons(tcp->th_offset_flags);
2163 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2164 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2165 if (tcp_flag & TH_SYN) {
2166 chain->stat.tcp_syn++;
2167 return RSC_BYPASS;
2168 }
2169
2170 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2171 chain->stat.tcp_ctrl_drain++;
2172 return RSC_FINAL;
2173 }
2174
2175 if (tcp_hdr > sizeof(struct tcp_header)) {
2176 chain->stat.tcp_all_opt++;
2177 return RSC_FINAL;
2178 }
2179
2180 return RSC_CANDIDATE;
2181}
2182
2183static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2184 NetClientState *nc,
2185 const uint8_t *buf, size_t size,
2186 VirtioNetRscUnit *unit)
2187{
2188 int ret;
2189 VirtioNetRscSeg *seg, *nseg;
2190
2191 if (QTAILQ_EMPTY(&chain->buffers)) {
2192 chain->stat.empty_cache++;
2193 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2194 timer_mod(chain->drain_timer,
2195 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2196 return size;
2197 }
2198
2199 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2200 if (chain->proto == ETH_P_IP) {
2201 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2202 } else {
2203 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2204 }
2205
2206 if (ret == RSC_FINAL) {
2207 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2208
2209 chain->stat.final_failed++;
2210 return 0;
2211 }
2212
2213
2214 return virtio_net_do_receive(nc, buf, size);
2215 } else if (ret == RSC_NO_MATCH) {
2216 continue;
2217 } else {
2218
2219 seg->is_coalesced = 1;
2220 return size;
2221 }
2222 }
2223
2224 chain->stat.no_match_cache++;
2225 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2226 return size;
2227}
2228
2229
2230static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2231 NetClientState *nc,
2232 const uint8_t *buf, size_t size,
2233 uint16_t ip_start, uint16_t ip_size,
2234 uint16_t tcp_port)
2235{
2236 VirtioNetRscSeg *seg, *nseg;
2237 uint32_t ppair1, ppair2;
2238
2239 ppair1 = *(uint32_t *)(buf + tcp_port);
2240 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2241 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2242 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2243 || (ppair1 != ppair2)) {
2244 continue;
2245 }
2246 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2247 chain->stat.drain_failed++;
2248 }
2249
2250 break;
2251 }
2252
2253 return virtio_net_do_receive(nc, buf, size);
2254}
2255
2256static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2257 struct ip_header *ip,
2258 const uint8_t *buf, size_t size)
2259{
2260 uint16_t ip_len;
2261
2262
2263 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2264 chain->stat.ip_option++;
2265 return RSC_BYPASS;
2266 }
2267
2268
2269 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2270 chain->stat.ip_option++;
2271 return RSC_BYPASS;
2272 }
2273
2274 if (ip->ip_p != IPPROTO_TCP) {
2275 chain->stat.bypass_not_tcp++;
2276 return RSC_BYPASS;
2277 }
2278
2279
2280 if (!(htons(ip->ip_off) & IP_DF)) {
2281 chain->stat.ip_frag++;
2282 return RSC_BYPASS;
2283 }
2284
2285
2286 if (IPTOS_ECN(ip->ip_tos)) {
2287 chain->stat.ip_ecn++;
2288 return RSC_BYPASS;
2289 }
2290
2291 ip_len = htons(ip->ip_len);
2292 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2293 || ip_len > (size - chain->n->guest_hdr_len -
2294 sizeof(struct eth_header))) {
2295 chain->stat.ip_hacked++;
2296 return RSC_BYPASS;
2297 }
2298
2299 return RSC_CANDIDATE;
2300}
2301
2302static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2303 NetClientState *nc,
2304 const uint8_t *buf, size_t size)
2305{
2306 int32_t ret;
2307 uint16_t hdr_len;
2308 VirtioNetRscUnit unit;
2309
2310 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2311
2312 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2313 + sizeof(struct tcp_header))) {
2314 chain->stat.bypass_not_tcp++;
2315 return virtio_net_do_receive(nc, buf, size);
2316 }
2317
2318 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2319 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2320 != RSC_CANDIDATE) {
2321 return virtio_net_do_receive(nc, buf, size);
2322 }
2323
2324 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2325 if (ret == RSC_BYPASS) {
2326 return virtio_net_do_receive(nc, buf, size);
2327 } else if (ret == RSC_FINAL) {
2328 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2329 ((hdr_len + sizeof(struct eth_header)) + 12),
2330 VIRTIO_NET_IP4_ADDR_SIZE,
2331 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2332 }
2333
2334 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2335}
2336
2337static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2338 struct ip6_header *ip6,
2339 const uint8_t *buf, size_t size)
2340{
2341 uint16_t ip_len;
2342
2343 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2344 != IP_HEADER_VERSION_6) {
2345 return RSC_BYPASS;
2346 }
2347
2348
2349 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2350 chain->stat.bypass_not_tcp++;
2351 return RSC_BYPASS;
2352 }
2353
2354 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2355 if (ip_len < sizeof(struct tcp_header) ||
2356 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2357 - sizeof(struct ip6_header))) {
2358 chain->stat.ip_hacked++;
2359 return RSC_BYPASS;
2360 }
2361
2362
2363 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2364 chain->stat.ip_ecn++;
2365 return RSC_BYPASS;
2366 }
2367
2368 return RSC_CANDIDATE;
2369}
2370
2371static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2372 const uint8_t *buf, size_t size)
2373{
2374 int32_t ret;
2375 uint16_t hdr_len;
2376 VirtioNetRscChain *chain;
2377 VirtioNetRscUnit unit;
2378
2379 chain = (VirtioNetRscChain *)opq;
2380 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2381
2382 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2383 + sizeof(tcp_header))) {
2384 return virtio_net_do_receive(nc, buf, size);
2385 }
2386
2387 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2388 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2389 unit.ip, buf, size)) {
2390 return virtio_net_do_receive(nc, buf, size);
2391 }
2392
2393 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2394 if (ret == RSC_BYPASS) {
2395 return virtio_net_do_receive(nc, buf, size);
2396 } else if (ret == RSC_FINAL) {
2397 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2398 ((hdr_len + sizeof(struct eth_header)) + 8),
2399 VIRTIO_NET_IP6_ADDR_SIZE,
2400 hdr_len + sizeof(struct eth_header)
2401 + sizeof(struct ip6_header));
2402 }
2403
2404 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2405}
2406
2407static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2408 NetClientState *nc,
2409 uint16_t proto)
2410{
2411 VirtioNetRscChain *chain;
2412
2413 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2414 return NULL;
2415 }
2416
2417 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2418 if (chain->proto == proto) {
2419 return chain;
2420 }
2421 }
2422
2423 chain = g_malloc(sizeof(*chain));
2424 chain->n = n;
2425 chain->proto = proto;
2426 if (proto == (uint16_t)ETH_P_IP) {
2427 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2428 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2429 } else {
2430 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2431 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2432 }
2433 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2434 virtio_net_rsc_purge, chain);
2435 memset(&chain->stat, 0, sizeof(chain->stat));
2436
2437 QTAILQ_INIT(&chain->buffers);
2438 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2439
2440 return chain;
2441}
2442
2443static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2444 const uint8_t *buf,
2445 size_t size)
2446{
2447 uint16_t proto;
2448 VirtioNetRscChain *chain;
2449 struct eth_header *eth;
2450 VirtIONet *n;
2451
2452 n = qemu_get_nic_opaque(nc);
2453 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2454 return virtio_net_do_receive(nc, buf, size);
2455 }
2456
2457 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2458 proto = htons(eth->h_proto);
2459
2460 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2461 if (chain) {
2462 chain->stat.received++;
2463 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2464 return virtio_net_rsc_receive4(chain, nc, buf, size);
2465 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2466 return virtio_net_rsc_receive6(chain, nc, buf, size);
2467 }
2468 }
2469 return virtio_net_do_receive(nc, buf, size);
2470}
2471
2472static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2473 size_t size)
2474{
2475 VirtIONet *n = qemu_get_nic_opaque(nc);
2476 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2477 return virtio_net_rsc_receive(nc, buf, size);
2478 } else {
2479 return virtio_net_do_receive(nc, buf, size);
2480 }
2481}
2482
2483static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2484
2485static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2486{
2487 VirtIONet *n = qemu_get_nic_opaque(nc);
2488 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2489 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2490
2491 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2492 virtio_notify(vdev, q->tx_vq);
2493
2494 g_free(q->async_tx.elem);
2495 q->async_tx.elem = NULL;
2496
2497 virtio_queue_set_notification(q->tx_vq, 1);
2498 virtio_net_flush_tx(q);
2499}
2500
2501
2502static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2503{
2504 VirtIONet *n = q->n;
2505 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2506 VirtQueueElement *elem;
2507 int32_t num_packets = 0;
2508 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2509 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2510 return num_packets;
2511 }
2512
2513 if (q->async_tx.elem) {
2514 virtio_queue_set_notification(q->tx_vq, 0);
2515 return num_packets;
2516 }
2517
2518 for (;;) {
2519 ssize_t ret;
2520 unsigned int out_num;
2521 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2522 struct virtio_net_hdr_mrg_rxbuf mhdr;
2523
2524 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2525 if (!elem) {
2526 break;
2527 }
2528
2529 out_num = elem->out_num;
2530 out_sg = elem->out_sg;
2531 if (out_num < 1) {
2532 virtio_error(vdev, "virtio-net header not in first element");
2533 virtqueue_detach_element(q->tx_vq, elem, 0);
2534 g_free(elem);
2535 return -EINVAL;
2536 }
2537
2538 if (n->has_vnet_hdr) {
2539 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2540 n->guest_hdr_len) {
2541 virtio_error(vdev, "virtio-net header incorrect");
2542 virtqueue_detach_element(q->tx_vq, elem, 0);
2543 g_free(elem);
2544 return -EINVAL;
2545 }
2546 if (n->needs_vnet_hdr_swap) {
2547 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2548 sg2[0].iov_base = &mhdr;
2549 sg2[0].iov_len = n->guest_hdr_len;
2550 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2551 out_sg, out_num,
2552 n->guest_hdr_len, -1);
2553 if (out_num == VIRTQUEUE_MAX_SIZE) {
2554 goto drop;
2555 }
2556 out_num += 1;
2557 out_sg = sg2;
2558 }
2559 }
2560
2561
2562
2563
2564
2565 assert(n->host_hdr_len <= n->guest_hdr_len);
2566 if (n->host_hdr_len != n->guest_hdr_len) {
2567 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2568 out_sg, out_num,
2569 0, n->host_hdr_len);
2570 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2571 out_sg, out_num,
2572 n->guest_hdr_len, -1);
2573 out_num = sg_num;
2574 out_sg = sg;
2575 }
2576
2577 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2578 out_sg, out_num, virtio_net_tx_complete);
2579 if (ret == 0) {
2580 virtio_queue_set_notification(q->tx_vq, 0);
2581 q->async_tx.elem = elem;
2582 return -EBUSY;
2583 }
2584
2585drop:
2586 virtqueue_push(q->tx_vq, elem, 0);
2587 virtio_notify(vdev, q->tx_vq);
2588 g_free(elem);
2589
2590 if (++num_packets >= n->tx_burst) {
2591 break;
2592 }
2593 }
2594 return num_packets;
2595}
2596
2597static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2598{
2599 VirtIONet *n = VIRTIO_NET(vdev);
2600 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2601
2602 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2603 virtio_net_drop_tx_queue_data(vdev, vq);
2604 return;
2605 }
2606
2607
2608 if (!vdev->vm_running) {
2609 q->tx_waiting = 1;
2610 return;
2611 }
2612
2613 if (q->tx_waiting) {
2614 virtio_queue_set_notification(vq, 1);
2615 timer_del(q->tx_timer);
2616 q->tx_waiting = 0;
2617 if (virtio_net_flush_tx(q) == -EINVAL) {
2618 return;
2619 }
2620 } else {
2621 timer_mod(q->tx_timer,
2622 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2623 q->tx_waiting = 1;
2624 virtio_queue_set_notification(vq, 0);
2625 }
2626}
2627
2628static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2629{
2630 VirtIONet *n = VIRTIO_NET(vdev);
2631 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2632
2633 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2634 virtio_net_drop_tx_queue_data(vdev, vq);
2635 return;
2636 }
2637
2638 if (unlikely(q->tx_waiting)) {
2639 return;
2640 }
2641 q->tx_waiting = 1;
2642
2643 if (!vdev->vm_running) {
2644 return;
2645 }
2646 virtio_queue_set_notification(vq, 0);
2647 qemu_bh_schedule(q->tx_bh);
2648}
2649
2650static void virtio_net_tx_timer(void *opaque)
2651{
2652 VirtIONetQueue *q = opaque;
2653 VirtIONet *n = q->n;
2654 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2655
2656 if (!vdev->vm_running) {
2657
2658 assert(q->tx_waiting);
2659 return;
2660 }
2661
2662 q->tx_waiting = 0;
2663
2664
2665 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2666 return;
2667 }
2668
2669 virtio_queue_set_notification(q->tx_vq, 1);
2670 virtio_net_flush_tx(q);
2671}
2672
2673static void virtio_net_tx_bh(void *opaque)
2674{
2675 VirtIONetQueue *q = opaque;
2676 VirtIONet *n = q->n;
2677 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2678 int32_t ret;
2679
2680
2681 if (!vdev->vm_running) {
2682
2683 assert(q->tx_waiting);
2684 return;
2685 }
2686
2687 q->tx_waiting = 0;
2688
2689
2690 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2691 return;
2692 }
2693
2694 ret = virtio_net_flush_tx(q);
2695 if (ret == -EBUSY || ret == -EINVAL) {
2696 return;
2697
2698 }
2699
2700
2701
2702 if (ret >= n->tx_burst) {
2703 qemu_bh_schedule(q->tx_bh);
2704 q->tx_waiting = 1;
2705 return;
2706 }
2707
2708
2709
2710
2711 virtio_queue_set_notification(q->tx_vq, 1);
2712 ret = virtio_net_flush_tx(q);
2713 if (ret == -EINVAL) {
2714 return;
2715 } else if (ret > 0) {
2716 virtio_queue_set_notification(q->tx_vq, 0);
2717 qemu_bh_schedule(q->tx_bh);
2718 q->tx_waiting = 1;
2719 }
2720}
2721
2722static void virtio_net_add_queue(VirtIONet *n, int index)
2723{
2724 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2725
2726 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2727 virtio_net_handle_rx);
2728
2729 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2730 n->vqs[index].tx_vq =
2731 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2732 virtio_net_handle_tx_timer);
2733 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2734 virtio_net_tx_timer,
2735 &n->vqs[index]);
2736 } else {
2737 n->vqs[index].tx_vq =
2738 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2739 virtio_net_handle_tx_bh);
2740 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2741 }
2742
2743 n->vqs[index].tx_waiting = 0;
2744 n->vqs[index].n = n;
2745}
2746
2747static void virtio_net_del_queue(VirtIONet *n, int index)
2748{
2749 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2750 VirtIONetQueue *q = &n->vqs[index];
2751 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2752
2753 qemu_purge_queued_packets(nc);
2754
2755 virtio_del_queue(vdev, index * 2);
2756 if (q->tx_timer) {
2757 timer_free(q->tx_timer);
2758 q->tx_timer = NULL;
2759 } else {
2760 qemu_bh_delete(q->tx_bh);
2761 q->tx_bh = NULL;
2762 }
2763 q->tx_waiting = 0;
2764 virtio_del_queue(vdev, index * 2 + 1);
2765}
2766
2767static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2768{
2769 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2770 int old_num_queues = virtio_get_num_queues(vdev);
2771 int new_num_queues = new_max_queue_pairs * 2 + 1;
2772 int i;
2773
2774 assert(old_num_queues >= 3);
2775 assert(old_num_queues % 2 == 1);
2776
2777 if (old_num_queues == new_num_queues) {
2778 return;
2779 }
2780
2781
2782
2783
2784
2785
2786 virtio_del_queue(vdev, old_num_queues - 1);
2787
2788 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2789
2790 virtio_net_del_queue(n, i / 2);
2791 }
2792
2793 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2794
2795 virtio_net_add_queue(n, i / 2);
2796 }
2797
2798
2799 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2800}
2801
2802static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2803{
2804 int max = multiqueue ? n->max_queue_pairs : 1;
2805
2806 n->multiqueue = multiqueue;
2807 virtio_net_change_num_queue_pairs(n, max);
2808
2809 virtio_net_set_queue_pairs(n);
2810}
2811
2812static int virtio_net_post_load_device(void *opaque, int version_id)
2813{
2814 VirtIONet *n = opaque;
2815 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2816 int i, link_down;
2817
2818 trace_virtio_net_post_load_device();
2819 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2820 virtio_vdev_has_feature(vdev,
2821 VIRTIO_F_VERSION_1),
2822 virtio_vdev_has_feature(vdev,
2823 VIRTIO_NET_F_HASH_REPORT));
2824
2825
2826 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2827 n->mac_table.in_use = 0;
2828 }
2829
2830 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2831 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2832 }
2833
2834
2835
2836
2837
2838
2839
2840 n->saved_guest_offloads = n->curr_guest_offloads;
2841
2842 virtio_net_set_queue_pairs(n);
2843
2844
2845 for (i = 0; i < n->mac_table.in_use; i++) {
2846 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2847 break;
2848 }
2849 }
2850 n->mac_table.first_multi = i;
2851
2852
2853
2854 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2855 for (i = 0; i < n->max_queue_pairs; i++) {
2856 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2857 }
2858
2859 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2860 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2861 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2862 QEMU_CLOCK_VIRTUAL,
2863 virtio_net_announce_timer, n);
2864 if (n->announce_timer.round) {
2865 timer_mod(n->announce_timer.tm,
2866 qemu_clock_get_ms(n->announce_timer.type));
2867 } else {
2868 qemu_announce_timer_del(&n->announce_timer, false);
2869 }
2870 }
2871
2872 if (n->rss_data.enabled) {
2873 n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2874 if (!n->rss_data.populate_hash) {
2875 if (!virtio_net_attach_epbf_rss(n)) {
2876 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2877 warn_report("Can't post-load eBPF RSS for vhost");
2878 } else {
2879 warn_report("Can't post-load eBPF RSS - "
2880 "fallback to software RSS");
2881 n->rss_data.enabled_software_rss = true;
2882 }
2883 }
2884 }
2885
2886 trace_virtio_net_rss_enable(n->rss_data.hash_types,
2887 n->rss_data.indirections_len,
2888 sizeof(n->rss_data.key));
2889 } else {
2890 trace_virtio_net_rss_disable();
2891 }
2892 return 0;
2893}
2894
2895static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2896{
2897 VirtIONet *n = VIRTIO_NET(vdev);
2898
2899
2900
2901
2902
2903 n->curr_guest_offloads = n->saved_guest_offloads;
2904 if (peer_has_vnet_hdr(n)) {
2905 virtio_net_apply_guest_offloads(n);
2906 }
2907
2908 return 0;
2909}
2910
2911
2912static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2913 .name = "virtio-net-queue-tx_waiting",
2914 .fields = (VMStateField[]) {
2915 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2916 VMSTATE_END_OF_LIST()
2917 },
2918};
2919
2920static bool max_queue_pairs_gt_1(void *opaque, int version_id)
2921{
2922 return VIRTIO_NET(opaque)->max_queue_pairs > 1;
2923}
2924
2925static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2926{
2927 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2928 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2929}
2930
2931static bool mac_table_fits(void *opaque, int version_id)
2932{
2933 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2934}
2935
2936static bool mac_table_doesnt_fit(void *opaque, int version_id)
2937{
2938 return !mac_table_fits(opaque, version_id);
2939}
2940
2941
2942
2943
2944struct VirtIONetMigTmp {
2945 VirtIONet *parent;
2946 VirtIONetQueue *vqs_1;
2947 uint16_t curr_queue_pairs_1;
2948 uint8_t has_ufo;
2949 uint32_t has_vnet_hdr;
2950};
2951
2952
2953
2954
2955
2956
2957
2958static int virtio_net_tx_waiting_pre_save(void *opaque)
2959{
2960 struct VirtIONetMigTmp *tmp = opaque;
2961
2962 tmp->vqs_1 = tmp->parent->vqs + 1;
2963 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
2964 if (tmp->parent->curr_queue_pairs == 0) {
2965 tmp->curr_queue_pairs_1 = 0;
2966 }
2967
2968 return 0;
2969}
2970
2971static int virtio_net_tx_waiting_pre_load(void *opaque)
2972{
2973 struct VirtIONetMigTmp *tmp = opaque;
2974
2975
2976 virtio_net_tx_waiting_pre_save(opaque);
2977
2978 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
2979 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
2980 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
2981
2982 return -EINVAL;
2983 }
2984
2985 return 0;
2986}
2987
2988static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2989 .name = "virtio-net-tx_waiting",
2990 .pre_load = virtio_net_tx_waiting_pre_load,
2991 .pre_save = virtio_net_tx_waiting_pre_save,
2992 .fields = (VMStateField[]) {
2993 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2994 curr_queue_pairs_1,
2995 vmstate_virtio_net_queue_tx_waiting,
2996 struct VirtIONetQueue),
2997 VMSTATE_END_OF_LIST()
2998 },
2999};
3000
3001
3002
3003
3004static int virtio_net_ufo_post_load(void *opaque, int version_id)
3005{
3006 struct VirtIONetMigTmp *tmp = opaque;
3007
3008 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3009 error_report("virtio-net: saved image requires TUN_F_UFO support");
3010 return -EINVAL;
3011 }
3012
3013 return 0;
3014}
3015
3016static int virtio_net_ufo_pre_save(void *opaque)
3017{
3018 struct VirtIONetMigTmp *tmp = opaque;
3019
3020 tmp->has_ufo = tmp->parent->has_ufo;
3021
3022 return 0;
3023}
3024
3025static const VMStateDescription vmstate_virtio_net_has_ufo = {
3026 .name = "virtio-net-ufo",
3027 .post_load = virtio_net_ufo_post_load,
3028 .pre_save = virtio_net_ufo_pre_save,
3029 .fields = (VMStateField[]) {
3030 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3031 VMSTATE_END_OF_LIST()
3032 },
3033};
3034
3035
3036
3037
3038static int virtio_net_vnet_post_load(void *opaque, int version_id)
3039{
3040 struct VirtIONetMigTmp *tmp = opaque;
3041
3042 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3043 error_report("virtio-net: saved image requires vnet_hdr=on");
3044 return -EINVAL;
3045 }
3046
3047 return 0;
3048}
3049
3050static int virtio_net_vnet_pre_save(void *opaque)
3051{
3052 struct VirtIONetMigTmp *tmp = opaque;
3053
3054 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3055
3056 return 0;
3057}
3058
3059static const VMStateDescription vmstate_virtio_net_has_vnet = {
3060 .name = "virtio-net-vnet",
3061 .post_load = virtio_net_vnet_post_load,
3062 .pre_save = virtio_net_vnet_pre_save,
3063 .fields = (VMStateField[]) {
3064 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3065 VMSTATE_END_OF_LIST()
3066 },
3067};
3068
3069static bool virtio_net_rss_needed(void *opaque)
3070{
3071 return VIRTIO_NET(opaque)->rss_data.enabled;
3072}
3073
3074static const VMStateDescription vmstate_virtio_net_rss = {
3075 .name = "virtio-net-device/rss",
3076 .version_id = 1,
3077 .minimum_version_id = 1,
3078 .needed = virtio_net_rss_needed,
3079 .fields = (VMStateField[]) {
3080 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3081 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3082 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3083 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3084 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3085 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3086 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3087 VIRTIO_NET_RSS_MAX_KEY_SIZE),
3088 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3089 rss_data.indirections_len, 0,
3090 vmstate_info_uint16, uint16_t),
3091 VMSTATE_END_OF_LIST()
3092 },
3093};
3094
3095static const VMStateDescription vmstate_virtio_net_device = {
3096 .name = "virtio-net-device",
3097 .version_id = VIRTIO_NET_VM_VERSION,
3098 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3099 .post_load = virtio_net_post_load_device,
3100 .fields = (VMStateField[]) {
3101 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3102 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3103 vmstate_virtio_net_queue_tx_waiting,
3104 VirtIONetQueue),
3105 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3106 VMSTATE_UINT16(status, VirtIONet),
3107 VMSTATE_UINT8(promisc, VirtIONet),
3108 VMSTATE_UINT8(allmulti, VirtIONet),
3109 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3110
3111
3112
3113
3114
3115 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3116 0, mac_table_fits, mac_table.in_use,
3117 ETH_ALEN),
3118 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3119 mac_table.in_use, ETH_ALEN),
3120
3121
3122
3123
3124
3125 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3126 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3127 vmstate_virtio_net_has_vnet),
3128 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3129 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3130 VMSTATE_UINT8(alluni, VirtIONet),
3131 VMSTATE_UINT8(nomulti, VirtIONet),
3132 VMSTATE_UINT8(nouni, VirtIONet),
3133 VMSTATE_UINT8(nobcast, VirtIONet),
3134 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3135 vmstate_virtio_net_has_ufo),
3136 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3137 vmstate_info_uint16_equal, uint16_t),
3138 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3139 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3140 vmstate_virtio_net_tx_waiting),
3141 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3142 has_ctrl_guest_offloads),
3143 VMSTATE_END_OF_LIST()
3144 },
3145 .subsections = (const VMStateDescription * []) {
3146 &vmstate_virtio_net_rss,
3147 NULL
3148 }
3149};
3150
3151static NetClientInfo net_virtio_info = {
3152 .type = NET_CLIENT_DRIVER_NIC,
3153 .size = sizeof(NICState),
3154 .can_receive = virtio_net_can_receive,
3155 .receive = virtio_net_receive,
3156 .link_status_changed = virtio_net_set_link_status,
3157 .query_rx_filter = virtio_net_query_rxfilter,
3158 .announce = virtio_net_announce,
3159};
3160
3161static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3162{
3163 VirtIONet *n = VIRTIO_NET(vdev);
3164 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3165 assert(n->vhost_started);
3166 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3167}
3168
3169static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3170 bool mask)
3171{
3172 VirtIONet *n = VIRTIO_NET(vdev);
3173 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3174 assert(n->vhost_started);
3175 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3176 vdev, idx, mask);
3177}
3178
3179static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3180{
3181 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3182
3183 n->config_size = virtio_feature_get_config_size(feature_sizes,
3184 host_features);
3185}
3186
3187void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3188 const char *type)
3189{
3190
3191
3192
3193 assert(type != NULL);
3194
3195 g_free(n->netclient_name);
3196 g_free(n->netclient_type);
3197 n->netclient_name = g_strdup(name);
3198 n->netclient_type = g_strdup(type);
3199}
3200
3201static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3202{
3203 HotplugHandler *hotplug_ctrl;
3204 PCIDevice *pci_dev;
3205 Error *err = NULL;
3206
3207 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3208 if (hotplug_ctrl) {
3209 pci_dev = PCI_DEVICE(dev);
3210 pci_dev->partially_hotplugged = true;
3211 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3212 if (err) {
3213 error_report_err(err);
3214 return false;
3215 }
3216 } else {
3217 return false;
3218 }
3219 return true;
3220}
3221
3222static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3223 Error **errp)
3224{
3225 Error *err = NULL;
3226 HotplugHandler *hotplug_ctrl;
3227 PCIDevice *pdev = PCI_DEVICE(dev);
3228 BusState *primary_bus;
3229
3230 if (!pdev->partially_hotplugged) {
3231 return true;
3232 }
3233 primary_bus = dev->parent_bus;
3234 if (!primary_bus) {
3235 error_setg(errp, "virtio_net: couldn't find primary bus");
3236 return false;
3237 }
3238 qdev_set_parent_bus(dev, primary_bus, &error_abort);
3239 qatomic_set(&n->failover_primary_hidden, false);
3240 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3241 if (hotplug_ctrl) {
3242 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3243 if (err) {
3244 goto out;
3245 }
3246 hotplug_handler_plug(hotplug_ctrl, dev, &err);
3247 }
3248 pdev->partially_hotplugged = false;
3249
3250out:
3251 error_propagate(errp, err);
3252 return !err;
3253}
3254
3255static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3256{
3257 bool should_be_hidden;
3258 Error *err = NULL;
3259 DeviceState *dev = failover_find_primary_device(n);
3260
3261 if (!dev) {
3262 return;
3263 }
3264
3265 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3266
3267 if (migration_in_setup(s) && !should_be_hidden) {
3268 if (failover_unplug_primary(n, dev)) {
3269 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3270 qapi_event_send_unplug_primary(dev->id);
3271 qatomic_set(&n->failover_primary_hidden, true);
3272 } else {
3273 warn_report("couldn't unplug primary device");
3274 }
3275 } else if (migration_has_failed(s)) {
3276
3277 if (!failover_replug_primary(n, dev, &err)) {
3278 if (err) {
3279 error_report_err(err);
3280 }
3281 }
3282 }
3283}
3284
3285static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3286{
3287 MigrationState *s = data;
3288 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3289 virtio_net_handle_migration_primary(n, s);
3290}
3291
3292static bool failover_hide_primary_device(DeviceListener *listener,
3293 const QDict *device_opts,
3294 bool from_json,
3295 Error **errp)
3296{
3297 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3298 const char *standby_id;
3299
3300 if (!device_opts) {
3301 return false;
3302 }
3303
3304 if (!qdict_haskey(device_opts, "failover_pair_id")) {
3305 return false;
3306 }
3307
3308 if (!qdict_haskey(device_opts, "id")) {
3309 error_setg(errp, "Device with failover_pair_id needs to have id");
3310 return false;
3311 }
3312
3313 standby_id = qdict_get_str(device_opts, "failover_pair_id");
3314 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3315 return false;
3316 }
3317
3318
3319
3320
3321
3322
3323
3324 if (n->primary_opts) {
3325 const char *old, *new;
3326
3327 old = qdict_get_str(n->primary_opts, "id");
3328 new = qdict_get_str(device_opts, "id");
3329 if (strcmp(old, new) != 0) {
3330 error_setg(errp, "Cannot attach more than one primary device to "
3331 "'%s': '%s' and '%s'", n->netclient_name, old, new);
3332 return false;
3333 }
3334 } else {
3335 n->primary_opts = qdict_clone_shallow(device_opts);
3336 n->primary_opts_from_json = from_json;
3337 }
3338
3339
3340 return qatomic_read(&n->failover_primary_hidden);
3341}
3342
3343static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3344{
3345 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3346 VirtIONet *n = VIRTIO_NET(dev);
3347 NetClientState *nc;
3348 int i;
3349
3350 if (n->net_conf.mtu) {
3351 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3352 }
3353
3354 if (n->net_conf.duplex_str) {
3355 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3356 n->net_conf.duplex = DUPLEX_HALF;
3357 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3358 n->net_conf.duplex = DUPLEX_FULL;
3359 } else {
3360 error_setg(errp, "'duplex' must be 'half' or 'full'");
3361 return;
3362 }
3363 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3364 } else {
3365 n->net_conf.duplex = DUPLEX_UNKNOWN;
3366 }
3367
3368 if (n->net_conf.speed < SPEED_UNKNOWN) {
3369 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3370 return;
3371 }
3372 if (n->net_conf.speed >= 0) {
3373 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3374 }
3375
3376 if (n->failover) {
3377 n->primary_listener.hide_device = failover_hide_primary_device;
3378 qatomic_set(&n->failover_primary_hidden, true);
3379 device_listener_register(&n->primary_listener);
3380 n->migration_state.notify = virtio_net_migration_state_notifier;
3381 add_migration_state_change_notifier(&n->migration_state);
3382 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3383 }
3384
3385 virtio_net_set_config_size(n, n->host_features);
3386 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3387
3388
3389
3390
3391
3392
3393 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3394 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3395 !is_power_of_2(n->net_conf.rx_queue_size)) {
3396 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3397 "must be a power of 2 between %d and %d.",
3398 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3399 VIRTQUEUE_MAX_SIZE);
3400 virtio_cleanup(vdev);
3401 return;
3402 }
3403
3404 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3405 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3406 !is_power_of_2(n->net_conf.tx_queue_size)) {
3407 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3408 "must be a power of 2 between %d and %d",
3409 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3410 VIRTQUEUE_MAX_SIZE);
3411 virtio_cleanup(vdev);
3412 return;
3413 }
3414
3415 n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3416
3417
3418
3419
3420
3421 if (n->nic_conf.peers.queues) {
3422 for (i = 0; i < n->max_ncs; i++) {
3423 if (n->nic_conf.peers.ncs[i]->is_datapath) {
3424 ++n->max_queue_pairs;
3425 }
3426 }
3427 }
3428 n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3429
3430 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3431 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3432 "must be a positive integer less than %d.",
3433 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3434 virtio_cleanup(vdev);
3435 return;
3436 }
3437 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queue_pairs);
3438 n->curr_queue_pairs = 1;
3439 n->tx_timeout = n->net_conf.txtimer;
3440
3441 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3442 && strcmp(n->net_conf.tx, "bh")) {
3443 warn_report("virtio-net: "
3444 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3445 n->net_conf.tx);
3446 error_printf("Defaulting to \"bh\"");
3447 }
3448
3449 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3450 n->net_conf.tx_queue_size);
3451
3452 for (i = 0; i < n->max_queue_pairs; i++) {
3453 virtio_net_add_queue(n, i);
3454 }
3455
3456 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3457 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3458 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3459 n->status = VIRTIO_NET_S_LINK_UP;
3460 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3461 QEMU_CLOCK_VIRTUAL,
3462 virtio_net_announce_timer, n);
3463 n->announce_timer.round = 0;
3464
3465 if (n->netclient_type) {
3466
3467
3468
3469 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3470 n->netclient_type, n->netclient_name, n);
3471 } else {
3472 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3473 object_get_typename(OBJECT(dev)), dev->id, n);
3474 }
3475
3476 for (i = 0; i < n->max_queue_pairs; i++) {
3477 n->nic->ncs[i].do_not_pad = true;
3478 }
3479
3480 peer_test_vnet_hdr(n);
3481 if (peer_has_vnet_hdr(n)) {
3482 for (i = 0; i < n->max_queue_pairs; i++) {
3483 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3484 }
3485 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3486 } else {
3487 n->host_hdr_len = 0;
3488 }
3489
3490 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3491
3492 n->vqs[0].tx_waiting = 0;
3493 n->tx_burst = n->net_conf.txburst;
3494 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3495 n->promisc = 1;
3496
3497 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3498
3499 n->vlans = g_malloc0(MAX_VLAN >> 3);
3500
3501 nc = qemu_get_queue(n->nic);
3502 nc->rxfilter_notify_enabled = 1;
3503
3504 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3505 struct virtio_net_config netcfg = {};
3506 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3507 vhost_net_set_config(get_vhost_net(nc->peer),
3508 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3509 }
3510 QTAILQ_INIT(&n->rsc_chains);
3511 n->qdev = dev;
3512
3513 net_rx_pkt_init(&n->rx_pkt, false);
3514
3515 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3516 virtio_net_load_ebpf(n);
3517 }
3518}
3519
3520static void virtio_net_device_unrealize(DeviceState *dev)
3521{
3522 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3523 VirtIONet *n = VIRTIO_NET(dev);
3524 int i, max_queue_pairs;
3525
3526 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3527 virtio_net_unload_ebpf(n);
3528 }
3529
3530
3531 virtio_net_set_status(vdev, 0);
3532
3533 g_free(n->netclient_name);
3534 n->netclient_name = NULL;
3535 g_free(n->netclient_type);
3536 n->netclient_type = NULL;
3537
3538 g_free(n->mac_table.macs);
3539 g_free(n->vlans);
3540
3541 if (n->failover) {
3542 qobject_unref(n->primary_opts);
3543 device_listener_unregister(&n->primary_listener);
3544 remove_migration_state_change_notifier(&n->migration_state);
3545 } else {
3546 assert(n->primary_opts == NULL);
3547 }
3548
3549 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3550 for (i = 0; i < max_queue_pairs; i++) {
3551 virtio_net_del_queue(n, i);
3552 }
3553
3554 virtio_del_queue(vdev, max_queue_pairs * 2);
3555 qemu_announce_timer_del(&n->announce_timer, false);
3556 g_free(n->vqs);
3557 qemu_del_nic(n->nic);
3558 virtio_net_rsc_cleanup(n);
3559 g_free(n->rss_data.indirections_table);
3560 net_rx_pkt_uninit(n->rx_pkt);
3561 virtio_cleanup(vdev);
3562}
3563
3564static void virtio_net_instance_init(Object *obj)
3565{
3566 VirtIONet *n = VIRTIO_NET(obj);
3567
3568
3569
3570
3571
3572 n->config_size = sizeof(struct virtio_net_config);
3573 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3574 "bootindex", "/ethernet-phy@0",
3575 DEVICE(n));
3576
3577 ebpf_rss_init(&n->ebpf_rss);
3578}
3579
3580static int virtio_net_pre_save(void *opaque)
3581{
3582 VirtIONet *n = opaque;
3583
3584
3585
3586 assert(!n->vhost_started);
3587
3588 return 0;
3589}
3590
3591static bool primary_unplug_pending(void *opaque)
3592{
3593 DeviceState *dev = opaque;
3594 DeviceState *primary;
3595 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3596 VirtIONet *n = VIRTIO_NET(vdev);
3597
3598 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3599 return false;
3600 }
3601 primary = failover_find_primary_device(n);
3602 return primary ? primary->pending_deleted_event : false;
3603}
3604
3605static bool dev_unplug_pending(void *opaque)
3606{
3607 DeviceState *dev = opaque;
3608 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3609
3610 return vdc->primary_unplug_pending(dev);
3611}
3612
3613static const VMStateDescription vmstate_virtio_net = {
3614 .name = "virtio-net",
3615 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3616 .version_id = VIRTIO_NET_VM_VERSION,
3617 .fields = (VMStateField[]) {
3618 VMSTATE_VIRTIO_DEVICE,
3619 VMSTATE_END_OF_LIST()
3620 },
3621 .pre_save = virtio_net_pre_save,
3622 .dev_unplug_pending = dev_unplug_pending,
3623};
3624
3625static Property virtio_net_properties[] = {
3626 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3627 VIRTIO_NET_F_CSUM, true),
3628 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3629 VIRTIO_NET_F_GUEST_CSUM, true),
3630 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3631 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3632 VIRTIO_NET_F_GUEST_TSO4, true),
3633 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3634 VIRTIO_NET_F_GUEST_TSO6, true),
3635 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3636 VIRTIO_NET_F_GUEST_ECN, true),
3637 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3638 VIRTIO_NET_F_GUEST_UFO, true),
3639 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3640 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3641 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3642 VIRTIO_NET_F_HOST_TSO4, true),
3643 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3644 VIRTIO_NET_F_HOST_TSO6, true),
3645 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3646 VIRTIO_NET_F_HOST_ECN, true),
3647 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3648 VIRTIO_NET_F_HOST_UFO, true),
3649 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3650 VIRTIO_NET_F_MRG_RXBUF, true),
3651 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3652 VIRTIO_NET_F_STATUS, true),
3653 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3654 VIRTIO_NET_F_CTRL_VQ, true),
3655 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3656 VIRTIO_NET_F_CTRL_RX, true),
3657 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3658 VIRTIO_NET_F_CTRL_VLAN, true),
3659 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3660 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3661 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3662 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3663 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3664 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3665 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3666 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3667 VIRTIO_NET_F_RSS, false),
3668 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3669 VIRTIO_NET_F_HASH_REPORT, false),
3670 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3671 VIRTIO_NET_F_RSC_EXT, false),
3672 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3673 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3674 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3675 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3676 TX_TIMER_INTERVAL),
3677 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3678 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3679 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3680 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3681 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3682 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3683 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3684 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3685 true),
3686 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3687 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3688 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3689 DEFINE_PROP_END_OF_LIST(),
3690};
3691
3692static void virtio_net_class_init(ObjectClass *klass, void *data)
3693{
3694 DeviceClass *dc = DEVICE_CLASS(klass);
3695 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3696
3697 device_class_set_props(dc, virtio_net_properties);
3698 dc->vmsd = &vmstate_virtio_net;
3699 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3700 vdc->realize = virtio_net_device_realize;
3701 vdc->unrealize = virtio_net_device_unrealize;
3702 vdc->get_config = virtio_net_get_config;
3703 vdc->set_config = virtio_net_set_config;
3704 vdc->get_features = virtio_net_get_features;
3705 vdc->set_features = virtio_net_set_features;
3706 vdc->bad_features = virtio_net_bad_features;
3707 vdc->reset = virtio_net_reset;
3708 vdc->set_status = virtio_net_set_status;
3709 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3710 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3711 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3712 vdc->post_load = virtio_net_post_load_virtio;
3713 vdc->vmsd = &vmstate_virtio_net_device;
3714 vdc->primary_unplug_pending = primary_unplug_pending;
3715}
3716
3717static const TypeInfo virtio_net_info = {
3718 .name = TYPE_VIRTIO_NET,
3719 .parent = TYPE_VIRTIO_DEVICE,
3720 .instance_size = sizeof(VirtIONet),
3721 .instance_init = virtio_net_instance_init,
3722 .class_init = virtio_net_class_init,
3723};
3724
3725static void virtio_register_types(void)
3726{
3727 type_register_static(&virtio_net_info);
3728}
3729
3730type_init(virtio_register_types)
3731