1
2
3
4
5
6
7#include "gve.h"
8#include "gve_adminq.h"
9#include "gve_utils.h"
10#include <linux/ip.h>
11#include <linux/tcp.h>
12#include <linux/vmalloc.h>
13#include <linux/skbuff.h>
14
15static inline void gve_tx_put_doorbell(struct gve_priv *priv,
16 struct gve_queue_resources *q_resources,
17 u32 val)
18{
19 iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]);
20}
21
22
23
24
25
26
27
28
29
30static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo)
31{
32 fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP,
33 PAGE_KERNEL);
34 if (unlikely(!fifo->base)) {
35 netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n",
36 fifo->qpl->id);
37 return -ENOMEM;
38 }
39
40 fifo->size = fifo->qpl->num_entries * PAGE_SIZE;
41 atomic_set(&fifo->available, fifo->size);
42 fifo->head = 0;
43 return 0;
44}
45
46static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo)
47{
48 WARN(atomic_read(&fifo->available) != fifo->size,
49 "Releasing non-empty fifo");
50
51 vunmap(fifo->base);
52}
53
54static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo,
55 size_t bytes)
56{
57 return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head;
58}
59
60static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes)
61{
62 return (atomic_read(&fifo->available) <= bytes) ? false : true;
63}
64
65
66
67
68
69
70
71
72
73
74
75static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes,
76 struct gve_tx_iovec iov[2])
77{
78 size_t overflow, padding;
79 u32 aligned_head;
80 int nfrags = 0;
81
82 if (!bytes)
83 return 0;
84
85
86
87
88
89
90
91 WARN(!gve_tx_fifo_can_alloc(fifo, bytes),
92 "Reached %s when there's not enough space in the fifo", __func__);
93
94 nfrags++;
95
96 iov[0].iov_offset = fifo->head;
97 iov[0].iov_len = bytes;
98 fifo->head += bytes;
99
100 if (fifo->head > fifo->size) {
101
102
103
104 nfrags++;
105 overflow = fifo->head - fifo->size;
106 iov[0].iov_len -= overflow;
107 iov[1].iov_offset = 0;
108 iov[1].iov_len = overflow;
109
110 fifo->head = overflow;
111 }
112
113
114 aligned_head = L1_CACHE_ALIGN(fifo->head);
115 padding = aligned_head - fifo->head;
116 iov[nfrags - 1].iov_padding = padding;
117 atomic_sub(bytes + padding, &fifo->available);
118 fifo->head = aligned_head;
119
120 if (fifo->head == fifo->size)
121 fifo->head = 0;
122
123 return nfrags;
124}
125
126
127
128
129
130static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
131{
132 atomic_add(bytes, &fifo->available);
133}
134
135static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
136 u32 to_do, bool try_to_wake);
137
138static void gve_tx_free_ring(struct gve_priv *priv, int idx)
139{
140 struct gve_tx_ring *tx = &priv->tx[idx];
141 struct device *hdev = &priv->pdev->dev;
142 size_t bytes;
143 u32 slots;
144
145 gve_tx_remove_from_block(priv, idx);
146 slots = tx->mask + 1;
147 gve_clean_tx_done(priv, tx, tx->req, false);
148 netdev_tx_reset_queue(tx->netdev_txq);
149
150 dma_free_coherent(hdev, sizeof(*tx->q_resources),
151 tx->q_resources, tx->q_resources_bus);
152 tx->q_resources = NULL;
153
154 if (!tx->raw_addressing) {
155 gve_tx_fifo_release(priv, &tx->tx_fifo);
156 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
157 tx->tx_fifo.qpl = NULL;
158 }
159
160 bytes = sizeof(*tx->desc) * slots;
161 dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
162 tx->desc = NULL;
163
164 vfree(tx->info);
165 tx->info = NULL;
166
167 netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
168}
169
170static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
171{
172 struct gve_tx_ring *tx = &priv->tx[idx];
173 struct device *hdev = &priv->pdev->dev;
174 u32 slots = priv->tx_desc_cnt;
175 size_t bytes;
176
177
178 memset(tx, 0, sizeof(*tx));
179 tx->q_num = idx;
180
181 tx->mask = slots - 1;
182
183
184 tx->info = vzalloc(sizeof(*tx->info) * slots);
185 if (!tx->info)
186 return -ENOMEM;
187
188
189 bytes = sizeof(*tx->desc) * slots;
190 tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL);
191 if (!tx->desc)
192 goto abort_with_info;
193
194 tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
195 tx->dev = &priv->pdev->dev;
196 if (!tx->raw_addressing) {
197 tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
198 if (!tx->tx_fifo.qpl)
199 goto abort_with_desc;
200
201 if (gve_tx_fifo_init(priv, &tx->tx_fifo))
202 goto abort_with_qpl;
203 }
204
205 tx->q_resources =
206 dma_alloc_coherent(hdev,
207 sizeof(*tx->q_resources),
208 &tx->q_resources_bus,
209 GFP_KERNEL);
210 if (!tx->q_resources)
211 goto abort_with_fifo;
212
213 netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
214 (unsigned long)tx->bus);
215 tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
216 gve_tx_add_to_block(priv, idx);
217
218 return 0;
219
220abort_with_fifo:
221 if (!tx->raw_addressing)
222 gve_tx_fifo_release(priv, &tx->tx_fifo);
223abort_with_qpl:
224 if (!tx->raw_addressing)
225 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
226abort_with_desc:
227 dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
228 tx->desc = NULL;
229abort_with_info:
230 vfree(tx->info);
231 tx->info = NULL;
232 return -ENOMEM;
233}
234
235int gve_tx_alloc_rings(struct gve_priv *priv)
236{
237 int err = 0;
238 int i;
239
240 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
241 err = gve_tx_alloc_ring(priv, i);
242 if (err) {
243 netif_err(priv, drv, priv->dev,
244 "Failed to alloc tx ring=%d: err=%d\n",
245 i, err);
246 break;
247 }
248 }
249
250 if (err) {
251 int j;
252
253 for (j = 0; j < i; j++)
254 gve_tx_free_ring(priv, j);
255 }
256 return err;
257}
258
259void gve_tx_free_rings_gqi(struct gve_priv *priv)
260{
261 int i;
262
263 for (i = 0; i < priv->tx_cfg.num_queues; i++)
264 gve_tx_free_ring(priv, i);
265}
266
267
268
269
270
271
272
273
274static inline u32 gve_tx_avail(struct gve_tx_ring *tx)
275{
276 return tx->mask + 1 - (tx->req - tx->done);
277}
278
279static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
280 struct sk_buff *skb)
281{
282 int pad_bytes, align_hdr_pad;
283 int bytes;
284 int hlen;
285
286 hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) +
287 tcp_hdrlen(skb) : skb_headlen(skb);
288
289 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo,
290 hlen);
291
292 align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen;
293 bytes = align_hdr_pad + pad_bytes + skb->len;
294
295 return bytes;
296}
297
298
299
300
301
302#define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 3)
303static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info)
304{
305 if (info->skb) {
306 dma_unmap_single(dev, dma_unmap_addr(&info->buf, dma),
307 dma_unmap_len(&info->buf, len),
308 DMA_TO_DEVICE);
309 dma_unmap_len_set(&info->buf, len, 0);
310 } else {
311 dma_unmap_page(dev, dma_unmap_addr(&info->buf, dma),
312 dma_unmap_len(&info->buf, len),
313 DMA_TO_DEVICE);
314 dma_unmap_len_set(&info->buf, len, 0);
315 }
316}
317
318
319
320
321static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
322{
323 bool can_alloc = true;
324
325 if (!tx->raw_addressing)
326 can_alloc = gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required);
327
328 return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc);
329}
330
331
332static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
333{
334 int bytes_required = 0;
335
336 if (!tx->raw_addressing)
337 bytes_required = gve_skb_fifo_bytes_required(tx, skb);
338
339 if (likely(gve_can_tx(tx, bytes_required)))
340 return 0;
341
342
343 tx->stop_queue++;
344 netif_tx_stop_queue(tx->netdev_txq);
345 smp_mb();
346
347
348
349
350
351
352
353
354
355
356
357
358
359 if (likely(!gve_can_tx(tx, bytes_required)))
360 return -EBUSY;
361
362 netif_tx_start_queue(tx->netdev_txq);
363 tx->wake_queue++;
364 return 0;
365}
366
367static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
368 struct sk_buff *skb, bool is_gso,
369 int l4_hdr_offset, u32 desc_cnt,
370 u16 hlen, u64 addr)
371{
372
373 if (is_gso) {
374 pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
375 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
376 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
377 } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
378 pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
379 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
380 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
381 } else {
382 pkt_desc->pkt.type_flags = GVE_TXD_STD;
383 pkt_desc->pkt.l4_csum_offset = 0;
384 pkt_desc->pkt.l4_hdr_offset = 0;
385 }
386 pkt_desc->pkt.desc_cnt = desc_cnt;
387 pkt_desc->pkt.len = cpu_to_be16(skb->len);
388 pkt_desc->pkt.seg_len = cpu_to_be16(hlen);
389 pkt_desc->pkt.seg_addr = cpu_to_be64(addr);
390}
391
392static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
393 struct sk_buff *skb, bool is_gso,
394 u16 len, u64 addr)
395{
396 seg_desc->seg.type_flags = GVE_TXD_SEG;
397 if (is_gso) {
398 if (skb_is_gso_v6(skb))
399 seg_desc->seg.type_flags |= GVE_TXSF_IPV6;
400 seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1;
401 seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
402 }
403 seg_desc->seg.seg_len = cpu_to_be16(len);
404 seg_desc->seg.seg_addr = cpu_to_be64(addr);
405}
406
407static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses,
408 u64 iov_offset, u64 iov_len)
409{
410 u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE;
411 u64 first_page = iov_offset / PAGE_SIZE;
412 u64 page;
413
414 for (page = first_page; page <= last_page; page++)
415 dma_sync_single_for_device(dev, page_buses[page], PAGE_SIZE, DMA_TO_DEVICE);
416}
417
418static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, struct sk_buff *skb)
419{
420 int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
421 union gve_tx_desc *pkt_desc, *seg_desc;
422 struct gve_tx_buffer_state *info;
423 bool is_gso = skb_is_gso(skb);
424 u32 idx = tx->req & tx->mask;
425 int payload_iov = 2;
426 int copy_offset;
427 u32 next_idx;
428 int i;
429
430 info = &tx->info[idx];
431 pkt_desc = &tx->desc[idx];
432
433 l4_hdr_offset = skb_checksum_start_offset(skb);
434
435
436
437
438
439 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
440 skb_headlen(skb);
441
442 info->skb = skb;
443
444
445
446 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen);
447 hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes,
448 &info->iov[0]);
449 WARN(!hdr_nfrags, "hdr_nfrags should never be 0!");
450 payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen,
451 &info->iov[payload_iov]);
452
453 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
454 1 + payload_nfrags, hlen,
455 info->iov[hdr_nfrags - 1].iov_offset);
456
457 skb_copy_bits(skb, 0,
458 tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
459 hlen);
460 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses,
461 info->iov[hdr_nfrags - 1].iov_offset,
462 info->iov[hdr_nfrags - 1].iov_len);
463 copy_offset = hlen;
464
465 for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
466 next_idx = (tx->req + 1 + i - payload_iov) & tx->mask;
467 seg_desc = &tx->desc[next_idx];
468
469 gve_tx_fill_seg_desc(seg_desc, skb, is_gso,
470 info->iov[i].iov_len,
471 info->iov[i].iov_offset);
472
473 skb_copy_bits(skb, copy_offset,
474 tx->tx_fifo.base + info->iov[i].iov_offset,
475 info->iov[i].iov_len);
476 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses,
477 info->iov[i].iov_offset,
478 info->iov[i].iov_len);
479 copy_offset += info->iov[i].iov_len;
480 }
481
482 return 1 + payload_nfrags;
483}
484
485static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
486 struct sk_buff *skb)
487{
488 const struct skb_shared_info *shinfo = skb_shinfo(skb);
489 int hlen, payload_nfrags, l4_hdr_offset;
490 union gve_tx_desc *pkt_desc, *seg_desc;
491 struct gve_tx_buffer_state *info;
492 bool is_gso = skb_is_gso(skb);
493 u32 idx = tx->req & tx->mask;
494 struct gve_tx_dma_buf *buf;
495 u64 addr;
496 u32 len;
497 int i;
498
499 info = &tx->info[idx];
500 pkt_desc = &tx->desc[idx];
501
502 l4_hdr_offset = skb_checksum_start_offset(skb);
503
504
505
506
507
508 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : skb_headlen(skb);
509 len = skb_headlen(skb);
510
511 info->skb = skb;
512
513 addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE);
514 if (unlikely(dma_mapping_error(tx->dev, addr))) {
515 tx->dma_mapping_error++;
516 goto drop;
517 }
518 buf = &info->buf;
519 dma_unmap_len_set(buf, len, len);
520 dma_unmap_addr_set(buf, dma, addr);
521
522 payload_nfrags = shinfo->nr_frags;
523 if (hlen < len) {
524
525
526
527 payload_nfrags++;
528 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
529 1 + payload_nfrags, hlen, addr);
530
531 len -= hlen;
532 addr += hlen;
533 idx = (tx->req + 1) & tx->mask;
534 seg_desc = &tx->desc[idx];
535 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
536 } else {
537 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
538 1 + payload_nfrags, hlen, addr);
539 }
540
541 for (i = 0; i < shinfo->nr_frags; i++) {
542 const skb_frag_t *frag = &shinfo->frags[i];
543
544 idx = (idx + 1) & tx->mask;
545 seg_desc = &tx->desc[idx];
546 len = skb_frag_size(frag);
547 addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE);
548 if (unlikely(dma_mapping_error(tx->dev, addr))) {
549 tx->dma_mapping_error++;
550 goto unmap_drop;
551 }
552 buf = &tx->info[idx].buf;
553 tx->info[idx].skb = NULL;
554 dma_unmap_len_set(buf, len, len);
555 dma_unmap_addr_set(buf, dma, addr);
556
557 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
558 }
559
560 return 1 + payload_nfrags;
561
562unmap_drop:
563 i += (payload_nfrags == shinfo->nr_frags ? 1 : 2);
564 while (i--) {
565 idx--;
566 gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]);
567 }
568drop:
569 tx->dropped_pkt++;
570 return 0;
571}
572
573netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
574{
575 struct gve_priv *priv = netdev_priv(dev);
576 struct gve_tx_ring *tx;
577 int nsegs;
578
579 WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues,
580 "skb queue index out of range");
581 tx = &priv->tx[skb_get_queue_mapping(skb)];
582 if (unlikely(gve_maybe_stop_tx(tx, skb))) {
583
584
585
586
587
588 gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
589 return NETDEV_TX_BUSY;
590 }
591 if (tx->raw_addressing)
592 nsegs = gve_tx_add_skb_no_copy(priv, tx, skb);
593 else
594 nsegs = gve_tx_add_skb_copy(priv, tx, skb);
595
596
597 if (nsegs) {
598 netdev_tx_sent_queue(tx->netdev_txq, skb->len);
599 skb_tx_timestamp(skb);
600 tx->req += nsegs;
601 } else {
602 dev_kfree_skb_any(skb);
603 }
604
605 if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
606 return NETDEV_TX_OK;
607
608
609
610
611 gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
612 return NETDEV_TX_OK;
613}
614
615#define GVE_TX_START_THRESH PAGE_SIZE
616
617static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
618 u32 to_do, bool try_to_wake)
619{
620 struct gve_tx_buffer_state *info;
621 u64 pkts = 0, bytes = 0;
622 size_t space_freed = 0;
623 struct sk_buff *skb;
624 int i, j;
625 u32 idx;
626
627 for (j = 0; j < to_do; j++) {
628 idx = tx->done & tx->mask;
629 netif_info(priv, tx_done, priv->dev,
630 "[%d] %s: idx=%d (req=%u done=%u)\n",
631 tx->q_num, __func__, idx, tx->req, tx->done);
632 info = &tx->info[idx];
633 skb = info->skb;
634
635
636 if (tx->raw_addressing)
637 gve_tx_unmap_buf(tx->dev, info);
638 tx->done++;
639
640 if (skb) {
641 info->skb = NULL;
642 bytes += skb->len;
643 pkts++;
644 dev_consume_skb_any(skb);
645 if (tx->raw_addressing)
646 continue;
647
648 for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
649 space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
650 info->iov[i].iov_len = 0;
651 info->iov[i].iov_padding = 0;
652 }
653 }
654 }
655
656 if (!tx->raw_addressing)
657 gve_tx_free_fifo(&tx->tx_fifo, space_freed);
658 u64_stats_update_begin(&tx->statss);
659 tx->bytes_done += bytes;
660 tx->pkt_done += pkts;
661 u64_stats_update_end(&tx->statss);
662 netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes);
663
664
665#ifndef CONFIG_BQL
666
667 smp_mb();
668#endif
669 if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) &&
670 likely(gve_can_tx(tx, GVE_TX_START_THRESH))) {
671 tx->wake_queue++;
672 netif_tx_wake_queue(tx->netdev_txq);
673 }
674
675 return pkts;
676}
677
678__be32 gve_tx_load_event_counter(struct gve_priv *priv,
679 struct gve_tx_ring *tx)
680{
681 u32 counter_index = be32_to_cpu((tx->q_resources->counter_index));
682
683 return READ_ONCE(priv->counter_array[counter_index]);
684}
685
686bool gve_tx_poll(struct gve_notify_block *block, int budget)
687{
688 struct gve_priv *priv = block->priv;
689 struct gve_tx_ring *tx = block->tx;
690 bool repoll = false;
691 u32 nic_done;
692 u32 to_do;
693
694
695 if (budget == 0)
696 budget = INT_MAX;
697
698
699 tx->last_nic_done = gve_tx_load_event_counter(priv, tx);
700 nic_done = be32_to_cpu(tx->last_nic_done);
701 if (budget > 0) {
702
703
704
705 to_do = min_t(u32, (nic_done - tx->done), budget);
706 gve_clean_tx_done(priv, tx, to_do, true);
707 }
708
709 repoll |= (nic_done != tx->done);
710 return repoll;
711}
712