1
2
3
4
5
6
7#include <linux/cpumask.h>
8#include <linux/etherdevice.h>
9#include <linux/interrupt.h>
10#include <linux/module.h>
11#include <linux/pci.h>
12#include <linux/sched.h>
13#include <linux/timer.h>
14#include <linux/workqueue.h>
15#include <net/sch_generic.h>
16#include "gve.h"
17#include "gve_adminq.h"
18#include "gve_register.h"
19
20#define GVE_DEFAULT_RX_COPYBREAK (256)
21
22#define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
23#define GVE_VERSION "1.0.0"
24#define GVE_VERSION_PREFIX "GVE-"
25
26const char gve_version_str[] = GVE_VERSION;
27static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
28
29static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
30{
31 struct gve_priv *priv = netdev_priv(dev);
32 unsigned int start;
33 int ring;
34
35 if (priv->rx) {
36 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
37 do {
38 start =
39 u64_stats_fetch_begin(&priv->rx[ring].statss);
40 s->rx_packets += priv->rx[ring].rpackets;
41 s->rx_bytes += priv->rx[ring].rbytes;
42 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
43 start));
44 }
45 }
46 if (priv->tx) {
47 for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
48 do {
49 start =
50 u64_stats_fetch_begin(&priv->tx[ring].statss);
51 s->tx_packets += priv->tx[ring].pkt_done;
52 s->tx_bytes += priv->tx[ring].bytes_done;
53 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
54 start));
55 }
56 }
57}
58
59static int gve_alloc_counter_array(struct gve_priv *priv)
60{
61 priv->counter_array =
62 dma_alloc_coherent(&priv->pdev->dev,
63 priv->num_event_counters *
64 sizeof(*priv->counter_array),
65 &priv->counter_array_bus, GFP_KERNEL);
66 if (!priv->counter_array)
67 return -ENOMEM;
68
69 return 0;
70}
71
72static void gve_free_counter_array(struct gve_priv *priv)
73{
74 dma_free_coherent(&priv->pdev->dev,
75 priv->num_event_counters *
76 sizeof(*priv->counter_array),
77 priv->counter_array, priv->counter_array_bus);
78 priv->counter_array = NULL;
79}
80
81
82static void gve_stats_report_task(struct work_struct *work)
83{
84 struct gve_priv *priv = container_of(work, struct gve_priv,
85 stats_report_task);
86 if (gve_get_do_report_stats(priv)) {
87 gve_handle_report_stats(priv);
88 gve_clear_do_report_stats(priv);
89 }
90}
91
92static void gve_stats_report_schedule(struct gve_priv *priv)
93{
94 if (!gve_get_probe_in_progress(priv) &&
95 !gve_get_reset_in_progress(priv)) {
96 gve_set_do_report_stats(priv);
97 queue_work(priv->gve_wq, &priv->stats_report_task);
98 }
99}
100
101static void gve_stats_report_timer(struct timer_list *t)
102{
103 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
104
105 mod_timer(&priv->stats_report_timer,
106 round_jiffies(jiffies +
107 msecs_to_jiffies(priv->stats_report_timer_period)));
108 gve_stats_report_schedule(priv);
109}
110
111static int gve_alloc_stats_report(struct gve_priv *priv)
112{
113 int tx_stats_num, rx_stats_num;
114
115 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
116 priv->tx_cfg.num_queues;
117 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
118 priv->rx_cfg.num_queues;
119 priv->stats_report_len = struct_size(priv->stats_report, stats,
120 tx_stats_num + rx_stats_num);
121 priv->stats_report =
122 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
123 &priv->stats_report_bus, GFP_KERNEL);
124 if (!priv->stats_report)
125 return -ENOMEM;
126
127 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
128 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
129 return 0;
130}
131
132static void gve_free_stats_report(struct gve_priv *priv)
133{
134 del_timer_sync(&priv->stats_report_timer);
135 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
136 priv->stats_report, priv->stats_report_bus);
137 priv->stats_report = NULL;
138}
139
140static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
141{
142 struct gve_priv *priv = arg;
143
144 queue_work(priv->gve_wq, &priv->service_task);
145 return IRQ_HANDLED;
146}
147
148static irqreturn_t gve_intr(int irq, void *arg)
149{
150 struct gve_notify_block *block = arg;
151 struct gve_priv *priv = block->priv;
152
153 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
154 napi_schedule_irqoff(&block->napi);
155 return IRQ_HANDLED;
156}
157
158static int gve_napi_poll(struct napi_struct *napi, int budget)
159{
160 struct gve_notify_block *block;
161 __be32 __iomem *irq_doorbell;
162 bool reschedule = false;
163 struct gve_priv *priv;
164
165 block = container_of(napi, struct gve_notify_block, napi);
166 priv = block->priv;
167
168 if (block->tx)
169 reschedule |= gve_tx_poll(block, budget);
170 if (block->rx)
171 reschedule |= gve_rx_poll(block, budget);
172
173 if (reschedule)
174 return budget;
175
176 napi_complete(napi);
177 irq_doorbell = gve_irq_doorbell(priv, block);
178 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
179
180
181
182
183 dma_rmb();
184 if (block->tx)
185 reschedule |= gve_tx_poll(block, -1);
186 if (block->rx)
187 reschedule |= gve_rx_poll(block, -1);
188 if (reschedule && napi_reschedule(napi))
189 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
190
191 return 0;
192}
193
194static int gve_alloc_notify_blocks(struct gve_priv *priv)
195{
196 int num_vecs_requested = priv->num_ntfy_blks + 1;
197 char *name = priv->dev->name;
198 unsigned int active_cpus;
199 int vecs_enabled;
200 int i, j;
201 int err;
202
203 priv->msix_vectors = kvzalloc(num_vecs_requested *
204 sizeof(*priv->msix_vectors), GFP_KERNEL);
205 if (!priv->msix_vectors)
206 return -ENOMEM;
207 for (i = 0; i < num_vecs_requested; i++)
208 priv->msix_vectors[i].entry = i;
209 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
210 GVE_MIN_MSIX, num_vecs_requested);
211 if (vecs_enabled < 0) {
212 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
213 GVE_MIN_MSIX, vecs_enabled);
214 err = vecs_enabled;
215 goto abort_with_msix_vectors;
216 }
217 if (vecs_enabled != num_vecs_requested) {
218 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
219 int vecs_per_type = new_num_ntfy_blks / 2;
220 int vecs_left = new_num_ntfy_blks % 2;
221
222 priv->num_ntfy_blks = new_num_ntfy_blks;
223 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
224 vecs_per_type);
225 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
226 vecs_per_type + vecs_left);
227 dev_err(&priv->pdev->dev,
228 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
229 vecs_enabled, priv->tx_cfg.max_queues,
230 priv->rx_cfg.max_queues);
231 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
232 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
233 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
234 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
235 }
236
237 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
238
239
240 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
241 name);
242 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
243 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
244 if (err) {
245 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
246 goto abort_with_msix_enabled;
247 }
248 priv->ntfy_blocks =
249 dma_alloc_coherent(&priv->pdev->dev,
250 priv->num_ntfy_blks *
251 sizeof(*priv->ntfy_blocks),
252 &priv->ntfy_block_bus, GFP_KERNEL);
253 if (!priv->ntfy_blocks) {
254 err = -ENOMEM;
255 goto abort_with_mgmt_vector;
256 }
257
258 for (i = 0; i < priv->num_ntfy_blks; i++) {
259 struct gve_notify_block *block = &priv->ntfy_blocks[i];
260 int msix_idx = i;
261
262 snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
263 name, i);
264 block->priv = priv;
265 err = request_irq(priv->msix_vectors[msix_idx].vector,
266 gve_intr, 0, block->name, block);
267 if (err) {
268 dev_err(&priv->pdev->dev,
269 "Failed to receive msix vector %d\n", i);
270 goto abort_with_some_ntfy_blocks;
271 }
272 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
273 get_cpu_mask(i % active_cpus));
274 }
275 return 0;
276abort_with_some_ntfy_blocks:
277 for (j = 0; j < i; j++) {
278 struct gve_notify_block *block = &priv->ntfy_blocks[j];
279 int msix_idx = j;
280
281 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
282 NULL);
283 free_irq(priv->msix_vectors[msix_idx].vector, block);
284 }
285 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
286 sizeof(*priv->ntfy_blocks),
287 priv->ntfy_blocks, priv->ntfy_block_bus);
288 priv->ntfy_blocks = NULL;
289abort_with_mgmt_vector:
290 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
291abort_with_msix_enabled:
292 pci_disable_msix(priv->pdev);
293abort_with_msix_vectors:
294 kvfree(priv->msix_vectors);
295 priv->msix_vectors = NULL;
296 return err;
297}
298
299static void gve_free_notify_blocks(struct gve_priv *priv)
300{
301 int i;
302
303
304 for (i = 0; i < priv->num_ntfy_blks; i++) {
305 struct gve_notify_block *block = &priv->ntfy_blocks[i];
306 int msix_idx = i;
307
308 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
309 NULL);
310 free_irq(priv->msix_vectors[msix_idx].vector, block);
311 }
312 dma_free_coherent(&priv->pdev->dev,
313 priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
314 priv->ntfy_blocks, priv->ntfy_block_bus);
315 priv->ntfy_blocks = NULL;
316 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
317 pci_disable_msix(priv->pdev);
318 kvfree(priv->msix_vectors);
319 priv->msix_vectors = NULL;
320}
321
322static int gve_setup_device_resources(struct gve_priv *priv)
323{
324 int err;
325
326 err = gve_alloc_counter_array(priv);
327 if (err)
328 return err;
329 err = gve_alloc_notify_blocks(priv);
330 if (err)
331 goto abort_with_counter;
332 err = gve_alloc_stats_report(priv);
333 if (err)
334 goto abort_with_ntfy_blocks;
335 err = gve_adminq_configure_device_resources(priv,
336 priv->counter_array_bus,
337 priv->num_event_counters,
338 priv->ntfy_block_bus,
339 priv->num_ntfy_blks);
340 if (unlikely(err)) {
341 dev_err(&priv->pdev->dev,
342 "could not setup device_resources: err=%d\n", err);
343 err = -ENXIO;
344 goto abort_with_stats_report;
345 }
346 err = gve_adminq_report_stats(priv, priv->stats_report_len,
347 priv->stats_report_bus,
348 GVE_STATS_REPORT_TIMER_PERIOD);
349 if (err)
350 dev_err(&priv->pdev->dev,
351 "Failed to report stats: err=%d\n", err);
352 gve_set_device_resources_ok(priv);
353 return 0;
354abort_with_stats_report:
355 gve_free_stats_report(priv);
356abort_with_ntfy_blocks:
357 gve_free_notify_blocks(priv);
358abort_with_counter:
359 gve_free_counter_array(priv);
360 return err;
361}
362
363static void gve_trigger_reset(struct gve_priv *priv);
364
365static void gve_teardown_device_resources(struct gve_priv *priv)
366{
367 int err;
368
369
370 if (gve_get_device_resources_ok(priv)) {
371
372 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
373 if (err) {
374 dev_err(&priv->pdev->dev,
375 "Failed to detach stats report: err=%d\n", err);
376 gve_trigger_reset(priv);
377 }
378 err = gve_adminq_deconfigure_device_resources(priv);
379 if (err) {
380 dev_err(&priv->pdev->dev,
381 "Could not deconfigure device resources: err=%d\n",
382 err);
383 gve_trigger_reset(priv);
384 }
385 }
386 gve_free_counter_array(priv);
387 gve_free_notify_blocks(priv);
388 gve_free_stats_report(priv);
389 gve_clear_device_resources_ok(priv);
390}
391
392static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
393{
394 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
395
396 netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
397 NAPI_POLL_WEIGHT);
398}
399
400static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
401{
402 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
403
404 netif_napi_del(&block->napi);
405}
406
407static int gve_register_qpls(struct gve_priv *priv)
408{
409 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
410 int err;
411 int i;
412
413 for (i = 0; i < num_qpls; i++) {
414 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
415 if (err) {
416 netif_err(priv, drv, priv->dev,
417 "failed to register queue page list %d\n",
418 priv->qpls[i].id);
419
420
421
422 return err;
423 }
424 }
425 return 0;
426}
427
428static int gve_unregister_qpls(struct gve_priv *priv)
429{
430 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
431 int err;
432 int i;
433
434 for (i = 0; i < num_qpls; i++) {
435 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
436
437 if (err) {
438 netif_err(priv, drv, priv->dev,
439 "Failed to unregister queue page list %d\n",
440 priv->qpls[i].id);
441 return err;
442 }
443 }
444 return 0;
445}
446
447static int gve_create_rings(struct gve_priv *priv)
448{
449 int err;
450 int i;
451
452 err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
453 if (err) {
454 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
455 priv->tx_cfg.num_queues);
456
457
458
459 return err;
460 }
461 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
462 priv->tx_cfg.num_queues);
463
464 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
465 if (err) {
466 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
467 priv->rx_cfg.num_queues);
468
469
470
471 return err;
472 }
473 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
474 priv->rx_cfg.num_queues);
475
476
477
478
479
480
481 for (i = 0; i < priv->rx_cfg.num_queues; i++)
482 gve_rx_write_doorbell(priv, &priv->rx[i]);
483
484 return 0;
485}
486
487static int gve_alloc_rings(struct gve_priv *priv)
488{
489 int ntfy_idx;
490 int err;
491 int i;
492
493
494 priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
495 GFP_KERNEL);
496 if (!priv->tx)
497 return -ENOMEM;
498 err = gve_tx_alloc_rings(priv);
499 if (err)
500 goto free_tx;
501
502 priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
503 GFP_KERNEL);
504 if (!priv->rx) {
505 err = -ENOMEM;
506 goto free_tx_queue;
507 }
508 err = gve_rx_alloc_rings(priv);
509 if (err)
510 goto free_rx;
511
512 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
513 u64_stats_init(&priv->tx[i].statss);
514 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
515 gve_add_napi(priv, ntfy_idx);
516 }
517
518 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
519 u64_stats_init(&priv->rx[i].statss);
520 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
521 gve_add_napi(priv, ntfy_idx);
522 }
523
524 return 0;
525
526free_rx:
527 kvfree(priv->rx);
528 priv->rx = NULL;
529free_tx_queue:
530 gve_tx_free_rings(priv);
531free_tx:
532 kvfree(priv->tx);
533 priv->tx = NULL;
534 return err;
535}
536
537static int gve_destroy_rings(struct gve_priv *priv)
538{
539 int err;
540
541 err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
542 if (err) {
543 netif_err(priv, drv, priv->dev,
544 "failed to destroy tx queues\n");
545
546 return err;
547 }
548 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
549 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
550 if (err) {
551 netif_err(priv, drv, priv->dev,
552 "failed to destroy rx queues\n");
553
554 return err;
555 }
556 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
557 return 0;
558}
559
560static void gve_free_rings(struct gve_priv *priv)
561{
562 int ntfy_idx;
563 int i;
564
565 if (priv->tx) {
566 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
567 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
568 gve_remove_napi(priv, ntfy_idx);
569 }
570 gve_tx_free_rings(priv);
571 kvfree(priv->tx);
572 priv->tx = NULL;
573 }
574 if (priv->rx) {
575 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
576 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
577 gve_remove_napi(priv, ntfy_idx);
578 }
579 gve_rx_free_rings(priv);
580 kvfree(priv->rx);
581 priv->rx = NULL;
582 }
583}
584
585int gve_alloc_page(struct gve_priv *priv, struct device *dev,
586 struct page **page, dma_addr_t *dma,
587 enum dma_data_direction dir)
588{
589 *page = alloc_page(GFP_KERNEL);
590 if (!*page) {
591 priv->page_alloc_fail++;
592 return -ENOMEM;
593 }
594 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
595 if (dma_mapping_error(dev, *dma)) {
596 priv->dma_mapping_error++;
597 put_page(*page);
598 return -ENOMEM;
599 }
600 return 0;
601}
602
603static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
604 int pages)
605{
606 struct gve_queue_page_list *qpl = &priv->qpls[id];
607 int err;
608 int i;
609
610 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
611 netif_err(priv, drv, priv->dev,
612 "Reached max number of registered pages %llu > %llu\n",
613 pages + priv->num_registered_pages,
614 priv->max_registered_pages);
615 return -EINVAL;
616 }
617
618 qpl->id = id;
619 qpl->num_entries = 0;
620 qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
621
622 if (!qpl->pages)
623 return -ENOMEM;
624 qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
625 GFP_KERNEL);
626
627 if (!qpl->page_buses)
628 return -ENOMEM;
629
630 for (i = 0; i < pages; i++) {
631 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
632 &qpl->page_buses[i],
633 gve_qpl_dma_dir(priv, id));
634
635 if (err)
636 return -ENOMEM;
637 qpl->num_entries++;
638 }
639 priv->num_registered_pages += pages;
640
641 return 0;
642}
643
644void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
645 enum dma_data_direction dir)
646{
647 if (!dma_mapping_error(dev, dma))
648 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
649 if (page)
650 put_page(page);
651}
652
653static void gve_free_queue_page_list(struct gve_priv *priv,
654 int id)
655{
656 struct gve_queue_page_list *qpl = &priv->qpls[id];
657 int i;
658
659 if (!qpl->pages)
660 return;
661 if (!qpl->page_buses)
662 goto free_pages;
663
664 for (i = 0; i < qpl->num_entries; i++)
665 gve_free_page(&priv->pdev->dev, qpl->pages[i],
666 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
667
668 kvfree(qpl->page_buses);
669free_pages:
670 kvfree(qpl->pages);
671 priv->num_registered_pages -= qpl->num_entries;
672}
673
674static int gve_alloc_qpls(struct gve_priv *priv)
675{
676 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
677 int i, j;
678 int err;
679
680
681 if (priv->raw_addressing)
682 return 0;
683
684 priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
685 if (!priv->qpls)
686 return -ENOMEM;
687
688 for (i = 0; i < gve_num_tx_qpls(priv); i++) {
689 err = gve_alloc_queue_page_list(priv, i,
690 priv->tx_pages_per_qpl);
691 if (err)
692 goto free_qpls;
693 }
694 for (; i < num_qpls; i++) {
695 err = gve_alloc_queue_page_list(priv, i,
696 priv->rx_data_slot_cnt);
697 if (err)
698 goto free_qpls;
699 }
700
701 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
702 sizeof(unsigned long) * BITS_PER_BYTE;
703 priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
704 sizeof(unsigned long), GFP_KERNEL);
705 if (!priv->qpl_cfg.qpl_id_map) {
706 err = -ENOMEM;
707 goto free_qpls;
708 }
709
710 return 0;
711
712free_qpls:
713 for (j = 0; j <= i; j++)
714 gve_free_queue_page_list(priv, j);
715 kvfree(priv->qpls);
716 return err;
717}
718
719static void gve_free_qpls(struct gve_priv *priv)
720{
721 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
722 int i;
723
724
725 if (priv->raw_addressing)
726 return;
727
728 kvfree(priv->qpl_cfg.qpl_id_map);
729
730 for (i = 0; i < num_qpls; i++)
731 gve_free_queue_page_list(priv, i);
732
733 kvfree(priv->qpls);
734}
735
736
737
738
739
740void gve_schedule_reset(struct gve_priv *priv)
741{
742 gve_set_do_reset(priv);
743 queue_work(priv->gve_wq, &priv->service_task);
744}
745
746static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
747static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
748static void gve_turndown(struct gve_priv *priv);
749static void gve_turnup(struct gve_priv *priv);
750
751static int gve_open(struct net_device *dev)
752{
753 struct gve_priv *priv = netdev_priv(dev);
754 int err;
755
756 err = gve_alloc_qpls(priv);
757 if (err)
758 return err;
759 err = gve_alloc_rings(priv);
760 if (err)
761 goto free_qpls;
762
763 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
764 if (err)
765 goto free_rings;
766 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
767 if (err)
768 goto free_rings;
769
770 err = gve_register_qpls(priv);
771 if (err)
772 goto reset;
773 err = gve_create_rings(priv);
774 if (err)
775 goto reset;
776 gve_set_device_rings_ok(priv);
777
778 if (gve_get_report_stats(priv))
779 mod_timer(&priv->stats_report_timer,
780 round_jiffies(jiffies +
781 msecs_to_jiffies(priv->stats_report_timer_period)));
782
783 gve_turnup(priv);
784 queue_work(priv->gve_wq, &priv->service_task);
785 priv->interface_up_cnt++;
786 return 0;
787
788free_rings:
789 gve_free_rings(priv);
790free_qpls:
791 gve_free_qpls(priv);
792 return err;
793
794reset:
795
796
797
798 if (gve_get_reset_in_progress(priv))
799 return err;
800
801 gve_reset_and_teardown(priv, true);
802
803 gve_reset_recovery(priv, false);
804
805 return err;
806}
807
808static int gve_close(struct net_device *dev)
809{
810 struct gve_priv *priv = netdev_priv(dev);
811 int err;
812
813 netif_carrier_off(dev);
814 if (gve_get_device_rings_ok(priv)) {
815 gve_turndown(priv);
816 err = gve_destroy_rings(priv);
817 if (err)
818 goto err;
819 err = gve_unregister_qpls(priv);
820 if (err)
821 goto err;
822 gve_clear_device_rings_ok(priv);
823 }
824 del_timer_sync(&priv->stats_report_timer);
825
826 gve_free_rings(priv);
827 gve_free_qpls(priv);
828 priv->interface_down_cnt++;
829 return 0;
830
831err:
832
833
834
835 if (gve_get_reset_in_progress(priv))
836 return err;
837
838 gve_reset_and_teardown(priv, true);
839 return gve_reset_recovery(priv, false);
840}
841
842int gve_adjust_queues(struct gve_priv *priv,
843 struct gve_queue_config new_rx_config,
844 struct gve_queue_config new_tx_config)
845{
846 int err;
847
848 if (netif_carrier_ok(priv->dev)) {
849
850
851
852
853 err = gve_close(priv->dev);
854
855
856
857 if (err)
858 return err;
859 priv->tx_cfg = new_tx_config;
860 priv->rx_cfg = new_rx_config;
861
862 err = gve_open(priv->dev);
863 if (err)
864 goto err;
865
866 return 0;
867 }
868
869 priv->tx_cfg = new_tx_config;
870 priv->rx_cfg = new_rx_config;
871
872 return 0;
873err:
874 netif_err(priv, drv, priv->dev,
875 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
876 gve_turndown(priv);
877 return err;
878}
879
880static void gve_turndown(struct gve_priv *priv)
881{
882 int idx;
883
884 if (netif_carrier_ok(priv->dev))
885 netif_carrier_off(priv->dev);
886
887 if (!gve_get_napi_enabled(priv))
888 return;
889
890
891 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
892 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
893 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
894
895 napi_disable(&block->napi);
896 }
897 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
898 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
899 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
900
901 napi_disable(&block->napi);
902 }
903
904
905 netif_tx_disable(priv->dev);
906
907 gve_clear_napi_enabled(priv);
908 gve_clear_report_stats(priv);
909}
910
911static void gve_turnup(struct gve_priv *priv)
912{
913 int idx;
914
915
916 netif_tx_start_all_queues(priv->dev);
917
918
919 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
920 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
921 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
922
923 napi_enable(&block->napi);
924 iowrite32be(0, gve_irq_doorbell(priv, block));
925 }
926 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
927 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
928 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
929
930 napi_enable(&block->napi);
931 iowrite32be(0, gve_irq_doorbell(priv, block));
932 }
933
934 gve_set_napi_enabled(priv);
935}
936
937static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
938{
939 struct gve_priv *priv = netdev_priv(dev);
940
941 gve_schedule_reset(priv);
942 priv->tx_timeo_cnt++;
943}
944
945static const struct net_device_ops gve_netdev_ops = {
946 .ndo_start_xmit = gve_tx,
947 .ndo_open = gve_open,
948 .ndo_stop = gve_close,
949 .ndo_get_stats64 = gve_get_stats,
950 .ndo_tx_timeout = gve_tx_timeout,
951};
952
953static void gve_handle_status(struct gve_priv *priv, u32 status)
954{
955 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
956 dev_info(&priv->pdev->dev, "Device requested reset.\n");
957 gve_set_do_reset(priv);
958 }
959 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
960 priv->stats_report_trigger_cnt++;
961 gve_set_do_report_stats(priv);
962 }
963}
964
965static void gve_handle_reset(struct gve_priv *priv)
966{
967
968
969
970
971 if (gve_get_probe_in_progress(priv))
972 return;
973
974 if (gve_get_do_reset(priv)) {
975 rtnl_lock();
976 gve_reset(priv, false);
977 rtnl_unlock();
978 }
979}
980
981void gve_handle_report_stats(struct gve_priv *priv)
982{
983 int idx, stats_idx = 0, tx_bytes;
984 unsigned int start = 0;
985 struct stats *stats = priv->stats_report->stats;
986
987 if (!gve_get_report_stats(priv))
988 return;
989
990 be64_add_cpu(&priv->stats_report->written_count, 1);
991
992 if (priv->tx) {
993 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
994 do {
995 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
996 tx_bytes = priv->tx[idx].bytes_done;
997 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
998 stats[stats_idx++] = (struct stats) {
999 .stat_name = cpu_to_be32(TX_WAKE_CNT),
1000 .value = cpu_to_be64(priv->tx[idx].wake_queue),
1001 .queue_id = cpu_to_be32(idx),
1002 };
1003 stats[stats_idx++] = (struct stats) {
1004 .stat_name = cpu_to_be32(TX_STOP_CNT),
1005 .value = cpu_to_be64(priv->tx[idx].stop_queue),
1006 .queue_id = cpu_to_be32(idx),
1007 };
1008 stats[stats_idx++] = (struct stats) {
1009 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1010 .value = cpu_to_be64(priv->tx[idx].req),
1011 .queue_id = cpu_to_be32(idx),
1012 };
1013 stats[stats_idx++] = (struct stats) {
1014 .stat_name = cpu_to_be32(TX_BYTES_SENT),
1015 .value = cpu_to_be64(tx_bytes),
1016 .queue_id = cpu_to_be32(idx),
1017 };
1018 stats[stats_idx++] = (struct stats) {
1019 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1020 .value = cpu_to_be64(priv->tx[idx].done),
1021 .queue_id = cpu_to_be32(idx),
1022 };
1023 }
1024 }
1025
1026 if (priv->rx) {
1027 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1028 stats[stats_idx++] = (struct stats) {
1029 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1030 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1031 .queue_id = cpu_to_be32(idx),
1032 };
1033 stats[stats_idx++] = (struct stats) {
1034 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1035 .value = cpu_to_be64(priv->rx[0].fill_cnt),
1036 .queue_id = cpu_to_be32(idx),
1037 };
1038 }
1039 }
1040}
1041
1042static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1043{
1044 if (!gve_get_napi_enabled(priv))
1045 return;
1046
1047 if (link_status == netif_carrier_ok(priv->dev))
1048 return;
1049
1050 if (link_status) {
1051 netdev_info(priv->dev, "Device link is up.\n");
1052 netif_carrier_on(priv->dev);
1053 } else {
1054 netdev_info(priv->dev, "Device link is down.\n");
1055 netif_carrier_off(priv->dev);
1056 }
1057}
1058
1059
1060static void gve_service_task(struct work_struct *work)
1061{
1062 struct gve_priv *priv = container_of(work, struct gve_priv,
1063 service_task);
1064 u32 status = ioread32be(&priv->reg_bar0->device_status);
1065
1066 gve_handle_status(priv, status);
1067
1068 gve_handle_reset(priv);
1069 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1070}
1071
1072static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
1073{
1074 int num_ntfy;
1075 int err;
1076
1077
1078 err = gve_adminq_alloc(&priv->pdev->dev, priv);
1079 if (err) {
1080 dev_err(&priv->pdev->dev,
1081 "Failed to alloc admin queue: err=%d\n", err);
1082 return err;
1083 }
1084
1085 if (skip_describe_device)
1086 goto setup_device;
1087
1088 priv->raw_addressing = false;
1089
1090 err = gve_adminq_describe_device(priv);
1091 if (err) {
1092 dev_err(&priv->pdev->dev,
1093 "Could not get device information: err=%d\n", err);
1094 goto err;
1095 }
1096 if (priv->dev->max_mtu > PAGE_SIZE) {
1097 priv->dev->max_mtu = PAGE_SIZE;
1098 err = gve_adminq_set_mtu(priv, priv->dev->mtu);
1099 if (err) {
1100 dev_err(&priv->pdev->dev, "Could not set mtu");
1101 goto err;
1102 }
1103 }
1104 priv->dev->mtu = priv->dev->max_mtu;
1105 num_ntfy = pci_msix_vec_count(priv->pdev);
1106 if (num_ntfy <= 0) {
1107 dev_err(&priv->pdev->dev,
1108 "could not count MSI-x vectors: err=%d\n", num_ntfy);
1109 err = num_ntfy;
1110 goto err;
1111 } else if (num_ntfy < GVE_MIN_MSIX) {
1112 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
1113 GVE_MIN_MSIX, num_ntfy);
1114 err = -EINVAL;
1115 goto err;
1116 }
1117
1118 priv->num_registered_pages = 0;
1119 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1120
1121
1122
1123 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
1124 priv->mgmt_msix_idx = priv->num_ntfy_blks;
1125
1126 priv->tx_cfg.max_queues =
1127 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
1128 priv->rx_cfg.max_queues =
1129 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
1130
1131 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
1132 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
1133 if (priv->default_num_queues > 0) {
1134 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
1135 priv->tx_cfg.num_queues);
1136 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
1137 priv->rx_cfg.num_queues);
1138 }
1139
1140 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
1141 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
1142 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
1143 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
1144
1145setup_device:
1146 err = gve_setup_device_resources(priv);
1147 if (!err)
1148 return 0;
1149err:
1150 gve_adminq_free(&priv->pdev->dev, priv);
1151 return err;
1152}
1153
1154static void gve_teardown_priv_resources(struct gve_priv *priv)
1155{
1156 gve_teardown_device_resources(priv);
1157 gve_adminq_free(&priv->pdev->dev, priv);
1158}
1159
1160static void gve_trigger_reset(struct gve_priv *priv)
1161{
1162
1163 gve_adminq_release(priv);
1164}
1165
1166static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
1167{
1168 gve_trigger_reset(priv);
1169
1170 if (was_up)
1171 gve_close(priv->dev);
1172 gve_teardown_priv_resources(priv);
1173}
1174
1175static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1176{
1177 int err;
1178
1179 err = gve_init_priv(priv, true);
1180 if (err)
1181 goto err;
1182 if (was_up) {
1183 err = gve_open(priv->dev);
1184 if (err)
1185 goto err;
1186 }
1187 return 0;
1188err:
1189 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1190 gve_turndown(priv);
1191 return err;
1192}
1193
1194int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1195{
1196 bool was_up = netif_carrier_ok(priv->dev);
1197 int err;
1198
1199 dev_info(&priv->pdev->dev, "Performing reset\n");
1200 gve_clear_do_reset(priv);
1201 gve_set_reset_in_progress(priv);
1202
1203
1204
1205 if (!attempt_teardown) {
1206 gve_turndown(priv);
1207 gve_reset_and_teardown(priv, was_up);
1208 } else {
1209
1210 if (was_up) {
1211 err = gve_close(priv->dev);
1212
1213 if (err)
1214 gve_reset_and_teardown(priv, was_up);
1215 }
1216
1217 gve_teardown_priv_resources(priv);
1218 }
1219
1220
1221 err = gve_reset_recovery(priv, was_up);
1222 gve_clear_reset_in_progress(priv);
1223 priv->reset_cnt++;
1224 priv->interface_up_cnt = 0;
1225 priv->interface_down_cnt = 0;
1226 priv->stats_report_trigger_cnt = 0;
1227 return err;
1228}
1229
1230static void gve_write_version(u8 __iomem *driver_version_register)
1231{
1232 const char *c = gve_version_prefix;
1233
1234 while (*c) {
1235 writeb(*c, driver_version_register);
1236 c++;
1237 }
1238
1239 c = gve_version_str;
1240 while (*c) {
1241 writeb(*c, driver_version_register);
1242 c++;
1243 }
1244 writeb('\n', driver_version_register);
1245}
1246
1247static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1248{
1249 int max_tx_queues, max_rx_queues;
1250 struct net_device *dev;
1251 __be32 __iomem *db_bar;
1252 struct gve_registers __iomem *reg_bar;
1253 struct gve_priv *priv;
1254 int err;
1255
1256 err = pci_enable_device(pdev);
1257 if (err)
1258 return -ENXIO;
1259
1260 err = pci_request_regions(pdev, "gvnic-cfg");
1261 if (err)
1262 goto abort_with_enabled;
1263
1264 pci_set_master(pdev);
1265
1266 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1267 if (err) {
1268 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1269 goto abort_with_pci_region;
1270 }
1271
1272 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1273 if (err) {
1274 dev_err(&pdev->dev,
1275 "Failed to set consistent dma mask: err=%d\n", err);
1276 goto abort_with_pci_region;
1277 }
1278
1279 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1280 if (!reg_bar) {
1281 dev_err(&pdev->dev, "Failed to map pci bar!\n");
1282 err = -ENOMEM;
1283 goto abort_with_pci_region;
1284 }
1285
1286 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1287 if (!db_bar) {
1288 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1289 err = -ENOMEM;
1290 goto abort_with_reg_bar;
1291 }
1292
1293 gve_write_version(®_bar->driver_version);
1294
1295 max_rx_queues = ioread32be(®_bar->max_tx_queues);
1296 max_tx_queues = ioread32be(®_bar->max_rx_queues);
1297
1298 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1299 if (!dev) {
1300 dev_err(&pdev->dev, "could not allocate netdev\n");
1301 goto abort_with_db_bar;
1302 }
1303 SET_NETDEV_DEV(dev, &pdev->dev);
1304 pci_set_drvdata(pdev, dev);
1305 dev->ethtool_ops = &gve_ethtool_ops;
1306 dev->netdev_ops = &gve_netdev_ops;
1307
1308 dev->hw_features = NETIF_F_HIGHDMA;
1309 dev->hw_features |= NETIF_F_SG;
1310 dev->hw_features |= NETIF_F_HW_CSUM;
1311 dev->hw_features |= NETIF_F_TSO;
1312 dev->hw_features |= NETIF_F_TSO6;
1313 dev->hw_features |= NETIF_F_TSO_ECN;
1314 dev->hw_features |= NETIF_F_RXCSUM;
1315 dev->hw_features |= NETIF_F_RXHASH;
1316 dev->features = dev->hw_features;
1317 dev->watchdog_timeo = 5 * HZ;
1318 dev->min_mtu = ETH_MIN_MTU;
1319 netif_carrier_off(dev);
1320
1321 priv = netdev_priv(dev);
1322 priv->dev = dev;
1323 priv->pdev = pdev;
1324 priv->msg_enable = DEFAULT_MSG_LEVEL;
1325 priv->reg_bar0 = reg_bar;
1326 priv->db_bar2 = db_bar;
1327 priv->service_task_flags = 0x0;
1328 priv->state_flags = 0x0;
1329 priv->ethtool_flags = 0x0;
1330
1331 gve_set_probe_in_progress(priv);
1332 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1333 if (!priv->gve_wq) {
1334 dev_err(&pdev->dev, "Could not allocate workqueue");
1335 err = -ENOMEM;
1336 goto abort_with_netdev;
1337 }
1338 INIT_WORK(&priv->service_task, gve_service_task);
1339 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
1340 priv->tx_cfg.max_queues = max_tx_queues;
1341 priv->rx_cfg.max_queues = max_rx_queues;
1342
1343 err = gve_init_priv(priv, false);
1344 if (err)
1345 goto abort_with_wq;
1346
1347 err = register_netdev(dev);
1348 if (err)
1349 goto abort_with_wq;
1350
1351 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1352 gve_clear_probe_in_progress(priv);
1353 queue_work(priv->gve_wq, &priv->service_task);
1354 return 0;
1355
1356abort_with_wq:
1357 destroy_workqueue(priv->gve_wq);
1358
1359abort_with_netdev:
1360 free_netdev(dev);
1361
1362abort_with_db_bar:
1363 pci_iounmap(pdev, db_bar);
1364
1365abort_with_reg_bar:
1366 pci_iounmap(pdev, reg_bar);
1367
1368abort_with_pci_region:
1369 pci_release_regions(pdev);
1370
1371abort_with_enabled:
1372 pci_disable_device(pdev);
1373 return -ENXIO;
1374}
1375
1376static void gve_remove(struct pci_dev *pdev)
1377{
1378 struct net_device *netdev = pci_get_drvdata(pdev);
1379 struct gve_priv *priv = netdev_priv(netdev);
1380 __be32 __iomem *db_bar = priv->db_bar2;
1381 void __iomem *reg_bar = priv->reg_bar0;
1382
1383 unregister_netdev(netdev);
1384 gve_teardown_priv_resources(priv);
1385 destroy_workqueue(priv->gve_wq);
1386 free_netdev(netdev);
1387 pci_iounmap(pdev, db_bar);
1388 pci_iounmap(pdev, reg_bar);
1389 pci_release_regions(pdev);
1390 pci_disable_device(pdev);
1391}
1392
1393static const struct pci_device_id gve_id_table[] = {
1394 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1395 { }
1396};
1397
1398static struct pci_driver gvnic_driver = {
1399 .name = "gvnic",
1400 .id_table = gve_id_table,
1401 .probe = gve_probe,
1402 .remove = gve_remove,
1403};
1404
1405module_pci_driver(gvnic_driver);
1406
1407MODULE_DEVICE_TABLE(pci, gve_id_table);
1408MODULE_AUTHOR("Google, Inc.");
1409MODULE_DESCRIPTION("gVNIC Driver");
1410MODULE_LICENSE("Dual MIT/GPL");
1411MODULE_VERSION(GVE_VERSION);
1412