1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
33
34#include <linux/module.h>
35#include <linux/kernel.h>
36#include <linux/netdevice.h>
37#include <linux/etherdevice.h>
38#include <linux/skbuff.h>
39#include <linux/ethtool.h>
40#include <linux/if_ether.h>
41#include <net/tcp.h>
42#include <linux/udp.h>
43#include <linux/moduleparam.h>
44#include <linux/mm.h>
45#include <linux/slab.h>
46#include <net/ip.h>
47
48#include <asm/xen/page.h>
49#include <xen/xen.h>
50#include <xen/xenbus.h>
51#include <xen/events.h>
52#include <xen/page.h>
53#include <xen/platform_pci.h>
54#include <xen/grant_table.h>
55
56#include <xen/interface/io/netif.h>
57#include <xen/interface/memory.h>
58#include <xen/interface/grant_table.h>
59
60
61#define MAX_QUEUES_DEFAULT 8
62static unsigned int xennet_max_queues;
63module_param_named(max_queues, xennet_max_queues, uint, 0644);
64MODULE_PARM_DESC(max_queues,
65 "Maximum number of queues per virtual interface");
66
67static const struct ethtool_ops xennet_ethtool_ops;
68
69struct netfront_cb {
70 int pull_to;
71};
72
73#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
74
75#define RX_COPY_THRESHOLD 256
76
77#define GRANT_INVALID_REF 0
78
79#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
80#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
81
82
83#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
84
85
86#define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
87
88
89#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
90
91struct netfront_stats {
92 u64 packets;
93 u64 bytes;
94 struct u64_stats_sync syncp;
95};
96
97struct netfront_info;
98
99struct netfront_queue {
100 unsigned int id;
101 char name[QUEUE_NAME_SIZE];
102 struct netfront_info *info;
103
104 struct napi_struct napi;
105
106
107
108
109 unsigned int tx_evtchn, rx_evtchn;
110 unsigned int tx_irq, rx_irq;
111
112 char tx_irq_name[IRQ_NAME_SIZE];
113 char rx_irq_name[IRQ_NAME_SIZE];
114
115 spinlock_t tx_lock;
116 struct xen_netif_tx_front_ring tx;
117 int tx_ring_ref;
118
119
120
121
122
123
124
125
126
127
128 union skb_entry {
129 struct sk_buff *skb;
130 unsigned long link;
131 } tx_skbs[NET_TX_RING_SIZE];
132 grant_ref_t gref_tx_head;
133 grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
134 struct page *grant_tx_page[NET_TX_RING_SIZE];
135 unsigned tx_skb_freelist;
136
137 spinlock_t rx_lock ____cacheline_aligned_in_smp;
138 struct xen_netif_rx_front_ring rx;
139 int rx_ring_ref;
140
141 struct timer_list rx_refill_timer;
142
143 struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
144 grant_ref_t gref_rx_head;
145 grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
146};
147
148struct netfront_info {
149 struct list_head list;
150 struct net_device *netdev;
151
152 struct xenbus_device *xbdev;
153
154
155 struct netfront_queue *queues;
156
157
158 struct netfront_stats __percpu *rx_stats;
159 struct netfront_stats __percpu *tx_stats;
160
161 atomic_t rx_gso_checksum_fixup;
162};
163
164struct netfront_rx_info {
165 struct xen_netif_rx_response rx;
166 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
167};
168
169static void skb_entry_set_link(union skb_entry *list, unsigned short id)
170{
171 list->link = id;
172}
173
174static int skb_entry_is_link(const union skb_entry *list)
175{
176 BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
177 return (unsigned long)list->skb < PAGE_OFFSET;
178}
179
180
181
182
183
184static void add_id_to_freelist(unsigned *head, union skb_entry *list,
185 unsigned short id)
186{
187 skb_entry_set_link(&list[id], *head);
188 *head = id;
189}
190
191static unsigned short get_id_from_freelist(unsigned *head,
192 union skb_entry *list)
193{
194 unsigned int id = *head;
195 *head = list[id].link;
196 return id;
197}
198
199static int xennet_rxidx(RING_IDX idx)
200{
201 return idx & (NET_RX_RING_SIZE - 1);
202}
203
204static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
205 RING_IDX ri)
206{
207 int i = xennet_rxidx(ri);
208 struct sk_buff *skb = queue->rx_skbs[i];
209 queue->rx_skbs[i] = NULL;
210 return skb;
211}
212
213static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
214 RING_IDX ri)
215{
216 int i = xennet_rxidx(ri);
217 grant_ref_t ref = queue->grant_rx_ref[i];
218 queue->grant_rx_ref[i] = GRANT_INVALID_REF;
219 return ref;
220}
221
222#ifdef CONFIG_SYSFS
223static int xennet_sysfs_addif(struct net_device *netdev);
224static void xennet_sysfs_delif(struct net_device *netdev);
225#else
226#define xennet_sysfs_addif(dev) (0)
227#define xennet_sysfs_delif(dev) do { } while (0)
228#endif
229
230static bool xennet_can_sg(struct net_device *dev)
231{
232 return dev->features & NETIF_F_SG;
233}
234
235
236static void rx_refill_timeout(unsigned long data)
237{
238 struct netfront_queue *queue = (struct netfront_queue *)data;
239 napi_schedule(&queue->napi);
240}
241
242static int netfront_tx_slot_available(struct netfront_queue *queue)
243{
244 return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
245 (NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2);
246}
247
248static void xennet_maybe_wake_tx(struct netfront_queue *queue)
249{
250 struct net_device *dev = queue->info->netdev;
251 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
252
253 if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
254 netfront_tx_slot_available(queue) &&
255 likely(netif_running(dev)))
256 netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
257}
258
259
260static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
261{
262 struct sk_buff *skb;
263 struct page *page;
264
265 skb = __netdev_alloc_skb(queue->info->netdev,
266 RX_COPY_THRESHOLD + NET_IP_ALIGN,
267 GFP_ATOMIC | __GFP_NOWARN);
268 if (unlikely(!skb))
269 return NULL;
270
271 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
272 if (!page) {
273 kfree_skb(skb);
274 return NULL;
275 }
276 skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
277
278
279 skb_reserve(skb, NET_IP_ALIGN);
280 skb->dev = queue->info->netdev;
281
282 return skb;
283}
284
285
286static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
287{
288 RING_IDX req_prod = queue->rx.req_prod_pvt;
289 int notify;
290 int err = 0;
291
292 if (unlikely(!netif_carrier_ok(queue->info->netdev)))
293 return;
294
295 for (req_prod = queue->rx.req_prod_pvt;
296 req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
297 req_prod++) {
298 struct sk_buff *skb;
299 unsigned short id;
300 grant_ref_t ref;
301 unsigned long pfn;
302 struct xen_netif_rx_request *req;
303
304 skb = xennet_alloc_one_rx_buffer(queue);
305 if (!skb) {
306 err = -ENOMEM;
307 break;
308 }
309
310 id = xennet_rxidx(req_prod);
311
312 BUG_ON(queue->rx_skbs[id]);
313 queue->rx_skbs[id] = skb;
314
315 ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
316 WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
317 queue->grant_rx_ref[id] = ref;
318
319 pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
320
321 req = RING_GET_REQUEST(&queue->rx, req_prod);
322 gnttab_grant_foreign_access_ref(ref,
323 queue->info->xbdev->otherend_id,
324 pfn_to_mfn(pfn),
325 0);
326
327 req->id = id;
328 req->gref = ref;
329 }
330
331 queue->rx.req_prod_pvt = req_prod;
332
333
334
335
336
337
338 if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN ||
339 unlikely(err)) {
340 mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
341 return;
342 }
343
344 wmb();
345
346 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
347 if (notify)
348 notify_remote_via_irq(queue->rx_irq);
349}
350
351static int xennet_open(struct net_device *dev)
352{
353 struct netfront_info *np = netdev_priv(dev);
354 unsigned int num_queues = dev->real_num_tx_queues;
355 unsigned int i = 0;
356 struct netfront_queue *queue = NULL;
357
358 for (i = 0; i < num_queues; ++i) {
359 queue = &np->queues[i];
360 napi_enable(&queue->napi);
361
362 spin_lock_bh(&queue->rx_lock);
363 if (netif_carrier_ok(dev)) {
364 xennet_alloc_rx_buffers(queue);
365 queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
366 if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
367 napi_schedule(&queue->napi);
368 }
369 spin_unlock_bh(&queue->rx_lock);
370 }
371
372 netif_tx_start_all_queues(dev);
373
374 return 0;
375}
376
377static void xennet_tx_buf_gc(struct netfront_queue *queue)
378{
379 RING_IDX cons, prod;
380 unsigned short id;
381 struct sk_buff *skb;
382 bool more_to_do;
383
384 BUG_ON(!netif_carrier_ok(queue->info->netdev));
385
386 do {
387 prod = queue->tx.sring->rsp_prod;
388 rmb();
389
390 for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
391 struct xen_netif_tx_response *txrsp;
392
393 txrsp = RING_GET_RESPONSE(&queue->tx, cons);
394 if (txrsp->status == XEN_NETIF_RSP_NULL)
395 continue;
396
397 id = txrsp->id;
398 skb = queue->tx_skbs[id].skb;
399 if (unlikely(gnttab_query_foreign_access(
400 queue->grant_tx_ref[id]) != 0)) {
401 pr_alert("%s: warning -- grant still in use by backend domain\n",
402 __func__);
403 BUG();
404 }
405 gnttab_end_foreign_access_ref(
406 queue->grant_tx_ref[id], GNTMAP_readonly);
407 gnttab_release_grant_reference(
408 &queue->gref_tx_head, queue->grant_tx_ref[id]);
409 queue->grant_tx_ref[id] = GRANT_INVALID_REF;
410 queue->grant_tx_page[id] = NULL;
411 add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
412 dev_kfree_skb_irq(skb);
413 }
414
415 queue->tx.rsp_cons = prod;
416
417 RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
418 } while (more_to_do);
419
420 xennet_maybe_wake_tx(queue);
421}
422
423static struct xen_netif_tx_request *xennet_make_one_txreq(
424 struct netfront_queue *queue, struct sk_buff *skb,
425 struct page *page, unsigned int offset, unsigned int len)
426{
427 unsigned int id;
428 struct xen_netif_tx_request *tx;
429 grant_ref_t ref;
430
431 len = min_t(unsigned int, PAGE_SIZE - offset, len);
432
433 id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
434 tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
435 ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
436 WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
437
438 gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
439 page_to_mfn(page), GNTMAP_readonly);
440
441 queue->tx_skbs[id].skb = skb;
442 queue->grant_tx_page[id] = page;
443 queue->grant_tx_ref[id] = ref;
444
445 tx->id = id;
446 tx->gref = ref;
447 tx->offset = offset;
448 tx->size = len;
449 tx->flags = 0;
450
451 return tx;
452}
453
454static struct xen_netif_tx_request *xennet_make_txreqs(
455 struct netfront_queue *queue, struct xen_netif_tx_request *tx,
456 struct sk_buff *skb, struct page *page,
457 unsigned int offset, unsigned int len)
458{
459
460 page += offset >> PAGE_SHIFT;
461 offset &= ~PAGE_MASK;
462
463 while (len) {
464 tx->flags |= XEN_NETTXF_more_data;
465 tx = xennet_make_one_txreq(queue, skb_get(skb),
466 page, offset, len);
467 page++;
468 offset = 0;
469 len -= tx->size;
470 }
471
472 return tx;
473}
474
475
476
477
478
479static int xennet_count_skb_slots(struct sk_buff *skb)
480{
481 int i, frags = skb_shinfo(skb)->nr_frags;
482 int pages;
483
484 pages = PFN_UP(offset_in_page(skb->data) + skb_headlen(skb));
485
486 for (i = 0; i < frags; i++) {
487 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
488 unsigned long size = skb_frag_size(frag);
489 unsigned long offset = frag->page_offset;
490
491
492 offset &= ~PAGE_MASK;
493
494 pages += PFN_UP(offset + size);
495 }
496
497 return pages;
498}
499
500static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
501 void *accel_priv, select_queue_fallback_t fallback)
502{
503 unsigned int num_queues = dev->real_num_tx_queues;
504 u32 hash;
505 u16 queue_idx;
506
507
508 if (num_queues == 1) {
509 queue_idx = 0;
510 } else {
511 hash = skb_get_hash(skb);
512 queue_idx = hash % num_queues;
513 }
514
515 return queue_idx;
516}
517
518static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
519{
520 struct netfront_info *np = netdev_priv(dev);
521 struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
522 struct xen_netif_tx_request *tx, *first_tx;
523 unsigned int i;
524 int notify;
525 int slots;
526 struct page *page;
527 unsigned int offset;
528 unsigned int len;
529 unsigned long flags;
530 struct netfront_queue *queue = NULL;
531 unsigned int num_queues = dev->real_num_tx_queues;
532 u16 queue_index;
533 struct sk_buff *nskb;
534
535
536 if (num_queues < 1)
537 goto drop;
538
539 queue_index = skb_get_queue_mapping(skb);
540 queue = &np->queues[queue_index];
541
542
543
544
545 if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
546 net_alert_ratelimited(
547 "xennet: skb->len = %u, too big for wire format\n",
548 skb->len);
549 goto drop;
550 }
551
552 slots = xennet_count_skb_slots(skb);
553 if (unlikely(slots > MAX_SKB_FRAGS + 1)) {
554 net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
555 slots, skb->len);
556 if (skb_linearize(skb))
557 goto drop;
558 }
559
560 page = virt_to_page(skb->data);
561 offset = offset_in_page(skb->data);
562
563
564
565
566 if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
567 nskb = skb_copy(skb, GFP_ATOMIC);
568 if (!nskb)
569 goto drop;
570 dev_kfree_skb_any(skb);
571 skb = nskb;
572 page = virt_to_page(skb->data);
573 offset = offset_in_page(skb->data);
574 }
575
576 len = skb_headlen(skb);
577
578 spin_lock_irqsave(&queue->tx_lock, flags);
579
580 if (unlikely(!netif_carrier_ok(dev) ||
581 (slots > 1 && !xennet_can_sg(dev)) ||
582 netif_needs_gso(skb, netif_skb_features(skb)))) {
583 spin_unlock_irqrestore(&queue->tx_lock, flags);
584 goto drop;
585 }
586
587
588 first_tx = tx = xennet_make_one_txreq(queue, skb,
589 page, offset, len);
590 page++;
591 offset = 0;
592 len -= tx->size;
593
594 if (skb->ip_summed == CHECKSUM_PARTIAL)
595
596 tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
597 else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
598
599 tx->flags |= XEN_NETTXF_data_validated;
600
601
602 if (skb_shinfo(skb)->gso_size) {
603 struct xen_netif_extra_info *gso;
604
605 gso = (struct xen_netif_extra_info *)
606 RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
607
608 tx->flags |= XEN_NETTXF_extra_info;
609
610 gso->u.gso.size = skb_shinfo(skb)->gso_size;
611 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
612 gso->u.gso.pad = 0;
613 gso->u.gso.features = 0;
614
615 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
616 gso->flags = 0;
617 }
618
619
620 tx = xennet_make_txreqs(queue, tx, skb, page, offset, len);
621
622
623 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
624 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
625 tx = xennet_make_txreqs(queue, tx, skb,
626 skb_frag_page(frag), frag->page_offset,
627 skb_frag_size(frag));
628 }
629
630
631 first_tx->size = skb->len;
632
633 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
634 if (notify)
635 notify_remote_via_irq(queue->tx_irq);
636
637 u64_stats_update_begin(&tx_stats->syncp);
638 tx_stats->bytes += skb->len;
639 tx_stats->packets++;
640 u64_stats_update_end(&tx_stats->syncp);
641
642
643 xennet_tx_buf_gc(queue);
644
645 if (!netfront_tx_slot_available(queue))
646 netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
647
648 spin_unlock_irqrestore(&queue->tx_lock, flags);
649
650 return NETDEV_TX_OK;
651
652 drop:
653 dev->stats.tx_dropped++;
654 dev_kfree_skb_any(skb);
655 return NETDEV_TX_OK;
656}
657
658static int xennet_close(struct net_device *dev)
659{
660 struct netfront_info *np = netdev_priv(dev);
661 unsigned int num_queues = dev->real_num_tx_queues;
662 unsigned int i;
663 struct netfront_queue *queue;
664 netif_tx_stop_all_queues(np->netdev);
665 for (i = 0; i < num_queues; ++i) {
666 queue = &np->queues[i];
667 napi_disable(&queue->napi);
668 }
669 return 0;
670}
671
672static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
673 grant_ref_t ref)
674{
675 int new = xennet_rxidx(queue->rx.req_prod_pvt);
676
677 BUG_ON(queue->rx_skbs[new]);
678 queue->rx_skbs[new] = skb;
679 queue->grant_rx_ref[new] = ref;
680 RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
681 RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
682 queue->rx.req_prod_pvt++;
683}
684
685static int xennet_get_extras(struct netfront_queue *queue,
686 struct xen_netif_extra_info *extras,
687 RING_IDX rp)
688
689{
690 struct xen_netif_extra_info *extra;
691 struct device *dev = &queue->info->netdev->dev;
692 RING_IDX cons = queue->rx.rsp_cons;
693 int err = 0;
694
695 do {
696 struct sk_buff *skb;
697 grant_ref_t ref;
698
699 if (unlikely(cons + 1 == rp)) {
700 if (net_ratelimit())
701 dev_warn(dev, "Missing extra info\n");
702 err = -EBADR;
703 break;
704 }
705
706 extra = (struct xen_netif_extra_info *)
707 RING_GET_RESPONSE(&queue->rx, ++cons);
708
709 if (unlikely(!extra->type ||
710 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
711 if (net_ratelimit())
712 dev_warn(dev, "Invalid extra type: %d\n",
713 extra->type);
714 err = -EINVAL;
715 } else {
716 memcpy(&extras[extra->type - 1], extra,
717 sizeof(*extra));
718 }
719
720 skb = xennet_get_rx_skb(queue, cons);
721 ref = xennet_get_rx_ref(queue, cons);
722 xennet_move_rx_slot(queue, skb, ref);
723 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
724
725 queue->rx.rsp_cons = cons;
726 return err;
727}
728
729static int xennet_get_responses(struct netfront_queue *queue,
730 struct netfront_rx_info *rinfo, RING_IDX rp,
731 struct sk_buff_head *list)
732{
733 struct xen_netif_rx_response *rx = &rinfo->rx;
734 struct xen_netif_extra_info *extras = rinfo->extras;
735 struct device *dev = &queue->info->netdev->dev;
736 RING_IDX cons = queue->rx.rsp_cons;
737 struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
738 grant_ref_t ref = xennet_get_rx_ref(queue, cons);
739 int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
740 int slots = 1;
741 int err = 0;
742 unsigned long ret;
743
744 if (rx->flags & XEN_NETRXF_extra_info) {
745 err = xennet_get_extras(queue, extras, rp);
746 cons = queue->rx.rsp_cons;
747 }
748
749 for (;;) {
750 if (unlikely(rx->status < 0 ||
751 rx->offset + rx->status > PAGE_SIZE)) {
752 if (net_ratelimit())
753 dev_warn(dev, "rx->offset: %x, size: %u\n",
754 rx->offset, rx->status);
755 xennet_move_rx_slot(queue, skb, ref);
756 err = -EINVAL;
757 goto next;
758 }
759
760
761
762
763
764
765 if (ref == GRANT_INVALID_REF) {
766 if (net_ratelimit())
767 dev_warn(dev, "Bad rx response id %d.\n",
768 rx->id);
769 err = -EINVAL;
770 goto next;
771 }
772
773 ret = gnttab_end_foreign_access_ref(ref, 0);
774 BUG_ON(!ret);
775
776 gnttab_release_grant_reference(&queue->gref_rx_head, ref);
777
778 __skb_queue_tail(list, skb);
779
780next:
781 if (!(rx->flags & XEN_NETRXF_more_data))
782 break;
783
784 if (cons + slots == rp) {
785 if (net_ratelimit())
786 dev_warn(dev, "Need more slots\n");
787 err = -ENOENT;
788 break;
789 }
790
791 rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
792 skb = xennet_get_rx_skb(queue, cons + slots);
793 ref = xennet_get_rx_ref(queue, cons + slots);
794 slots++;
795 }
796
797 if (unlikely(slots > max)) {
798 if (net_ratelimit())
799 dev_warn(dev, "Too many slots\n");
800 err = -E2BIG;
801 }
802
803 if (unlikely(err))
804 queue->rx.rsp_cons = cons + slots;
805
806 return err;
807}
808
809static int xennet_set_skb_gso(struct sk_buff *skb,
810 struct xen_netif_extra_info *gso)
811{
812 if (!gso->u.gso.size) {
813 if (net_ratelimit())
814 pr_warn("GSO size must not be zero\n");
815 return -EINVAL;
816 }
817
818
819 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
820 if (net_ratelimit())
821 pr_warn("Bad GSO type %d\n", gso->u.gso.type);
822 return -EINVAL;
823 }
824
825 skb_shinfo(skb)->gso_size = gso->u.gso.size;
826 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
827
828
829 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
830 skb_shinfo(skb)->gso_segs = 0;
831
832 return 0;
833}
834
835static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
836 struct sk_buff *skb,
837 struct sk_buff_head *list)
838{
839 struct skb_shared_info *shinfo = skb_shinfo(skb);
840 RING_IDX cons = queue->rx.rsp_cons;
841 struct sk_buff *nskb;
842
843 while ((nskb = __skb_dequeue(list))) {
844 struct xen_netif_rx_response *rx =
845 RING_GET_RESPONSE(&queue->rx, ++cons);
846 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
847
848 if (shinfo->nr_frags == MAX_SKB_FRAGS) {
849 unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
850
851 BUG_ON(pull_to <= skb_headlen(skb));
852 __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
853 }
854 BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
855
856 skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
857 rx->offset, rx->status, PAGE_SIZE);
858
859 skb_shinfo(nskb)->nr_frags = 0;
860 kfree_skb(nskb);
861 }
862
863 return cons;
864}
865
866static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
867{
868 struct iphdr *iph;
869 int err = -EPROTO;
870 int recalculate_partial_csum = 0;
871
872
873
874
875
876
877
878 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
879 struct netfront_info *np = netdev_priv(dev);
880 atomic_inc(&np->rx_gso_checksum_fixup);
881 skb->ip_summed = CHECKSUM_PARTIAL;
882 recalculate_partial_csum = 1;
883 }
884
885
886 if (skb->ip_summed != CHECKSUM_PARTIAL)
887 return 0;
888
889 if (skb->protocol != htons(ETH_P_IP))
890 goto out;
891
892 iph = (void *)skb->data;
893
894 switch (iph->protocol) {
895 case IPPROTO_TCP:
896 if (!skb_partial_csum_set(skb, 4 * iph->ihl,
897 offsetof(struct tcphdr, check)))
898 goto out;
899
900 if (recalculate_partial_csum) {
901 struct tcphdr *tcph = tcp_hdr(skb);
902 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
903 skb->len - iph->ihl*4,
904 IPPROTO_TCP, 0);
905 }
906 break;
907 case IPPROTO_UDP:
908 if (!skb_partial_csum_set(skb, 4 * iph->ihl,
909 offsetof(struct udphdr, check)))
910 goto out;
911
912 if (recalculate_partial_csum) {
913 struct udphdr *udph = udp_hdr(skb);
914 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
915 skb->len - iph->ihl*4,
916 IPPROTO_UDP, 0);
917 }
918 break;
919 default:
920 if (net_ratelimit())
921 pr_err("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
922 iph->protocol);
923 goto out;
924 }
925
926 err = 0;
927
928out:
929 return err;
930}
931
932static int handle_incoming_queue(struct netfront_queue *queue,
933 struct sk_buff_head *rxq)
934{
935 struct netfront_stats *rx_stats = this_cpu_ptr(queue->info->rx_stats);
936 int packets_dropped = 0;
937 struct sk_buff *skb;
938
939 while ((skb = __skb_dequeue(rxq)) != NULL) {
940 int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
941
942 if (pull_to > skb_headlen(skb))
943 __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
944
945
946 skb->protocol = eth_type_trans(skb, queue->info->netdev);
947
948 if (checksum_setup(queue->info->netdev, skb)) {
949 kfree_skb(skb);
950 packets_dropped++;
951 queue->info->netdev->stats.rx_errors++;
952 continue;
953 }
954
955 u64_stats_update_begin(&rx_stats->syncp);
956 rx_stats->packets++;
957 rx_stats->bytes += skb->len;
958 u64_stats_update_end(&rx_stats->syncp);
959
960
961 napi_gro_receive(&queue->napi, skb);
962 }
963
964 return packets_dropped;
965}
966
967static int xennet_poll(struct napi_struct *napi, int budget)
968{
969 struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
970 struct net_device *dev = queue->info->netdev;
971 struct sk_buff *skb;
972 struct netfront_rx_info rinfo;
973 struct xen_netif_rx_response *rx = &rinfo.rx;
974 struct xen_netif_extra_info *extras = rinfo.extras;
975 RING_IDX i, rp;
976 int work_done;
977 struct sk_buff_head rxq;
978 struct sk_buff_head errq;
979 struct sk_buff_head tmpq;
980 int err;
981
982 spin_lock(&queue->rx_lock);
983
984 skb_queue_head_init(&rxq);
985 skb_queue_head_init(&errq);
986 skb_queue_head_init(&tmpq);
987
988 rp = queue->rx.sring->rsp_prod;
989 rmb();
990
991 i = queue->rx.rsp_cons;
992 work_done = 0;
993 while ((i != rp) && (work_done < budget)) {
994 memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
995 memset(extras, 0, sizeof(rinfo.extras));
996
997 err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
998
999 if (unlikely(err)) {
1000err:
1001 while ((skb = __skb_dequeue(&tmpq)))
1002 __skb_queue_tail(&errq, skb);
1003 dev->stats.rx_errors++;
1004 i = queue->rx.rsp_cons;
1005 continue;
1006 }
1007
1008 skb = __skb_dequeue(&tmpq);
1009
1010 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1011 struct xen_netif_extra_info *gso;
1012 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1013
1014 if (unlikely(xennet_set_skb_gso(skb, gso))) {
1015 __skb_queue_head(&tmpq, skb);
1016 queue->rx.rsp_cons += skb_queue_len(&tmpq);
1017 goto err;
1018 }
1019 }
1020
1021 NETFRONT_SKB_CB(skb)->pull_to = rx->status;
1022 if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
1023 NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
1024
1025 skb_shinfo(skb)->frags[0].page_offset = rx->offset;
1026 skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
1027 skb->data_len = rx->status;
1028 skb->len += rx->status;
1029
1030 i = xennet_fill_frags(queue, skb, &tmpq);
1031
1032 if (rx->flags & XEN_NETRXF_csum_blank)
1033 skb->ip_summed = CHECKSUM_PARTIAL;
1034 else if (rx->flags & XEN_NETRXF_data_validated)
1035 skb->ip_summed = CHECKSUM_UNNECESSARY;
1036
1037 __skb_queue_tail(&rxq, skb);
1038
1039 queue->rx.rsp_cons = ++i;
1040 work_done++;
1041 }
1042
1043 __skb_queue_purge(&errq);
1044
1045 work_done -= handle_incoming_queue(queue, &rxq);
1046
1047 xennet_alloc_rx_buffers(queue);
1048
1049 if (work_done < budget) {
1050 int more_to_do = 0;
1051
1052 napi_complete(napi);
1053
1054 RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
1055 if (more_to_do)
1056 napi_schedule(napi);
1057 }
1058
1059 spin_unlock(&queue->rx_lock);
1060
1061 return work_done;
1062}
1063
1064static int xennet_change_mtu(struct net_device *dev, int mtu)
1065{
1066 int max = xennet_can_sg(dev) ?
1067 XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN;
1068
1069 if (mtu > max)
1070 return -EINVAL;
1071 dev->mtu = mtu;
1072 return 0;
1073}
1074
1075static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev,
1076 struct rtnl_link_stats64 *tot)
1077{
1078 struct netfront_info *np = netdev_priv(dev);
1079 int cpu;
1080
1081 for_each_possible_cpu(cpu) {
1082 struct netfront_stats *rx_stats = per_cpu_ptr(np->rx_stats, cpu);
1083 struct netfront_stats *tx_stats = per_cpu_ptr(np->tx_stats, cpu);
1084 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1085 unsigned int start;
1086
1087 do {
1088 start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
1089 tx_packets = tx_stats->packets;
1090 tx_bytes = tx_stats->bytes;
1091 } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
1092
1093 do {
1094 start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
1095 rx_packets = rx_stats->packets;
1096 rx_bytes = rx_stats->bytes;
1097 } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
1098
1099 tot->rx_packets += rx_packets;
1100 tot->tx_packets += tx_packets;
1101 tot->rx_bytes += rx_bytes;
1102 tot->tx_bytes += tx_bytes;
1103 }
1104
1105 tot->rx_errors = dev->stats.rx_errors;
1106 tot->tx_dropped = dev->stats.tx_dropped;
1107
1108 return tot;
1109}
1110
1111static void xennet_release_tx_bufs(struct netfront_queue *queue)
1112{
1113 struct sk_buff *skb;
1114 int i;
1115
1116 for (i = 0; i < NET_TX_RING_SIZE; i++) {
1117
1118 if (skb_entry_is_link(&queue->tx_skbs[i]))
1119 continue;
1120
1121 skb = queue->tx_skbs[i].skb;
1122 get_page(queue->grant_tx_page[i]);
1123 gnttab_end_foreign_access(queue->grant_tx_ref[i],
1124 GNTMAP_readonly,
1125 (unsigned long)page_address(queue->grant_tx_page[i]));
1126 queue->grant_tx_page[i] = NULL;
1127 queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1128 add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
1129 dev_kfree_skb_irq(skb);
1130 }
1131}
1132
1133static void xennet_release_rx_bufs(struct netfront_queue *queue)
1134{
1135 int id, ref;
1136
1137 spin_lock_bh(&queue->rx_lock);
1138
1139 for (id = 0; id < NET_RX_RING_SIZE; id++) {
1140 struct sk_buff *skb;
1141 struct page *page;
1142
1143 skb = queue->rx_skbs[id];
1144 if (!skb)
1145 continue;
1146
1147 ref = queue->grant_rx_ref[id];
1148 if (ref == GRANT_INVALID_REF)
1149 continue;
1150
1151 page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
1152
1153
1154
1155
1156 get_page(page);
1157 gnttab_end_foreign_access(ref, 0,
1158 (unsigned long)page_address(page));
1159 queue->grant_rx_ref[id] = GRANT_INVALID_REF;
1160
1161 kfree_skb(skb);
1162 }
1163
1164 spin_unlock_bh(&queue->rx_lock);
1165}
1166
1167static netdev_features_t xennet_fix_features(struct net_device *dev,
1168 netdev_features_t features)
1169{
1170 struct netfront_info *np = netdev_priv(dev);
1171 int val;
1172
1173 if (features & NETIF_F_SG) {
1174 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1175 "%d", &val) < 0)
1176 val = 0;
1177
1178 if (!val)
1179 features &= ~NETIF_F_SG;
1180 }
1181
1182 if (features & NETIF_F_TSO) {
1183 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1184 "feature-gso-tcpv4", "%d", &val) < 0)
1185 val = 0;
1186
1187 if (!val)
1188 features &= ~NETIF_F_TSO;
1189 }
1190
1191 return features;
1192}
1193
1194static int xennet_set_features(struct net_device *dev,
1195 netdev_features_t features)
1196{
1197 if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1198 netdev_info(dev, "Reducing MTU because no SG offload");
1199 dev->mtu = ETH_DATA_LEN;
1200 }
1201
1202 return 0;
1203}
1204
1205static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
1206{
1207 struct netfront_queue *queue = dev_id;
1208 unsigned long flags;
1209
1210 spin_lock_irqsave(&queue->tx_lock, flags);
1211 xennet_tx_buf_gc(queue);
1212 spin_unlock_irqrestore(&queue->tx_lock, flags);
1213
1214 return IRQ_HANDLED;
1215}
1216
1217static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
1218{
1219 struct netfront_queue *queue = dev_id;
1220 struct net_device *dev = queue->info->netdev;
1221
1222 if (likely(netif_carrier_ok(dev) &&
1223 RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
1224 napi_schedule(&queue->napi);
1225
1226 return IRQ_HANDLED;
1227}
1228
1229static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1230{
1231 xennet_tx_interrupt(irq, dev_id);
1232 xennet_rx_interrupt(irq, dev_id);
1233 return IRQ_HANDLED;
1234}
1235
1236#ifdef CONFIG_NET_POLL_CONTROLLER
1237static void xennet_poll_controller(struct net_device *dev)
1238{
1239
1240 struct netfront_info *info = netdev_priv(dev);
1241 unsigned int num_queues = dev->real_num_tx_queues;
1242 unsigned int i;
1243 for (i = 0; i < num_queues; ++i)
1244 xennet_interrupt(0, &info->queues[i]);
1245}
1246#endif
1247
1248static const struct net_device_ops xennet_netdev_ops = {
1249 .ndo_open = xennet_open,
1250 .ndo_stop = xennet_close,
1251 .ndo_start_xmit = xennet_start_xmit,
1252 .ndo_change_mtu = xennet_change_mtu,
1253 .ndo_get_stats64 = xennet_get_stats64,
1254 .ndo_set_mac_address = eth_mac_addr,
1255 .ndo_validate_addr = eth_validate_addr,
1256 .ndo_fix_features = xennet_fix_features,
1257 .ndo_set_features = xennet_set_features,
1258 .ndo_select_queue = xennet_select_queue,
1259#ifdef CONFIG_NET_POLL_CONTROLLER
1260 .ndo_poll_controller = xennet_poll_controller,
1261#endif
1262};
1263
1264static void xennet_free_netdev(struct net_device *netdev)
1265{
1266 struct netfront_info *np = netdev_priv(netdev);
1267
1268 free_percpu(np->rx_stats);
1269 free_percpu(np->tx_stats);
1270 free_netdev(netdev);
1271}
1272
1273static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1274{
1275 int err;
1276 struct net_device *netdev;
1277 struct netfront_info *np;
1278
1279 netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
1280 if (!netdev)
1281 return ERR_PTR(-ENOMEM);
1282
1283 np = netdev_priv(netdev);
1284 np->xbdev = dev;
1285
1286 np->queues = NULL;
1287
1288 err = -ENOMEM;
1289 np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1290 if (np->rx_stats == NULL)
1291 goto exit;
1292 np->tx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1293 if (np->tx_stats == NULL)
1294 goto exit;
1295
1296 netdev->netdev_ops = &xennet_netdev_ops;
1297
1298 netdev->features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1299 NETIF_F_GSO_ROBUST;
1300 netdev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
1301
1302
1303
1304
1305
1306
1307
1308 netdev->features |= netdev->hw_features;
1309
1310 SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
1311 SET_NETDEV_DEV(netdev, &dev->dev);
1312
1313 netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER);
1314
1315 np->netdev = netdev;
1316
1317 netif_carrier_off(netdev);
1318
1319 return netdev;
1320
1321 exit:
1322 xennet_free_netdev(netdev);
1323 return ERR_PTR(err);
1324}
1325
1326
1327
1328
1329
1330
1331static int netfront_probe(struct xenbus_device *dev,
1332 const struct xenbus_device_id *id)
1333{
1334 int err;
1335 struct net_device *netdev;
1336 struct netfront_info *info;
1337
1338 netdev = xennet_create_dev(dev);
1339 if (IS_ERR(netdev)) {
1340 err = PTR_ERR(netdev);
1341 xenbus_dev_fatal(dev, err, "creating netdev");
1342 return err;
1343 }
1344
1345 info = netdev_priv(netdev);
1346 dev_set_drvdata(&dev->dev, info);
1347
1348 err = register_netdev(info->netdev);
1349 if (err) {
1350 pr_warn("%s: register_netdev err=%d\n", __func__, err);
1351 goto fail;
1352 }
1353
1354 err = xennet_sysfs_addif(info->netdev);
1355 if (err) {
1356 unregister_netdev(info->netdev);
1357 pr_warn("%s: add sysfs failed err=%d\n", __func__, err);
1358 goto fail;
1359 }
1360
1361 return 0;
1362
1363 fail:
1364 xennet_free_netdev(netdev);
1365 dev_set_drvdata(&dev->dev, NULL);
1366 return err;
1367}
1368
1369static void xennet_end_access(int ref, void *page)
1370{
1371
1372 if (ref != GRANT_INVALID_REF)
1373 gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1374}
1375
1376static void xennet_disconnect_backend(struct netfront_info *info)
1377{
1378 unsigned int i = 0;
1379 unsigned int num_queues = info->netdev->real_num_tx_queues;
1380
1381 netif_carrier_off(info->netdev);
1382
1383 for (i = 0; i < num_queues && info->queues; ++i) {
1384 struct netfront_queue *queue = &info->queues[i];
1385
1386 del_timer_sync(&queue->rx_refill_timer);
1387
1388 if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
1389 unbind_from_irqhandler(queue->tx_irq, queue);
1390 if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
1391 unbind_from_irqhandler(queue->tx_irq, queue);
1392 unbind_from_irqhandler(queue->rx_irq, queue);
1393 }
1394 queue->tx_evtchn = queue->rx_evtchn = 0;
1395 queue->tx_irq = queue->rx_irq = 0;
1396
1397 if (netif_running(info->netdev))
1398 napi_synchronize(&queue->napi);
1399
1400 xennet_release_tx_bufs(queue);
1401 xennet_release_rx_bufs(queue);
1402 gnttab_free_grant_references(queue->gref_tx_head);
1403 gnttab_free_grant_references(queue->gref_rx_head);
1404
1405
1406 xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
1407 xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
1408
1409 queue->tx_ring_ref = GRANT_INVALID_REF;
1410 queue->rx_ring_ref = GRANT_INVALID_REF;
1411 queue->tx.sring = NULL;
1412 queue->rx.sring = NULL;
1413 }
1414}
1415
1416
1417
1418
1419
1420
1421
1422static int netfront_resume(struct xenbus_device *dev)
1423{
1424 struct netfront_info *info = dev_get_drvdata(&dev->dev);
1425
1426 dev_dbg(&dev->dev, "%s\n", dev->nodename);
1427
1428 xennet_disconnect_backend(info);
1429 return 0;
1430}
1431
1432static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1433{
1434 char *s, *e, *macstr;
1435 int i;
1436
1437 macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1438 if (IS_ERR(macstr))
1439 return PTR_ERR(macstr);
1440
1441 for (i = 0; i < ETH_ALEN; i++) {
1442 mac[i] = simple_strtoul(s, &e, 16);
1443 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1444 kfree(macstr);
1445 return -ENOENT;
1446 }
1447 s = e+1;
1448 }
1449
1450 kfree(macstr);
1451 return 0;
1452}
1453
1454static int setup_netfront_single(struct netfront_queue *queue)
1455{
1456 int err;
1457
1458 err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1459 if (err < 0)
1460 goto fail;
1461
1462 err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1463 xennet_interrupt,
1464 0, queue->info->netdev->name, queue);
1465 if (err < 0)
1466 goto bind_fail;
1467 queue->rx_evtchn = queue->tx_evtchn;
1468 queue->rx_irq = queue->tx_irq = err;
1469
1470 return 0;
1471
1472bind_fail:
1473 xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1474 queue->tx_evtchn = 0;
1475fail:
1476 return err;
1477}
1478
1479static int setup_netfront_split(struct netfront_queue *queue)
1480{
1481 int err;
1482
1483 err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1484 if (err < 0)
1485 goto fail;
1486 err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
1487 if (err < 0)
1488 goto alloc_rx_evtchn_fail;
1489
1490 snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
1491 "%s-tx", queue->name);
1492 err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1493 xennet_tx_interrupt,
1494 0, queue->tx_irq_name, queue);
1495 if (err < 0)
1496 goto bind_tx_fail;
1497 queue->tx_irq = err;
1498
1499 snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
1500 "%s-rx", queue->name);
1501 err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
1502 xennet_rx_interrupt,
1503 0, queue->rx_irq_name, queue);
1504 if (err < 0)
1505 goto bind_rx_fail;
1506 queue->rx_irq = err;
1507
1508 return 0;
1509
1510bind_rx_fail:
1511 unbind_from_irqhandler(queue->tx_irq, queue);
1512 queue->tx_irq = 0;
1513bind_tx_fail:
1514 xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
1515 queue->rx_evtchn = 0;
1516alloc_rx_evtchn_fail:
1517 xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1518 queue->tx_evtchn = 0;
1519fail:
1520 return err;
1521}
1522
1523static int setup_netfront(struct xenbus_device *dev,
1524 struct netfront_queue *queue, unsigned int feature_split_evtchn)
1525{
1526 struct xen_netif_tx_sring *txs;
1527 struct xen_netif_rx_sring *rxs;
1528 int err;
1529
1530 queue->tx_ring_ref = GRANT_INVALID_REF;
1531 queue->rx_ring_ref = GRANT_INVALID_REF;
1532 queue->rx.sring = NULL;
1533 queue->tx.sring = NULL;
1534
1535 txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1536 if (!txs) {
1537 err = -ENOMEM;
1538 xenbus_dev_fatal(dev, err, "allocating tx ring page");
1539 goto fail;
1540 }
1541 SHARED_RING_INIT(txs);
1542 FRONT_RING_INIT(&queue->tx, txs, PAGE_SIZE);
1543
1544 err = xenbus_grant_ring(dev, virt_to_mfn(txs));
1545 if (err < 0)
1546 goto grant_tx_ring_fail;
1547 queue->tx_ring_ref = err;
1548
1549 rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1550 if (!rxs) {
1551 err = -ENOMEM;
1552 xenbus_dev_fatal(dev, err, "allocating rx ring page");
1553 goto alloc_rx_ring_fail;
1554 }
1555 SHARED_RING_INIT(rxs);
1556 FRONT_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
1557
1558 err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
1559 if (err < 0)
1560 goto grant_rx_ring_fail;
1561 queue->rx_ring_ref = err;
1562
1563 if (feature_split_evtchn)
1564 err = setup_netfront_split(queue);
1565
1566
1567
1568
1569 if (!feature_split_evtchn || (feature_split_evtchn && err))
1570 err = setup_netfront_single(queue);
1571
1572 if (err)
1573 goto alloc_evtchn_fail;
1574
1575 return 0;
1576
1577
1578
1579
1580alloc_evtchn_fail:
1581 gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
1582grant_rx_ring_fail:
1583 free_page((unsigned long)rxs);
1584alloc_rx_ring_fail:
1585 gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
1586grant_tx_ring_fail:
1587 free_page((unsigned long)txs);
1588fail:
1589 return err;
1590}
1591
1592
1593
1594
1595
1596static int xennet_init_queue(struct netfront_queue *queue)
1597{
1598 unsigned short i;
1599 int err = 0;
1600
1601 spin_lock_init(&queue->tx_lock);
1602 spin_lock_init(&queue->rx_lock);
1603
1604 setup_timer(&queue->rx_refill_timer, rx_refill_timeout,
1605 (unsigned long)queue);
1606
1607 snprintf(queue->name, sizeof(queue->name), "%s-q%u",
1608 queue->info->netdev->name, queue->id);
1609
1610
1611 queue->tx_skb_freelist = 0;
1612 for (i = 0; i < NET_TX_RING_SIZE; i++) {
1613 skb_entry_set_link(&queue->tx_skbs[i], i+1);
1614 queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1615 queue->grant_tx_page[i] = NULL;
1616 }
1617
1618
1619 for (i = 0; i < NET_RX_RING_SIZE; i++) {
1620 queue->rx_skbs[i] = NULL;
1621 queue->grant_rx_ref[i] = GRANT_INVALID_REF;
1622 }
1623
1624
1625 if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
1626 &queue->gref_tx_head) < 0) {
1627 pr_alert("can't alloc tx grant refs\n");
1628 err = -ENOMEM;
1629 goto exit;
1630 }
1631
1632
1633 if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
1634 &queue->gref_rx_head) < 0) {
1635 pr_alert("can't alloc rx grant refs\n");
1636 err = -ENOMEM;
1637 goto exit_free_tx;
1638 }
1639
1640 return 0;
1641
1642 exit_free_tx:
1643 gnttab_free_grant_references(queue->gref_tx_head);
1644 exit:
1645 return err;
1646}
1647
1648static int write_queue_xenstore_keys(struct netfront_queue *queue,
1649 struct xenbus_transaction *xbt, int write_hierarchical)
1650{
1651
1652
1653
1654
1655 struct xenbus_device *dev = queue->info->xbdev;
1656 int err;
1657 const char *message;
1658 char *path;
1659 size_t pathsize;
1660
1661
1662 if (write_hierarchical) {
1663 pathsize = strlen(dev->nodename) + 10;
1664 path = kzalloc(pathsize, GFP_KERNEL);
1665 if (!path) {
1666 err = -ENOMEM;
1667 message = "out of memory while writing ring references";
1668 goto error;
1669 }
1670 snprintf(path, pathsize, "%s/queue-%u",
1671 dev->nodename, queue->id);
1672 } else {
1673 path = (char *)dev->nodename;
1674 }
1675
1676
1677 err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
1678 queue->tx_ring_ref);
1679 if (err) {
1680 message = "writing tx-ring-ref";
1681 goto error;
1682 }
1683
1684 err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
1685 queue->rx_ring_ref);
1686 if (err) {
1687 message = "writing rx-ring-ref";
1688 goto error;
1689 }
1690
1691
1692
1693
1694 if (queue->tx_evtchn == queue->rx_evtchn) {
1695
1696 err = xenbus_printf(*xbt, path,
1697 "event-channel", "%u", queue->tx_evtchn);
1698 if (err) {
1699 message = "writing event-channel";
1700 goto error;
1701 }
1702 } else {
1703
1704 err = xenbus_printf(*xbt, path,
1705 "event-channel-tx", "%u", queue->tx_evtchn);
1706 if (err) {
1707 message = "writing event-channel-tx";
1708 goto error;
1709 }
1710
1711 err = xenbus_printf(*xbt, path,
1712 "event-channel-rx", "%u", queue->rx_evtchn);
1713 if (err) {
1714 message = "writing event-channel-rx";
1715 goto error;
1716 }
1717 }
1718
1719 if (write_hierarchical)
1720 kfree(path);
1721 return 0;
1722
1723error:
1724 if (write_hierarchical)
1725 kfree(path);
1726 xenbus_dev_fatal(dev, err, "%s", message);
1727 return err;
1728}
1729
1730static void xennet_destroy_queues(struct netfront_info *info)
1731{
1732 unsigned int i;
1733
1734 rtnl_lock();
1735
1736 for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
1737 struct netfront_queue *queue = &info->queues[i];
1738
1739 if (netif_running(info->netdev))
1740 napi_disable(&queue->napi);
1741 netif_napi_del(&queue->napi);
1742 }
1743
1744 rtnl_unlock();
1745
1746 kfree(info->queues);
1747 info->queues = NULL;
1748}
1749
1750static int xennet_create_queues(struct netfront_info *info,
1751 unsigned int *num_queues)
1752{
1753 unsigned int i;
1754 int ret;
1755
1756 info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
1757 GFP_KERNEL);
1758 if (!info->queues)
1759 return -ENOMEM;
1760
1761 rtnl_lock();
1762
1763 for (i = 0; i < *num_queues; i++) {
1764 struct netfront_queue *queue = &info->queues[i];
1765
1766 queue->id = i;
1767 queue->info = info;
1768
1769 ret = xennet_init_queue(queue);
1770 if (ret < 0) {
1771 dev_warn(&info->netdev->dev,
1772 "only created %d queues\n", i);
1773 *num_queues = i;
1774 break;
1775 }
1776
1777 netif_napi_add(queue->info->netdev, &queue->napi,
1778 xennet_poll, 64);
1779 if (netif_running(info->netdev))
1780 napi_enable(&queue->napi);
1781 }
1782
1783 netif_set_real_num_tx_queues(info->netdev, *num_queues);
1784
1785 rtnl_unlock();
1786
1787 if (*num_queues == 0) {
1788 dev_err(&info->netdev->dev, "no queues\n");
1789 return -EINVAL;
1790 }
1791 return 0;
1792}
1793
1794
1795static int talk_to_netback(struct xenbus_device *dev,
1796 struct netfront_info *info)
1797{
1798 const char *message;
1799 struct xenbus_transaction xbt;
1800 int err;
1801 unsigned int feature_split_evtchn;
1802 unsigned int i = 0;
1803 unsigned int max_queues = 0;
1804 struct netfront_queue *queue = NULL;
1805 unsigned int num_queues = 1;
1806
1807 info->netdev->irq = 0;
1808
1809
1810 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1811 "multi-queue-max-queues", "%u", &max_queues);
1812 if (err < 0)
1813 max_queues = 1;
1814 num_queues = min(max_queues, xennet_max_queues);
1815
1816
1817 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1818 "feature-split-event-channels", "%u",
1819 &feature_split_evtchn);
1820 if (err < 0)
1821 feature_split_evtchn = 0;
1822
1823
1824 err = xen_net_read_mac(dev, info->netdev->dev_addr);
1825 if (err) {
1826 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1827 goto out;
1828 }
1829
1830 if (info->queues)
1831 xennet_destroy_queues(info);
1832
1833 err = xennet_create_queues(info, &num_queues);
1834 if (err < 0) {
1835 xenbus_dev_fatal(dev, err, "creating queues");
1836 kfree(info->queues);
1837 info->queues = NULL;
1838 goto out;
1839 }
1840
1841
1842 for (i = 0; i < num_queues; ++i) {
1843 queue = &info->queues[i];
1844 err = setup_netfront(dev, queue, feature_split_evtchn);
1845 if (err)
1846 goto destroy_ring;
1847 }
1848
1849again:
1850 err = xenbus_transaction_start(&xbt);
1851 if (err) {
1852 xenbus_dev_fatal(dev, err, "starting transaction");
1853 goto destroy_ring;
1854 }
1855
1856 if (xenbus_exists(XBT_NIL,
1857 info->xbdev->otherend, "multi-queue-max-queues")) {
1858
1859 err = xenbus_printf(xbt, dev->nodename,
1860 "multi-queue-num-queues", "%u", num_queues);
1861 if (err) {
1862 message = "writing multi-queue-num-queues";
1863 goto abort_transaction_no_dev_fatal;
1864 }
1865 }
1866
1867 if (num_queues == 1) {
1868 err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0);
1869 if (err)
1870 goto abort_transaction_no_dev_fatal;
1871 } else {
1872
1873 for (i = 0; i < num_queues; ++i) {
1874 queue = &info->queues[i];
1875 err = write_queue_xenstore_keys(queue, &xbt, 1);
1876 if (err)
1877 goto abort_transaction_no_dev_fatal;
1878 }
1879 }
1880
1881
1882 err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1883 1);
1884 if (err) {
1885 message = "writing request-rx-copy";
1886 goto abort_transaction;
1887 }
1888
1889 err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1890 if (err) {
1891 message = "writing feature-rx-notify";
1892 goto abort_transaction;
1893 }
1894
1895 err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1896 if (err) {
1897 message = "writing feature-sg";
1898 goto abort_transaction;
1899 }
1900
1901 err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1902 if (err) {
1903 message = "writing feature-gso-tcpv4";
1904 goto abort_transaction;
1905 }
1906
1907 err = xenbus_transaction_end(xbt, 0);
1908 if (err) {
1909 if (err == -EAGAIN)
1910 goto again;
1911 xenbus_dev_fatal(dev, err, "completing transaction");
1912 goto destroy_ring;
1913 }
1914
1915 return 0;
1916
1917 abort_transaction:
1918 xenbus_dev_fatal(dev, err, "%s", message);
1919abort_transaction_no_dev_fatal:
1920 xenbus_transaction_end(xbt, 1);
1921 destroy_ring:
1922 xennet_disconnect_backend(info);
1923 xennet_destroy_queues(info);
1924 out:
1925 device_unregister(&dev->dev);
1926 return err;
1927}
1928
1929static int xennet_connect(struct net_device *dev)
1930{
1931 struct netfront_info *np = netdev_priv(dev);
1932 unsigned int num_queues = 0;
1933 int err;
1934 unsigned int feature_rx_copy;
1935 unsigned int j = 0;
1936 struct netfront_queue *queue = NULL;
1937
1938 err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1939 "feature-rx-copy", "%u", &feature_rx_copy);
1940 if (err != 1)
1941 feature_rx_copy = 0;
1942
1943 if (!feature_rx_copy) {
1944 dev_info(&dev->dev,
1945 "backend does not support copying receive path\n");
1946 return -ENODEV;
1947 }
1948
1949 err = talk_to_netback(np->xbdev, np);
1950 if (err)
1951 return err;
1952
1953
1954 num_queues = dev->real_num_tx_queues;
1955
1956 rtnl_lock();
1957 netdev_update_features(dev);
1958 rtnl_unlock();
1959
1960
1961
1962
1963
1964
1965
1966 netif_carrier_on(np->netdev);
1967 for (j = 0; j < num_queues; ++j) {
1968 queue = &np->queues[j];
1969
1970 notify_remote_via_irq(queue->tx_irq);
1971 if (queue->tx_irq != queue->rx_irq)
1972 notify_remote_via_irq(queue->rx_irq);
1973
1974 spin_lock_irq(&queue->tx_lock);
1975 xennet_tx_buf_gc(queue);
1976 spin_unlock_irq(&queue->tx_lock);
1977
1978 spin_lock_bh(&queue->rx_lock);
1979 xennet_alloc_rx_buffers(queue);
1980 spin_unlock_bh(&queue->rx_lock);
1981 }
1982
1983 return 0;
1984}
1985
1986
1987
1988
1989static void netback_changed(struct xenbus_device *dev,
1990 enum xenbus_state backend_state)
1991{
1992 struct netfront_info *np = dev_get_drvdata(&dev->dev);
1993 struct net_device *netdev = np->netdev;
1994
1995 dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
1996
1997 switch (backend_state) {
1998 case XenbusStateInitialising:
1999 case XenbusStateInitialised:
2000 case XenbusStateReconfiguring:
2001 case XenbusStateReconfigured:
2002 case XenbusStateUnknown:
2003 break;
2004
2005 case XenbusStateInitWait:
2006 if (dev->state != XenbusStateInitialising)
2007 break;
2008 if (xennet_connect(netdev) != 0)
2009 break;
2010 xenbus_switch_state(dev, XenbusStateConnected);
2011 break;
2012
2013 case XenbusStateConnected:
2014 netdev_notify_peers(netdev);
2015 break;
2016
2017 case XenbusStateClosed:
2018 if (dev->state == XenbusStateClosed)
2019 break;
2020
2021 case XenbusStateClosing:
2022 xenbus_frontend_closed(dev);
2023 break;
2024 }
2025}
2026
2027static const struct xennet_stat {
2028 char name[ETH_GSTRING_LEN];
2029 u16 offset;
2030} xennet_stats[] = {
2031 {
2032 "rx_gso_checksum_fixup",
2033 offsetof(struct netfront_info, rx_gso_checksum_fixup)
2034 },
2035};
2036
2037static int xennet_get_sset_count(struct net_device *dev, int string_set)
2038{
2039 switch (string_set) {
2040 case ETH_SS_STATS:
2041 return ARRAY_SIZE(xennet_stats);
2042 default:
2043 return -EINVAL;
2044 }
2045}
2046
2047static void xennet_get_ethtool_stats(struct net_device *dev,
2048 struct ethtool_stats *stats, u64 * data)
2049{
2050 void *np = netdev_priv(dev);
2051 int i;
2052
2053 for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2054 data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
2055}
2056
2057static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
2058{
2059 int i;
2060
2061 switch (stringset) {
2062 case ETH_SS_STATS:
2063 for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2064 memcpy(data + i * ETH_GSTRING_LEN,
2065 xennet_stats[i].name, ETH_GSTRING_LEN);
2066 break;
2067 }
2068}
2069
2070static const struct ethtool_ops xennet_ethtool_ops =
2071{
2072 .get_link = ethtool_op_get_link,
2073
2074 .get_sset_count = xennet_get_sset_count,
2075 .get_ethtool_stats = xennet_get_ethtool_stats,
2076 .get_strings = xennet_get_strings,
2077};
2078
2079#ifdef CONFIG_SYSFS
2080static ssize_t show_rxbuf(struct device *dev,
2081 struct device_attribute *attr, char *buf)
2082{
2083 return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
2084}
2085
2086static ssize_t store_rxbuf(struct device *dev,
2087 struct device_attribute *attr,
2088 const char *buf, size_t len)
2089{
2090 char *endp;
2091 unsigned long target;
2092
2093 if (!capable(CAP_NET_ADMIN))
2094 return -EPERM;
2095
2096 target = simple_strtoul(buf, &endp, 0);
2097 if (endp == buf)
2098 return -EBADMSG;
2099
2100
2101
2102 return len;
2103}
2104
2105static struct device_attribute xennet_attrs[] = {
2106 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf),
2107 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf),
2108 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf, NULL),
2109};
2110
2111static int xennet_sysfs_addif(struct net_device *netdev)
2112{
2113 int i;
2114 int err;
2115
2116 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
2117 err = device_create_file(&netdev->dev,
2118 &xennet_attrs[i]);
2119 if (err)
2120 goto fail;
2121 }
2122 return 0;
2123
2124 fail:
2125 while (--i >= 0)
2126 device_remove_file(&netdev->dev, &xennet_attrs[i]);
2127 return err;
2128}
2129
2130static void xennet_sysfs_delif(struct net_device *netdev)
2131{
2132 int i;
2133
2134 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
2135 device_remove_file(&netdev->dev, &xennet_attrs[i]);
2136}
2137
2138#endif
2139
2140static const struct xenbus_device_id netfront_ids[] = {
2141 { "vif" },
2142 { "" }
2143};
2144
2145
2146static int xennet_remove(struct xenbus_device *dev)
2147{
2148 struct netfront_info *info = dev_get_drvdata(&dev->dev);
2149
2150 dev_dbg(&dev->dev, "%s\n", dev->nodename);
2151
2152 xennet_disconnect_backend(info);
2153
2154 xennet_sysfs_delif(info->netdev);
2155
2156 unregister_netdev(info->netdev);
2157
2158 if (info->queues)
2159 xennet_destroy_queues(info);
2160 xennet_free_netdev(info->netdev);
2161
2162 return 0;
2163}
2164
2165static DEFINE_XENBUS_DRIVER(netfront, ,
2166 .probe = netfront_probe,
2167 .remove = xennet_remove,
2168 .resume = netfront_resume,
2169 .otherend_changed = netback_changed,
2170);
2171
2172static int __init netif_init(void)
2173{
2174 if (!xen_domain())
2175 return -ENODEV;
2176
2177 if (!xen_has_pv_nic_devices())
2178 return -ENODEV;
2179
2180 pr_info("Initialising Xen virtual ethernet driver\n");
2181
2182
2183
2184
2185 if (xennet_max_queues == 0)
2186 xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
2187 num_online_cpus());
2188
2189 return xenbus_register_frontend(&netfront_driver);
2190}
2191module_init(netif_init);
2192
2193
2194static void __exit netif_exit(void)
2195{
2196 xenbus_unregister_driver(&netfront_driver);
2197}
2198module_exit(netif_exit);
2199
2200MODULE_DESCRIPTION("Xen virtual network device frontend");
2201MODULE_LICENSE("GPL");
2202MODULE_ALIAS("xen:vif");
2203MODULE_ALIAS("xennet");
2204