1
2
3
4
5
6
7
8
9
10
11#include <linux/version.h>
12#include <linux/memblock.h>
13#include <linux/etherdevice.h>
14#include <linux/ethtool.h>
15#include <linux/inetdevice.h>
16#include <linux/init.h>
17#include <linux/list.h>
18#include <linux/netdevice.h>
19#include <linux/platform_device.h>
20#include <linux/rtnetlink.h>
21#include <linux/skbuff.h>
22#include <linux/slab.h>
23#include <linux/interrupt.h>
24#include <init.h>
25#include <irq_kern.h>
26#include <irq_user.h>
27#include <net_kern.h>
28#include <os.h>
29#include "mconsole_kern.h"
30#include "vector_user.h"
31#include "vector_kern.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45#define DRIVER_NAME "uml-vector"
46#define DRIVER_VERSION "01"
47struct vector_cmd_line_arg {
48 struct list_head list;
49 int unit;
50 char *arguments;
51};
52
53struct vector_device {
54 struct list_head list;
55 struct net_device *dev;
56 struct platform_device pdev;
57 int unit;
58 int opened;
59};
60
61static LIST_HEAD(vec_cmd_line);
62
63static DEFINE_SPINLOCK(vector_devices_lock);
64static LIST_HEAD(vector_devices);
65
66static int driver_registered;
67
68static void vector_eth_configure(int n, struct arglist *def);
69
70
71
72
73
74#define DEFAULT_HEADROOM 2
75#define SAFETY_MARGIN 32
76#define DEFAULT_VECTOR_SIZE 64
77#define TX_SMALL_PACKET 128
78#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
79
80static const struct {
81 const char string[ETH_GSTRING_LEN];
82} ethtool_stats_keys[] = {
83 { "rx_queue_max" },
84 { "rx_queue_running_average" },
85 { "tx_queue_max" },
86 { "tx_queue_running_average" },
87 { "rx_encaps_errors" },
88 { "tx_timeout_count" },
89 { "tx_restart_queue" },
90 { "tx_kicks" },
91 { "tx_flow_control_xon" },
92 { "tx_flow_control_xoff" },
93 { "rx_csum_offload_good" },
94 { "rx_csum_offload_errors"},
95 { "sg_ok"},
96 { "sg_linearized"},
97};
98
99#define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys)
100
101static void vector_reset_stats(struct vector_private *vp)
102{
103 vp->estats.rx_queue_max = 0;
104 vp->estats.rx_queue_running_average = 0;
105 vp->estats.tx_queue_max = 0;
106 vp->estats.tx_queue_running_average = 0;
107 vp->estats.rx_encaps_errors = 0;
108 vp->estats.tx_timeout_count = 0;
109 vp->estats.tx_restart_queue = 0;
110 vp->estats.tx_kicks = 0;
111 vp->estats.tx_flow_control_xon = 0;
112 vp->estats.tx_flow_control_xoff = 0;
113 vp->estats.sg_ok = 0;
114 vp->estats.sg_linearized = 0;
115}
116
117static int get_mtu(struct arglist *def)
118{
119 char *mtu = uml_vector_fetch_arg(def, "mtu");
120 long result;
121
122 if (mtu != NULL) {
123 if (kstrtoul(mtu, 10, &result) == 0)
124 return result;
125 }
126 return ETH_MAX_PACKET;
127}
128
129static int get_depth(struct arglist *def)
130{
131 char *mtu = uml_vector_fetch_arg(def, "depth");
132 long result;
133
134 if (mtu != NULL) {
135 if (kstrtoul(mtu, 10, &result) == 0)
136 return result;
137 }
138 return DEFAULT_VECTOR_SIZE;
139}
140
141static int get_headroom(struct arglist *def)
142{
143 char *mtu = uml_vector_fetch_arg(def, "headroom");
144 long result;
145
146 if (mtu != NULL) {
147 if (kstrtoul(mtu, 10, &result) == 0)
148 return result;
149 }
150 return DEFAULT_HEADROOM;
151}
152
153static int get_req_size(struct arglist *def)
154{
155 char *gro = uml_vector_fetch_arg(def, "gro");
156 long result;
157
158 if (gro != NULL) {
159 if (kstrtoul(gro, 10, &result) == 0) {
160 if (result > 0)
161 return 65536;
162 }
163 }
164 return get_mtu(def) + ETH_HEADER_OTHER +
165 get_headroom(def) + SAFETY_MARGIN;
166}
167
168
169static int get_transport_options(struct arglist *def)
170{
171 char *transport = uml_vector_fetch_arg(def, "transport");
172 char *vector = uml_vector_fetch_arg(def, "vec");
173
174 int vec_rx = VECTOR_RX;
175 int vec_tx = VECTOR_TX;
176 long parsed;
177
178 if (vector != NULL) {
179 if (kstrtoul(vector, 10, &parsed) == 0) {
180 if (parsed == 0) {
181 vec_rx = 0;
182 vec_tx = 0;
183 }
184 }
185 }
186
187
188 if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
189 return (vec_rx | VECTOR_BPF);
190 if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
191 return (vec_rx | vec_tx | VECTOR_QDISC_BYPASS);
192 return (vec_rx | vec_tx);
193}
194
195
196
197
198
199
200
201
202
203#define DROP_BUFFER_SIZE 32
204
205static char *drop_buffer;
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220static int vector_advancehead(struct vector_queue *qi, int advance)
221{
222 int queue_depth;
223
224 qi->head =
225 (qi->head + advance)
226 % qi->max_depth;
227
228
229 spin_lock(&qi->tail_lock);
230 qi->queue_depth -= advance;
231
232
233
234
235
236 if (qi->queue_depth == 0) {
237 qi->head = 0;
238 qi->tail = 0;
239 }
240 queue_depth = qi->queue_depth;
241 spin_unlock(&qi->tail_lock);
242 return queue_depth;
243}
244
245
246
247
248
249
250static int vector_advancetail(struct vector_queue *qi, int advance)
251{
252 int queue_depth;
253
254 qi->tail =
255 (qi->tail + advance)
256 % qi->max_depth;
257 spin_lock(&qi->head_lock);
258 qi->queue_depth += advance;
259 queue_depth = qi->queue_depth;
260 spin_unlock(&qi->head_lock);
261 return queue_depth;
262}
263
264static int prep_msg(struct vector_private *vp,
265 struct sk_buff *skb,
266 struct iovec *iov)
267{
268 int iov_index = 0;
269 int nr_frags, frag;
270 skb_frag_t *skb_frag;
271
272 nr_frags = skb_shinfo(skb)->nr_frags;
273 if (nr_frags > MAX_IOV_SIZE) {
274 if (skb_linearize(skb) != 0)
275 goto drop;
276 }
277 if (vp->header_size > 0) {
278 iov[iov_index].iov_len = vp->header_size;
279 vp->form_header(iov[iov_index].iov_base, skb, vp);
280 iov_index++;
281 }
282 iov[iov_index].iov_base = skb->data;
283 if (nr_frags > 0) {
284 iov[iov_index].iov_len = skb->len - skb->data_len;
285 vp->estats.sg_ok++;
286 } else
287 iov[iov_index].iov_len = skb->len;
288 iov_index++;
289 for (frag = 0; frag < nr_frags; frag++) {
290 skb_frag = &skb_shinfo(skb)->frags[frag];
291 iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
292 iov[iov_index].iov_len = skb_frag_size(skb_frag);
293 iov_index++;
294 }
295 return iov_index;
296drop:
297 return -1;
298}
299
300
301
302
303
304
305static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
306{
307 struct vector_private *vp = netdev_priv(qi->dev);
308 int queue_depth;
309 int packet_len;
310 struct mmsghdr *mmsg_vector = qi->mmsg_vector;
311 int iov_count;
312
313 spin_lock(&qi->tail_lock);
314 spin_lock(&qi->head_lock);
315 queue_depth = qi->queue_depth;
316 spin_unlock(&qi->head_lock);
317
318 if (skb)
319 packet_len = skb->len;
320
321 if (queue_depth < qi->max_depth) {
322
323 *(qi->skbuff_vector + qi->tail) = skb;
324 mmsg_vector += qi->tail;
325 iov_count = prep_msg(
326 vp,
327 skb,
328 mmsg_vector->msg_hdr.msg_iov
329 );
330 if (iov_count < 1)
331 goto drop;
332 mmsg_vector->msg_hdr.msg_iovlen = iov_count;
333 mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
334 mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
335 queue_depth = vector_advancetail(qi, 1);
336 } else
337 goto drop;
338 spin_unlock(&qi->tail_lock);
339 return queue_depth;
340drop:
341 qi->dev->stats.tx_dropped++;
342 if (skb != NULL) {
343 packet_len = skb->len;
344 dev_consume_skb_any(skb);
345 netdev_completed_queue(qi->dev, 1, packet_len);
346 }
347 spin_unlock(&qi->tail_lock);
348 return queue_depth;
349}
350
351static int consume_vector_skbs(struct vector_queue *qi, int count)
352{
353 struct sk_buff *skb;
354 int skb_index;
355 int bytes_compl = 0;
356
357 for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
358 skb = *(qi->skbuff_vector + skb_index);
359
360
361
362 bytes_compl += skb->len;
363 *(qi->skbuff_vector + skb_index) = NULL;
364 dev_consume_skb_any(skb);
365 }
366 qi->dev->stats.tx_bytes += bytes_compl;
367 qi->dev->stats.tx_packets += count;
368 netdev_completed_queue(qi->dev, count, bytes_compl);
369 return vector_advancehead(qi, count);
370}
371
372
373
374
375
376
377
378
379static int vector_send(struct vector_queue *qi)
380{
381 struct vector_private *vp = netdev_priv(qi->dev);
382 struct mmsghdr *send_from;
383 int result = 0, send_len, queue_depth = qi->max_depth;
384
385 if (spin_trylock(&qi->head_lock)) {
386 if (spin_trylock(&qi->tail_lock)) {
387
388 queue_depth = qi->queue_depth;
389 spin_unlock(&qi->tail_lock);
390 while (queue_depth > 0) {
391
392 send_len = queue_depth;
393 send_from = qi->mmsg_vector;
394 send_from += qi->head;
395
396 if (send_len + qi->head > qi->max_depth)
397 send_len = qi->max_depth - qi->head;
398
399 if (send_len > 0) {
400 result = uml_vector_sendmmsg(
401 vp->fds->tx_fd,
402 send_from,
403 send_len,
404 0
405 );
406 vp->in_write_poll =
407 (result != send_len);
408 }
409
410
411
412
413
414 if (result < 0) {
415 if (net_ratelimit())
416 netdev_err(vp->dev, "sendmmsg err=%i\n",
417 result);
418 result = send_len;
419 }
420 if (result > 0) {
421 queue_depth =
422 consume_vector_skbs(qi, result);
423
424
425
426
427 if (result > vp->estats.tx_queue_max)
428 vp->estats.tx_queue_max = result;
429 vp->estats.tx_queue_running_average =
430 (vp->estats.tx_queue_running_average + result) >> 1;
431 }
432 netif_trans_update(qi->dev);
433 netif_wake_queue(qi->dev);
434
435
436
437 if (result != send_len) {
438 vp->estats.tx_restart_queue++;
439 break;
440 }
441 }
442 }
443 spin_unlock(&qi->head_lock);
444 } else {
445 tasklet_schedule(&vp->tx_poll);
446 }
447 return queue_depth;
448}
449
450
451
452
453
454static void destroy_queue(struct vector_queue *qi)
455{
456 int i;
457 struct iovec *iov;
458 struct vector_private *vp = netdev_priv(qi->dev);
459 struct mmsghdr *mmsg_vector;
460
461 if (qi == NULL)
462 return;
463
464
465
466 if (qi->skbuff_vector != NULL) {
467 for (i = 0; i < qi->max_depth; i++) {
468 if (*(qi->skbuff_vector + i) != NULL)
469 dev_kfree_skb_any(*(qi->skbuff_vector + i));
470 }
471 kfree(qi->skbuff_vector);
472 }
473
474 if (qi->mmsg_vector != NULL) {
475 mmsg_vector = qi->mmsg_vector;
476 for (i = 0; i < qi->max_depth; i++) {
477 iov = mmsg_vector->msg_hdr.msg_iov;
478 if (iov != NULL) {
479 if ((vp->header_size > 0) &&
480 (iov->iov_base != NULL))
481 kfree(iov->iov_base);
482 kfree(iov);
483 }
484 mmsg_vector++;
485 }
486 kfree(qi->mmsg_vector);
487 }
488 kfree(qi);
489}
490
491
492
493
494static struct vector_queue *create_queue(
495 struct vector_private *vp,
496 int max_size,
497 int header_size,
498 int num_extra_frags)
499{
500 struct vector_queue *result;
501 int i;
502 struct iovec *iov;
503 struct mmsghdr *mmsg_vector;
504
505 result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
506 if (result == NULL)
507 return NULL;
508 result->max_depth = max_size;
509 result->dev = vp->dev;
510 result->mmsg_vector = kmalloc(
511 (sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
512 if (result->mmsg_vector == NULL)
513 goto out_mmsg_fail;
514 result->skbuff_vector = kmalloc(
515 (sizeof(void *) * max_size), GFP_KERNEL);
516 if (result->skbuff_vector == NULL)
517 goto out_skb_fail;
518
519
520
521 mmsg_vector = result->mmsg_vector;
522 for (i = 0; i < max_size; i++) {
523
524
525
526 *(result->skbuff_vector + i) = NULL;
527 mmsg_vector->msg_hdr.msg_iov = NULL;
528 mmsg_vector++;
529 }
530 mmsg_vector = result->mmsg_vector;
531 result->max_iov_frags = num_extra_frags;
532 for (i = 0; i < max_size; i++) {
533 if (vp->header_size > 0)
534 iov = kmalloc_array(3 + num_extra_frags,
535 sizeof(struct iovec),
536 GFP_KERNEL
537 );
538 else
539 iov = kmalloc_array(2 + num_extra_frags,
540 sizeof(struct iovec),
541 GFP_KERNEL
542 );
543 if (iov == NULL)
544 goto out_fail;
545 mmsg_vector->msg_hdr.msg_iov = iov;
546 mmsg_vector->msg_hdr.msg_iovlen = 1;
547 mmsg_vector->msg_hdr.msg_control = NULL;
548 mmsg_vector->msg_hdr.msg_controllen = 0;
549 mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
550 mmsg_vector->msg_hdr.msg_name = NULL;
551 mmsg_vector->msg_hdr.msg_namelen = 0;
552 if (vp->header_size > 0) {
553 iov->iov_base = kmalloc(header_size, GFP_KERNEL);
554 if (iov->iov_base == NULL)
555 goto out_fail;
556 iov->iov_len = header_size;
557 mmsg_vector->msg_hdr.msg_iovlen = 2;
558 iov++;
559 }
560 iov->iov_base = NULL;
561 iov->iov_len = 0;
562 mmsg_vector++;
563 }
564 spin_lock_init(&result->head_lock);
565 spin_lock_init(&result->tail_lock);
566 result->queue_depth = 0;
567 result->head = 0;
568 result->tail = 0;
569 return result;
570out_skb_fail:
571 kfree(result->mmsg_vector);
572out_mmsg_fail:
573 kfree(result);
574 return NULL;
575out_fail:
576 destroy_queue(result);
577 return NULL;
578}
579
580
581
582
583
584
585
586
587
588
589static struct sk_buff *prep_skb(
590 struct vector_private *vp,
591 struct user_msghdr *msg)
592{
593 int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
594 struct sk_buff *result;
595 int iov_index = 0, len;
596 struct iovec *iov = msg->msg_iov;
597 int err, nr_frags, frag;
598 skb_frag_t *skb_frag;
599
600 if (vp->req_size <= linear)
601 len = linear;
602 else
603 len = vp->req_size;
604 result = alloc_skb_with_frags(
605 linear,
606 len - vp->max_packet,
607 3,
608 &err,
609 GFP_ATOMIC
610 );
611 if (vp->header_size > 0)
612 iov_index++;
613 if (result == NULL) {
614 iov[iov_index].iov_base = NULL;
615 iov[iov_index].iov_len = 0;
616 goto done;
617 }
618 skb_reserve(result, vp->headroom);
619 result->dev = vp->dev;
620 skb_put(result, vp->max_packet);
621 result->data_len = len - vp->max_packet;
622 result->len += len - vp->max_packet;
623 skb_reset_mac_header(result);
624 result->ip_summed = CHECKSUM_NONE;
625 iov[iov_index].iov_base = result->data;
626 iov[iov_index].iov_len = vp->max_packet;
627 iov_index++;
628
629 nr_frags = skb_shinfo(result)->nr_frags;
630 for (frag = 0; frag < nr_frags; frag++) {
631 skb_frag = &skb_shinfo(result)->frags[frag];
632 iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
633 if (iov[iov_index].iov_base != NULL)
634 iov[iov_index].iov_len = skb_frag_size(skb_frag);
635 else
636 iov[iov_index].iov_len = 0;
637 iov_index++;
638 }
639done:
640 msg->msg_iovlen = iov_index;
641 return result;
642}
643
644
645
646
647static void prep_queue_for_rx(struct vector_queue *qi)
648{
649 struct vector_private *vp = netdev_priv(qi->dev);
650 struct mmsghdr *mmsg_vector = qi->mmsg_vector;
651 void **skbuff_vector = qi->skbuff_vector;
652 int i;
653
654 if (qi->queue_depth == 0)
655 return;
656 for (i = 0; i < qi->queue_depth; i++) {
657
658
659
660
661
662 *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
663 skbuff_vector++;
664 mmsg_vector++;
665 }
666 qi->queue_depth = 0;
667}
668
669static struct vector_device *find_device(int n)
670{
671 struct vector_device *device;
672 struct list_head *ele;
673
674 spin_lock(&vector_devices_lock);
675 list_for_each(ele, &vector_devices) {
676 device = list_entry(ele, struct vector_device, list);
677 if (device->unit == n)
678 goto out;
679 }
680 device = NULL;
681 out:
682 spin_unlock(&vector_devices_lock);
683 return device;
684}
685
686static int vector_parse(char *str, int *index_out, char **str_out,
687 char **error_out)
688{
689 int n, len, err;
690 char *start = str;
691
692 len = strlen(str);
693
694 while ((*str != ':') && (strlen(str) > 1))
695 str++;
696 if (*str != ':') {
697 *error_out = "Expected ':' after device number";
698 return -EINVAL;
699 }
700 *str = '\0';
701
702 err = kstrtouint(start, 0, &n);
703 if (err < 0) {
704 *error_out = "Bad device number";
705 return err;
706 }
707
708 str++;
709 if (find_device(n)) {
710 *error_out = "Device already configured";
711 return -EINVAL;
712 }
713
714 *index_out = n;
715 *str_out = str;
716 return 0;
717}
718
719static int vector_config(char *str, char **error_out)
720{
721 int err, n;
722 char *params;
723 struct arglist *parsed;
724
725 err = vector_parse(str, &n, ¶ms, error_out);
726 if (err != 0)
727 return err;
728
729
730
731
732
733
734 params = kstrdup(params, GFP_KERNEL);
735 if (params == NULL) {
736 *error_out = "vector_config failed to strdup string";
737 return -ENOMEM;
738 }
739
740 parsed = uml_parse_vector_ifspec(params);
741
742 if (parsed == NULL) {
743 *error_out = "vector_config failed to parse parameters";
744 return -EINVAL;
745 }
746
747 vector_eth_configure(n, parsed);
748 return 0;
749}
750
751static int vector_id(char **str, int *start_out, int *end_out)
752{
753 char *end;
754 int n;
755
756 n = simple_strtoul(*str, &end, 0);
757 if ((*end != '\0') || (end == *str))
758 return -1;
759
760 *start_out = n;
761 *end_out = n;
762 *str = end;
763 return n;
764}
765
766static int vector_remove(int n, char **error_out)
767{
768 struct vector_device *vec_d;
769 struct net_device *dev;
770 struct vector_private *vp;
771
772 vec_d = find_device(n);
773 if (vec_d == NULL)
774 return -ENODEV;
775 dev = vec_d->dev;
776 vp = netdev_priv(dev);
777 if (vp->fds != NULL)
778 return -EBUSY;
779 unregister_netdev(dev);
780 platform_device_unregister(&vec_d->pdev);
781 return 0;
782}
783
784
785
786
787
788
789
790static struct platform_driver uml_net_driver = {
791 .driver = {
792 .name = DRIVER_NAME,
793 },
794};
795
796
797static void vector_device_release(struct device *dev)
798{
799 struct vector_device *device = dev_get_drvdata(dev);
800 struct net_device *netdev = device->dev;
801
802 list_del(&device->list);
803 kfree(device);
804 free_netdev(netdev);
805}
806
807
808
809
810
811static int vector_legacy_rx(struct vector_private *vp)
812{
813 int pkt_len;
814 struct user_msghdr hdr;
815 struct iovec iov[2 + MAX_IOV_SIZE];
816 int iovpos = 0;
817 struct sk_buff *skb;
818 int header_check;
819
820 hdr.msg_name = NULL;
821 hdr.msg_namelen = 0;
822 hdr.msg_iov = (struct iovec *) &iov;
823 hdr.msg_control = NULL;
824 hdr.msg_controllen = 0;
825 hdr.msg_flags = 0;
826
827 if (vp->header_size > 0) {
828 iov[0].iov_base = vp->header_rxbuffer;
829 iov[0].iov_len = vp->header_size;
830 }
831
832 skb = prep_skb(vp, &hdr);
833
834 if (skb == NULL) {
835
836
837
838 iov[iovpos].iov_base = drop_buffer;
839 iov[iovpos].iov_len = DROP_BUFFER_SIZE;
840 hdr.msg_iovlen = 1;
841 vp->dev->stats.rx_dropped++;
842 }
843
844 pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
845
846 if (skb != NULL) {
847 if (pkt_len > vp->header_size) {
848 if (vp->header_size > 0) {
849 header_check = vp->verify_header(
850 vp->header_rxbuffer, skb, vp);
851 if (header_check < 0) {
852 dev_kfree_skb_irq(skb);
853 vp->dev->stats.rx_dropped++;
854 vp->estats.rx_encaps_errors++;
855 return 0;
856 }
857 if (header_check > 0) {
858 vp->estats.rx_csum_offload_good++;
859 skb->ip_summed = CHECKSUM_UNNECESSARY;
860 }
861 }
862 pskb_trim(skb, pkt_len - vp->rx_header_size);
863 skb->protocol = eth_type_trans(skb, skb->dev);
864 vp->dev->stats.rx_bytes += skb->len;
865 vp->dev->stats.rx_packets++;
866 netif_rx(skb);
867 } else {
868 dev_kfree_skb_irq(skb);
869 }
870 }
871 return pkt_len;
872}
873
874
875
876
877
878
879
880
881static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
882{
883 struct iovec iov[3 + MAX_IOV_SIZE];
884 int iov_count, pkt_len = 0;
885
886 iov[0].iov_base = vp->header_txbuffer;
887 iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
888
889 if (iov_count < 1)
890 goto drop;
891 pkt_len = uml_vector_writev(
892 vp->fds->tx_fd,
893 (struct iovec *) &iov,
894 iov_count
895 );
896
897 netif_trans_update(vp->dev);
898 netif_wake_queue(vp->dev);
899
900 if (pkt_len > 0) {
901 vp->dev->stats.tx_bytes += skb->len;
902 vp->dev->stats.tx_packets++;
903 } else {
904 vp->dev->stats.tx_dropped++;
905 }
906 consume_skb(skb);
907 return pkt_len;
908drop:
909 vp->dev->stats.tx_dropped++;
910 consume_skb(skb);
911 return pkt_len;
912}
913
914
915
916
917
918
919static int vector_mmsg_rx(struct vector_private *vp)
920{
921 int packet_count, i;
922 struct vector_queue *qi = vp->rx_queue;
923 struct sk_buff *skb;
924 struct mmsghdr *mmsg_vector = qi->mmsg_vector;
925 void **skbuff_vector = qi->skbuff_vector;
926 int header_check;
927
928
929
930
931
932 prep_queue_for_rx(qi);
933
934
935
936 packet_count = uml_vector_recvmmsg(
937 vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
938
939 if (packet_count <= 0)
940 return packet_count;
941
942
943
944
945
946
947 qi->queue_depth = packet_count;
948
949 for (i = 0; i < packet_count; i++) {
950 skb = (*skbuff_vector);
951 if (mmsg_vector->msg_len > vp->header_size) {
952 if (vp->header_size > 0) {
953 header_check = vp->verify_header(
954 mmsg_vector->msg_hdr.msg_iov->iov_base,
955 skb,
956 vp
957 );
958 if (header_check < 0) {
959
960
961
962
963
964 dev_kfree_skb_irq(skb);
965 vp->estats.rx_encaps_errors++;
966 continue;
967 }
968 if (header_check > 0) {
969 vp->estats.rx_csum_offload_good++;
970 skb->ip_summed = CHECKSUM_UNNECESSARY;
971 }
972 }
973 pskb_trim(skb,
974 mmsg_vector->msg_len - vp->rx_header_size);
975 skb->protocol = eth_type_trans(skb, skb->dev);
976
977
978
979
980 vp->dev->stats.rx_bytes += skb->len;
981 vp->dev->stats.rx_packets++;
982 netif_rx(skb);
983 } else {
984
985
986
987
988 if (skb != NULL)
989 dev_kfree_skb_irq(skb);
990 }
991 (*skbuff_vector) = NULL;
992
993 mmsg_vector++;
994 skbuff_vector++;
995 }
996 if (packet_count > 0) {
997 if (vp->estats.rx_queue_max < packet_count)
998 vp->estats.rx_queue_max = packet_count;
999 vp->estats.rx_queue_running_average =
1000 (vp->estats.rx_queue_running_average + packet_count) >> 1;
1001 }
1002 return packet_count;
1003}
1004
1005static void vector_rx(struct vector_private *vp)
1006{
1007 int err;
1008
1009 if ((vp->options & VECTOR_RX) > 0)
1010 while ((err = vector_mmsg_rx(vp)) > 0)
1011 ;
1012 else
1013 while ((err = vector_legacy_rx(vp)) > 0)
1014 ;
1015 if ((err != 0) && net_ratelimit())
1016 netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
1017}
1018
1019static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
1020{
1021 struct vector_private *vp = netdev_priv(dev);
1022 int queue_depth = 0;
1023
1024 if ((vp->options & VECTOR_TX) == 0) {
1025 writev_tx(vp, skb);
1026 return NETDEV_TX_OK;
1027 }
1028
1029
1030
1031
1032
1033 netdev_sent_queue(vp->dev, skb->len);
1034 queue_depth = vector_enqueue(vp->tx_queue, skb);
1035
1036
1037
1038
1039
1040 if (queue_depth >= vp->tx_queue->max_depth - 1) {
1041 vp->estats.tx_kicks++;
1042 netif_stop_queue(dev);
1043 vector_send(vp->tx_queue);
1044 return NETDEV_TX_OK;
1045 }
1046 if (netdev_xmit_more()) {
1047 mod_timer(&vp->tl, vp->coalesce);
1048 return NETDEV_TX_OK;
1049 }
1050 if (skb->len < TX_SMALL_PACKET) {
1051 vp->estats.tx_kicks++;
1052 vector_send(vp->tx_queue);
1053 } else
1054 tasklet_schedule(&vp->tx_poll);
1055 return NETDEV_TX_OK;
1056}
1057
1058static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
1059{
1060 struct net_device *dev = dev_id;
1061 struct vector_private *vp = netdev_priv(dev);
1062
1063 if (!netif_running(dev))
1064 return IRQ_NONE;
1065 vector_rx(vp);
1066 return IRQ_HANDLED;
1067
1068}
1069
1070static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
1071{
1072 struct net_device *dev = dev_id;
1073 struct vector_private *vp = netdev_priv(dev);
1074
1075 if (!netif_running(dev))
1076 return IRQ_NONE;
1077
1078
1079
1080
1081
1082
1083
1084 if (vp->in_write_poll)
1085 tasklet_schedule(&vp->tx_poll);
1086 return IRQ_HANDLED;
1087
1088}
1089
1090static int irq_rr;
1091
1092static int vector_net_close(struct net_device *dev)
1093{
1094 struct vector_private *vp = netdev_priv(dev);
1095 unsigned long flags;
1096
1097 netif_stop_queue(dev);
1098 del_timer(&vp->tl);
1099
1100 if (vp->fds == NULL)
1101 return 0;
1102
1103
1104 if (vp->rx_irq > 0) {
1105 um_free_irq(vp->rx_irq, dev);
1106 vp->rx_irq = 0;
1107 }
1108 if (vp->tx_irq > 0) {
1109 um_free_irq(vp->tx_irq, dev);
1110 vp->tx_irq = 0;
1111 }
1112 tasklet_kill(&vp->tx_poll);
1113 if (vp->fds->rx_fd > 0) {
1114 os_close_file(vp->fds->rx_fd);
1115 vp->fds->rx_fd = -1;
1116 }
1117 if (vp->fds->tx_fd > 0) {
1118 os_close_file(vp->fds->tx_fd);
1119 vp->fds->tx_fd = -1;
1120 }
1121 if (vp->bpf != NULL)
1122 kfree(vp->bpf);
1123 if (vp->fds->remote_addr != NULL)
1124 kfree(vp->fds->remote_addr);
1125 if (vp->transport_data != NULL)
1126 kfree(vp->transport_data);
1127 if (vp->header_rxbuffer != NULL)
1128 kfree(vp->header_rxbuffer);
1129 if (vp->header_txbuffer != NULL)
1130 kfree(vp->header_txbuffer);
1131 if (vp->rx_queue != NULL)
1132 destroy_queue(vp->rx_queue);
1133 if (vp->tx_queue != NULL)
1134 destroy_queue(vp->tx_queue);
1135 kfree(vp->fds);
1136 vp->fds = NULL;
1137 spin_lock_irqsave(&vp->lock, flags);
1138 vp->opened = false;
1139 spin_unlock_irqrestore(&vp->lock, flags);
1140 return 0;
1141}
1142
1143
1144
1145static void vector_tx_poll(unsigned long data)
1146{
1147 struct vector_private *vp = (struct vector_private *)data;
1148
1149 vp->estats.tx_kicks++;
1150 vector_send(vp->tx_queue);
1151}
1152static void vector_reset_tx(struct work_struct *work)
1153{
1154 struct vector_private *vp =
1155 container_of(work, struct vector_private, reset_tx);
1156 netdev_reset_queue(vp->dev);
1157 netif_start_queue(vp->dev);
1158 netif_wake_queue(vp->dev);
1159}
1160static int vector_net_open(struct net_device *dev)
1161{
1162 struct vector_private *vp = netdev_priv(dev);
1163 unsigned long flags;
1164 int err = -EINVAL;
1165 struct vector_device *vdevice;
1166
1167 spin_lock_irqsave(&vp->lock, flags);
1168 if (vp->opened) {
1169 spin_unlock_irqrestore(&vp->lock, flags);
1170 return -ENXIO;
1171 }
1172 vp->opened = true;
1173 spin_unlock_irqrestore(&vp->lock, flags);
1174
1175 vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
1176
1177 if (vp->fds == NULL)
1178 goto out_close;
1179
1180 if (build_transport_data(vp) < 0)
1181 goto out_close;
1182
1183 if ((vp->options & VECTOR_RX) > 0) {
1184 vp->rx_queue = create_queue(
1185 vp,
1186 get_depth(vp->parsed),
1187 vp->rx_header_size,
1188 MAX_IOV_SIZE
1189 );
1190 vp->rx_queue->queue_depth = get_depth(vp->parsed);
1191 } else {
1192 vp->header_rxbuffer = kmalloc(
1193 vp->rx_header_size,
1194 GFP_KERNEL
1195 );
1196 if (vp->header_rxbuffer == NULL)
1197 goto out_close;
1198 }
1199 if ((vp->options & VECTOR_TX) > 0) {
1200 vp->tx_queue = create_queue(
1201 vp,
1202 get_depth(vp->parsed),
1203 vp->header_size,
1204 MAX_IOV_SIZE
1205 );
1206 } else {
1207 vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
1208 if (vp->header_txbuffer == NULL)
1209 goto out_close;
1210 }
1211
1212
1213 err = um_request_irq(
1214 irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
1215 IRQ_READ, vector_rx_interrupt,
1216 IRQF_SHARED, dev->name, dev);
1217 if (err != 0) {
1218 netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
1219 err = -ENETUNREACH;
1220 goto out_close;
1221 }
1222 vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
1223 dev->irq = irq_rr + VECTOR_BASE_IRQ;
1224 irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
1225
1226
1227 if ((vp->options & VECTOR_TX) > 0) {
1228 err = um_request_irq(
1229 irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
1230 IRQ_WRITE, vector_tx_interrupt,
1231 IRQF_SHARED, dev->name, dev);
1232 if (err != 0) {
1233 netdev_err(dev,
1234 "vector_open: failed to get tx irq(%d)\n", err);
1235 err = -ENETUNREACH;
1236 goto out_close;
1237 }
1238 vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
1239 irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
1240 }
1241
1242 if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
1243 if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
1244 vp->options |= VECTOR_BPF;
1245 }
1246 if ((vp->options & VECTOR_BPF) != 0)
1247 vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
1248
1249 netif_start_queue(dev);
1250
1251
1252
1253
1254
1255
1256 vector_rx(vp);
1257
1258 vector_reset_stats(vp);
1259 vdevice = find_device(vp->unit);
1260 vdevice->opened = 1;
1261
1262 if ((vp->options & VECTOR_TX) != 0)
1263 add_timer(&vp->tl);
1264 return 0;
1265out_close:
1266 vector_net_close(dev);
1267 return err;
1268}
1269
1270
1271static void vector_net_set_multicast_list(struct net_device *dev)
1272{
1273
1274 return;
1275}
1276
1277static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
1278{
1279 struct vector_private *vp = netdev_priv(dev);
1280
1281 vp->estats.tx_timeout_count++;
1282 netif_trans_update(dev);
1283 schedule_work(&vp->reset_tx);
1284}
1285
1286static netdev_features_t vector_fix_features(struct net_device *dev,
1287 netdev_features_t features)
1288{
1289 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
1290 return features;
1291}
1292
1293static int vector_set_features(struct net_device *dev,
1294 netdev_features_t features)
1295{
1296 struct vector_private *vp = netdev_priv(dev);
1297
1298
1299
1300
1301 if (features & NETIF_F_GRO)
1302
1303 vp->req_size = 65536;
1304 else
1305
1306 vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
1307 return 0;
1308}
1309
1310#ifdef CONFIG_NET_POLL_CONTROLLER
1311static void vector_net_poll_controller(struct net_device *dev)
1312{
1313 disable_irq(dev->irq);
1314 vector_rx_interrupt(dev->irq, dev);
1315 enable_irq(dev->irq);
1316}
1317#endif
1318
1319static void vector_net_get_drvinfo(struct net_device *dev,
1320 struct ethtool_drvinfo *info)
1321{
1322 strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
1323 strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
1324}
1325
1326static void vector_get_ringparam(struct net_device *netdev,
1327 struct ethtool_ringparam *ring)
1328{
1329 struct vector_private *vp = netdev_priv(netdev);
1330
1331 ring->rx_max_pending = vp->rx_queue->max_depth;
1332 ring->tx_max_pending = vp->tx_queue->max_depth;
1333 ring->rx_pending = vp->rx_queue->max_depth;
1334 ring->tx_pending = vp->tx_queue->max_depth;
1335}
1336
1337static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
1338{
1339 switch (stringset) {
1340 case ETH_SS_TEST:
1341 *buf = '\0';
1342 break;
1343 case ETH_SS_STATS:
1344 memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys));
1345 break;
1346 default:
1347 WARN_ON(1);
1348 break;
1349 }
1350}
1351
1352static int vector_get_sset_count(struct net_device *dev, int sset)
1353{
1354 switch (sset) {
1355 case ETH_SS_TEST:
1356 return 0;
1357 case ETH_SS_STATS:
1358 return VECTOR_NUM_STATS;
1359 default:
1360 return -EOPNOTSUPP;
1361 }
1362}
1363
1364static void vector_get_ethtool_stats(struct net_device *dev,
1365 struct ethtool_stats *estats,
1366 u64 *tmp_stats)
1367{
1368 struct vector_private *vp = netdev_priv(dev);
1369
1370 memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
1371}
1372
1373static int vector_get_coalesce(struct net_device *netdev,
1374 struct ethtool_coalesce *ec)
1375{
1376 struct vector_private *vp = netdev_priv(netdev);
1377
1378 ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
1379 return 0;
1380}
1381
1382static int vector_set_coalesce(struct net_device *netdev,
1383 struct ethtool_coalesce *ec)
1384{
1385 struct vector_private *vp = netdev_priv(netdev);
1386
1387 vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
1388 if (vp->coalesce == 0)
1389 vp->coalesce = 1;
1390 return 0;
1391}
1392
1393static const struct ethtool_ops vector_net_ethtool_ops = {
1394 .get_drvinfo = vector_net_get_drvinfo,
1395 .get_link = ethtool_op_get_link,
1396 .get_ts_info = ethtool_op_get_ts_info,
1397 .get_ringparam = vector_get_ringparam,
1398 .get_strings = vector_get_strings,
1399 .get_sset_count = vector_get_sset_count,
1400 .get_ethtool_stats = vector_get_ethtool_stats,
1401 .get_coalesce = vector_get_coalesce,
1402 .set_coalesce = vector_set_coalesce,
1403};
1404
1405
1406static const struct net_device_ops vector_netdev_ops = {
1407 .ndo_open = vector_net_open,
1408 .ndo_stop = vector_net_close,
1409 .ndo_start_xmit = vector_net_start_xmit,
1410 .ndo_set_rx_mode = vector_net_set_multicast_list,
1411 .ndo_tx_timeout = vector_net_tx_timeout,
1412 .ndo_set_mac_address = eth_mac_addr,
1413 .ndo_validate_addr = eth_validate_addr,
1414 .ndo_fix_features = vector_fix_features,
1415 .ndo_set_features = vector_set_features,
1416#ifdef CONFIG_NET_POLL_CONTROLLER
1417 .ndo_poll_controller = vector_net_poll_controller,
1418#endif
1419};
1420
1421
1422static void vector_timer_expire(struct timer_list *t)
1423{
1424 struct vector_private *vp = from_timer(vp, t, tl);
1425
1426 vp->estats.tx_kicks++;
1427 vector_send(vp->tx_queue);
1428}
1429
1430static void vector_eth_configure(
1431 int n,
1432 struct arglist *def
1433 )
1434{
1435 struct vector_device *device;
1436 struct net_device *dev;
1437 struct vector_private *vp;
1438 int err;
1439
1440 device = kzalloc(sizeof(*device), GFP_KERNEL);
1441 if (device == NULL) {
1442 printk(KERN_ERR "eth_configure failed to allocate struct "
1443 "vector_device\n");
1444 return;
1445 }
1446 dev = alloc_etherdev(sizeof(struct vector_private));
1447 if (dev == NULL) {
1448 printk(KERN_ERR "eth_configure: failed to allocate struct "
1449 "net_device for vec%d\n", n);
1450 goto out_free_device;
1451 }
1452
1453 dev->mtu = get_mtu(def);
1454
1455 INIT_LIST_HEAD(&device->list);
1456 device->unit = n;
1457
1458
1459
1460
1461
1462 snprintf(dev->name, sizeof(dev->name), "vec%d", n);
1463 uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
1464 vp = netdev_priv(dev);
1465
1466
1467 if (!driver_registered) {
1468 platform_driver_register(¨_net_driver);
1469 driver_registered = 1;
1470 }
1471 device->pdev.id = n;
1472 device->pdev.name = DRIVER_NAME;
1473 device->pdev.dev.release = vector_device_release;
1474 dev_set_drvdata(&device->pdev.dev, device);
1475 if (platform_device_register(&device->pdev))
1476 goto out_free_netdev;
1477 SET_NETDEV_DEV(dev, &device->pdev.dev);
1478
1479 device->dev = dev;
1480
1481 *vp = ((struct vector_private)
1482 {
1483 .list = LIST_HEAD_INIT(vp->list),
1484 .dev = dev,
1485 .unit = n,
1486 .options = get_transport_options(def),
1487 .rx_irq = 0,
1488 .tx_irq = 0,
1489 .parsed = def,
1490 .max_packet = get_mtu(def) + ETH_HEADER_OTHER,
1491
1492
1493
1494 .headroom = get_headroom(def),
1495 .form_header = NULL,
1496 .verify_header = NULL,
1497 .header_rxbuffer = NULL,
1498 .header_txbuffer = NULL,
1499 .header_size = 0,
1500 .rx_header_size = 0,
1501 .rexmit_scheduled = false,
1502 .opened = false,
1503 .transport_data = NULL,
1504 .in_write_poll = false,
1505 .coalesce = 2,
1506 .req_size = get_req_size(def)
1507 });
1508
1509 dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
1510 tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
1511 INIT_WORK(&vp->reset_tx, vector_reset_tx);
1512
1513 timer_setup(&vp->tl, vector_timer_expire, 0);
1514 spin_lock_init(&vp->lock);
1515
1516
1517 dev->netdev_ops = &vector_netdev_ops;
1518 dev->ethtool_ops = &vector_net_ethtool_ops;
1519 dev->watchdog_timeo = (HZ >> 1);
1520
1521 dev->irq = 0;
1522
1523 rtnl_lock();
1524 err = register_netdevice(dev);
1525 rtnl_unlock();
1526 if (err)
1527 goto out_undo_user_init;
1528
1529 spin_lock(&vector_devices_lock);
1530 list_add(&device->list, &vector_devices);
1531 spin_unlock(&vector_devices_lock);
1532
1533 return;
1534
1535out_undo_user_init:
1536 return;
1537out_free_netdev:
1538 free_netdev(dev);
1539out_free_device:
1540 kfree(device);
1541}
1542
1543
1544
1545
1546
1547
1548
1549
1550static int __init vector_init(void)
1551{
1552 struct list_head *ele;
1553 struct vector_cmd_line_arg *def;
1554 struct arglist *parsed;
1555
1556 list_for_each(ele, &vec_cmd_line) {
1557 def = list_entry(ele, struct vector_cmd_line_arg, list);
1558 parsed = uml_parse_vector_ifspec(def->arguments);
1559 if (parsed != NULL)
1560 vector_eth_configure(def->unit, parsed);
1561 }
1562 return 0;
1563}
1564
1565
1566
1567
1568
1569
1570
1571static int __init vector_setup(char *str)
1572{
1573 char *error;
1574 int n, err;
1575 struct vector_cmd_line_arg *new;
1576
1577 err = vector_parse(str, &n, &str, &error);
1578 if (err) {
1579 printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
1580 str, error);
1581 return 1;
1582 }
1583 new = memblock_alloc(sizeof(*new), 0);
1584 INIT_LIST_HEAD(&new->list);
1585 new->unit = n;
1586 new->arguments = str;
1587 list_add_tail(&new->list, &vec_cmd_line);
1588 return 1;
1589}
1590
1591__setup("vec", vector_setup);
1592__uml_help(vector_setup,
1593"vec[0-9]+:<option>=<value>,<option>=<value>\n"
1594" Configure a vector io network device.\n\n"
1595);
1596
1597late_initcall(vector_init);
1598
1599static struct mc_device vector_mc = {
1600 .list = LIST_HEAD_INIT(vector_mc.list),
1601 .name = "vec",
1602 .config = vector_config,
1603 .get_config = NULL,
1604 .id = vector_id,
1605 .remove = vector_remove,
1606};
1607
1608#ifdef CONFIG_INET
1609static int vector_inetaddr_event(
1610 struct notifier_block *this,
1611 unsigned long event,
1612 void *ptr)
1613{
1614 return NOTIFY_DONE;
1615}
1616
1617static struct notifier_block vector_inetaddr_notifier = {
1618 .notifier_call = vector_inetaddr_event,
1619};
1620
1621static void inet_register(void)
1622{
1623 register_inetaddr_notifier(&vector_inetaddr_notifier);
1624}
1625#else
1626static inline void inet_register(void)
1627{
1628}
1629#endif
1630
1631static int vector_net_init(void)
1632{
1633 mconsole_register_dev(&vector_mc);
1634 inet_register();
1635 return 0;
1636}
1637
1638__initcall(vector_net_init);
1639
1640
1641
1642