1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include "ipoib.h"
36
37#include <linux/module.h>
38
39#include <linux/init.h>
40#include <linux/slab.h>
41#include <linux/kernel.h>
42#include <linux/vmalloc.h>
43
44#include <linux/if_arp.h>
45
46#include <linux/ip.h>
47#include <linux/in.h>
48
49#include <linux/jhash.h>
50#include <net/arp.h>
51#include <net/addrconf.h>
52#include <linux/inetdevice.h>
53#include <rdma/ib_cache.h>
54
55MODULE_AUTHOR("Roland Dreier");
56MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
57MODULE_LICENSE("Dual BSD/GPL");
58
59int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE;
60int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE;
61
62module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
63MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
64module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
65MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
66
67#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
68int ipoib_debug_level;
69
70module_param_named(debug_level, ipoib_debug_level, int, 0644);
71MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
72#endif
73
74struct ipoib_path_iter {
75 struct net_device *dev;
76 struct ipoib_path path;
77};
78
79static const u8 ipv4_bcast_addr[] = {
80 0x00, 0xff, 0xff, 0xff,
81 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
82 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
83};
84
85struct workqueue_struct *ipoib_workqueue;
86
87struct ib_sa_client ipoib_sa_client;
88
89static int ipoib_add_one(struct ib_device *device);
90static void ipoib_remove_one(struct ib_device *device, void *client_data);
91static void ipoib_neigh_reclaim(struct rcu_head *rp);
92static struct net_device *ipoib_get_net_dev_by_params(
93 struct ib_device *dev, u8 port, u16 pkey,
94 const union ib_gid *gid, const struct sockaddr *addr,
95 void *client_data);
96static int ipoib_set_mac(struct net_device *dev, void *addr);
97static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr,
98 int cmd);
99
100static struct ib_client ipoib_client = {
101 .name = "ipoib",
102 .add = ipoib_add_one,
103 .remove = ipoib_remove_one,
104 .get_net_dev_by_params = ipoib_get_net_dev_by_params,
105};
106
107#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
108static int ipoib_netdev_event(struct notifier_block *this,
109 unsigned long event, void *ptr)
110{
111 struct netdev_notifier_info *ni = ptr;
112 struct net_device *dev = ni->dev;
113
114 if (dev->netdev_ops->ndo_open != ipoib_open)
115 return NOTIFY_DONE;
116
117 switch (event) {
118 case NETDEV_REGISTER:
119 ipoib_create_debug_files(dev);
120 break;
121 case NETDEV_CHANGENAME:
122 ipoib_delete_debug_files(dev);
123 ipoib_create_debug_files(dev);
124 break;
125 case NETDEV_UNREGISTER:
126 ipoib_delete_debug_files(dev);
127 break;
128 }
129
130 return NOTIFY_DONE;
131}
132#endif
133
134int ipoib_open(struct net_device *dev)
135{
136 struct ipoib_dev_priv *priv = ipoib_priv(dev);
137
138 ipoib_dbg(priv, "bringing up interface\n");
139
140 netif_carrier_off(dev);
141
142 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
143
144 if (ipoib_ib_dev_open(dev)) {
145 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
146 return 0;
147 goto err_disable;
148 }
149
150 ipoib_ib_dev_up(dev);
151
152 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
153 struct ipoib_dev_priv *cpriv;
154
155
156 down_read(&priv->vlan_rwsem);
157 list_for_each_entry(cpriv, &priv->child_intfs, list) {
158 int flags;
159
160 flags = cpriv->dev->flags;
161 if (flags & IFF_UP)
162 continue;
163
164 dev_change_flags(cpriv->dev, flags | IFF_UP, NULL);
165 }
166 up_read(&priv->vlan_rwsem);
167 }
168
169 netif_start_queue(dev);
170
171 return 0;
172
173err_disable:
174 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
175
176 return -EINVAL;
177}
178
179static int ipoib_stop(struct net_device *dev)
180{
181 struct ipoib_dev_priv *priv = ipoib_priv(dev);
182
183 ipoib_dbg(priv, "stopping interface\n");
184
185 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
186
187 netif_stop_queue(dev);
188
189 ipoib_ib_dev_down(dev);
190 ipoib_ib_dev_stop(dev);
191
192 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
193 struct ipoib_dev_priv *cpriv;
194
195
196 down_read(&priv->vlan_rwsem);
197 list_for_each_entry(cpriv, &priv->child_intfs, list) {
198 int flags;
199
200 flags = cpriv->dev->flags;
201 if (!(flags & IFF_UP))
202 continue;
203
204 dev_change_flags(cpriv->dev, flags & ~IFF_UP, NULL);
205 }
206 up_read(&priv->vlan_rwsem);
207 }
208
209 return 0;
210}
211
212static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
213{
214 struct ipoib_dev_priv *priv = ipoib_priv(dev);
215
216 if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
217 features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
218
219 return features;
220}
221
222static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
223{
224 struct ipoib_dev_priv *priv = ipoib_priv(dev);
225 int ret = 0;
226
227
228 if (ipoib_cm_admin_enabled(dev)) {
229 if (new_mtu > ipoib_cm_max_mtu(dev))
230 return -EINVAL;
231
232 if (new_mtu > priv->mcast_mtu)
233 ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
234 priv->mcast_mtu);
235
236 dev->mtu = new_mtu;
237 return 0;
238 }
239
240 if (new_mtu < (ETH_MIN_MTU + IPOIB_ENCAP_LEN) ||
241 new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
242 return -EINVAL;
243
244 priv->admin_mtu = new_mtu;
245
246 if (priv->mcast_mtu < priv->admin_mtu)
247 ipoib_dbg(priv, "MTU must be smaller than the underlying "
248 "link layer MTU - 4 (%u)\n", priv->mcast_mtu);
249
250 new_mtu = min(priv->mcast_mtu, priv->admin_mtu);
251
252 if (priv->rn_ops->ndo_change_mtu) {
253 bool carrier_status = netif_carrier_ok(dev);
254
255 netif_carrier_off(dev);
256
257
258 ret = priv->rn_ops->ndo_change_mtu(dev, new_mtu);
259
260 if (carrier_status)
261 netif_carrier_on(dev);
262 } else {
263 dev->mtu = new_mtu;
264 }
265
266 return ret;
267}
268
269static void ipoib_get_stats(struct net_device *dev,
270 struct rtnl_link_stats64 *stats)
271{
272 struct ipoib_dev_priv *priv = ipoib_priv(dev);
273
274 if (priv->rn_ops->ndo_get_stats64)
275 priv->rn_ops->ndo_get_stats64(dev, stats);
276 else
277 netdev_stats_to_stats64(stats, &dev->stats);
278}
279
280
281static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr,
282 struct net_device *dev)
283{
284 struct net *net = dev_net(dev);
285 struct in_device *in_dev;
286 struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
287 struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr;
288 __be32 ret_addr;
289
290 switch (addr->sa_family) {
291 case AF_INET:
292 in_dev = in_dev_get(dev);
293 if (!in_dev)
294 return false;
295
296 ret_addr = inet_confirm_addr(net, in_dev, 0,
297 addr_in->sin_addr.s_addr,
298 RT_SCOPE_HOST);
299 in_dev_put(in_dev);
300 if (ret_addr)
301 return true;
302
303 break;
304 case AF_INET6:
305 if (IS_ENABLED(CONFIG_IPV6) &&
306 ipv6_chk_addr(net, &addr_in6->sin6_addr, dev, 1))
307 return true;
308
309 break;
310 }
311 return false;
312}
313
314
315
316
317
318
319
320
321static struct net_device *ipoib_get_master_net_dev(struct net_device *dev)
322{
323 struct net_device *master;
324
325 rcu_read_lock();
326 master = netdev_master_upper_dev_get_rcu(dev);
327 if (master)
328 dev_hold(master);
329 rcu_read_unlock();
330
331 if (master)
332 return master;
333
334 dev_hold(dev);
335 return dev;
336}
337
338struct ipoib_walk_data {
339 const struct sockaddr *addr;
340 struct net_device *result;
341};
342
343static int ipoib_upper_walk(struct net_device *upper,
344 struct netdev_nested_priv *priv)
345{
346 struct ipoib_walk_data *data = (struct ipoib_walk_data *)priv->data;
347 int ret = 0;
348
349 if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) {
350 dev_hold(upper);
351 data->result = upper;
352 ret = 1;
353 }
354
355 return ret;
356}
357
358
359
360
361
362
363
364
365
366
367static struct net_device *ipoib_get_net_dev_match_addr(
368 const struct sockaddr *addr, struct net_device *dev)
369{
370 struct netdev_nested_priv priv;
371 struct ipoib_walk_data data = {
372 .addr = addr,
373 };
374
375 priv.data = (void *)&data;
376 rcu_read_lock();
377 if (ipoib_is_dev_match_addr_rcu(addr, dev)) {
378 dev_hold(dev);
379 data.result = dev;
380 goto out;
381 }
382
383 netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &priv);
384out:
385 rcu_read_unlock();
386 return data.result;
387}
388
389
390
391
392
393
394static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv,
395 const union ib_gid *gid,
396 u16 pkey_index,
397 const struct sockaddr *addr,
398 int nesting,
399 struct net_device **found_net_dev)
400{
401 struct ipoib_dev_priv *child_priv;
402 struct net_device *net_dev = NULL;
403 int matches = 0;
404
405 if (priv->pkey_index == pkey_index &&
406 (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) {
407 if (!addr) {
408 net_dev = ipoib_get_master_net_dev(priv->dev);
409 } else {
410
411
412 net_dev = ipoib_get_net_dev_match_addr(addr, priv->dev);
413 }
414 if (net_dev) {
415 if (!*found_net_dev)
416 *found_net_dev = net_dev;
417 else
418 dev_put(net_dev);
419 ++matches;
420 }
421 }
422
423
424 down_read_nested(&priv->vlan_rwsem, nesting);
425 list_for_each_entry(child_priv, &priv->child_intfs, list) {
426 matches += ipoib_match_gid_pkey_addr(child_priv, gid,
427 pkey_index, addr,
428 nesting + 1,
429 found_net_dev);
430 if (matches > 1)
431 break;
432 }
433 up_read(&priv->vlan_rwsem);
434
435 return matches;
436}
437
438
439
440
441static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
442 u16 pkey_index,
443 const union ib_gid *gid,
444 const struct sockaddr *addr,
445 struct net_device **net_dev)
446{
447 struct ipoib_dev_priv *priv;
448 int matches = 0;
449
450 *net_dev = NULL;
451
452 list_for_each_entry(priv, dev_list, list) {
453 if (priv->port != port)
454 continue;
455
456 matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index,
457 addr, 0, net_dev);
458 if (matches > 1)
459 break;
460 }
461
462 return matches;
463}
464
465static struct net_device *ipoib_get_net_dev_by_params(
466 struct ib_device *dev, u8 port, u16 pkey,
467 const union ib_gid *gid, const struct sockaddr *addr,
468 void *client_data)
469{
470 struct net_device *net_dev;
471 struct list_head *dev_list = client_data;
472 u16 pkey_index;
473 int matches;
474 int ret;
475
476 if (!rdma_protocol_ib(dev, port))
477 return NULL;
478
479 ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index);
480 if (ret)
481 return NULL;
482
483
484 matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
485 gid, NULL, &net_dev);
486
487 switch (matches) {
488 case 0:
489 return NULL;
490 case 1:
491 return net_dev;
492 }
493
494 dev_put(net_dev);
495
496
497
498 matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
499 gid, addr, &net_dev);
500 switch (matches) {
501 case 0:
502 return NULL;
503 default:
504 dev_warn_ratelimited(&dev->dev,
505 "duplicate IP address detected\n");
506 fallthrough;
507 case 1:
508 return net_dev;
509 }
510}
511
512int ipoib_set_mode(struct net_device *dev, const char *buf)
513{
514 struct ipoib_dev_priv *priv = ipoib_priv(dev);
515
516 if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
517 !strcmp(buf, "connected\n")) ||
518 (!test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
519 !strcmp(buf, "datagram\n"))) {
520 return 0;
521 }
522
523
524 if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
525 set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
526 ipoib_warn(priv, "enabling connected mode "
527 "will cause multicast packet drops\n");
528 netdev_update_features(dev);
529 dev_set_mtu(dev, ipoib_cm_max_mtu(dev));
530 netif_set_real_num_tx_queues(dev, 1);
531 rtnl_unlock();
532 priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
533
534 ipoib_flush_paths(dev);
535 return (!rtnl_trylock()) ? -EBUSY : 0;
536 }
537
538 if (!strcmp(buf, "datagram\n")) {
539 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
540 netdev_update_features(dev);
541 dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
542 netif_set_real_num_tx_queues(dev, dev->num_tx_queues);
543 rtnl_unlock();
544 ipoib_flush_paths(dev);
545 return (!rtnl_trylock()) ? -EBUSY : 0;
546 }
547
548 return -EINVAL;
549}
550
551struct ipoib_path *__path_find(struct net_device *dev, void *gid)
552{
553 struct ipoib_dev_priv *priv = ipoib_priv(dev);
554 struct rb_node *n = priv->path_tree.rb_node;
555 struct ipoib_path *path;
556 int ret;
557
558 while (n) {
559 path = rb_entry(n, struct ipoib_path, rb_node);
560
561 ret = memcmp(gid, path->pathrec.dgid.raw,
562 sizeof (union ib_gid));
563
564 if (ret < 0)
565 n = n->rb_left;
566 else if (ret > 0)
567 n = n->rb_right;
568 else
569 return path;
570 }
571
572 return NULL;
573}
574
575static int __path_add(struct net_device *dev, struct ipoib_path *path)
576{
577 struct ipoib_dev_priv *priv = ipoib_priv(dev);
578 struct rb_node **n = &priv->path_tree.rb_node;
579 struct rb_node *pn = NULL;
580 struct ipoib_path *tpath;
581 int ret;
582
583 while (*n) {
584 pn = *n;
585 tpath = rb_entry(pn, struct ipoib_path, rb_node);
586
587 ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
588 sizeof (union ib_gid));
589 if (ret < 0)
590 n = &pn->rb_left;
591 else if (ret > 0)
592 n = &pn->rb_right;
593 else
594 return -EEXIST;
595 }
596
597 rb_link_node(&path->rb_node, pn, n);
598 rb_insert_color(&path->rb_node, &priv->path_tree);
599
600 list_add_tail(&path->list, &priv->path_list);
601
602 return 0;
603}
604
605static void path_free(struct net_device *dev, struct ipoib_path *path)
606{
607 struct sk_buff *skb;
608
609 while ((skb = __skb_dequeue(&path->queue)))
610 dev_kfree_skb_irq(skb);
611
612 ipoib_dbg(ipoib_priv(dev), "%s\n", __func__);
613
614
615 ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
616
617 if (path->ah)
618 ipoib_put_ah(path->ah);
619
620 kfree(path);
621}
622
623#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
624
625struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
626{
627 struct ipoib_path_iter *iter;
628
629 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
630 if (!iter)
631 return NULL;
632
633 iter->dev = dev;
634 memset(iter->path.pathrec.dgid.raw, 0, 16);
635
636 if (ipoib_path_iter_next(iter)) {
637 kfree(iter);
638 return NULL;
639 }
640
641 return iter;
642}
643
644int ipoib_path_iter_next(struct ipoib_path_iter *iter)
645{
646 struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
647 struct rb_node *n;
648 struct ipoib_path *path;
649 int ret = 1;
650
651 spin_lock_irq(&priv->lock);
652
653 n = rb_first(&priv->path_tree);
654
655 while (n) {
656 path = rb_entry(n, struct ipoib_path, rb_node);
657
658 if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
659 sizeof (union ib_gid)) < 0) {
660 iter->path = *path;
661 ret = 0;
662 break;
663 }
664
665 n = rb_next(n);
666 }
667
668 spin_unlock_irq(&priv->lock);
669
670 return ret;
671}
672
673void ipoib_path_iter_read(struct ipoib_path_iter *iter,
674 struct ipoib_path *path)
675{
676 *path = iter->path;
677}
678
679#endif
680
681void ipoib_mark_paths_invalid(struct net_device *dev)
682{
683 struct ipoib_dev_priv *priv = ipoib_priv(dev);
684 struct ipoib_path *path, *tp;
685
686 spin_lock_irq(&priv->lock);
687
688 list_for_each_entry_safe(path, tp, &priv->path_list, list) {
689 ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
690 be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
691 path->pathrec.dgid.raw);
692 if (path->ah)
693 path->ah->valid = 0;
694 }
695
696 spin_unlock_irq(&priv->lock);
697}
698
699static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
700{
701 struct ipoib_pseudo_header *phdr;
702
703 phdr = skb_push(skb, sizeof(*phdr));
704 memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
705}
706
707void ipoib_flush_paths(struct net_device *dev)
708{
709 struct ipoib_dev_priv *priv = ipoib_priv(dev);
710 struct ipoib_path *path, *tp;
711 LIST_HEAD(remove_list);
712 unsigned long flags;
713
714 netif_tx_lock_bh(dev);
715 spin_lock_irqsave(&priv->lock, flags);
716
717 list_splice_init(&priv->path_list, &remove_list);
718
719 list_for_each_entry(path, &remove_list, list)
720 rb_erase(&path->rb_node, &priv->path_tree);
721
722 list_for_each_entry_safe(path, tp, &remove_list, list) {
723 if (path->query)
724 ib_sa_cancel_query(path->query_id, path->query);
725 spin_unlock_irqrestore(&priv->lock, flags);
726 netif_tx_unlock_bh(dev);
727 wait_for_completion(&path->done);
728 path_free(dev, path);
729 netif_tx_lock_bh(dev);
730 spin_lock_irqsave(&priv->lock, flags);
731 }
732
733 spin_unlock_irqrestore(&priv->lock, flags);
734 netif_tx_unlock_bh(dev);
735}
736
737static void path_rec_completion(int status,
738 struct sa_path_rec *pathrec,
739 void *path_ptr)
740{
741 struct ipoib_path *path = path_ptr;
742 struct net_device *dev = path->dev;
743 struct ipoib_dev_priv *priv = ipoib_priv(dev);
744 struct ipoib_ah *ah = NULL;
745 struct ipoib_ah *old_ah = NULL;
746 struct ipoib_neigh *neigh, *tn;
747 struct sk_buff_head skqueue;
748 struct sk_buff *skb;
749 unsigned long flags;
750
751 if (!status)
752 ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n",
753 be32_to_cpu(sa_path_get_dlid(pathrec)),
754 pathrec->dgid.raw);
755 else
756 ipoib_dbg(priv, "PathRec status %d for GID %pI6\n",
757 status, path->pathrec.dgid.raw);
758
759 skb_queue_head_init(&skqueue);
760
761 if (!status) {
762 struct rdma_ah_attr av;
763
764 if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
765 pathrec, &av, NULL)) {
766 ah = ipoib_create_ah(dev, priv->pd, &av);
767 rdma_destroy_ah_attr(&av);
768 }
769 }
770
771 spin_lock_irqsave(&priv->lock, flags);
772
773 if (!IS_ERR_OR_NULL(ah)) {
774
775
776
777
778
779 if (memcmp(pathrec->dgid.raw, path->pathrec.dgid.raw,
780 sizeof(union ib_gid))) {
781 ipoib_dbg(
782 priv,
783 "%s got PathRec for gid %pI6 while asked for %pI6\n",
784 dev->name, pathrec->dgid.raw,
785 path->pathrec.dgid.raw);
786 memcpy(pathrec->dgid.raw, path->pathrec.dgid.raw,
787 sizeof(union ib_gid));
788 }
789
790 path->pathrec = *pathrec;
791
792 old_ah = path->ah;
793 path->ah = ah;
794
795 ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
796 ah, be32_to_cpu(sa_path_get_dlid(pathrec)),
797 pathrec->sl);
798
799 while ((skb = __skb_dequeue(&path->queue)))
800 __skb_queue_tail(&skqueue, skb);
801
802 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
803 if (neigh->ah) {
804 WARN_ON(neigh->ah != old_ah);
805
806
807
808
809
810
811
812 ipoib_put_ah(neigh->ah);
813 }
814 kref_get(&path->ah->ref);
815 neigh->ah = path->ah;
816
817 if (ipoib_cm_enabled(dev, neigh->daddr)) {
818 if (!ipoib_cm_get(neigh))
819 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
820 path,
821 neigh));
822 if (!ipoib_cm_get(neigh)) {
823 ipoib_neigh_free(neigh);
824 continue;
825 }
826 }
827
828 while ((skb = __skb_dequeue(&neigh->queue)))
829 __skb_queue_tail(&skqueue, skb);
830 }
831 path->ah->valid = 1;
832 }
833
834 path->query = NULL;
835 complete(&path->done);
836
837 spin_unlock_irqrestore(&priv->lock, flags);
838
839 if (IS_ERR_OR_NULL(ah))
840 ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
841
842 if (old_ah)
843 ipoib_put_ah(old_ah);
844
845 while ((skb = __skb_dequeue(&skqueue))) {
846 int ret;
847 skb->dev = dev;
848 ret = dev_queue_xmit(skb);
849 if (ret)
850 ipoib_warn(priv, "%s: dev_queue_xmit failed to re-queue packet, ret:%d\n",
851 __func__, ret);
852 }
853}
854
855static void init_path_rec(struct ipoib_dev_priv *priv, struct ipoib_path *path,
856 void *gid)
857{
858 path->dev = priv->dev;
859
860 if (rdma_cap_opa_ah(priv->ca, priv->port))
861 path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
862 else
863 path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
864
865 memcpy(path->pathrec.dgid.raw, gid, sizeof(union ib_gid));
866 path->pathrec.sgid = priv->local_gid;
867 path->pathrec.pkey = cpu_to_be16(priv->pkey);
868 path->pathrec.numb_path = 1;
869 path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
870}
871
872static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
873{
874 struct ipoib_dev_priv *priv = ipoib_priv(dev);
875 struct ipoib_path *path;
876
877 if (!priv->broadcast)
878 return NULL;
879
880 path = kzalloc(sizeof(*path), GFP_ATOMIC);
881 if (!path)
882 return NULL;
883
884 skb_queue_head_init(&path->queue);
885
886 INIT_LIST_HEAD(&path->neigh_list);
887
888 init_path_rec(priv, path, gid);
889
890 return path;
891}
892
893static int path_rec_start(struct net_device *dev,
894 struct ipoib_path *path)
895{
896 struct ipoib_dev_priv *priv = ipoib_priv(dev);
897
898 ipoib_dbg(priv, "Start path record lookup for %pI6\n",
899 path->pathrec.dgid.raw);
900
901 init_completion(&path->done);
902
903 path->query_id =
904 ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
905 &path->pathrec,
906 IB_SA_PATH_REC_DGID |
907 IB_SA_PATH_REC_SGID |
908 IB_SA_PATH_REC_NUMB_PATH |
909 IB_SA_PATH_REC_TRAFFIC_CLASS |
910 IB_SA_PATH_REC_PKEY,
911 1000, GFP_ATOMIC,
912 path_rec_completion,
913 path, &path->query);
914 if (path->query_id < 0) {
915 ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id);
916 path->query = NULL;
917 complete(&path->done);
918 return path->query_id;
919 }
920
921 return 0;
922}
923
924static void neigh_refresh_path(struct ipoib_neigh *neigh, u8 *daddr,
925 struct net_device *dev)
926{
927 struct ipoib_dev_priv *priv = ipoib_priv(dev);
928 struct ipoib_path *path;
929 unsigned long flags;
930
931 spin_lock_irqsave(&priv->lock, flags);
932
933 path = __path_find(dev, daddr + 4);
934 if (!path)
935 goto out;
936 if (!path->query)
937 path_rec_start(dev, path);
938out:
939 spin_unlock_irqrestore(&priv->lock, flags);
940}
941
942static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
943 struct net_device *dev)
944{
945 struct ipoib_dev_priv *priv = ipoib_priv(dev);
946 struct rdma_netdev *rn = netdev_priv(dev);
947 struct ipoib_path *path;
948 struct ipoib_neigh *neigh;
949 unsigned long flags;
950
951 spin_lock_irqsave(&priv->lock, flags);
952 neigh = ipoib_neigh_alloc(daddr, dev);
953 if (!neigh) {
954 spin_unlock_irqrestore(&priv->lock, flags);
955 ++dev->stats.tx_dropped;
956 dev_kfree_skb_any(skb);
957 return NULL;
958 }
959
960
961
962
963 if (unlikely(!list_empty(&neigh->list))) {
964 spin_unlock_irqrestore(&priv->lock, flags);
965 return neigh;
966 }
967
968 path = __path_find(dev, daddr + 4);
969 if (!path) {
970 path = path_rec_create(dev, daddr + 4);
971 if (!path)
972 goto err_path;
973
974 __path_add(dev, path);
975 }
976
977 list_add_tail(&neigh->list, &path->neigh_list);
978
979 if (path->ah && path->ah->valid) {
980 kref_get(&path->ah->ref);
981 neigh->ah = path->ah;
982
983 if (ipoib_cm_enabled(dev, neigh->daddr)) {
984 if (!ipoib_cm_get(neigh))
985 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
986 if (!ipoib_cm_get(neigh)) {
987 ipoib_neigh_free(neigh);
988 goto err_drop;
989 }
990 if (skb_queue_len(&neigh->queue) <
991 IPOIB_MAX_PATH_REC_QUEUE) {
992 push_pseudo_header(skb, neigh->daddr);
993 __skb_queue_tail(&neigh->queue, skb);
994 } else {
995 ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
996 skb_queue_len(&neigh->queue));
997 goto err_drop;
998 }
999 } else {
1000 spin_unlock_irqrestore(&priv->lock, flags);
1001 path->ah->last_send = rn->send(dev, skb, path->ah->ah,
1002 IPOIB_QPN(daddr));
1003 ipoib_neigh_put(neigh);
1004 return NULL;
1005 }
1006 } else {
1007 neigh->ah = NULL;
1008
1009 if (!path->query && path_rec_start(dev, path))
1010 goto err_path;
1011 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
1012 push_pseudo_header(skb, neigh->daddr);
1013 __skb_queue_tail(&neigh->queue, skb);
1014 } else {
1015 goto err_drop;
1016 }
1017 }
1018
1019 spin_unlock_irqrestore(&priv->lock, flags);
1020 ipoib_neigh_put(neigh);
1021 return NULL;
1022
1023err_path:
1024 ipoib_neigh_free(neigh);
1025err_drop:
1026 ++dev->stats.tx_dropped;
1027 dev_kfree_skb_any(skb);
1028
1029 spin_unlock_irqrestore(&priv->lock, flags);
1030 ipoib_neigh_put(neigh);
1031
1032 return NULL;
1033}
1034
1035static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
1036 struct ipoib_pseudo_header *phdr)
1037{
1038 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1039 struct rdma_netdev *rn = netdev_priv(dev);
1040 struct ipoib_path *path;
1041 unsigned long flags;
1042
1043 spin_lock_irqsave(&priv->lock, flags);
1044
1045
1046 if (!priv->broadcast)
1047 goto drop_and_unlock;
1048
1049 path = __path_find(dev, phdr->hwaddr + 4);
1050 if (!path || !path->ah || !path->ah->valid) {
1051 if (!path) {
1052 path = path_rec_create(dev, phdr->hwaddr + 4);
1053 if (!path)
1054 goto drop_and_unlock;
1055 __path_add(dev, path);
1056 } else {
1057
1058
1059
1060
1061 init_path_rec(priv, path, phdr->hwaddr + 4);
1062 }
1063 if (!path->query && path_rec_start(dev, path)) {
1064 goto drop_and_unlock;
1065 }
1066
1067 if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
1068 push_pseudo_header(skb, phdr->hwaddr);
1069 __skb_queue_tail(&path->queue, skb);
1070 goto unlock;
1071 } else {
1072 goto drop_and_unlock;
1073 }
1074 }
1075
1076 spin_unlock_irqrestore(&priv->lock, flags);
1077 ipoib_dbg(priv, "Send unicast ARP to %08x\n",
1078 be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
1079 path->ah->last_send = rn->send(dev, skb, path->ah->ah,
1080 IPOIB_QPN(phdr->hwaddr));
1081 return;
1082
1083drop_and_unlock:
1084 ++dev->stats.tx_dropped;
1085 dev_kfree_skb_any(skb);
1086unlock:
1087 spin_unlock_irqrestore(&priv->lock, flags);
1088}
1089
1090static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
1091{
1092 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1093 struct rdma_netdev *rn = netdev_priv(dev);
1094 struct ipoib_neigh *neigh;
1095 struct ipoib_pseudo_header *phdr;
1096 struct ipoib_header *header;
1097 unsigned long flags;
1098
1099 phdr = (struct ipoib_pseudo_header *) skb->data;
1100 skb_pull(skb, sizeof(*phdr));
1101 header = (struct ipoib_header *) skb->data;
1102
1103 if (unlikely(phdr->hwaddr[4] == 0xff)) {
1104
1105 if ((header->proto != htons(ETH_P_IP)) &&
1106 (header->proto != htons(ETH_P_IPV6)) &&
1107 (header->proto != htons(ETH_P_ARP)) &&
1108 (header->proto != htons(ETH_P_RARP)) &&
1109 (header->proto != htons(ETH_P_TIPC))) {
1110
1111 ++dev->stats.tx_dropped;
1112 dev_kfree_skb_any(skb);
1113 return NETDEV_TX_OK;
1114 }
1115
1116 phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
1117 phdr->hwaddr[9] = priv->pkey & 0xff;
1118
1119 neigh = ipoib_neigh_get(dev, phdr->hwaddr);
1120 if (likely(neigh))
1121 goto send_using_neigh;
1122 ipoib_mcast_send(dev, phdr->hwaddr, skb);
1123 return NETDEV_TX_OK;
1124 }
1125
1126
1127 switch (header->proto) {
1128 case htons(ETH_P_IP):
1129 case htons(ETH_P_IPV6):
1130 case htons(ETH_P_TIPC):
1131 neigh = ipoib_neigh_get(dev, phdr->hwaddr);
1132 if (unlikely(!neigh)) {
1133 neigh = neigh_add_path(skb, phdr->hwaddr, dev);
1134 if (likely(!neigh))
1135 return NETDEV_TX_OK;
1136 }
1137 break;
1138 case htons(ETH_P_ARP):
1139 case htons(ETH_P_RARP):
1140
1141 unicast_arp_send(skb, dev, phdr);
1142 return NETDEV_TX_OK;
1143 default:
1144
1145 ++dev->stats.tx_dropped;
1146 dev_kfree_skb_any(skb);
1147 return NETDEV_TX_OK;
1148 }
1149
1150send_using_neigh:
1151
1152 if (ipoib_cm_get(neigh)) {
1153 if (ipoib_cm_up(neigh)) {
1154 ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
1155 goto unref;
1156 }
1157 } else if (neigh->ah && neigh->ah->valid) {
1158 neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
1159 IPOIB_QPN(phdr->hwaddr));
1160 goto unref;
1161 } else if (neigh->ah) {
1162 neigh_refresh_path(neigh, phdr->hwaddr, dev);
1163 }
1164
1165 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
1166 push_pseudo_header(skb, phdr->hwaddr);
1167 spin_lock_irqsave(&priv->lock, flags);
1168 __skb_queue_tail(&neigh->queue, skb);
1169 spin_unlock_irqrestore(&priv->lock, flags);
1170 } else {
1171 ++dev->stats.tx_dropped;
1172 dev_kfree_skb_any(skb);
1173 }
1174
1175unref:
1176 ipoib_neigh_put(neigh);
1177
1178 return NETDEV_TX_OK;
1179}
1180
1181static void ipoib_timeout(struct net_device *dev, unsigned int txqueue)
1182{
1183 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1184
1185 ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
1186 jiffies_to_msecs(jiffies - dev_trans_start(dev)));
1187 ipoib_warn(priv,
1188 "queue stopped %d, tx_head %u, tx_tail %u, global_tx_head %u, global_tx_tail %u\n",
1189 netif_queue_stopped(dev), priv->tx_head, priv->tx_tail,
1190 priv->global_tx_head, priv->global_tx_tail);
1191
1192
1193}
1194
1195static int ipoib_hard_header(struct sk_buff *skb,
1196 struct net_device *dev,
1197 unsigned short type,
1198 const void *daddr,
1199 const void *saddr,
1200 unsigned int len)
1201{
1202 struct ipoib_header *header;
1203
1204 header = skb_push(skb, sizeof(*header));
1205
1206 header->proto = htons(type);
1207 header->reserved = 0;
1208
1209
1210
1211
1212
1213
1214 push_pseudo_header(skb, daddr);
1215
1216 return IPOIB_HARD_LEN;
1217}
1218
1219static void ipoib_set_mcast_list(struct net_device *dev)
1220{
1221 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1222
1223 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
1224 ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
1225 return;
1226 }
1227
1228 queue_work(priv->wq, &priv->restart_task);
1229}
1230
1231static int ipoib_get_iflink(const struct net_device *dev)
1232{
1233 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1234
1235
1236 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
1237 return dev->ifindex;
1238
1239
1240 return priv->parent->ifindex;
1241}
1242
1243static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
1244{
1245
1246
1247
1248
1249
1250
1251
1252 u32 *d32 = (u32 *) daddr;
1253 u32 hv;
1254
1255 hv = jhash_3words(d32[3], d32[4], IPOIB_QPN_MASK & d32[0], 0);
1256 return hv & htbl->mask;
1257}
1258
1259struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
1260{
1261 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1262 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1263 struct ipoib_neigh_hash *htbl;
1264 struct ipoib_neigh *neigh = NULL;
1265 u32 hash_val;
1266
1267 rcu_read_lock_bh();
1268
1269 htbl = rcu_dereference_bh(ntbl->htbl);
1270
1271 if (!htbl)
1272 goto out_unlock;
1273
1274 hash_val = ipoib_addr_hash(htbl, daddr);
1275 for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]);
1276 neigh != NULL;
1277 neigh = rcu_dereference_bh(neigh->hnext)) {
1278 if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
1279
1280 if (!atomic_inc_not_zero(&neigh->refcnt)) {
1281
1282 neigh = NULL;
1283 goto out_unlock;
1284 }
1285
1286 if (likely(skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE))
1287 neigh->alive = jiffies;
1288 goto out_unlock;
1289 }
1290 }
1291
1292out_unlock:
1293 rcu_read_unlock_bh();
1294 return neigh;
1295}
1296
1297static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
1298{
1299 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1300 struct ipoib_neigh_hash *htbl;
1301 unsigned long neigh_obsolete;
1302 unsigned long dt;
1303 unsigned long flags;
1304 int i;
1305 LIST_HEAD(remove_list);
1306
1307 spin_lock_irqsave(&priv->lock, flags);
1308
1309 htbl = rcu_dereference_protected(ntbl->htbl,
1310 lockdep_is_held(&priv->lock));
1311
1312 if (!htbl)
1313 goto out_unlock;
1314
1315
1316 dt = 2 * arp_tbl.gc_interval;
1317 neigh_obsolete = jiffies - dt;
1318
1319 for (i = 0; i < htbl->size; i++) {
1320 struct ipoib_neigh *neigh;
1321 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
1322
1323 while ((neigh = rcu_dereference_protected(*np,
1324 lockdep_is_held(&priv->lock))) != NULL) {
1325
1326 if (time_after(neigh_obsolete, neigh->alive)) {
1327
1328 ipoib_check_and_add_mcast_sendonly(priv, neigh->daddr + 4, &remove_list);
1329
1330 rcu_assign_pointer(*np,
1331 rcu_dereference_protected(neigh->hnext,
1332 lockdep_is_held(&priv->lock)));
1333
1334 list_del_init(&neigh->list);
1335 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1336 } else {
1337 np = &neigh->hnext;
1338 }
1339
1340 }
1341 }
1342
1343out_unlock:
1344 spin_unlock_irqrestore(&priv->lock, flags);
1345 ipoib_mcast_remove_list(&remove_list);
1346}
1347
1348static void ipoib_reap_neigh(struct work_struct *work)
1349{
1350 struct ipoib_dev_priv *priv =
1351 container_of(work, struct ipoib_dev_priv, neigh_reap_task.work);
1352
1353 __ipoib_reap_neigh(priv);
1354
1355 queue_delayed_work(priv->wq, &priv->neigh_reap_task,
1356 arp_tbl.gc_interval);
1357}
1358
1359
1360static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
1361 struct net_device *dev)
1362{
1363 struct ipoib_neigh *neigh;
1364
1365 neigh = kzalloc(sizeof(*neigh), GFP_ATOMIC);
1366 if (!neigh)
1367 return NULL;
1368
1369 neigh->dev = dev;
1370 memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr));
1371 skb_queue_head_init(&neigh->queue);
1372 INIT_LIST_HEAD(&neigh->list);
1373 ipoib_cm_set(neigh, NULL);
1374
1375 atomic_set(&neigh->refcnt, 1);
1376
1377 return neigh;
1378}
1379
1380struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
1381 struct net_device *dev)
1382{
1383 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1384 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1385 struct ipoib_neigh_hash *htbl;
1386 struct ipoib_neigh *neigh;
1387 u32 hash_val;
1388
1389 htbl = rcu_dereference_protected(ntbl->htbl,
1390 lockdep_is_held(&priv->lock));
1391 if (!htbl) {
1392 neigh = NULL;
1393 goto out_unlock;
1394 }
1395
1396
1397
1398
1399 hash_val = ipoib_addr_hash(htbl, daddr);
1400 for (neigh = rcu_dereference_protected(htbl->buckets[hash_val],
1401 lockdep_is_held(&priv->lock));
1402 neigh != NULL;
1403 neigh = rcu_dereference_protected(neigh->hnext,
1404 lockdep_is_held(&priv->lock))) {
1405 if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
1406
1407 if (!atomic_inc_not_zero(&neigh->refcnt)) {
1408
1409 neigh = NULL;
1410 break;
1411 }
1412 neigh->alive = jiffies;
1413 goto out_unlock;
1414 }
1415 }
1416
1417 neigh = ipoib_neigh_ctor(daddr, dev);
1418 if (!neigh)
1419 goto out_unlock;
1420
1421
1422 atomic_inc(&neigh->refcnt);
1423 neigh->alive = jiffies;
1424
1425 rcu_assign_pointer(neigh->hnext,
1426 rcu_dereference_protected(htbl->buckets[hash_val],
1427 lockdep_is_held(&priv->lock)));
1428 rcu_assign_pointer(htbl->buckets[hash_val], neigh);
1429 atomic_inc(&ntbl->entries);
1430
1431out_unlock:
1432
1433 return neigh;
1434}
1435
1436void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
1437{
1438
1439 struct net_device *dev = neigh->dev;
1440 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1441 struct sk_buff *skb;
1442 if (neigh->ah)
1443 ipoib_put_ah(neigh->ah);
1444 while ((skb = __skb_dequeue(&neigh->queue))) {
1445 ++dev->stats.tx_dropped;
1446 dev_kfree_skb_any(skb);
1447 }
1448 if (ipoib_cm_get(neigh))
1449 ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
1450 ipoib_dbg(ipoib_priv(dev),
1451 "neigh free for %06x %pI6\n",
1452 IPOIB_QPN(neigh->daddr),
1453 neigh->daddr + 4);
1454 kfree(neigh);
1455 if (atomic_dec_and_test(&priv->ntbl.entries)) {
1456 if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags))
1457 complete(&priv->ntbl.flushed);
1458 }
1459}
1460
1461static void ipoib_neigh_reclaim(struct rcu_head *rp)
1462{
1463
1464 struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu);
1465
1466 ipoib_neigh_put(neigh);
1467}
1468
1469void ipoib_neigh_free(struct ipoib_neigh *neigh)
1470{
1471 struct net_device *dev = neigh->dev;
1472 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1473 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1474 struct ipoib_neigh_hash *htbl;
1475 struct ipoib_neigh __rcu **np;
1476 struct ipoib_neigh *n;
1477 u32 hash_val;
1478
1479 htbl = rcu_dereference_protected(ntbl->htbl,
1480 lockdep_is_held(&priv->lock));
1481 if (!htbl)
1482 return;
1483
1484 hash_val = ipoib_addr_hash(htbl, neigh->daddr);
1485 np = &htbl->buckets[hash_val];
1486 for (n = rcu_dereference_protected(*np,
1487 lockdep_is_held(&priv->lock));
1488 n != NULL;
1489 n = rcu_dereference_protected(*np,
1490 lockdep_is_held(&priv->lock))) {
1491 if (n == neigh) {
1492
1493 rcu_assign_pointer(*np,
1494 rcu_dereference_protected(neigh->hnext,
1495 lockdep_is_held(&priv->lock)));
1496
1497 list_del_init(&neigh->list);
1498 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1499 return;
1500 } else {
1501 np = &n->hnext;
1502 }
1503 }
1504}
1505
1506static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
1507{
1508 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1509 struct ipoib_neigh_hash *htbl;
1510 struct ipoib_neigh __rcu **buckets;
1511 u32 size;
1512
1513 clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
1514 ntbl->htbl = NULL;
1515 htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
1516 if (!htbl)
1517 return -ENOMEM;
1518 size = roundup_pow_of_two(arp_tbl.gc_thresh3);
1519 buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL);
1520 if (!buckets) {
1521 kfree(htbl);
1522 return -ENOMEM;
1523 }
1524 htbl->size = size;
1525 htbl->mask = (size - 1);
1526 htbl->buckets = buckets;
1527 RCU_INIT_POINTER(ntbl->htbl, htbl);
1528 htbl->ntbl = ntbl;
1529 atomic_set(&ntbl->entries, 0);
1530
1531
1532 queue_delayed_work(priv->wq, &priv->neigh_reap_task,
1533 arp_tbl.gc_interval);
1534
1535 return 0;
1536}
1537
1538static void neigh_hash_free_rcu(struct rcu_head *head)
1539{
1540 struct ipoib_neigh_hash *htbl = container_of(head,
1541 struct ipoib_neigh_hash,
1542 rcu);
1543 struct ipoib_neigh __rcu **buckets = htbl->buckets;
1544 struct ipoib_neigh_table *ntbl = htbl->ntbl;
1545
1546 kvfree(buckets);
1547 kfree(htbl);
1548 complete(&ntbl->deleted);
1549}
1550
1551void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
1552{
1553 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1554 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1555 struct ipoib_neigh_hash *htbl;
1556 unsigned long flags;
1557 int i;
1558
1559
1560 spin_lock_irqsave(&priv->lock, flags);
1561
1562 htbl = rcu_dereference_protected(ntbl->htbl,
1563 lockdep_is_held(&priv->lock));
1564
1565 if (!htbl)
1566 goto out_unlock;
1567
1568 for (i = 0; i < htbl->size; i++) {
1569 struct ipoib_neigh *neigh;
1570 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
1571
1572 while ((neigh = rcu_dereference_protected(*np,
1573 lockdep_is_held(&priv->lock))) != NULL) {
1574
1575 if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) {
1576 rcu_assign_pointer(*np,
1577 rcu_dereference_protected(neigh->hnext,
1578 lockdep_is_held(&priv->lock)));
1579
1580 list_del_init(&neigh->list);
1581 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1582 } else {
1583 np = &neigh->hnext;
1584 }
1585
1586 }
1587 }
1588out_unlock:
1589 spin_unlock_irqrestore(&priv->lock, flags);
1590}
1591
1592static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
1593{
1594 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1595 struct ipoib_neigh_hash *htbl;
1596 unsigned long flags;
1597 int i, wait_flushed = 0;
1598
1599 init_completion(&priv->ntbl.flushed);
1600 set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
1601
1602 spin_lock_irqsave(&priv->lock, flags);
1603
1604 htbl = rcu_dereference_protected(ntbl->htbl,
1605 lockdep_is_held(&priv->lock));
1606 if (!htbl)
1607 goto out_unlock;
1608
1609 wait_flushed = atomic_read(&priv->ntbl.entries);
1610 if (!wait_flushed)
1611 goto free_htbl;
1612
1613 for (i = 0; i < htbl->size; i++) {
1614 struct ipoib_neigh *neigh;
1615 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
1616
1617 while ((neigh = rcu_dereference_protected(*np,
1618 lockdep_is_held(&priv->lock))) != NULL) {
1619 rcu_assign_pointer(*np,
1620 rcu_dereference_protected(neigh->hnext,
1621 lockdep_is_held(&priv->lock)));
1622
1623 list_del_init(&neigh->list);
1624 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1625 }
1626 }
1627
1628free_htbl:
1629 rcu_assign_pointer(ntbl->htbl, NULL);
1630 call_rcu(&htbl->rcu, neigh_hash_free_rcu);
1631
1632out_unlock:
1633 spin_unlock_irqrestore(&priv->lock, flags);
1634 if (wait_flushed)
1635 wait_for_completion(&priv->ntbl.flushed);
1636}
1637
1638static void ipoib_neigh_hash_uninit(struct net_device *dev)
1639{
1640 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1641
1642 ipoib_dbg(priv, "%s\n", __func__);
1643 init_completion(&priv->ntbl.deleted);
1644
1645 cancel_delayed_work_sync(&priv->neigh_reap_task);
1646
1647 ipoib_flush_neighs(priv);
1648
1649 wait_for_completion(&priv->ntbl.deleted);
1650}
1651
1652static void ipoib_napi_add(struct net_device *dev)
1653{
1654 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1655
1656 netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC);
1657 netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE);
1658}
1659
1660static void ipoib_napi_del(struct net_device *dev)
1661{
1662 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1663
1664 netif_napi_del(&priv->recv_napi);
1665 netif_napi_del(&priv->send_napi);
1666}
1667
1668static void ipoib_dev_uninit_default(struct net_device *dev)
1669{
1670 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1671
1672 ipoib_transport_dev_cleanup(dev);
1673
1674 ipoib_napi_del(dev);
1675
1676 ipoib_cm_dev_cleanup(dev);
1677
1678 kfree(priv->rx_ring);
1679 vfree(priv->tx_ring);
1680
1681 priv->rx_ring = NULL;
1682 priv->tx_ring = NULL;
1683}
1684
1685static int ipoib_dev_init_default(struct net_device *dev)
1686{
1687 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1688
1689 ipoib_napi_add(dev);
1690
1691
1692 priv->rx_ring = kcalloc(ipoib_recvq_size,
1693 sizeof(*priv->rx_ring),
1694 GFP_KERNEL);
1695 if (!priv->rx_ring)
1696 goto out;
1697
1698 priv->tx_ring = vzalloc(array_size(ipoib_sendq_size,
1699 sizeof(*priv->tx_ring)));
1700 if (!priv->tx_ring) {
1701 pr_warn("%s: failed to allocate TX ring (%d entries)\n",
1702 priv->ca->name, ipoib_sendq_size);
1703 goto out_rx_ring_cleanup;
1704 }
1705
1706
1707
1708 if (ipoib_transport_dev_init(dev, priv->ca)) {
1709 pr_warn("%s: ipoib_transport_dev_init failed\n",
1710 priv->ca->name);
1711 goto out_tx_ring_cleanup;
1712 }
1713
1714
1715 priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
1716 priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
1717 priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
1718
1719 return 0;
1720
1721out_tx_ring_cleanup:
1722 vfree(priv->tx_ring);
1723
1724out_rx_ring_cleanup:
1725 kfree(priv->rx_ring);
1726
1727out:
1728 ipoib_napi_del(dev);
1729 return -ENOMEM;
1730}
1731
1732static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr,
1733 int cmd)
1734{
1735 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1736
1737 if (!priv->rn_ops->ndo_do_ioctl)
1738 return -EOPNOTSUPP;
1739
1740 return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd);
1741}
1742
1743static int ipoib_dev_init(struct net_device *dev)
1744{
1745 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1746 int ret = -ENOMEM;
1747
1748 priv->qp = NULL;
1749
1750
1751
1752
1753
1754 priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
1755 if (!priv->wq) {
1756 pr_warn("%s: failed to allocate device WQ\n", dev->name);
1757 goto out;
1758 }
1759
1760
1761 priv->pd = ib_alloc_pd(priv->ca, 0);
1762 if (IS_ERR(priv->pd)) {
1763 pr_warn("%s: failed to allocate PD\n", priv->ca->name);
1764 goto clean_wq;
1765 }
1766
1767 ret = priv->rn_ops->ndo_init(dev);
1768 if (ret) {
1769 pr_warn("%s failed to init HW resource\n", dev->name);
1770 goto out_free_pd;
1771 }
1772
1773 ret = ipoib_neigh_hash_init(priv);
1774 if (ret) {
1775 pr_warn("%s failed to init neigh hash\n", dev->name);
1776 goto out_dev_uninit;
1777 }
1778
1779 if (dev->flags & IFF_UP) {
1780 if (ipoib_ib_dev_open(dev)) {
1781 pr_warn("%s failed to open device\n", dev->name);
1782 ret = -ENODEV;
1783 goto out_hash_uninit;
1784 }
1785 }
1786
1787 return 0;
1788
1789out_hash_uninit:
1790 ipoib_neigh_hash_uninit(dev);
1791
1792out_dev_uninit:
1793 ipoib_ib_dev_cleanup(dev);
1794
1795out_free_pd:
1796 if (priv->pd) {
1797 ib_dealloc_pd(priv->pd);
1798 priv->pd = NULL;
1799 }
1800
1801clean_wq:
1802 if (priv->wq) {
1803 destroy_workqueue(priv->wq);
1804 priv->wq = NULL;
1805 }
1806
1807out:
1808 return ret;
1809}
1810
1811
1812
1813
1814
1815static void ipoib_parent_unregister_pre(struct net_device *ndev)
1816{
1817 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
1818
1819
1820
1821
1822
1823 rtnl_lock();
1824 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP, NULL);
1825 rtnl_unlock();
1826
1827
1828 ib_unregister_event_handler(&priv->event_handler);
1829
1830
1831
1832
1833
1834 flush_workqueue(ipoib_workqueue);
1835}
1836
1837static void ipoib_set_dev_features(struct ipoib_dev_priv *priv)
1838{
1839 priv->hca_caps = priv->ca->attrs.device_cap_flags;
1840
1841 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
1842 priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
1843
1844 if (priv->hca_caps & IB_DEVICE_UD_TSO)
1845 priv->dev->hw_features |= NETIF_F_TSO;
1846
1847 priv->dev->features |= priv->dev->hw_features;
1848 }
1849}
1850
1851static int ipoib_parent_init(struct net_device *ndev)
1852{
1853 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
1854 struct ib_port_attr attr;
1855 int result;
1856
1857 result = ib_query_port(priv->ca, priv->port, &attr);
1858 if (result) {
1859 pr_warn("%s: ib_query_port %d failed\n", priv->ca->name,
1860 priv->port);
1861 return result;
1862 }
1863 priv->max_ib_mtu = rdma_mtu_from_attr(priv->ca, priv->port, &attr);
1864
1865 result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
1866 if (result) {
1867 pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
1868 priv->ca->name, priv->port, result);
1869 return result;
1870 }
1871
1872 result = rdma_query_gid(priv->ca, priv->port, 0, &priv->local_gid);
1873 if (result) {
1874 pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n",
1875 priv->ca->name, priv->port, result);
1876 return result;
1877 }
1878 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
1879 sizeof(union ib_gid));
1880
1881 SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent);
1882 priv->dev->dev_port = priv->port - 1;
1883
1884 priv->dev->dev_id = priv->port - 1;
1885
1886 return 0;
1887}
1888
1889static void ipoib_child_init(struct net_device *ndev)
1890{
1891 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
1892 struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
1893
1894 priv->max_ib_mtu = ppriv->max_ib_mtu;
1895 set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
1896 if (memchr_inv(priv->dev->dev_addr, 0, INFINIBAND_ALEN))
1897 memcpy(&priv->local_gid, priv->dev->dev_addr + 4,
1898 sizeof(priv->local_gid));
1899 else {
1900 memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr,
1901 INFINIBAND_ALEN);
1902 memcpy(&priv->local_gid, &ppriv->local_gid,
1903 sizeof(priv->local_gid));
1904 }
1905}
1906
1907static int ipoib_ndo_init(struct net_device *ndev)
1908{
1909 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
1910 int rc;
1911 struct rdma_netdev *rn = netdev_priv(ndev);
1912
1913 if (priv->parent) {
1914 ipoib_child_init(ndev);
1915 } else {
1916 rc = ipoib_parent_init(ndev);
1917 if (rc)
1918 return rc;
1919 }
1920
1921
1922 ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
1923 priv->mcast_mtu = priv->admin_mtu = ndev->mtu;
1924 rn->mtu = priv->mcast_mtu;
1925 ndev->max_mtu = IPOIB_CM_MTU;
1926
1927 ndev->neigh_priv_len = sizeof(struct ipoib_neigh);
1928
1929
1930
1931
1932
1933 priv->pkey |= 0x8000;
1934
1935 ndev->broadcast[8] = priv->pkey >> 8;
1936 ndev->broadcast[9] = priv->pkey & 0xff;
1937 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1938
1939 ipoib_set_dev_features(priv);
1940
1941 rc = ipoib_dev_init(ndev);
1942 if (rc) {
1943 pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n",
1944 priv->ca->name, priv->dev->name, priv->port, rc);
1945 return rc;
1946 }
1947
1948 if (priv->parent) {
1949 struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
1950
1951 dev_hold(priv->parent);
1952
1953 down_write(&ppriv->vlan_rwsem);
1954 list_add_tail(&priv->list, &ppriv->child_intfs);
1955 up_write(&ppriv->vlan_rwsem);
1956 }
1957
1958 return 0;
1959}
1960
1961static void ipoib_ndo_uninit(struct net_device *dev)
1962{
1963 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1964
1965 ASSERT_RTNL();
1966
1967
1968
1969
1970
1971 WARN_ON(!list_empty(&priv->child_intfs));
1972
1973 if (priv->parent) {
1974 struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
1975
1976 down_write(&ppriv->vlan_rwsem);
1977 list_del(&priv->list);
1978 up_write(&ppriv->vlan_rwsem);
1979 }
1980
1981 ipoib_neigh_hash_uninit(dev);
1982
1983 ipoib_ib_dev_cleanup(dev);
1984
1985
1986 if (priv->wq) {
1987
1988 WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags));
1989 flush_workqueue(priv->wq);
1990 destroy_workqueue(priv->wq);
1991 priv->wq = NULL;
1992 }
1993
1994 if (priv->parent)
1995 dev_put(priv->parent);
1996}
1997
1998static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
1999{
2000 struct ipoib_dev_priv *priv = ipoib_priv(dev);
2001
2002 return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
2003}
2004
2005static int ipoib_get_vf_config(struct net_device *dev, int vf,
2006 struct ifla_vf_info *ivf)
2007{
2008 struct ipoib_dev_priv *priv = ipoib_priv(dev);
2009 int err;
2010
2011 err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
2012 if (err)
2013 return err;
2014
2015 ivf->vf = vf;
2016 memcpy(ivf->mac, dev->dev_addr, dev->addr_len);
2017
2018 return 0;
2019}
2020
2021static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
2022{
2023 struct ipoib_dev_priv *priv = ipoib_priv(dev);
2024
2025 if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
2026 return -EINVAL;
2027
2028 return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type);
2029}
2030
2031static int ipoib_get_vf_guid(struct net_device *dev, int vf,
2032 struct ifla_vf_guid *node_guid,
2033 struct ifla_vf_guid *port_guid)
2034{
2035 struct ipoib_dev_priv *priv = ipoib_priv(dev);
2036
2037 return ib_get_vf_guid(priv->ca, vf, priv->port, node_guid, port_guid);
2038}
2039
2040static int ipoib_get_vf_stats(struct net_device *dev, int vf,
2041 struct ifla_vf_stats *vf_stats)
2042{
2043 struct ipoib_dev_priv *priv = ipoib_priv(dev);
2044
2045 return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
2046}
2047
2048static const struct header_ops ipoib_header_ops = {
2049 .create = ipoib_hard_header,
2050};
2051
2052static const struct net_device_ops ipoib_netdev_ops_pf = {
2053 .ndo_init = ipoib_ndo_init,
2054 .ndo_uninit = ipoib_ndo_uninit,
2055 .ndo_open = ipoib_open,
2056 .ndo_stop = ipoib_stop,
2057 .ndo_change_mtu = ipoib_change_mtu,
2058 .ndo_fix_features = ipoib_fix_features,
2059 .ndo_start_xmit = ipoib_start_xmit,
2060 .ndo_tx_timeout = ipoib_timeout,
2061 .ndo_set_rx_mode = ipoib_set_mcast_list,
2062 .ndo_get_iflink = ipoib_get_iflink,
2063 .ndo_set_vf_link_state = ipoib_set_vf_link_state,
2064 .ndo_get_vf_config = ipoib_get_vf_config,
2065 .ndo_get_vf_stats = ipoib_get_vf_stats,
2066 .ndo_get_vf_guid = ipoib_get_vf_guid,
2067 .ndo_set_vf_guid = ipoib_set_vf_guid,
2068 .ndo_set_mac_address = ipoib_set_mac,
2069 .ndo_get_stats64 = ipoib_get_stats,
2070 .ndo_do_ioctl = ipoib_ioctl,
2071};
2072
2073static const struct net_device_ops ipoib_netdev_ops_vf = {
2074 .ndo_init = ipoib_ndo_init,
2075 .ndo_uninit = ipoib_ndo_uninit,
2076 .ndo_open = ipoib_open,
2077 .ndo_stop = ipoib_stop,
2078 .ndo_change_mtu = ipoib_change_mtu,
2079 .ndo_fix_features = ipoib_fix_features,
2080 .ndo_start_xmit = ipoib_start_xmit,
2081 .ndo_tx_timeout = ipoib_timeout,
2082 .ndo_set_rx_mode = ipoib_set_mcast_list,
2083 .ndo_get_iflink = ipoib_get_iflink,
2084 .ndo_get_stats64 = ipoib_get_stats,
2085 .ndo_do_ioctl = ipoib_ioctl,
2086};
2087
2088static const struct net_device_ops ipoib_netdev_default_pf = {
2089 .ndo_init = ipoib_dev_init_default,
2090 .ndo_uninit = ipoib_dev_uninit_default,
2091 .ndo_open = ipoib_ib_dev_open_default,
2092 .ndo_stop = ipoib_ib_dev_stop_default,
2093};
2094
2095void ipoib_setup_common(struct net_device *dev)
2096{
2097 dev->header_ops = &ipoib_header_ops;
2098 dev->netdev_ops = &ipoib_netdev_default_pf;
2099
2100 ipoib_set_ethtool_ops(dev);
2101
2102 dev->watchdog_timeo = HZ;
2103
2104 dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
2105
2106 dev->hard_header_len = IPOIB_HARD_LEN;
2107 dev->addr_len = INFINIBAND_ALEN;
2108 dev->type = ARPHRD_INFINIBAND;
2109 dev->tx_queue_len = ipoib_sendq_size * 2;
2110 dev->features = (NETIF_F_VLAN_CHALLENGED |
2111 NETIF_F_HIGHDMA);
2112 netif_keep_dst(dev);
2113
2114 memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
2115
2116
2117
2118
2119
2120
2121 dev->needs_free_netdev = true;
2122}
2123
2124static void ipoib_build_priv(struct net_device *dev)
2125{
2126 struct ipoib_dev_priv *priv = ipoib_priv(dev);
2127
2128 priv->dev = dev;
2129 spin_lock_init(&priv->lock);
2130 init_rwsem(&priv->vlan_rwsem);
2131 mutex_init(&priv->mcast_mutex);
2132
2133 INIT_LIST_HEAD(&priv->path_list);
2134 INIT_LIST_HEAD(&priv->child_intfs);
2135 INIT_LIST_HEAD(&priv->dead_ahs);
2136 INIT_LIST_HEAD(&priv->multicast_list);
2137
2138 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
2139 INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
2140 INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
2141 INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal);
2142 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
2143 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
2144 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
2145 INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
2146}
2147
2148static struct net_device *ipoib_alloc_netdev(struct ib_device *hca, u8 port,
2149 const char *name)
2150{
2151 struct net_device *dev;
2152
2153 dev = rdma_alloc_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
2154 NET_NAME_UNKNOWN, ipoib_setup_common);
2155 if (!IS_ERR(dev) || PTR_ERR(dev) != -EOPNOTSUPP)
2156 return dev;
2157
2158 dev = alloc_netdev(sizeof(struct rdma_netdev), name, NET_NAME_UNKNOWN,
2159 ipoib_setup_common);
2160 if (!dev)
2161 return ERR_PTR(-ENOMEM);
2162 return dev;
2163}
2164
2165int ipoib_intf_init(struct ib_device *hca, u8 port, const char *name,
2166 struct net_device *dev)
2167{
2168 struct rdma_netdev *rn = netdev_priv(dev);
2169 struct ipoib_dev_priv *priv;
2170 int rc;
2171
2172 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
2173 if (!priv)
2174 return -ENOMEM;
2175
2176 priv->ca = hca;
2177 priv->port = port;
2178
2179 rc = rdma_init_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
2180 NET_NAME_UNKNOWN, ipoib_setup_common, dev);
2181 if (rc) {
2182 if (rc != -EOPNOTSUPP)
2183 goto out;
2184
2185 rn->send = ipoib_send;
2186 rn->attach_mcast = ipoib_mcast_attach;
2187 rn->detach_mcast = ipoib_mcast_detach;
2188 rn->hca = hca;
2189 }
2190
2191 priv->rn_ops = dev->netdev_ops;
2192
2193 if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION)
2194 dev->netdev_ops = &ipoib_netdev_ops_vf;
2195 else
2196 dev->netdev_ops = &ipoib_netdev_ops_pf;
2197
2198 rn->clnt_priv = priv;
2199
2200
2201
2202
2203
2204 priv->next_priv_destructor = dev->priv_destructor;
2205 dev->priv_destructor = NULL;
2206
2207 ipoib_build_priv(dev);
2208
2209 return 0;
2210
2211out:
2212 kfree(priv);
2213 return rc;
2214}
2215
2216struct net_device *ipoib_intf_alloc(struct ib_device *hca, u8 port,
2217 const char *name)
2218{
2219 struct net_device *dev;
2220 int rc;
2221
2222 dev = ipoib_alloc_netdev(hca, port, name);
2223 if (IS_ERR(dev))
2224 return dev;
2225
2226 rc = ipoib_intf_init(hca, port, name, dev);
2227 if (rc) {
2228 free_netdev(dev);
2229 return ERR_PTR(rc);
2230 }
2231
2232
2233
2234
2235
2236
2237 return dev;
2238}
2239
2240void ipoib_intf_free(struct net_device *dev)
2241{
2242 struct ipoib_dev_priv *priv = ipoib_priv(dev);
2243 struct rdma_netdev *rn = netdev_priv(dev);
2244
2245 dev->priv_destructor = priv->next_priv_destructor;
2246 if (dev->priv_destructor)
2247 dev->priv_destructor(dev);
2248
2249
2250
2251
2252
2253 dev->priv_destructor = NULL;
2254
2255
2256 rn->clnt_priv = NULL;
2257
2258 kfree(priv);
2259}
2260
2261static ssize_t show_pkey(struct device *dev,
2262 struct device_attribute *attr, char *buf)
2263{
2264 struct net_device *ndev = to_net_dev(dev);
2265 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
2266
2267 return sysfs_emit(buf, "0x%04x\n", priv->pkey);
2268}
2269static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2270
2271static ssize_t show_umcast(struct device *dev,
2272 struct device_attribute *attr, char *buf)
2273{
2274 struct net_device *ndev = to_net_dev(dev);
2275 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
2276
2277 return sysfs_emit(buf, "%d\n",
2278 test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
2279}
2280
2281void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
2282{
2283 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
2284
2285 if (umcast_val > 0) {
2286 set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
2287 ipoib_warn(priv, "ignoring multicast groups joined directly "
2288 "by userspace\n");
2289 } else
2290 clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
2291}
2292
2293static ssize_t set_umcast(struct device *dev,
2294 struct device_attribute *attr,
2295 const char *buf, size_t count)
2296{
2297 unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
2298
2299 ipoib_set_umcast(to_net_dev(dev), umcast_val);
2300
2301 return count;
2302}
2303static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast);
2304
2305int ipoib_add_umcast_attr(struct net_device *dev)
2306{
2307 return device_create_file(&dev->dev, &dev_attr_umcast);
2308}
2309
2310static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
2311{
2312 struct ipoib_dev_priv *child_priv;
2313 struct net_device *netdev = priv->dev;
2314
2315 netif_addr_lock_bh(netdev);
2316
2317 memcpy(&priv->local_gid.global.interface_id,
2318 &gid->global.interface_id,
2319 sizeof(gid->global.interface_id));
2320 memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
2321 clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
2322
2323 netif_addr_unlock_bh(netdev);
2324
2325 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
2326 down_read(&priv->vlan_rwsem);
2327 list_for_each_entry(child_priv, &priv->child_intfs, list)
2328 set_base_guid(child_priv, gid);
2329 up_read(&priv->vlan_rwsem);
2330 }
2331}
2332
2333static int ipoib_check_lladdr(struct net_device *dev,
2334 struct sockaddr_storage *ss)
2335{
2336 union ib_gid *gid = (union ib_gid *)(ss->__data + 4);
2337 int ret = 0;
2338
2339 netif_addr_lock_bh(dev);
2340
2341
2342
2343
2344 if (memcmp(dev->dev_addr, ss->__data,
2345 4 + sizeof(gid->global.subnet_prefix)) ||
2346 gid->global.interface_id == 0)
2347 ret = -EINVAL;
2348
2349 netif_addr_unlock_bh(dev);
2350
2351 return ret;
2352}
2353
2354static int ipoib_set_mac(struct net_device *dev, void *addr)
2355{
2356 struct ipoib_dev_priv *priv = ipoib_priv(dev);
2357 struct sockaddr_storage *ss = addr;
2358 int ret;
2359
2360 if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
2361 return -EBUSY;
2362
2363 ret = ipoib_check_lladdr(dev, ss);
2364 if (ret)
2365 return ret;
2366
2367 set_base_guid(priv, (union ib_gid *)(ss->__data + 4));
2368
2369 queue_work(ipoib_workqueue, &priv->flush_light);
2370
2371 return 0;
2372}
2373
2374static ssize_t create_child(struct device *dev,
2375 struct device_attribute *attr,
2376 const char *buf, size_t count)
2377{
2378 int pkey;
2379 int ret;
2380
2381 if (sscanf(buf, "%i", &pkey) != 1)
2382 return -EINVAL;
2383
2384 if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000)
2385 return -EINVAL;
2386
2387 ret = ipoib_vlan_add(to_net_dev(dev), pkey);
2388
2389 return ret ? ret : count;
2390}
2391static DEVICE_ATTR(create_child, S_IWUSR, NULL, create_child);
2392
2393static ssize_t delete_child(struct device *dev,
2394 struct device_attribute *attr,
2395 const char *buf, size_t count)
2396{
2397 int pkey;
2398 int ret;
2399
2400 if (sscanf(buf, "%i", &pkey) != 1)
2401 return -EINVAL;
2402
2403 if (pkey < 0 || pkey > 0xffff)
2404 return -EINVAL;
2405
2406 ret = ipoib_vlan_delete(to_net_dev(dev), pkey);
2407
2408 return ret ? ret : count;
2409
2410}
2411static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child);
2412
2413int ipoib_add_pkey_attr(struct net_device *dev)
2414{
2415 return device_create_file(&dev->dev, &dev_attr_pkey);
2416}
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427static ssize_t dev_id_show(struct device *dev,
2428 struct device_attribute *attr, char *buf)
2429{
2430 struct net_device *ndev = to_net_dev(dev);
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443 if (ndev->dev_port && ndev->dev_id == ndev->dev_port)
2444 netdev_info_once(ndev,
2445 "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n",
2446 current->comm);
2447
2448 return sysfs_emit(buf, "%#x\n", ndev->dev_id);
2449}
2450static DEVICE_ATTR_RO(dev_id);
2451
2452static int ipoib_intercept_dev_id_attr(struct net_device *dev)
2453{
2454 device_remove_file(&dev->dev, &dev_attr_dev_id);
2455 return device_create_file(&dev->dev, &dev_attr_dev_id);
2456}
2457
2458static struct net_device *ipoib_add_port(const char *format,
2459 struct ib_device *hca, u8 port)
2460{
2461 struct rtnl_link_ops *ops = ipoib_get_link_ops();
2462 struct rdma_netdev_alloc_params params;
2463 struct ipoib_dev_priv *priv;
2464 struct net_device *ndev;
2465 int result;
2466
2467 ndev = ipoib_intf_alloc(hca, port, format);
2468 if (IS_ERR(ndev)) {
2469 pr_warn("%s, %d: ipoib_intf_alloc failed %ld\n", hca->name, port,
2470 PTR_ERR(ndev));
2471 return ndev;
2472 }
2473 priv = ipoib_priv(ndev);
2474
2475 INIT_IB_EVENT_HANDLER(&priv->event_handler,
2476 priv->ca, ipoib_event);
2477 ib_register_event_handler(&priv->event_handler);
2478
2479
2480 queue_work(ipoib_workqueue, &priv->flush_heavy);
2481
2482 ndev->rtnl_link_ops = ipoib_get_link_ops();
2483
2484 result = register_netdev(ndev);
2485 if (result) {
2486 pr_warn("%s: couldn't register ipoib port %d; error %d\n",
2487 hca->name, port, result);
2488
2489 ipoib_parent_unregister_pre(ndev);
2490 ipoib_intf_free(ndev);
2491 free_netdev(ndev);
2492
2493 return ERR_PTR(result);
2494 }
2495
2496 if (hca->ops.rdma_netdev_get_params) {
2497 int rc = hca->ops.rdma_netdev_get_params(hca, port,
2498 RDMA_NETDEV_IPOIB,
2499 ¶ms);
2500
2501 if (!rc && ops->priv_size < params.sizeof_priv)
2502 ops->priv_size = params.sizeof_priv;
2503 }
2504
2505
2506
2507
2508
2509
2510 ndev->priv_destructor = ipoib_intf_free;
2511
2512 if (ipoib_intercept_dev_id_attr(ndev))
2513 goto sysfs_failed;
2514 if (ipoib_cm_add_mode_attr(ndev))
2515 goto sysfs_failed;
2516 if (ipoib_add_pkey_attr(ndev))
2517 goto sysfs_failed;
2518 if (ipoib_add_umcast_attr(ndev))
2519 goto sysfs_failed;
2520 if (device_create_file(&ndev->dev, &dev_attr_create_child))
2521 goto sysfs_failed;
2522 if (device_create_file(&ndev->dev, &dev_attr_delete_child))
2523 goto sysfs_failed;
2524
2525 return ndev;
2526
2527sysfs_failed:
2528 ipoib_parent_unregister_pre(ndev);
2529 unregister_netdev(ndev);
2530 return ERR_PTR(-ENOMEM);
2531}
2532
2533static int ipoib_add_one(struct ib_device *device)
2534{
2535 struct list_head *dev_list;
2536 struct net_device *dev;
2537 struct ipoib_dev_priv *priv;
2538 unsigned int p;
2539 int count = 0;
2540
2541 dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
2542 if (!dev_list)
2543 return -ENOMEM;
2544
2545 INIT_LIST_HEAD(dev_list);
2546
2547 rdma_for_each_port (device, p) {
2548 if (!rdma_protocol_ib(device, p))
2549 continue;
2550 dev = ipoib_add_port("ib%d", device, p);
2551 if (!IS_ERR(dev)) {
2552 priv = ipoib_priv(dev);
2553 list_add_tail(&priv->list, dev_list);
2554 count++;
2555 }
2556 }
2557
2558 if (!count) {
2559 kfree(dev_list);
2560 return -EOPNOTSUPP;
2561 }
2562
2563 ib_set_client_data(device, &ipoib_client, dev_list);
2564 return 0;
2565}
2566
2567static void ipoib_remove_one(struct ib_device *device, void *client_data)
2568{
2569 struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
2570 struct list_head *dev_list = client_data;
2571
2572 list_for_each_entry_safe(priv, tmp, dev_list, list) {
2573 LIST_HEAD(head);
2574 ipoib_parent_unregister_pre(priv->dev);
2575
2576 rtnl_lock();
2577
2578 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs,
2579 list)
2580 unregister_netdevice_queue(cpriv->dev, &head);
2581 unregister_netdevice_queue(priv->dev, &head);
2582 unregister_netdevice_many(&head);
2583
2584 rtnl_unlock();
2585 }
2586
2587 kfree(dev_list);
2588}
2589
2590#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
2591static struct notifier_block ipoib_netdev_notifier = {
2592 .notifier_call = ipoib_netdev_event,
2593};
2594#endif
2595
2596static int __init ipoib_init_module(void)
2597{
2598 int ret;
2599
2600 ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
2601 ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
2602 ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
2603
2604 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
2605 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
2606 ipoib_sendq_size = max3(ipoib_sendq_size, 2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE);
2607#ifdef CONFIG_INFINIBAND_IPOIB_CM
2608 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
2609 ipoib_max_conn_qp = max(ipoib_max_conn_qp, 0);
2610#endif
2611
2612
2613
2614
2615
2616 BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE);
2617
2618 ipoib_register_debugfs();
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630 ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 0);
2631 if (!ipoib_workqueue) {
2632 ret = -ENOMEM;
2633 goto err_fs;
2634 }
2635
2636 ib_sa_register_client(&ipoib_sa_client);
2637
2638 ret = ib_register_client(&ipoib_client);
2639 if (ret)
2640 goto err_sa;
2641
2642 ret = ipoib_netlink_init();
2643 if (ret)
2644 goto err_client;
2645
2646#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
2647 register_netdevice_notifier(&ipoib_netdev_notifier);
2648#endif
2649 return 0;
2650
2651err_client:
2652 ib_unregister_client(&ipoib_client);
2653
2654err_sa:
2655 ib_sa_unregister_client(&ipoib_sa_client);
2656 destroy_workqueue(ipoib_workqueue);
2657
2658err_fs:
2659 ipoib_unregister_debugfs();
2660
2661 return ret;
2662}
2663
2664static void __exit ipoib_cleanup_module(void)
2665{
2666#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
2667 unregister_netdevice_notifier(&ipoib_netdev_notifier);
2668#endif
2669 ipoib_netlink_fini();
2670 ib_unregister_client(&ipoib_client);
2671 ib_sa_unregister_client(&ipoib_sa_client);
2672 ipoib_unregister_debugfs();
2673 destroy_workqueue(ipoib_workqueue);
2674}
2675
2676module_init(ipoib_init_module);
2677module_exit(ipoib_cleanup_module);
2678