1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#include <linux/module.h>
35#include <linux/string.h>
36#include <linux/errno.h>
37#include <linux/kernel.h>
38#include <linux/slab.h>
39#include <linux/init.h>
40#include <linux/mutex.h>
41#include <linux/netdevice.h>
42#include <rdma/rdma_netlink.h>
43#include <rdma/ib_addr.h>
44#include <rdma/ib_cache.h>
45
46#include "core_priv.h"
47
48MODULE_AUTHOR("Roland Dreier");
49MODULE_DESCRIPTION("core kernel InfiniBand API");
50MODULE_LICENSE("Dual BSD/GPL");
51
52struct ib_client_data {
53 struct list_head list;
54 struct ib_client *client;
55 void * data;
56
57
58 bool going_down;
59};
60
61struct workqueue_struct *ib_comp_wq;
62struct workqueue_struct *ib_wq;
63EXPORT_SYMBOL_GPL(ib_wq);
64
65
66
67
68static LIST_HEAD(device_list);
69static LIST_HEAD(client_list);
70
71
72
73
74
75
76
77
78
79
80
81
82static DEFINE_MUTEX(device_mutex);
83static DECLARE_RWSEM(lists_rwsem);
84
85
86static int ib_device_check_mandatory(struct ib_device *device)
87{
88#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
89 static const struct {
90 size_t offset;
91 char *name;
92 } mandatory_table[] = {
93 IB_MANDATORY_FUNC(query_device),
94 IB_MANDATORY_FUNC(query_port),
95 IB_MANDATORY_FUNC(query_pkey),
96 IB_MANDATORY_FUNC(query_gid),
97 IB_MANDATORY_FUNC(alloc_pd),
98 IB_MANDATORY_FUNC(dealloc_pd),
99 IB_MANDATORY_FUNC(create_ah),
100 IB_MANDATORY_FUNC(destroy_ah),
101 IB_MANDATORY_FUNC(create_qp),
102 IB_MANDATORY_FUNC(modify_qp),
103 IB_MANDATORY_FUNC(destroy_qp),
104 IB_MANDATORY_FUNC(post_send),
105 IB_MANDATORY_FUNC(post_recv),
106 IB_MANDATORY_FUNC(create_cq),
107 IB_MANDATORY_FUNC(destroy_cq),
108 IB_MANDATORY_FUNC(poll_cq),
109 IB_MANDATORY_FUNC(req_notify_cq),
110 IB_MANDATORY_FUNC(get_dma_mr),
111 IB_MANDATORY_FUNC(dereg_mr),
112 IB_MANDATORY_FUNC(get_port_immutable)
113 };
114 int i;
115
116 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
117 if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
118 printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
119 device->name, mandatory_table[i].name);
120 return -EINVAL;
121 }
122 }
123
124 return 0;
125}
126
127static struct ib_device *__ib_device_get_by_name(const char *name)
128{
129 struct ib_device *device;
130
131 list_for_each_entry(device, &device_list, core_list)
132 if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
133 return device;
134
135 return NULL;
136}
137
138
139static int alloc_name(char *name)
140{
141 unsigned long *inuse;
142 char buf[IB_DEVICE_NAME_MAX];
143 struct ib_device *device;
144 int i;
145
146 inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
147 if (!inuse)
148 return -ENOMEM;
149
150 list_for_each_entry(device, &device_list, core_list) {
151 if (!sscanf(device->name, name, &i))
152 continue;
153 if (i < 0 || i >= PAGE_SIZE * 8)
154 continue;
155 snprintf(buf, sizeof buf, name, i);
156 if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
157 set_bit(i, inuse);
158 }
159
160 i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
161 free_page((unsigned long) inuse);
162 snprintf(buf, sizeof buf, name, i);
163
164 if (__ib_device_get_by_name(buf))
165 return -ENFILE;
166
167 strlcpy(name, buf, IB_DEVICE_NAME_MAX);
168 return 0;
169}
170
171static void ib_device_release(struct device *device)
172{
173 struct ib_device *dev = container_of(device, struct ib_device, dev);
174
175 ib_cache_release_one(dev);
176 kfree(dev->port_immutable);
177 kfree(dev);
178}
179
180static int ib_device_uevent(struct device *device,
181 struct kobj_uevent_env *env)
182{
183 struct ib_device *dev = container_of(device, struct ib_device, dev);
184
185 if (add_uevent_var(env, "NAME=%s", dev->name))
186 return -ENOMEM;
187
188
189
190
191
192 return 0;
193}
194
195static struct class ib_class = {
196 .name = "infiniband",
197 .dev_release = ib_device_release,
198 .dev_uevent = ib_device_uevent,
199};
200
201
202
203
204
205
206
207
208
209
210
211struct ib_device *ib_alloc_device(size_t size)
212{
213 struct ib_device *device;
214
215 if (WARN_ON(size < sizeof(struct ib_device)))
216 return NULL;
217
218 device = kzalloc(size, GFP_KERNEL);
219 if (!device)
220 return NULL;
221
222 device->dev.class = &ib_class;
223 device_initialize(&device->dev);
224
225 dev_set_drvdata(&device->dev, device);
226
227 INIT_LIST_HEAD(&device->event_handler_list);
228 spin_lock_init(&device->event_handler_lock);
229 spin_lock_init(&device->client_data_lock);
230 INIT_LIST_HEAD(&device->client_data_list);
231 INIT_LIST_HEAD(&device->port_list);
232
233 return device;
234}
235EXPORT_SYMBOL(ib_alloc_device);
236
237
238
239
240
241
242
243void ib_dealloc_device(struct ib_device *device)
244{
245 WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
246 device->reg_state != IB_DEV_UNINITIALIZED);
247 kobject_put(&device->dev.kobj);
248}
249EXPORT_SYMBOL(ib_dealloc_device);
250
251static int add_client_context(struct ib_device *device, struct ib_client *client)
252{
253 struct ib_client_data *context;
254 unsigned long flags;
255
256 context = kmalloc(sizeof *context, GFP_KERNEL);
257 if (!context) {
258 printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n",
259 device->name, client->name);
260 return -ENOMEM;
261 }
262
263 context->client = client;
264 context->data = NULL;
265 context->going_down = false;
266
267 down_write(&lists_rwsem);
268 spin_lock_irqsave(&device->client_data_lock, flags);
269 list_add(&context->list, &device->client_data_list);
270 spin_unlock_irqrestore(&device->client_data_lock, flags);
271 up_write(&lists_rwsem);
272
273 return 0;
274}
275
276static int verify_immutable(const struct ib_device *dev, u8 port)
277{
278 return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
279 rdma_max_mad_size(dev, port) != 0);
280}
281
282static int read_port_immutable(struct ib_device *device)
283{
284 int ret;
285 u8 start_port = rdma_start_port(device);
286 u8 end_port = rdma_end_port(device);
287 u8 port;
288
289
290
291
292
293
294
295
296 device->port_immutable = kzalloc(sizeof(*device->port_immutable)
297 * (end_port + 1),
298 GFP_KERNEL);
299 if (!device->port_immutable)
300 return -ENOMEM;
301
302 for (port = start_port; port <= end_port; ++port) {
303 ret = device->get_port_immutable(device, port,
304 &device->port_immutable[port]);
305 if (ret)
306 return ret;
307
308 if (verify_immutable(device, port))
309 return -EINVAL;
310 }
311 return 0;
312}
313
314
315
316
317
318
319
320
321
322
323int ib_register_device(struct ib_device *device,
324 int (*port_callback)(struct ib_device *,
325 u8, struct kobject *))
326{
327 int ret;
328 struct ib_client *client;
329 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
330
331 mutex_lock(&device_mutex);
332
333 if (strchr(device->name, '%')) {
334 ret = alloc_name(device->name);
335 if (ret)
336 goto out;
337 }
338
339 if (ib_device_check_mandatory(device)) {
340 ret = -EINVAL;
341 goto out;
342 }
343
344 ret = read_port_immutable(device);
345 if (ret) {
346 printk(KERN_WARNING "Couldn't create per port immutable data %s\n",
347 device->name);
348 goto out;
349 }
350
351 ret = ib_cache_setup_one(device);
352 if (ret) {
353 printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
354 goto out;
355 }
356
357 memset(&device->attrs, 0, sizeof(device->attrs));
358 ret = device->query_device(device, &device->attrs, &uhw);
359 if (ret) {
360 printk(KERN_WARNING "Couldn't query the device attributes\n");
361 ib_cache_cleanup_one(device);
362 goto out;
363 }
364
365 ret = ib_device_register_sysfs(device, port_callback);
366 if (ret) {
367 printk(KERN_WARNING "Couldn't register device %s with driver model\n",
368 device->name);
369 ib_cache_cleanup_one(device);
370 goto out;
371 }
372
373 device->reg_state = IB_DEV_REGISTERED;
374
375 list_for_each_entry(client, &client_list, list)
376 if (client->add && !add_client_context(device, client))
377 client->add(device);
378
379 down_write(&lists_rwsem);
380 list_add_tail(&device->core_list, &device_list);
381 up_write(&lists_rwsem);
382out:
383 mutex_unlock(&device_mutex);
384 return ret;
385}
386EXPORT_SYMBOL(ib_register_device);
387
388
389
390
391
392
393
394void ib_unregister_device(struct ib_device *device)
395{
396 struct ib_client_data *context, *tmp;
397 unsigned long flags;
398
399 mutex_lock(&device_mutex);
400
401 down_write(&lists_rwsem);
402 list_del(&device->core_list);
403 spin_lock_irqsave(&device->client_data_lock, flags);
404 list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
405 context->going_down = true;
406 spin_unlock_irqrestore(&device->client_data_lock, flags);
407 downgrade_write(&lists_rwsem);
408
409 list_for_each_entry_safe(context, tmp, &device->client_data_list,
410 list) {
411 if (context->client->remove)
412 context->client->remove(device, context->data);
413 }
414 up_read(&lists_rwsem);
415
416 mutex_unlock(&device_mutex);
417
418 ib_device_unregister_sysfs(device);
419 ib_cache_cleanup_one(device);
420
421 down_write(&lists_rwsem);
422 spin_lock_irqsave(&device->client_data_lock, flags);
423 list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
424 kfree(context);
425 spin_unlock_irqrestore(&device->client_data_lock, flags);
426 up_write(&lists_rwsem);
427
428 device->reg_state = IB_DEV_UNREGISTERED;
429}
430EXPORT_SYMBOL(ib_unregister_device);
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445int ib_register_client(struct ib_client *client)
446{
447 struct ib_device *device;
448
449 mutex_lock(&device_mutex);
450
451 list_for_each_entry(device, &device_list, core_list)
452 if (client->add && !add_client_context(device, client))
453 client->add(device);
454
455 down_write(&lists_rwsem);
456 list_add_tail(&client->list, &client_list);
457 up_write(&lists_rwsem);
458
459 mutex_unlock(&device_mutex);
460
461 return 0;
462}
463EXPORT_SYMBOL(ib_register_client);
464
465
466
467
468
469
470
471
472
473void ib_unregister_client(struct ib_client *client)
474{
475 struct ib_client_data *context, *tmp;
476 struct ib_device *device;
477 unsigned long flags;
478
479 mutex_lock(&device_mutex);
480
481 down_write(&lists_rwsem);
482 list_del(&client->list);
483 up_write(&lists_rwsem);
484
485 list_for_each_entry(device, &device_list, core_list) {
486 struct ib_client_data *found_context = NULL;
487
488 down_write(&lists_rwsem);
489 spin_lock_irqsave(&device->client_data_lock, flags);
490 list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
491 if (context->client == client) {
492 context->going_down = true;
493 found_context = context;
494 break;
495 }
496 spin_unlock_irqrestore(&device->client_data_lock, flags);
497 up_write(&lists_rwsem);
498
499 if (client->remove)
500 client->remove(device, found_context ?
501 found_context->data : NULL);
502
503 if (!found_context) {
504 pr_warn("No client context found for %s/%s\n",
505 device->name, client->name);
506 continue;
507 }
508
509 down_write(&lists_rwsem);
510 spin_lock_irqsave(&device->client_data_lock, flags);
511 list_del(&found_context->list);
512 kfree(found_context);
513 spin_unlock_irqrestore(&device->client_data_lock, flags);
514 up_write(&lists_rwsem);
515 }
516
517 mutex_unlock(&device_mutex);
518}
519EXPORT_SYMBOL(ib_unregister_client);
520
521
522
523
524
525
526
527
528
529void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
530{
531 struct ib_client_data *context;
532 void *ret = NULL;
533 unsigned long flags;
534
535 spin_lock_irqsave(&device->client_data_lock, flags);
536 list_for_each_entry(context, &device->client_data_list, list)
537 if (context->client == client) {
538 ret = context->data;
539 break;
540 }
541 spin_unlock_irqrestore(&device->client_data_lock, flags);
542
543 return ret;
544}
545EXPORT_SYMBOL(ib_get_client_data);
546
547
548
549
550
551
552
553
554
555
556void ib_set_client_data(struct ib_device *device, struct ib_client *client,
557 void *data)
558{
559 struct ib_client_data *context;
560 unsigned long flags;
561
562 spin_lock_irqsave(&device->client_data_lock, flags);
563 list_for_each_entry(context, &device->client_data_list, list)
564 if (context->client == client) {
565 context->data = data;
566 goto out;
567 }
568
569 printk(KERN_WARNING "No client context found for %s/%s\n",
570 device->name, client->name);
571
572out:
573 spin_unlock_irqrestore(&device->client_data_lock, flags);
574}
575EXPORT_SYMBOL(ib_set_client_data);
576
577
578
579
580
581
582
583
584
585
586int ib_register_event_handler (struct ib_event_handler *event_handler)
587{
588 unsigned long flags;
589
590 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
591 list_add_tail(&event_handler->list,
592 &event_handler->device->event_handler_list);
593 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
594
595 return 0;
596}
597EXPORT_SYMBOL(ib_register_event_handler);
598
599
600
601
602
603
604
605
606int ib_unregister_event_handler(struct ib_event_handler *event_handler)
607{
608 unsigned long flags;
609
610 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
611 list_del(&event_handler->list);
612 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
613
614 return 0;
615}
616EXPORT_SYMBOL(ib_unregister_event_handler);
617
618
619
620
621
622
623
624
625
626void ib_dispatch_event(struct ib_event *event)
627{
628 unsigned long flags;
629 struct ib_event_handler *handler;
630
631 spin_lock_irqsave(&event->device->event_handler_lock, flags);
632
633 list_for_each_entry(handler, &event->device->event_handler_list, list)
634 handler->handler(handler, event);
635
636 spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
637}
638EXPORT_SYMBOL(ib_dispatch_event);
639
640
641
642
643
644
645
646
647
648
649int ib_query_port(struct ib_device *device,
650 u8 port_num,
651 struct ib_port_attr *port_attr)
652{
653 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
654 return -EINVAL;
655
656 return device->query_port(device, port_num, port_attr);
657}
658EXPORT_SYMBOL(ib_query_port);
659
660
661
662
663
664
665
666
667
668
669
670
671int ib_query_gid(struct ib_device *device,
672 u8 port_num, int index, union ib_gid *gid,
673 struct ib_gid_attr *attr)
674{
675 if (rdma_cap_roce_gid_table(device, port_num))
676 return ib_get_cached_gid(device, port_num, index, gid, attr);
677
678 if (attr)
679 return -EINVAL;
680
681 return device->query_gid(device, port_num, index, gid);
682}
683EXPORT_SYMBOL(ib_query_gid);
684
685
686
687
688
689
690
691
692
693
694
695
696
697void ib_enum_roce_netdev(struct ib_device *ib_dev,
698 roce_netdev_filter filter,
699 void *filter_cookie,
700 roce_netdev_callback cb,
701 void *cookie)
702{
703 u8 port;
704
705 for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev);
706 port++)
707 if (rdma_protocol_roce(ib_dev, port)) {
708 struct net_device *idev = NULL;
709
710 if (ib_dev->get_netdev)
711 idev = ib_dev->get_netdev(ib_dev, port);
712
713 if (idev &&
714 idev->reg_state >= NETREG_UNREGISTERED) {
715 dev_put(idev);
716 idev = NULL;
717 }
718
719 if (filter(ib_dev, port, idev, filter_cookie))
720 cb(ib_dev, port, idev, cookie);
721
722 if (idev)
723 dev_put(idev);
724 }
725}
726
727
728
729
730
731
732
733
734
735
736
737
738void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
739 void *filter_cookie,
740 roce_netdev_callback cb,
741 void *cookie)
742{
743 struct ib_device *dev;
744
745 down_read(&lists_rwsem);
746 list_for_each_entry(dev, &device_list, core_list)
747 ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
748 up_read(&lists_rwsem);
749}
750
751
752
753
754
755
756
757
758
759
760int ib_query_pkey(struct ib_device *device,
761 u8 port_num, u16 index, u16 *pkey)
762{
763 return device->query_pkey(device, port_num, index, pkey);
764}
765EXPORT_SYMBOL(ib_query_pkey);
766
767
768
769
770
771
772
773
774
775
776int ib_modify_device(struct ib_device *device,
777 int device_modify_mask,
778 struct ib_device_modify *device_modify)
779{
780 if (!device->modify_device)
781 return -ENOSYS;
782
783 return device->modify_device(device, device_modify_mask,
784 device_modify);
785}
786EXPORT_SYMBOL(ib_modify_device);
787
788
789
790
791
792
793
794
795
796
797
798
799int ib_modify_port(struct ib_device *device,
800 u8 port_num, int port_modify_mask,
801 struct ib_port_modify *port_modify)
802{
803 if (!device->modify_port)
804 return -ENOSYS;
805
806 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
807 return -EINVAL;
808
809 return device->modify_port(device, port_num, port_modify_mask,
810 port_modify);
811}
812EXPORT_SYMBOL(ib_modify_port);
813
814
815
816
817
818
819
820
821
822
823
824
825int ib_find_gid(struct ib_device *device, union ib_gid *gid,
826 enum ib_gid_type gid_type, struct net_device *ndev,
827 u8 *port_num, u16 *index)
828{
829 union ib_gid tmp_gid;
830 int ret, port, i;
831
832 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
833 if (rdma_cap_roce_gid_table(device, port)) {
834 if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
835 ndev, index)) {
836 *port_num = port;
837 return 0;
838 }
839 }
840
841 if (gid_type != IB_GID_TYPE_IB)
842 continue;
843
844 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
845 ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
846 if (ret)
847 return ret;
848 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
849 *port_num = port;
850 if (index)
851 *index = i;
852 return 0;
853 }
854 }
855 }
856
857 return -ENOENT;
858}
859EXPORT_SYMBOL(ib_find_gid);
860
861
862
863
864
865
866
867
868
869int ib_find_pkey(struct ib_device *device,
870 u8 port_num, u16 pkey, u16 *index)
871{
872 int ret, i;
873 u16 tmp_pkey;
874 int partial_ix = -1;
875
876 for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) {
877 ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
878 if (ret)
879 return ret;
880 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
881
882 if (tmp_pkey & 0x8000) {
883 *index = i;
884 return 0;
885 }
886 if (partial_ix < 0)
887 partial_ix = i;
888 }
889 }
890
891
892 if (partial_ix >= 0) {
893 *index = partial_ix;
894 return 0;
895 }
896 return -ENOENT;
897}
898EXPORT_SYMBOL(ib_find_pkey);
899
900
901
902
903
904
905
906
907
908
909
910struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
911 u8 port,
912 u16 pkey,
913 const union ib_gid *gid,
914 const struct sockaddr *addr)
915{
916 struct net_device *net_dev = NULL;
917 struct ib_client_data *context;
918
919 if (!rdma_protocol_ib(dev, port))
920 return NULL;
921
922 down_read(&lists_rwsem);
923
924 list_for_each_entry(context, &dev->client_data_list, list) {
925 struct ib_client *client = context->client;
926
927 if (context->going_down)
928 continue;
929
930 if (client->get_net_dev_by_params) {
931 net_dev = client->get_net_dev_by_params(dev, port, pkey,
932 gid, addr,
933 context->data);
934 if (net_dev)
935 break;
936 }
937 }
938
939 up_read(&lists_rwsem);
940
941 return net_dev;
942}
943EXPORT_SYMBOL(ib_get_net_dev_by_params);
944
945static int __init ib_core_init(void)
946{
947 int ret;
948
949 ib_wq = alloc_workqueue("infiniband", 0, 0);
950 if (!ib_wq)
951 return -ENOMEM;
952
953 ib_comp_wq = alloc_workqueue("ib-comp-wq",
954 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
955 WQ_UNBOUND_MAX_ACTIVE);
956 if (!ib_comp_wq) {
957 ret = -ENOMEM;
958 goto err;
959 }
960
961 ret = class_register(&ib_class);
962 if (ret) {
963 printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
964 goto err_comp;
965 }
966
967 ret = ibnl_init();
968 if (ret) {
969 printk(KERN_WARNING "Couldn't init IB netlink interface\n");
970 goto err_sysfs;
971 }
972
973 ib_cache_setup();
974
975 return 0;
976
977err_sysfs:
978 class_unregister(&ib_class);
979err_comp:
980 destroy_workqueue(ib_comp_wq);
981err:
982 destroy_workqueue(ib_wq);
983 return ret;
984}
985
986static void __exit ib_core_cleanup(void)
987{
988 ib_cache_cleanup();
989 ibnl_cleanup();
990 class_unregister(&ib_class);
991 destroy_workqueue(ib_comp_wq);
992
993 destroy_workqueue(ib_wq);
994}
995
996module_init(ib_core_init);
997module_exit(ib_core_cleanup);
998