1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#include <linux/module.h>
35#include <linux/string.h>
36#include <linux/errno.h>
37#include <linux/kernel.h>
38#include <linux/slab.h>
39#include <linux/init.h>
40#include <linux/mutex.h>
41#include <linux/netdevice.h>
42#include <rdma/rdma_netlink.h>
43#include <rdma/ib_addr.h>
44#include <rdma/ib_cache.h>
45
46#include "core_priv.h"
47
48MODULE_AUTHOR("Roland Dreier");
49MODULE_DESCRIPTION("core kernel InfiniBand API");
50MODULE_LICENSE("Dual BSD/GPL");
51
52struct ib_client_data {
53 struct list_head list;
54 struct ib_client *client;
55 void * data;
56
57
58 bool going_down;
59};
60
61struct workqueue_struct *ib_comp_wq;
62struct workqueue_struct *ib_wq;
63EXPORT_SYMBOL_GPL(ib_wq);
64
65
66
67
68static LIST_HEAD(device_list);
69static LIST_HEAD(client_list);
70
71
72
73
74
75
76
77
78
79
80
81
82static DEFINE_MUTEX(device_mutex);
83static DECLARE_RWSEM(lists_rwsem);
84
85
86static int ib_device_check_mandatory(struct ib_device *device)
87{
88#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
89 static const struct {
90 size_t offset;
91 char *name;
92 } mandatory_table[] = {
93 IB_MANDATORY_FUNC(query_device),
94 IB_MANDATORY_FUNC(query_port),
95 IB_MANDATORY_FUNC(query_pkey),
96 IB_MANDATORY_FUNC(query_gid),
97 IB_MANDATORY_FUNC(alloc_pd),
98 IB_MANDATORY_FUNC(dealloc_pd),
99 IB_MANDATORY_FUNC(create_ah),
100 IB_MANDATORY_FUNC(destroy_ah),
101 IB_MANDATORY_FUNC(create_qp),
102 IB_MANDATORY_FUNC(modify_qp),
103 IB_MANDATORY_FUNC(destroy_qp),
104 IB_MANDATORY_FUNC(post_send),
105 IB_MANDATORY_FUNC(post_recv),
106 IB_MANDATORY_FUNC(create_cq),
107 IB_MANDATORY_FUNC(destroy_cq),
108 IB_MANDATORY_FUNC(poll_cq),
109 IB_MANDATORY_FUNC(req_notify_cq),
110 IB_MANDATORY_FUNC(get_dma_mr),
111 IB_MANDATORY_FUNC(dereg_mr),
112 IB_MANDATORY_FUNC(get_port_immutable)
113 };
114 int i;
115
116 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
117 if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
118 pr_warn("Device %s is missing mandatory function %s\n",
119 device->name, mandatory_table[i].name);
120 return -EINVAL;
121 }
122 }
123
124 return 0;
125}
126
127static struct ib_device *__ib_device_get_by_name(const char *name)
128{
129 struct ib_device *device;
130
131 list_for_each_entry(device, &device_list, core_list)
132 if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
133 return device;
134
135 return NULL;
136}
137
138
139static int alloc_name(char *name)
140{
141 unsigned long *inuse;
142 char buf[IB_DEVICE_NAME_MAX];
143 struct ib_device *device;
144 int i;
145
146 inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
147 if (!inuse)
148 return -ENOMEM;
149
150 list_for_each_entry(device, &device_list, core_list) {
151 if (!sscanf(device->name, name, &i))
152 continue;
153 if (i < 0 || i >= PAGE_SIZE * 8)
154 continue;
155 snprintf(buf, sizeof buf, name, i);
156 if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
157 set_bit(i, inuse);
158 }
159
160 i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
161 free_page((unsigned long) inuse);
162 snprintf(buf, sizeof buf, name, i);
163
164 if (__ib_device_get_by_name(buf))
165 return -ENFILE;
166
167 strlcpy(name, buf, IB_DEVICE_NAME_MAX);
168 return 0;
169}
170
171static void ib_device_release(struct device *device)
172{
173 struct ib_device *dev = container_of(device, struct ib_device, dev);
174
175 ib_cache_release_one(dev);
176 kfree(dev->port_immutable);
177 kfree(dev);
178}
179
180static int ib_device_uevent(struct device *device,
181 struct kobj_uevent_env *env)
182{
183 struct ib_device *dev = container_of(device, struct ib_device, dev);
184
185 if (add_uevent_var(env, "NAME=%s", dev->name))
186 return -ENOMEM;
187
188
189
190
191
192 return 0;
193}
194
195static struct class ib_class = {
196 .name = "infiniband",
197 .dev_release = ib_device_release,
198 .dev_uevent = ib_device_uevent,
199};
200
201
202
203
204
205
206
207
208
209
210
211struct ib_device *ib_alloc_device(size_t size)
212{
213 struct ib_device *device;
214
215 if (WARN_ON(size < sizeof(struct ib_device)))
216 return NULL;
217
218 device = kzalloc(size, GFP_KERNEL);
219 if (!device)
220 return NULL;
221
222 device->dev.class = &ib_class;
223 device_initialize(&device->dev);
224
225 dev_set_drvdata(&device->dev, device);
226
227 INIT_LIST_HEAD(&device->event_handler_list);
228 spin_lock_init(&device->event_handler_lock);
229 spin_lock_init(&device->client_data_lock);
230 INIT_LIST_HEAD(&device->client_data_list);
231 INIT_LIST_HEAD(&device->port_list);
232
233 return device;
234}
235EXPORT_SYMBOL(ib_alloc_device);
236
237
238
239
240
241
242
243void ib_dealloc_device(struct ib_device *device)
244{
245 WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
246 device->reg_state != IB_DEV_UNINITIALIZED);
247 kobject_put(&device->dev.kobj);
248}
249EXPORT_SYMBOL(ib_dealloc_device);
250
251static int add_client_context(struct ib_device *device, struct ib_client *client)
252{
253 struct ib_client_data *context;
254 unsigned long flags;
255
256 context = kmalloc(sizeof *context, GFP_KERNEL);
257 if (!context) {
258 pr_warn("Couldn't allocate client context for %s/%s\n",
259 device->name, client->name);
260 return -ENOMEM;
261 }
262
263 context->client = client;
264 context->data = NULL;
265 context->going_down = false;
266
267 down_write(&lists_rwsem);
268 spin_lock_irqsave(&device->client_data_lock, flags);
269 list_add(&context->list, &device->client_data_list);
270 spin_unlock_irqrestore(&device->client_data_lock, flags);
271 up_write(&lists_rwsem);
272
273 return 0;
274}
275
276static int verify_immutable(const struct ib_device *dev, u8 port)
277{
278 return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
279 rdma_max_mad_size(dev, port) != 0);
280}
281
282static int read_port_immutable(struct ib_device *device)
283{
284 int ret;
285 u8 start_port = rdma_start_port(device);
286 u8 end_port = rdma_end_port(device);
287 u8 port;
288
289
290
291
292
293
294
295
296 device->port_immutable = kzalloc(sizeof(*device->port_immutable)
297 * (end_port + 1),
298 GFP_KERNEL);
299 if (!device->port_immutable)
300 return -ENOMEM;
301
302 for (port = start_port; port <= end_port; ++port) {
303 ret = device->get_port_immutable(device, port,
304 &device->port_immutable[port]);
305 if (ret)
306 return ret;
307
308 if (verify_immutable(device, port))
309 return -EINVAL;
310 }
311 return 0;
312}
313
314
315
316
317
318
319
320
321
322
323int ib_register_device(struct ib_device *device,
324 int (*port_callback)(struct ib_device *,
325 u8, struct kobject *))
326{
327 int ret;
328 struct ib_client *client;
329 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
330
331 mutex_lock(&device_mutex);
332
333 if (strchr(device->name, '%')) {
334 ret = alloc_name(device->name);
335 if (ret)
336 goto out;
337 }
338
339 if (ib_device_check_mandatory(device)) {
340 ret = -EINVAL;
341 goto out;
342 }
343
344 ret = read_port_immutable(device);
345 if (ret) {
346 pr_warn("Couldn't create per port immutable data %s\n",
347 device->name);
348 goto out;
349 }
350
351 ret = ib_cache_setup_one(device);
352 if (ret) {
353 pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
354 goto out;
355 }
356
357 memset(&device->attrs, 0, sizeof(device->attrs));
358 ret = device->query_device(device, &device->attrs, &uhw);
359 if (ret) {
360 pr_warn("Couldn't query the device attributes\n");
361 ib_cache_cleanup_one(device);
362 goto out;
363 }
364
365 ret = ib_device_register_sysfs(device, port_callback);
366 if (ret) {
367 pr_warn("Couldn't register device %s with driver model\n",
368 device->name);
369 ib_cache_cleanup_one(device);
370 goto out;
371 }
372
373 device->reg_state = IB_DEV_REGISTERED;
374
375 list_for_each_entry(client, &client_list, list)
376 if (client->add && !add_client_context(device, client))
377 client->add(device);
378
379 down_write(&lists_rwsem);
380 list_add_tail(&device->core_list, &device_list);
381 up_write(&lists_rwsem);
382out:
383 mutex_unlock(&device_mutex);
384 return ret;
385}
386EXPORT_SYMBOL(ib_register_device);
387
388
389
390
391
392
393
394void ib_unregister_device(struct ib_device *device)
395{
396 struct ib_client_data *context, *tmp;
397 unsigned long flags;
398
399 mutex_lock(&device_mutex);
400
401 down_write(&lists_rwsem);
402 list_del(&device->core_list);
403 spin_lock_irqsave(&device->client_data_lock, flags);
404 list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
405 context->going_down = true;
406 spin_unlock_irqrestore(&device->client_data_lock, flags);
407 downgrade_write(&lists_rwsem);
408
409 list_for_each_entry_safe(context, tmp, &device->client_data_list,
410 list) {
411 if (context->client->remove)
412 context->client->remove(device, context->data);
413 }
414 up_read(&lists_rwsem);
415
416 mutex_unlock(&device_mutex);
417
418 ib_device_unregister_sysfs(device);
419 ib_cache_cleanup_one(device);
420
421 down_write(&lists_rwsem);
422 spin_lock_irqsave(&device->client_data_lock, flags);
423 list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
424 kfree(context);
425 spin_unlock_irqrestore(&device->client_data_lock, flags);
426 up_write(&lists_rwsem);
427
428 device->reg_state = IB_DEV_UNREGISTERED;
429}
430EXPORT_SYMBOL(ib_unregister_device);
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445int ib_register_client(struct ib_client *client)
446{
447 struct ib_device *device;
448
449 mutex_lock(&device_mutex);
450
451 list_for_each_entry(device, &device_list, core_list)
452 if (client->add && !add_client_context(device, client))
453 client->add(device);
454
455 down_write(&lists_rwsem);
456 list_add_tail(&client->list, &client_list);
457 up_write(&lists_rwsem);
458
459 mutex_unlock(&device_mutex);
460
461 return 0;
462}
463EXPORT_SYMBOL(ib_register_client);
464
465
466
467
468
469
470
471
472
473void ib_unregister_client(struct ib_client *client)
474{
475 struct ib_client_data *context, *tmp;
476 struct ib_device *device;
477 unsigned long flags;
478
479 mutex_lock(&device_mutex);
480
481 down_write(&lists_rwsem);
482 list_del(&client->list);
483 up_write(&lists_rwsem);
484
485 list_for_each_entry(device, &device_list, core_list) {
486 struct ib_client_data *found_context = NULL;
487
488 down_write(&lists_rwsem);
489 spin_lock_irqsave(&device->client_data_lock, flags);
490 list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
491 if (context->client == client) {
492 context->going_down = true;
493 found_context = context;
494 break;
495 }
496 spin_unlock_irqrestore(&device->client_data_lock, flags);
497 up_write(&lists_rwsem);
498
499 if (client->remove)
500 client->remove(device, found_context ?
501 found_context->data : NULL);
502
503 if (!found_context) {
504 pr_warn("No client context found for %s/%s\n",
505 device->name, client->name);
506 continue;
507 }
508
509 down_write(&lists_rwsem);
510 spin_lock_irqsave(&device->client_data_lock, flags);
511 list_del(&found_context->list);
512 kfree(found_context);
513 spin_unlock_irqrestore(&device->client_data_lock, flags);
514 up_write(&lists_rwsem);
515 }
516
517 mutex_unlock(&device_mutex);
518}
519EXPORT_SYMBOL(ib_unregister_client);
520
521
522
523
524
525
526
527
528
529void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
530{
531 struct ib_client_data *context;
532 void *ret = NULL;
533 unsigned long flags;
534
535 spin_lock_irqsave(&device->client_data_lock, flags);
536 list_for_each_entry(context, &device->client_data_list, list)
537 if (context->client == client) {
538 ret = context->data;
539 break;
540 }
541 spin_unlock_irqrestore(&device->client_data_lock, flags);
542
543 return ret;
544}
545EXPORT_SYMBOL(ib_get_client_data);
546
547
548
549
550
551
552
553
554
555
556void ib_set_client_data(struct ib_device *device, struct ib_client *client,
557 void *data)
558{
559 struct ib_client_data *context;
560 unsigned long flags;
561
562 spin_lock_irqsave(&device->client_data_lock, flags);
563 list_for_each_entry(context, &device->client_data_list, list)
564 if (context->client == client) {
565 context->data = data;
566 goto out;
567 }
568
569 pr_warn("No client context found for %s/%s\n",
570 device->name, client->name);
571
572out:
573 spin_unlock_irqrestore(&device->client_data_lock, flags);
574}
575EXPORT_SYMBOL(ib_set_client_data);
576
577
578
579
580
581
582
583
584
585
586int ib_register_event_handler (struct ib_event_handler *event_handler)
587{
588 unsigned long flags;
589
590 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
591 list_add_tail(&event_handler->list,
592 &event_handler->device->event_handler_list);
593 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
594
595 return 0;
596}
597EXPORT_SYMBOL(ib_register_event_handler);
598
599
600
601
602
603
604
605
606int ib_unregister_event_handler(struct ib_event_handler *event_handler)
607{
608 unsigned long flags;
609
610 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
611 list_del(&event_handler->list);
612 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
613
614 return 0;
615}
616EXPORT_SYMBOL(ib_unregister_event_handler);
617
618
619
620
621
622
623
624
625
626void ib_dispatch_event(struct ib_event *event)
627{
628 unsigned long flags;
629 struct ib_event_handler *handler;
630
631 spin_lock_irqsave(&event->device->event_handler_lock, flags);
632
633 list_for_each_entry(handler, &event->device->event_handler_list, list)
634 handler->handler(handler, event);
635
636 spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
637}
638EXPORT_SYMBOL(ib_dispatch_event);
639
640
641
642
643
644
645
646
647
648
649int ib_query_port(struct ib_device *device,
650 u8 port_num,
651 struct ib_port_attr *port_attr)
652{
653 union ib_gid gid;
654 int err;
655
656 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
657 return -EINVAL;
658
659 memset(port_attr, 0, sizeof(*port_attr));
660 err = device->query_port(device, port_num, port_attr);
661 if (err || port_attr->subnet_prefix)
662 return err;
663
664 err = ib_query_gid(device, port_num, 0, &gid, NULL);
665 if (err)
666 return err;
667
668 port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
669 return 0;
670}
671EXPORT_SYMBOL(ib_query_port);
672
673
674
675
676
677
678
679
680
681
682
683
684int ib_query_gid(struct ib_device *device,
685 u8 port_num, int index, union ib_gid *gid,
686 struct ib_gid_attr *attr)
687{
688 if (rdma_cap_roce_gid_table(device, port_num))
689 return ib_get_cached_gid(device, port_num, index, gid, attr);
690
691 if (attr)
692 return -EINVAL;
693
694 return device->query_gid(device, port_num, index, gid);
695}
696EXPORT_SYMBOL(ib_query_gid);
697
698
699
700
701
702
703
704
705
706
707
708
709
710void ib_enum_roce_netdev(struct ib_device *ib_dev,
711 roce_netdev_filter filter,
712 void *filter_cookie,
713 roce_netdev_callback cb,
714 void *cookie)
715{
716 u8 port;
717
718 for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev);
719 port++)
720 if (rdma_protocol_roce(ib_dev, port)) {
721 struct net_device *idev = NULL;
722
723 if (ib_dev->get_netdev)
724 idev = ib_dev->get_netdev(ib_dev, port);
725
726 if (idev &&
727 idev->reg_state >= NETREG_UNREGISTERED) {
728 dev_put(idev);
729 idev = NULL;
730 }
731
732 if (filter(ib_dev, port, idev, filter_cookie))
733 cb(ib_dev, port, idev, cookie);
734
735 if (idev)
736 dev_put(idev);
737 }
738}
739
740
741
742
743
744
745
746
747
748
749
750
751void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
752 void *filter_cookie,
753 roce_netdev_callback cb,
754 void *cookie)
755{
756 struct ib_device *dev;
757
758 down_read(&lists_rwsem);
759 list_for_each_entry(dev, &device_list, core_list)
760 ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
761 up_read(&lists_rwsem);
762}
763
764
765
766
767
768
769
770
771
772
773int ib_query_pkey(struct ib_device *device,
774 u8 port_num, u16 index, u16 *pkey)
775{
776 return device->query_pkey(device, port_num, index, pkey);
777}
778EXPORT_SYMBOL(ib_query_pkey);
779
780
781
782
783
784
785
786
787
788
789int ib_modify_device(struct ib_device *device,
790 int device_modify_mask,
791 struct ib_device_modify *device_modify)
792{
793 if (!device->modify_device)
794 return -ENOSYS;
795
796 return device->modify_device(device, device_modify_mask,
797 device_modify);
798}
799EXPORT_SYMBOL(ib_modify_device);
800
801
802
803
804
805
806
807
808
809
810
811
812int ib_modify_port(struct ib_device *device,
813 u8 port_num, int port_modify_mask,
814 struct ib_port_modify *port_modify)
815{
816 if (!device->modify_port)
817 return -ENOSYS;
818
819 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
820 return -EINVAL;
821
822 return device->modify_port(device, port_num, port_modify_mask,
823 port_modify);
824}
825EXPORT_SYMBOL(ib_modify_port);
826
827
828
829
830
831
832
833
834
835
836
837
838int ib_find_gid(struct ib_device *device, union ib_gid *gid,
839 enum ib_gid_type gid_type, struct net_device *ndev,
840 u8 *port_num, u16 *index)
841{
842 union ib_gid tmp_gid;
843 int ret, port, i;
844
845 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
846 if (rdma_cap_roce_gid_table(device, port)) {
847 if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
848 ndev, index)) {
849 *port_num = port;
850 return 0;
851 }
852 }
853
854 if (gid_type != IB_GID_TYPE_IB)
855 continue;
856
857 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
858 ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
859 if (ret)
860 return ret;
861 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
862 *port_num = port;
863 if (index)
864 *index = i;
865 return 0;
866 }
867 }
868 }
869
870 return -ENOENT;
871}
872EXPORT_SYMBOL(ib_find_gid);
873
874
875
876
877
878
879
880
881
882int ib_find_pkey(struct ib_device *device,
883 u8 port_num, u16 pkey, u16 *index)
884{
885 int ret, i;
886 u16 tmp_pkey;
887 int partial_ix = -1;
888
889 for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) {
890 ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
891 if (ret)
892 return ret;
893 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
894
895 if (tmp_pkey & 0x8000) {
896 *index = i;
897 return 0;
898 }
899 if (partial_ix < 0)
900 partial_ix = i;
901 }
902 }
903
904
905 if (partial_ix >= 0) {
906 *index = partial_ix;
907 return 0;
908 }
909 return -ENOENT;
910}
911EXPORT_SYMBOL(ib_find_pkey);
912
913
914
915
916
917
918
919
920
921
922
923struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
924 u8 port,
925 u16 pkey,
926 const union ib_gid *gid,
927 const struct sockaddr *addr)
928{
929 struct net_device *net_dev = NULL;
930 struct ib_client_data *context;
931
932 if (!rdma_protocol_ib(dev, port))
933 return NULL;
934
935 down_read(&lists_rwsem);
936
937 list_for_each_entry(context, &dev->client_data_list, list) {
938 struct ib_client *client = context->client;
939
940 if (context->going_down)
941 continue;
942
943 if (client->get_net_dev_by_params) {
944 net_dev = client->get_net_dev_by_params(dev, port, pkey,
945 gid, addr,
946 context->data);
947 if (net_dev)
948 break;
949 }
950 }
951
952 up_read(&lists_rwsem);
953
954 return net_dev;
955}
956EXPORT_SYMBOL(ib_get_net_dev_by_params);
957
958static int __init ib_core_init(void)
959{
960 int ret;
961
962 ib_wq = alloc_workqueue("infiniband", 0, 0);
963 if (!ib_wq)
964 return -ENOMEM;
965
966 ib_comp_wq = alloc_workqueue("ib-comp-wq",
967 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
968 WQ_UNBOUND_MAX_ACTIVE);
969 if (!ib_comp_wq) {
970 ret = -ENOMEM;
971 goto err;
972 }
973
974 ret = class_register(&ib_class);
975 if (ret) {
976 pr_warn("Couldn't create InfiniBand device class\n");
977 goto err_comp;
978 }
979
980 ret = ibnl_init();
981 if (ret) {
982 pr_warn("Couldn't init IB netlink interface\n");
983 goto err_sysfs;
984 }
985
986 ib_cache_setup();
987
988 return 0;
989
990err_sysfs:
991 class_unregister(&ib_class);
992err_comp:
993 destroy_workqueue(ib_comp_wq);
994err:
995 destroy_workqueue(ib_wq);
996 return ret;
997}
998
999static void __exit ib_core_cleanup(void)
1000{
1001 ib_cache_cleanup();
1002 ibnl_cleanup();
1003 class_unregister(&ib_class);
1004 destroy_workqueue(ib_comp_wq);
1005
1006 destroy_workqueue(ib_wq);
1007}
1008
1009module_init(ib_core_init);
1010module_exit(ib_core_cleanup);
1011