1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#include <linux/module.h>
35#include <linux/string.h>
36#include <linux/errno.h>
37#include <linux/kernel.h>
38#include <linux/slab.h>
39#include <linux/init.h>
40#include <linux/netdevice.h>
41#include <net/net_namespace.h>
42#include <net/netns/generic.h>
43#include <linux/security.h>
44#include <linux/notifier.h>
45#include <linux/hashtable.h>
46#include <rdma/rdma_netlink.h>
47#include <rdma/ib_addr.h>
48#include <rdma/ib_cache.h>
49#include <rdma/rdma_counter.h>
50
51#include "core_priv.h"
52#include "restrack.h"
53
54MODULE_AUTHOR("Roland Dreier");
55MODULE_DESCRIPTION("core kernel InfiniBand API");
56MODULE_LICENSE("Dual BSD/GPL");
57
58struct workqueue_struct *ib_comp_wq;
59struct workqueue_struct *ib_comp_unbound_wq;
60struct workqueue_struct *ib_wq;
61EXPORT_SYMBOL_GPL(ib_wq);
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
94static DECLARE_RWSEM(devices_rwsem);
95#define DEVICE_REGISTERED XA_MARK_1
96
97static u32 highest_client_id;
98#define CLIENT_REGISTERED XA_MARK_1
99static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
100static DECLARE_RWSEM(clients_rwsem);
101
102static void ib_client_put(struct ib_client *client)
103{
104 if (refcount_dec_and_test(&client->uses))
105 complete(&client->uses_zero);
106}
107
108
109
110
111
112#define CLIENT_DATA_REGISTERED XA_MARK_1
113
114
115
116
117
118
119struct rdma_dev_net {
120 possible_net_t net;
121 u32 id;
122};
123
124static unsigned int rdma_dev_net_id;
125
126
127
128
129
130
131static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC);
132
133
134
135static DECLARE_RWSEM(rdma_nets_rwsem);
136
137bool ib_devices_shared_netns = true;
138module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444);
139MODULE_PARM_DESC(netns_mode,
140 "Share device among net namespaces; default=1 (shared)");
141
142
143
144
145
146
147
148
149
150
151
152
153
154bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
155{
156 return (ib_devices_shared_netns ||
157 net_eq(read_pnet(&dev->coredev.rdma_net), net));
158}
159EXPORT_SYMBOL(rdma_dev_access_netns);
160
161
162
163
164
165
166
167
168static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
169 xa_mark_t filter)
170{
171 XA_STATE(xas, xa, *indexp);
172 void *entry;
173
174 rcu_read_lock();
175 do {
176 entry = xas_find_marked(&xas, ULONG_MAX, filter);
177 if (xa_is_zero(entry))
178 break;
179 } while (xas_retry(&xas, entry));
180 rcu_read_unlock();
181
182 if (entry) {
183 *indexp = xas.xa_index;
184 if (xa_is_zero(entry))
185 return NULL;
186 return entry;
187 }
188 return XA_ERROR(-ENOENT);
189}
190#define xan_for_each_marked(xa, index, entry, filter) \
191 for (index = 0, entry = xan_find_marked(xa, &(index), filter); \
192 !xa_is_err(entry); \
193 (index)++, entry = xan_find_marked(xa, &(index), filter))
194
195
196static DEFINE_SPINLOCK(ndev_hash_lock);
197static DECLARE_HASHTABLE(ndev_hash, 5);
198
199static void free_netdevs(struct ib_device *ib_dev);
200static void ib_unregister_work(struct work_struct *work);
201static void __ib_unregister_device(struct ib_device *device);
202static int ib_security_change(struct notifier_block *nb, unsigned long event,
203 void *lsm_data);
204static void ib_policy_change_task(struct work_struct *work);
205static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task);
206
207static void __ibdev_printk(const char *level, const struct ib_device *ibdev,
208 struct va_format *vaf)
209{
210 if (ibdev && ibdev->dev.parent)
211 dev_printk_emit(level[1] - '0',
212 ibdev->dev.parent,
213 "%s %s %s: %pV",
214 dev_driver_string(ibdev->dev.parent),
215 dev_name(ibdev->dev.parent),
216 dev_name(&ibdev->dev),
217 vaf);
218 else if (ibdev)
219 printk("%s%s: %pV",
220 level, dev_name(&ibdev->dev), vaf);
221 else
222 printk("%s(NULL ib_device): %pV", level, vaf);
223}
224
225void ibdev_printk(const char *level, const struct ib_device *ibdev,
226 const char *format, ...)
227{
228 struct va_format vaf;
229 va_list args;
230
231 va_start(args, format);
232
233 vaf.fmt = format;
234 vaf.va = &args;
235
236 __ibdev_printk(level, ibdev, &vaf);
237
238 va_end(args);
239}
240EXPORT_SYMBOL(ibdev_printk);
241
242#define define_ibdev_printk_level(func, level) \
243void func(const struct ib_device *ibdev, const char *fmt, ...) \
244{ \
245 struct va_format vaf; \
246 va_list args; \
247 \
248 va_start(args, fmt); \
249 \
250 vaf.fmt = fmt; \
251 vaf.va = &args; \
252 \
253 __ibdev_printk(level, ibdev, &vaf); \
254 \
255 va_end(args); \
256} \
257EXPORT_SYMBOL(func);
258
259define_ibdev_printk_level(ibdev_emerg, KERN_EMERG);
260define_ibdev_printk_level(ibdev_alert, KERN_ALERT);
261define_ibdev_printk_level(ibdev_crit, KERN_CRIT);
262define_ibdev_printk_level(ibdev_err, KERN_ERR);
263define_ibdev_printk_level(ibdev_warn, KERN_WARNING);
264define_ibdev_printk_level(ibdev_notice, KERN_NOTICE);
265define_ibdev_printk_level(ibdev_info, KERN_INFO);
266
267static struct notifier_block ibdev_lsm_nb = {
268 .notifier_call = ib_security_change,
269};
270
271static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
272 struct net *net);
273
274
275struct ib_port_data_rcu {
276 struct rcu_head rcu_head;
277 struct ib_port_data pdata[];
278};
279
280static void ib_device_check_mandatory(struct ib_device *device)
281{
282#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
283 static const struct {
284 size_t offset;
285 char *name;
286 } mandatory_table[] = {
287 IB_MANDATORY_FUNC(query_device),
288 IB_MANDATORY_FUNC(query_port),
289 IB_MANDATORY_FUNC(query_pkey),
290 IB_MANDATORY_FUNC(alloc_pd),
291 IB_MANDATORY_FUNC(dealloc_pd),
292 IB_MANDATORY_FUNC(create_qp),
293 IB_MANDATORY_FUNC(modify_qp),
294 IB_MANDATORY_FUNC(destroy_qp),
295 IB_MANDATORY_FUNC(post_send),
296 IB_MANDATORY_FUNC(post_recv),
297 IB_MANDATORY_FUNC(create_cq),
298 IB_MANDATORY_FUNC(destroy_cq),
299 IB_MANDATORY_FUNC(poll_cq),
300 IB_MANDATORY_FUNC(req_notify_cq),
301 IB_MANDATORY_FUNC(get_dma_mr),
302 IB_MANDATORY_FUNC(dereg_mr),
303 IB_MANDATORY_FUNC(get_port_immutable)
304 };
305 int i;
306
307 device->kverbs_provider = true;
308 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
309 if (!*(void **) ((void *) &device->ops +
310 mandatory_table[i].offset)) {
311 device->kverbs_provider = false;
312 break;
313 }
314 }
315}
316
317
318
319
320
321struct ib_device *ib_device_get_by_index(const struct net *net, u32 index)
322{
323 struct ib_device *device;
324
325 down_read(&devices_rwsem);
326 device = xa_load(&devices, index);
327 if (device) {
328 if (!rdma_dev_access_netns(device, net)) {
329 device = NULL;
330 goto out;
331 }
332
333 if (!ib_device_try_get(device))
334 device = NULL;
335 }
336out:
337 up_read(&devices_rwsem);
338 return device;
339}
340
341
342
343
344
345
346
347
348void ib_device_put(struct ib_device *device)
349{
350 if (refcount_dec_and_test(&device->refcount))
351 complete(&device->unreg_completion);
352}
353EXPORT_SYMBOL(ib_device_put);
354
355static struct ib_device *__ib_device_get_by_name(const char *name)
356{
357 struct ib_device *device;
358 unsigned long index;
359
360 xa_for_each (&devices, index, device)
361 if (!strcmp(name, dev_name(&device->dev)))
362 return device;
363
364 return NULL;
365}
366
367
368
369
370
371
372
373
374
375struct ib_device *ib_device_get_by_name(const char *name,
376 enum rdma_driver_id driver_id)
377{
378 struct ib_device *device;
379
380 down_read(&devices_rwsem);
381 device = __ib_device_get_by_name(name);
382 if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
383 device->ops.driver_id != driver_id)
384 device = NULL;
385
386 if (device) {
387 if (!ib_device_try_get(device))
388 device = NULL;
389 }
390 up_read(&devices_rwsem);
391 return device;
392}
393EXPORT_SYMBOL(ib_device_get_by_name);
394
395static int rename_compat_devs(struct ib_device *device)
396{
397 struct ib_core_device *cdev;
398 unsigned long index;
399 int ret = 0;
400
401 mutex_lock(&device->compat_devs_mutex);
402 xa_for_each (&device->compat_devs, index, cdev) {
403 ret = device_rename(&cdev->dev, dev_name(&device->dev));
404 if (ret) {
405 dev_warn(&cdev->dev,
406 "Fail to rename compatdev to new name %s\n",
407 dev_name(&device->dev));
408 break;
409 }
410 }
411 mutex_unlock(&device->compat_devs_mutex);
412 return ret;
413}
414
415int ib_device_rename(struct ib_device *ibdev, const char *name)
416{
417 unsigned long index;
418 void *client_data;
419 int ret;
420
421 down_write(&devices_rwsem);
422 if (!strcmp(name, dev_name(&ibdev->dev))) {
423 up_write(&devices_rwsem);
424 return 0;
425 }
426
427 if (__ib_device_get_by_name(name)) {
428 up_write(&devices_rwsem);
429 return -EEXIST;
430 }
431
432 ret = device_rename(&ibdev->dev, name);
433 if (ret) {
434 up_write(&devices_rwsem);
435 return ret;
436 }
437
438 strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
439 ret = rename_compat_devs(ibdev);
440
441 downgrade_write(&devices_rwsem);
442 down_read(&ibdev->client_data_rwsem);
443 xan_for_each_marked(&ibdev->client_data, index, client_data,
444 CLIENT_DATA_REGISTERED) {
445 struct ib_client *client = xa_load(&clients, index);
446
447 if (!client || !client->rename)
448 continue;
449
450 client->rename(ibdev, client_data);
451 }
452 up_read(&ibdev->client_data_rwsem);
453 up_read(&devices_rwsem);
454 return 0;
455}
456
457int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim)
458{
459 if (use_dim > 1)
460 return -EINVAL;
461 ibdev->use_cq_dim = use_dim;
462
463 return 0;
464}
465
466static int alloc_name(struct ib_device *ibdev, const char *name)
467{
468 struct ib_device *device;
469 unsigned long index;
470 struct ida inuse;
471 int rc;
472 int i;
473
474 lockdep_assert_held_write(&devices_rwsem);
475 ida_init(&inuse);
476 xa_for_each (&devices, index, device) {
477 char buf[IB_DEVICE_NAME_MAX];
478
479 if (sscanf(dev_name(&device->dev), name, &i) != 1)
480 continue;
481 if (i < 0 || i >= INT_MAX)
482 continue;
483 snprintf(buf, sizeof buf, name, i);
484 if (strcmp(buf, dev_name(&device->dev)) != 0)
485 continue;
486
487 rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL);
488 if (rc < 0)
489 goto out;
490 }
491
492 rc = ida_alloc(&inuse, GFP_KERNEL);
493 if (rc < 0)
494 goto out;
495
496 rc = dev_set_name(&ibdev->dev, name, rc);
497out:
498 ida_destroy(&inuse);
499 return rc;
500}
501
502static void ib_device_release(struct device *device)
503{
504 struct ib_device *dev = container_of(device, struct ib_device, dev);
505
506 free_netdevs(dev);
507 WARN_ON(refcount_read(&dev->refcount));
508 if (dev->port_data) {
509 ib_cache_release_one(dev);
510 ib_security_release_port_pkey_list(dev);
511 rdma_counter_release(dev);
512 kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
513 pdata[0]),
514 rcu_head);
515 }
516
517 xa_destroy(&dev->compat_devs);
518 xa_destroy(&dev->client_data);
519 kfree_rcu(dev, rcu_head);
520}
521
522static int ib_device_uevent(struct device *device,
523 struct kobj_uevent_env *env)
524{
525 if (add_uevent_var(env, "NAME=%s", dev_name(device)))
526 return -ENOMEM;
527
528
529
530
531
532 return 0;
533}
534
535static const void *net_namespace(struct device *d)
536{
537 struct ib_core_device *coredev =
538 container_of(d, struct ib_core_device, dev);
539
540 return read_pnet(&coredev->rdma_net);
541}
542
543static struct class ib_class = {
544 .name = "infiniband",
545 .dev_release = ib_device_release,
546 .dev_uevent = ib_device_uevent,
547 .ns_type = &net_ns_type_operations,
548 .namespace = net_namespace,
549};
550
551static void rdma_init_coredev(struct ib_core_device *coredev,
552 struct ib_device *dev, struct net *net)
553{
554
555
556
557
558
559
560 BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) !=
561 offsetof(struct ib_device, dev));
562
563 coredev->dev.class = &ib_class;
564 coredev->dev.groups = dev->groups;
565 device_initialize(&coredev->dev);
566 coredev->owner = dev;
567 INIT_LIST_HEAD(&coredev->port_list);
568 write_pnet(&coredev->rdma_net, net);
569}
570
571
572
573
574
575
576
577
578
579
580
581struct ib_device *_ib_alloc_device(size_t size)
582{
583 struct ib_device *device;
584
585 if (WARN_ON(size < sizeof(struct ib_device)))
586 return NULL;
587
588 device = kzalloc(size, GFP_KERNEL);
589 if (!device)
590 return NULL;
591
592 if (rdma_restrack_init(device)) {
593 kfree(device);
594 return NULL;
595 }
596
597 device->groups[0] = &ib_dev_attr_group;
598 rdma_init_coredev(&device->coredev, device, &init_net);
599
600 INIT_LIST_HEAD(&device->event_handler_list);
601 spin_lock_init(&device->event_handler_lock);
602 mutex_init(&device->unregistration_lock);
603
604
605
606
607 xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
608 init_rwsem(&device->client_data_rwsem);
609 xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC);
610 mutex_init(&device->compat_devs_mutex);
611 init_completion(&device->unreg_completion);
612 INIT_WORK(&device->unregistration_work, ib_unregister_work);
613
614 return device;
615}
616EXPORT_SYMBOL(_ib_alloc_device);
617
618
619
620
621
622
623
624void ib_dealloc_device(struct ib_device *device)
625{
626 if (device->ops.dealloc_driver)
627 device->ops.dealloc_driver(device);
628
629
630
631
632
633
634
635 down_write(&devices_rwsem);
636 if (xa_load(&devices, device->index) == device)
637 xa_erase(&devices, device->index);
638 up_write(&devices_rwsem);
639
640
641 free_netdevs(device);
642
643 WARN_ON(!xa_empty(&device->compat_devs));
644 WARN_ON(!xa_empty(&device->client_data));
645 WARN_ON(refcount_read(&device->refcount));
646 rdma_restrack_clean(device);
647
648 put_device(&device->dev);
649}
650EXPORT_SYMBOL(ib_dealloc_device);
651
652
653
654
655
656
657
658
659
660static int add_client_context(struct ib_device *device,
661 struct ib_client *client)
662{
663 int ret = 0;
664
665 if (!device->kverbs_provider && !client->no_kverbs_req)
666 return 0;
667
668 down_write(&device->client_data_rwsem);
669
670
671
672
673 if (!refcount_inc_not_zero(&client->uses))
674 goto out_unlock;
675 refcount_inc(&device->refcount);
676
677
678
679
680
681 if (xa_get_mark(&device->client_data, client->client_id,
682 CLIENT_DATA_REGISTERED))
683 goto out;
684
685 ret = xa_err(xa_store(&device->client_data, client->client_id, NULL,
686 GFP_KERNEL));
687 if (ret)
688 goto out;
689 downgrade_write(&device->client_data_rwsem);
690 if (client->add)
691 client->add(device);
692
693
694 xa_set_mark(&device->client_data, client->client_id,
695 CLIENT_DATA_REGISTERED);
696 up_read(&device->client_data_rwsem);
697 return 0;
698
699out:
700 ib_device_put(device);
701 ib_client_put(client);
702out_unlock:
703 up_write(&device->client_data_rwsem);
704 return ret;
705}
706
707static void remove_client_context(struct ib_device *device,
708 unsigned int client_id)
709{
710 struct ib_client *client;
711 void *client_data;
712
713 down_write(&device->client_data_rwsem);
714 if (!xa_get_mark(&device->client_data, client_id,
715 CLIENT_DATA_REGISTERED)) {
716 up_write(&device->client_data_rwsem);
717 return;
718 }
719 client_data = xa_load(&device->client_data, client_id);
720 xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
721 client = xa_load(&clients, client_id);
722 up_write(&device->client_data_rwsem);
723
724
725
726
727
728
729
730
731
732
733 if (client->remove)
734 client->remove(device, client_data);
735
736 xa_erase(&device->client_data, client_id);
737 ib_device_put(device);
738 ib_client_put(client);
739}
740
741static int alloc_port_data(struct ib_device *device)
742{
743 struct ib_port_data_rcu *pdata_rcu;
744 unsigned int port;
745
746 if (device->port_data)
747 return 0;
748
749
750 if (WARN_ON(!device->phys_port_cnt))
751 return -EINVAL;
752
753
754
755
756
757
758
759
760 pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata,
761 rdma_end_port(device) + 1),
762 GFP_KERNEL);
763 if (!pdata_rcu)
764 return -ENOMEM;
765
766
767
768
769
770 device->port_data = pdata_rcu->pdata;
771
772 rdma_for_each_port (device, port) {
773 struct ib_port_data *pdata = &device->port_data[port];
774
775 pdata->ib_dev = device;
776 spin_lock_init(&pdata->pkey_list_lock);
777 INIT_LIST_HEAD(&pdata->pkey_list);
778 spin_lock_init(&pdata->netdev_lock);
779 INIT_HLIST_NODE(&pdata->ndev_hash_link);
780 }
781 return 0;
782}
783
784static int verify_immutable(const struct ib_device *dev, u8 port)
785{
786 return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
787 rdma_max_mad_size(dev, port) != 0);
788}
789
790static int setup_port_data(struct ib_device *device)
791{
792 unsigned int port;
793 int ret;
794
795 ret = alloc_port_data(device);
796 if (ret)
797 return ret;
798
799 rdma_for_each_port (device, port) {
800 struct ib_port_data *pdata = &device->port_data[port];
801
802 ret = device->ops.get_port_immutable(device, port,
803 &pdata->immutable);
804 if (ret)
805 return ret;
806
807 if (verify_immutable(device, port))
808 return -EINVAL;
809 }
810 return 0;
811}
812
813void ib_get_device_fw_str(struct ib_device *dev, char *str)
814{
815 if (dev->ops.get_dev_fw_str)
816 dev->ops.get_dev_fw_str(dev, str);
817 else
818 str[0] = '\0';
819}
820EXPORT_SYMBOL(ib_get_device_fw_str);
821
822static void ib_policy_change_task(struct work_struct *work)
823{
824 struct ib_device *dev;
825 unsigned long index;
826
827 down_read(&devices_rwsem);
828 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
829 unsigned int i;
830
831 rdma_for_each_port (dev, i) {
832 u64 sp;
833 int ret = ib_get_cached_subnet_prefix(dev,
834 i,
835 &sp);
836
837 WARN_ONCE(ret,
838 "ib_get_cached_subnet_prefix err: %d, this should never happen here\n",
839 ret);
840 if (!ret)
841 ib_security_cache_change(dev, i, sp);
842 }
843 }
844 up_read(&devices_rwsem);
845}
846
847static int ib_security_change(struct notifier_block *nb, unsigned long event,
848 void *lsm_data)
849{
850 if (event != LSM_POLICY_CHANGE)
851 return NOTIFY_DONE;
852
853 schedule_work(&ib_policy_change_work);
854 ib_mad_agent_security_change();
855
856 return NOTIFY_OK;
857}
858
859static void compatdev_release(struct device *dev)
860{
861 struct ib_core_device *cdev =
862 container_of(dev, struct ib_core_device, dev);
863
864 kfree(cdev);
865}
866
867static int add_one_compat_dev(struct ib_device *device,
868 struct rdma_dev_net *rnet)
869{
870 struct ib_core_device *cdev;
871 int ret;
872
873 lockdep_assert_held(&rdma_nets_rwsem);
874 if (!ib_devices_shared_netns)
875 return 0;
876
877
878
879
880
881 if (net_eq(read_pnet(&rnet->net),
882 read_pnet(&device->coredev.rdma_net)))
883 return 0;
884
885
886
887
888
889
890 mutex_lock(&device->compat_devs_mutex);
891 cdev = xa_load(&device->compat_devs, rnet->id);
892 if (cdev) {
893 ret = 0;
894 goto done;
895 }
896 ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL);
897 if (ret)
898 goto done;
899
900 cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
901 if (!cdev) {
902 ret = -ENOMEM;
903 goto cdev_err;
904 }
905
906 cdev->dev.parent = device->dev.parent;
907 rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
908 cdev->dev.release = compatdev_release;
909 dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
910
911 ret = device_add(&cdev->dev);
912 if (ret)
913 goto add_err;
914 ret = ib_setup_port_attrs(cdev);
915 if (ret)
916 goto port_err;
917
918 ret = xa_err(xa_store(&device->compat_devs, rnet->id,
919 cdev, GFP_KERNEL));
920 if (ret)
921 goto insert_err;
922
923 mutex_unlock(&device->compat_devs_mutex);
924 return 0;
925
926insert_err:
927 ib_free_port_attrs(cdev);
928port_err:
929 device_del(&cdev->dev);
930add_err:
931 put_device(&cdev->dev);
932cdev_err:
933 xa_release(&device->compat_devs, rnet->id);
934done:
935 mutex_unlock(&device->compat_devs_mutex);
936 return ret;
937}
938
939static void remove_one_compat_dev(struct ib_device *device, u32 id)
940{
941 struct ib_core_device *cdev;
942
943 mutex_lock(&device->compat_devs_mutex);
944 cdev = xa_erase(&device->compat_devs, id);
945 mutex_unlock(&device->compat_devs_mutex);
946 if (cdev) {
947 ib_free_port_attrs(cdev);
948 device_del(&cdev->dev);
949 put_device(&cdev->dev);
950 }
951}
952
953static void remove_compat_devs(struct ib_device *device)
954{
955 struct ib_core_device *cdev;
956 unsigned long index;
957
958 xa_for_each (&device->compat_devs, index, cdev)
959 remove_one_compat_dev(device, index);
960}
961
962static int add_compat_devs(struct ib_device *device)
963{
964 struct rdma_dev_net *rnet;
965 unsigned long index;
966 int ret = 0;
967
968 lockdep_assert_held(&devices_rwsem);
969
970 down_read(&rdma_nets_rwsem);
971 xa_for_each (&rdma_nets, index, rnet) {
972 ret = add_one_compat_dev(device, rnet);
973 if (ret)
974 break;
975 }
976 up_read(&rdma_nets_rwsem);
977 return ret;
978}
979
980static void remove_all_compat_devs(void)
981{
982 struct ib_compat_device *cdev;
983 struct ib_device *dev;
984 unsigned long index;
985
986 down_read(&devices_rwsem);
987 xa_for_each (&devices, index, dev) {
988 unsigned long c_index = 0;
989
990
991
992
993 down_read(&rdma_nets_rwsem);
994 xa_for_each (&dev->compat_devs, c_index, cdev)
995 remove_one_compat_dev(dev, c_index);
996 up_read(&rdma_nets_rwsem);
997 }
998 up_read(&devices_rwsem);
999}
1000
1001static int add_all_compat_devs(void)
1002{
1003 struct rdma_dev_net *rnet;
1004 struct ib_device *dev;
1005 unsigned long index;
1006 int ret = 0;
1007
1008 down_read(&devices_rwsem);
1009 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
1010 unsigned long net_index = 0;
1011
1012
1013
1014
1015 down_read(&rdma_nets_rwsem);
1016 xa_for_each (&rdma_nets, net_index, rnet) {
1017 ret = add_one_compat_dev(dev, rnet);
1018 if (ret)
1019 break;
1020 }
1021 up_read(&rdma_nets_rwsem);
1022 }
1023 up_read(&devices_rwsem);
1024 if (ret)
1025 remove_all_compat_devs();
1026 return ret;
1027}
1028
1029int rdma_compatdev_set(u8 enable)
1030{
1031 struct rdma_dev_net *rnet;
1032 unsigned long index;
1033 int ret = 0;
1034
1035 down_write(&rdma_nets_rwsem);
1036 if (ib_devices_shared_netns == enable) {
1037 up_write(&rdma_nets_rwsem);
1038 return 0;
1039 }
1040
1041
1042
1043
1044 xa_for_each (&rdma_nets, index, rnet) {
1045 ret++;
1046 break;
1047 }
1048 if (!ret)
1049 ib_devices_shared_netns = enable;
1050 up_write(&rdma_nets_rwsem);
1051 if (ret)
1052 return -EBUSY;
1053
1054 if (enable)
1055 ret = add_all_compat_devs();
1056 else
1057 remove_all_compat_devs();
1058 return ret;
1059}
1060
1061static void rdma_dev_exit_net(struct net *net)
1062{
1063 struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
1064 struct ib_device *dev;
1065 unsigned long index;
1066 int ret;
1067
1068 down_write(&rdma_nets_rwsem);
1069
1070
1071
1072 ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL));
1073 WARN_ON(ret);
1074 up_write(&rdma_nets_rwsem);
1075
1076 down_read(&devices_rwsem);
1077 xa_for_each (&devices, index, dev) {
1078 get_device(&dev->dev);
1079
1080
1081
1082
1083 up_read(&devices_rwsem);
1084
1085 remove_one_compat_dev(dev, rnet->id);
1086
1087
1088
1089
1090 rdma_dev_change_netns(dev, net, &init_net);
1091
1092 put_device(&dev->dev);
1093 down_read(&devices_rwsem);
1094 }
1095 up_read(&devices_rwsem);
1096
1097 xa_erase(&rdma_nets, rnet->id);
1098}
1099
1100static __net_init int rdma_dev_init_net(struct net *net)
1101{
1102 struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
1103 unsigned long index;
1104 struct ib_device *dev;
1105 int ret;
1106
1107
1108 if (net_eq(net, &init_net))
1109 return 0;
1110
1111 write_pnet(&rnet->net, net);
1112
1113 ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
1114 if (ret)
1115 return ret;
1116
1117 down_read(&devices_rwsem);
1118 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
1119
1120
1121
1122 down_read(&rdma_nets_rwsem);
1123 ret = add_one_compat_dev(dev, rnet);
1124 up_read(&rdma_nets_rwsem);
1125 if (ret)
1126 break;
1127 }
1128 up_read(&devices_rwsem);
1129
1130 if (ret)
1131 rdma_dev_exit_net(net);
1132
1133 return ret;
1134}
1135
1136
1137
1138
1139
1140static int assign_name(struct ib_device *device, const char *name)
1141{
1142 static u32 last_id;
1143 int ret;
1144
1145 down_write(&devices_rwsem);
1146
1147 if (strchr(name, '%'))
1148 ret = alloc_name(device, name);
1149 else
1150 ret = dev_set_name(&device->dev, name);
1151 if (ret)
1152 goto out;
1153
1154 if (__ib_device_get_by_name(dev_name(&device->dev))) {
1155 ret = -ENFILE;
1156 goto out;
1157 }
1158 strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
1159
1160 ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b,
1161 &last_id, GFP_KERNEL);
1162 if (ret > 0)
1163 ret = 0;
1164
1165out:
1166 up_write(&devices_rwsem);
1167 return ret;
1168}
1169
1170static void setup_dma_device(struct ib_device *device)
1171{
1172 struct device *parent = device->dev.parent;
1173
1174 WARN_ON_ONCE(device->dma_device);
1175 if (device->dev.dma_ops) {
1176
1177
1178
1179
1180
1181 device->dma_device = &device->dev;
1182 if (!device->dev.dma_mask) {
1183 if (parent)
1184 device->dev.dma_mask = parent->dma_mask;
1185 else
1186 WARN_ON_ONCE(true);
1187 }
1188 if (!device->dev.coherent_dma_mask) {
1189 if (parent)
1190 device->dev.coherent_dma_mask =
1191 parent->coherent_dma_mask;
1192 else
1193 WARN_ON_ONCE(true);
1194 }
1195 } else {
1196
1197
1198
1199
1200 WARN_ON_ONCE(!parent);
1201 device->dma_device = parent;
1202 }
1203
1204 dma_set_max_seg_size(device->dma_device, SZ_2G);
1205
1206}
1207
1208
1209
1210
1211
1212
1213static int setup_device(struct ib_device *device)
1214{
1215 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
1216 int ret;
1217
1218 setup_dma_device(device);
1219 ib_device_check_mandatory(device);
1220
1221 ret = setup_port_data(device);
1222 if (ret) {
1223 dev_warn(&device->dev, "Couldn't create per-port data\n");
1224 return ret;
1225 }
1226
1227 memset(&device->attrs, 0, sizeof(device->attrs));
1228 ret = device->ops.query_device(device, &device->attrs, &uhw);
1229 if (ret) {
1230 dev_warn(&device->dev,
1231 "Couldn't query the device attributes\n");
1232 return ret;
1233 }
1234
1235 return 0;
1236}
1237
1238static void disable_device(struct ib_device *device)
1239{
1240 u32 cid;
1241
1242 WARN_ON(!refcount_read(&device->refcount));
1243
1244 down_write(&devices_rwsem);
1245 xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
1246 up_write(&devices_rwsem);
1247
1248
1249
1250
1251
1252
1253
1254 down_read(&clients_rwsem);
1255 cid = highest_client_id;
1256 up_read(&clients_rwsem);
1257 while (cid) {
1258 cid--;
1259 remove_client_context(device, cid);
1260 }
1261
1262
1263 ib_device_put(device);
1264 wait_for_completion(&device->unreg_completion);
1265
1266
1267
1268
1269
1270
1271 remove_compat_devs(device);
1272}
1273
1274
1275
1276
1277
1278
1279static int enable_device_and_get(struct ib_device *device)
1280{
1281 struct ib_client *client;
1282 unsigned long index;
1283 int ret = 0;
1284
1285
1286
1287
1288
1289 refcount_set(&device->refcount, 2);
1290 down_write(&devices_rwsem);
1291 xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
1292
1293
1294
1295
1296
1297 downgrade_write(&devices_rwsem);
1298
1299 if (device->ops.enable_driver) {
1300 ret = device->ops.enable_driver(device);
1301 if (ret)
1302 goto out;
1303 }
1304
1305 down_read(&clients_rwsem);
1306 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
1307 ret = add_client_context(device, client);
1308 if (ret)
1309 break;
1310 }
1311 up_read(&clients_rwsem);
1312 if (!ret)
1313 ret = add_compat_devs(device);
1314out:
1315 up_read(&devices_rwsem);
1316 return ret;
1317}
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332int ib_register_device(struct ib_device *device, const char *name)
1333{
1334 int ret;
1335
1336 ret = assign_name(device, name);
1337 if (ret)
1338 return ret;
1339
1340 ret = setup_device(device);
1341 if (ret)
1342 return ret;
1343
1344 ret = ib_cache_setup_one(device);
1345 if (ret) {
1346 dev_warn(&device->dev,
1347 "Couldn't set up InfiniBand P_Key/GID cache\n");
1348 return ret;
1349 }
1350
1351 ib_device_register_rdmacg(device);
1352
1353 rdma_counter_init(device);
1354
1355
1356
1357
1358
1359 dev_set_uevent_suppress(&device->dev, true);
1360 ret = device_add(&device->dev);
1361 if (ret)
1362 goto cg_cleanup;
1363
1364 ret = ib_device_register_sysfs(device);
1365 if (ret) {
1366 dev_warn(&device->dev,
1367 "Couldn't register device with driver model\n");
1368 goto dev_cleanup;
1369 }
1370
1371 ret = enable_device_and_get(device);
1372 dev_set_uevent_suppress(&device->dev, false);
1373
1374 kobject_uevent(&device->dev.kobj, KOBJ_ADD);
1375 if (ret) {
1376 void (*dealloc_fn)(struct ib_device *);
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389 dealloc_fn = device->ops.dealloc_driver;
1390 device->ops.dealloc_driver = NULL;
1391 ib_device_put(device);
1392 __ib_unregister_device(device);
1393 device->ops.dealloc_driver = dealloc_fn;
1394 return ret;
1395 }
1396 ib_device_put(device);
1397
1398 return 0;
1399
1400dev_cleanup:
1401 device_del(&device->dev);
1402cg_cleanup:
1403 dev_set_uevent_suppress(&device->dev, false);
1404 ib_device_unregister_rdmacg(device);
1405 ib_cache_cleanup_one(device);
1406 return ret;
1407}
1408EXPORT_SYMBOL(ib_register_device);
1409
1410
1411static void __ib_unregister_device(struct ib_device *ib_dev)
1412{
1413
1414
1415
1416
1417
1418
1419
1420 mutex_lock(&ib_dev->unregistration_lock);
1421 if (!refcount_read(&ib_dev->refcount))
1422 goto out;
1423
1424 disable_device(ib_dev);
1425
1426
1427 free_netdevs(ib_dev);
1428
1429 ib_device_unregister_sysfs(ib_dev);
1430 device_del(&ib_dev->dev);
1431 ib_device_unregister_rdmacg(ib_dev);
1432 ib_cache_cleanup_one(ib_dev);
1433
1434
1435
1436
1437
1438 if (ib_dev->ops.dealloc_driver) {
1439 WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
1440 ib_dealloc_device(ib_dev);
1441 }
1442out:
1443 mutex_unlock(&ib_dev->unregistration_lock);
1444}
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460void ib_unregister_device(struct ib_device *ib_dev)
1461{
1462 get_device(&ib_dev->dev);
1463 __ib_unregister_device(ib_dev);
1464 put_device(&ib_dev->dev);
1465}
1466EXPORT_SYMBOL(ib_unregister_device);
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482void ib_unregister_device_and_put(struct ib_device *ib_dev)
1483{
1484 WARN_ON(!ib_dev->ops.dealloc_driver);
1485 get_device(&ib_dev->dev);
1486 ib_device_put(ib_dev);
1487 __ib_unregister_device(ib_dev);
1488 put_device(&ib_dev->dev);
1489}
1490EXPORT_SYMBOL(ib_unregister_device_and_put);
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506void ib_unregister_driver(enum rdma_driver_id driver_id)
1507{
1508 struct ib_device *ib_dev;
1509 unsigned long index;
1510
1511 down_read(&devices_rwsem);
1512 xa_for_each (&devices, index, ib_dev) {
1513 if (ib_dev->ops.driver_id != driver_id)
1514 continue;
1515
1516 get_device(&ib_dev->dev);
1517 up_read(&devices_rwsem);
1518
1519 WARN_ON(!ib_dev->ops.dealloc_driver);
1520 __ib_unregister_device(ib_dev);
1521
1522 put_device(&ib_dev->dev);
1523 down_read(&devices_rwsem);
1524 }
1525 up_read(&devices_rwsem);
1526}
1527EXPORT_SYMBOL(ib_unregister_driver);
1528
1529static void ib_unregister_work(struct work_struct *work)
1530{
1531 struct ib_device *ib_dev =
1532 container_of(work, struct ib_device, unregistration_work);
1533
1534 __ib_unregister_device(ib_dev);
1535 put_device(&ib_dev->dev);
1536}
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549void ib_unregister_device_queued(struct ib_device *ib_dev)
1550{
1551 WARN_ON(!refcount_read(&ib_dev->refcount));
1552 WARN_ON(!ib_dev->ops.dealloc_driver);
1553 get_device(&ib_dev->dev);
1554 if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
1555 put_device(&ib_dev->dev);
1556}
1557EXPORT_SYMBOL(ib_unregister_device_queued);
1558
1559
1560
1561
1562
1563
1564static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
1565 struct net *net)
1566{
1567 int ret2 = -EINVAL;
1568 int ret;
1569
1570 mutex_lock(&device->unregistration_lock);
1571
1572
1573
1574
1575
1576
1577 if (refcount_read(&device->refcount) == 0 ||
1578 !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
1579 ret = -ENODEV;
1580 goto out;
1581 }
1582
1583 kobject_uevent(&device->dev.kobj, KOBJ_REMOVE);
1584 disable_device(device);
1585
1586
1587
1588
1589
1590 write_pnet(&device->coredev.rdma_net, net);
1591
1592 down_read(&devices_rwsem);
1593
1594
1595
1596
1597
1598 ret = device_rename(&device->dev, dev_name(&device->dev));
1599 up_read(&devices_rwsem);
1600 if (ret) {
1601 dev_warn(&device->dev,
1602 "%s: Couldn't rename device after namespace change\n",
1603 __func__);
1604
1605 write_pnet(&device->coredev.rdma_net, cur_net);
1606 }
1607
1608 ret2 = enable_device_and_get(device);
1609 if (ret2) {
1610
1611
1612
1613
1614 dev_warn(&device->dev,
1615 "%s: Couldn't re-enable device after namespace change\n",
1616 __func__);
1617 }
1618 kobject_uevent(&device->dev.kobj, KOBJ_ADD);
1619
1620 ib_device_put(device);
1621out:
1622 mutex_unlock(&device->unregistration_lock);
1623 if (ret)
1624 return ret;
1625 return ret2;
1626}
1627
1628int ib_device_set_netns_put(struct sk_buff *skb,
1629 struct ib_device *dev, u32 ns_fd)
1630{
1631 struct net *net;
1632 int ret;
1633
1634 net = get_net_ns_by_fd(ns_fd);
1635 if (IS_ERR(net)) {
1636 ret = PTR_ERR(net);
1637 goto net_err;
1638 }
1639
1640 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
1641 ret = -EPERM;
1642 goto ns_err;
1643 }
1644
1645
1646
1647
1648
1649
1650
1651 if (!dev->ops.disassociate_ucontext || dev->ops.init_port ||
1652 ib_devices_shared_netns) {
1653 ret = -EOPNOTSUPP;
1654 goto ns_err;
1655 }
1656
1657 get_device(&dev->dev);
1658 ib_device_put(dev);
1659 ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net);
1660 put_device(&dev->dev);
1661
1662 put_net(net);
1663 return ret;
1664
1665ns_err:
1666 put_net(net);
1667net_err:
1668 ib_device_put(dev);
1669 return ret;
1670}
1671
1672static struct pernet_operations rdma_dev_net_ops = {
1673 .init = rdma_dev_init_net,
1674 .exit = rdma_dev_exit_net,
1675 .id = &rdma_dev_net_id,
1676 .size = sizeof(struct rdma_dev_net),
1677};
1678
1679static int assign_client_id(struct ib_client *client)
1680{
1681 int ret;
1682
1683 down_write(&clients_rwsem);
1684
1685
1686
1687
1688
1689 client->client_id = highest_client_id;
1690 ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
1691 if (ret)
1692 goto out;
1693
1694 highest_client_id++;
1695 xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
1696
1697out:
1698 up_write(&clients_rwsem);
1699 return ret;
1700}
1701
1702static void remove_client_id(struct ib_client *client)
1703{
1704 down_write(&clients_rwsem);
1705 xa_erase(&clients, client->client_id);
1706 for (; highest_client_id; highest_client_id--)
1707 if (xa_load(&clients, highest_client_id - 1))
1708 break;
1709 up_write(&clients_rwsem);
1710}
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725int ib_register_client(struct ib_client *client)
1726{
1727 struct ib_device *device;
1728 unsigned long index;
1729 int ret;
1730
1731 refcount_set(&client->uses, 1);
1732 init_completion(&client->uses_zero);
1733 ret = assign_client_id(client);
1734 if (ret)
1735 return ret;
1736
1737 down_read(&devices_rwsem);
1738 xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
1739 ret = add_client_context(device, client);
1740 if (ret) {
1741 up_read(&devices_rwsem);
1742 ib_unregister_client(client);
1743 return ret;
1744 }
1745 }
1746 up_read(&devices_rwsem);
1747 return 0;
1748}
1749EXPORT_SYMBOL(ib_register_client);
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762void ib_unregister_client(struct ib_client *client)
1763{
1764 struct ib_device *device;
1765 unsigned long index;
1766
1767 down_write(&clients_rwsem);
1768 ib_client_put(client);
1769 xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
1770 up_write(&clients_rwsem);
1771
1772
1773 rcu_read_lock();
1774 xa_for_each (&devices, index, device) {
1775 if (!ib_device_try_get(device))
1776 continue;
1777 rcu_read_unlock();
1778
1779 remove_client_context(device, client->client_id);
1780
1781 ib_device_put(device);
1782 rcu_read_lock();
1783 }
1784 rcu_read_unlock();
1785
1786
1787
1788
1789
1790 wait_for_completion(&client->uses_zero);
1791 remove_client_id(client);
1792}
1793EXPORT_SYMBOL(ib_unregister_client);
1794
1795static int __ib_get_global_client_nl_info(const char *client_name,
1796 struct ib_client_nl_info *res)
1797{
1798 struct ib_client *client;
1799 unsigned long index;
1800 int ret = -ENOENT;
1801
1802 down_read(&clients_rwsem);
1803 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
1804 if (strcmp(client->name, client_name) != 0)
1805 continue;
1806 if (!client->get_global_nl_info) {
1807 ret = -EOPNOTSUPP;
1808 break;
1809 }
1810 ret = client->get_global_nl_info(res);
1811 if (WARN_ON(ret == -ENOENT))
1812 ret = -EINVAL;
1813 if (!ret && res->cdev)
1814 get_device(res->cdev);
1815 break;
1816 }
1817 up_read(&clients_rwsem);
1818 return ret;
1819}
1820
1821static int __ib_get_client_nl_info(struct ib_device *ibdev,
1822 const char *client_name,
1823 struct ib_client_nl_info *res)
1824{
1825 unsigned long index;
1826 void *client_data;
1827 int ret = -ENOENT;
1828
1829 down_read(&ibdev->client_data_rwsem);
1830 xan_for_each_marked (&ibdev->client_data, index, client_data,
1831 CLIENT_DATA_REGISTERED) {
1832 struct ib_client *client = xa_load(&clients, index);
1833
1834 if (!client || strcmp(client->name, client_name) != 0)
1835 continue;
1836 if (!client->get_nl_info) {
1837 ret = -EOPNOTSUPP;
1838 break;
1839 }
1840 ret = client->get_nl_info(ibdev, client_data, res);
1841 if (WARN_ON(ret == -ENOENT))
1842 ret = -EINVAL;
1843
1844
1845
1846
1847
1848
1849 if (!ret && res->cdev)
1850 get_device(res->cdev);
1851 break;
1852 }
1853 up_read(&ibdev->client_data_rwsem);
1854
1855 return ret;
1856}
1857
1858
1859
1860
1861
1862
1863
1864int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
1865 struct ib_client_nl_info *res)
1866{
1867 int ret;
1868
1869 if (ibdev)
1870 ret = __ib_get_client_nl_info(ibdev, client_name, res);
1871 else
1872 ret = __ib_get_global_client_nl_info(client_name, res);
1873#ifdef CONFIG_MODULES
1874 if (ret == -ENOENT) {
1875 request_module("rdma-client-%s", client_name);
1876 if (ibdev)
1877 ret = __ib_get_client_nl_info(ibdev, client_name, res);
1878 else
1879 ret = __ib_get_global_client_nl_info(client_name, res);
1880 }
1881#endif
1882 if (ret) {
1883 if (ret == -ENOENT)
1884 return -EOPNOTSUPP;
1885 return ret;
1886 }
1887
1888 if (WARN_ON(!res->cdev))
1889 return -EINVAL;
1890 return 0;
1891}
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904void ib_set_client_data(struct ib_device *device, struct ib_client *client,
1905 void *data)
1906{
1907 void *rc;
1908
1909 if (WARN_ON(IS_ERR(data)))
1910 data = NULL;
1911
1912 rc = xa_store(&device->client_data, client->client_id, data,
1913 GFP_KERNEL);
1914 WARN_ON(xa_is_err(rc));
1915}
1916EXPORT_SYMBOL(ib_set_client_data);
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927void ib_register_event_handler(struct ib_event_handler *event_handler)
1928{
1929 unsigned long flags;
1930
1931 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
1932 list_add_tail(&event_handler->list,
1933 &event_handler->device->event_handler_list);
1934 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
1935}
1936EXPORT_SYMBOL(ib_register_event_handler);
1937
1938
1939
1940
1941
1942
1943
1944
1945void ib_unregister_event_handler(struct ib_event_handler *event_handler)
1946{
1947 unsigned long flags;
1948
1949 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
1950 list_del(&event_handler->list);
1951 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
1952}
1953EXPORT_SYMBOL(ib_unregister_event_handler);
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963void ib_dispatch_event(struct ib_event *event)
1964{
1965 unsigned long flags;
1966 struct ib_event_handler *handler;
1967
1968 spin_lock_irqsave(&event->device->event_handler_lock, flags);
1969
1970 list_for_each_entry(handler, &event->device->event_handler_list, list)
1971 handler->handler(handler, event);
1972
1973 spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
1974}
1975EXPORT_SYMBOL(ib_dispatch_event);
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986int ib_query_port(struct ib_device *device,
1987 u8 port_num,
1988 struct ib_port_attr *port_attr)
1989{
1990 union ib_gid gid;
1991 int err;
1992
1993 if (!rdma_is_port_valid(device, port_num))
1994 return -EINVAL;
1995
1996 memset(port_attr, 0, sizeof(*port_attr));
1997 err = device->ops.query_port(device, port_num, port_attr);
1998 if (err || port_attr->subnet_prefix)
1999 return err;
2000
2001 if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
2002 return 0;
2003
2004 err = device->ops.query_gid(device, port_num, 0, &gid);
2005 if (err)
2006 return err;
2007
2008 port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
2009 return 0;
2010}
2011EXPORT_SYMBOL(ib_query_port);
2012
2013static void add_ndev_hash(struct ib_port_data *pdata)
2014{
2015 unsigned long flags;
2016
2017 might_sleep();
2018
2019 spin_lock_irqsave(&ndev_hash_lock, flags);
2020 if (hash_hashed(&pdata->ndev_hash_link)) {
2021 hash_del_rcu(&pdata->ndev_hash_link);
2022 spin_unlock_irqrestore(&ndev_hash_lock, flags);
2023
2024
2025
2026
2027 synchronize_rcu();
2028 spin_lock_irqsave(&ndev_hash_lock, flags);
2029 }
2030 if (pdata->netdev)
2031 hash_add_rcu(ndev_hash, &pdata->ndev_hash_link,
2032 (uintptr_t)pdata->netdev);
2033 spin_unlock_irqrestore(&ndev_hash_lock, flags);
2034}
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
2052 unsigned int port)
2053{
2054 struct net_device *old_ndev;
2055 struct ib_port_data *pdata;
2056 unsigned long flags;
2057 int ret;
2058
2059
2060
2061
2062
2063 ret = alloc_port_data(ib_dev);
2064 if (ret)
2065 return ret;
2066
2067 if (!rdma_is_port_valid(ib_dev, port))
2068 return -EINVAL;
2069
2070 pdata = &ib_dev->port_data[port];
2071 spin_lock_irqsave(&pdata->netdev_lock, flags);
2072 old_ndev = rcu_dereference_protected(
2073 pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
2074 if (old_ndev == ndev) {
2075 spin_unlock_irqrestore(&pdata->netdev_lock, flags);
2076 return 0;
2077 }
2078
2079 if (ndev)
2080 dev_hold(ndev);
2081 rcu_assign_pointer(pdata->netdev, ndev);
2082 spin_unlock_irqrestore(&pdata->netdev_lock, flags);
2083
2084 add_ndev_hash(pdata);
2085 if (old_ndev)
2086 dev_put(old_ndev);
2087
2088 return 0;
2089}
2090EXPORT_SYMBOL(ib_device_set_netdev);
2091
2092static void free_netdevs(struct ib_device *ib_dev)
2093{
2094 unsigned long flags;
2095 unsigned int port;
2096
2097 if (!ib_dev->port_data)
2098 return;
2099
2100 rdma_for_each_port (ib_dev, port) {
2101 struct ib_port_data *pdata = &ib_dev->port_data[port];
2102 struct net_device *ndev;
2103
2104 spin_lock_irqsave(&pdata->netdev_lock, flags);
2105 ndev = rcu_dereference_protected(
2106 pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
2107 if (ndev) {
2108 spin_lock(&ndev_hash_lock);
2109 hash_del_rcu(&pdata->ndev_hash_link);
2110 spin_unlock(&ndev_hash_lock);
2111
2112
2113
2114
2115
2116
2117
2118 rcu_assign_pointer(pdata->netdev, NULL);
2119 dev_put(ndev);
2120 }
2121 spin_unlock_irqrestore(&pdata->netdev_lock, flags);
2122 }
2123}
2124
2125struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
2126 unsigned int port)
2127{
2128 struct ib_port_data *pdata;
2129 struct net_device *res;
2130
2131 if (!rdma_is_port_valid(ib_dev, port))
2132 return NULL;
2133
2134 pdata = &ib_dev->port_data[port];
2135
2136
2137
2138
2139
2140 if (ib_dev->ops.get_netdev)
2141 res = ib_dev->ops.get_netdev(ib_dev, port);
2142 else {
2143 spin_lock(&pdata->netdev_lock);
2144 res = rcu_dereference_protected(
2145 pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
2146 if (res)
2147 dev_hold(res);
2148 spin_unlock(&pdata->netdev_lock);
2149 }
2150
2151
2152
2153
2154
2155 if (res && res->reg_state != NETREG_REGISTERED) {
2156 dev_put(res);
2157 return NULL;
2158 }
2159
2160 return res;
2161}
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
2173 enum rdma_driver_id driver_id)
2174{
2175 struct ib_device *res = NULL;
2176 struct ib_port_data *cur;
2177
2178 rcu_read_lock();
2179 hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link,
2180 (uintptr_t)ndev) {
2181 if (rcu_access_pointer(cur->netdev) == ndev &&
2182 (driver_id == RDMA_DRIVER_UNKNOWN ||
2183 cur->ib_dev->ops.driver_id == driver_id) &&
2184 ib_device_try_get(cur->ib_dev)) {
2185 res = cur->ib_dev;
2186 break;
2187 }
2188 }
2189 rcu_read_unlock();
2190
2191 return res;
2192}
2193EXPORT_SYMBOL(ib_device_get_by_netdev);
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207void ib_enum_roce_netdev(struct ib_device *ib_dev,
2208 roce_netdev_filter filter,
2209 void *filter_cookie,
2210 roce_netdev_callback cb,
2211 void *cookie)
2212{
2213 unsigned int port;
2214
2215 rdma_for_each_port (ib_dev, port)
2216 if (rdma_protocol_roce(ib_dev, port)) {
2217 struct net_device *idev =
2218 ib_device_get_netdev(ib_dev, port);
2219
2220 if (filter(ib_dev, port, idev, filter_cookie))
2221 cb(ib_dev, port, idev, cookie);
2222
2223 if (idev)
2224 dev_put(idev);
2225 }
2226}
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
2240 void *filter_cookie,
2241 roce_netdev_callback cb,
2242 void *cookie)
2243{
2244 struct ib_device *dev;
2245 unsigned long index;
2246
2247 down_read(&devices_rwsem);
2248 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
2249 ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
2250 up_read(&devices_rwsem);
2251}
2252
2253
2254
2255
2256
2257
2258
2259int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
2260 struct netlink_callback *cb)
2261{
2262 unsigned long index;
2263 struct ib_device *dev;
2264 unsigned int idx = 0;
2265 int ret = 0;
2266
2267 down_read(&devices_rwsem);
2268 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
2269 if (!rdma_dev_access_netns(dev, sock_net(skb->sk)))
2270 continue;
2271
2272 ret = nldev_cb(dev, skb, cb, idx);
2273 if (ret)
2274 break;
2275 idx++;
2276 }
2277 up_read(&devices_rwsem);
2278 return ret;
2279}
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290int ib_query_pkey(struct ib_device *device,
2291 u8 port_num, u16 index, u16 *pkey)
2292{
2293 if (!rdma_is_port_valid(device, port_num))
2294 return -EINVAL;
2295
2296 return device->ops.query_pkey(device, port_num, index, pkey);
2297}
2298EXPORT_SYMBOL(ib_query_pkey);
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309int ib_modify_device(struct ib_device *device,
2310 int device_modify_mask,
2311 struct ib_device_modify *device_modify)
2312{
2313 if (!device->ops.modify_device)
2314 return -ENOSYS;
2315
2316 return device->ops.modify_device(device, device_modify_mask,
2317 device_modify);
2318}
2319EXPORT_SYMBOL(ib_modify_device);
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332int ib_modify_port(struct ib_device *device,
2333 u8 port_num, int port_modify_mask,
2334 struct ib_port_modify *port_modify)
2335{
2336 int rc;
2337
2338 if (!rdma_is_port_valid(device, port_num))
2339 return -EINVAL;
2340
2341 if (device->ops.modify_port)
2342 rc = device->ops.modify_port(device, port_num,
2343 port_modify_mask,
2344 port_modify);
2345 else
2346 rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
2347 return rc;
2348}
2349EXPORT_SYMBOL(ib_modify_port);
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360int ib_find_gid(struct ib_device *device, union ib_gid *gid,
2361 u8 *port_num, u16 *index)
2362{
2363 union ib_gid tmp_gid;
2364 unsigned int port;
2365 int ret, i;
2366
2367 rdma_for_each_port (device, port) {
2368 if (!rdma_protocol_ib(device, port))
2369 continue;
2370
2371 for (i = 0; i < device->port_data[port].immutable.gid_tbl_len;
2372 ++i) {
2373 ret = rdma_query_gid(device, port, i, &tmp_gid);
2374 if (ret)
2375 return ret;
2376 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
2377 *port_num = port;
2378 if (index)
2379 *index = i;
2380 return 0;
2381 }
2382 }
2383 }
2384
2385 return -ENOENT;
2386}
2387EXPORT_SYMBOL(ib_find_gid);
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397int ib_find_pkey(struct ib_device *device,
2398 u8 port_num, u16 pkey, u16 *index)
2399{
2400 int ret, i;
2401 u16 tmp_pkey;
2402 int partial_ix = -1;
2403
2404 for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len;
2405 ++i) {
2406 ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
2407 if (ret)
2408 return ret;
2409 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
2410
2411 if (tmp_pkey & 0x8000) {
2412 *index = i;
2413 return 0;
2414 }
2415 if (partial_ix < 0)
2416 partial_ix = i;
2417 }
2418 }
2419
2420
2421 if (partial_ix >= 0) {
2422 *index = partial_ix;
2423 return 0;
2424 }
2425 return -ENOENT;
2426}
2427EXPORT_SYMBOL(ib_find_pkey);
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
2441 u8 port,
2442 u16 pkey,
2443 const union ib_gid *gid,
2444 const struct sockaddr *addr)
2445{
2446 struct net_device *net_dev = NULL;
2447 unsigned long index;
2448 void *client_data;
2449
2450 if (!rdma_protocol_ib(dev, port))
2451 return NULL;
2452
2453
2454
2455
2456
2457 down_read(&dev->client_data_rwsem);
2458 xan_for_each_marked (&dev->client_data, index, client_data,
2459 CLIENT_DATA_REGISTERED) {
2460 struct ib_client *client = xa_load(&clients, index);
2461
2462 if (!client || !client->get_net_dev_by_params)
2463 continue;
2464
2465 net_dev = client->get_net_dev_by_params(dev, port, pkey, gid,
2466 addr, client_data);
2467 if (net_dev)
2468 break;
2469 }
2470 up_read(&dev->client_data_rwsem);
2471
2472 return net_dev;
2473}
2474EXPORT_SYMBOL(ib_get_net_dev_by_params);
2475
2476void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
2477{
2478 struct ib_device_ops *dev_ops = &dev->ops;
2479#define SET_DEVICE_OP(ptr, name) \
2480 do { \
2481 if (ops->name) \
2482 if (!((ptr)->name)) \
2483 (ptr)->name = ops->name; \
2484 } while (0)
2485
2486#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
2487
2488 if (ops->driver_id != RDMA_DRIVER_UNKNOWN) {
2489 WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN &&
2490 dev_ops->driver_id != ops->driver_id);
2491 dev_ops->driver_id = ops->driver_id;
2492 }
2493 if (ops->owner) {
2494 WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner);
2495 dev_ops->owner = ops->owner;
2496 }
2497 if (ops->uverbs_abi_ver)
2498 dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver;
2499
2500 dev_ops->uverbs_no_driver_id_binding |=
2501 ops->uverbs_no_driver_id_binding;
2502
2503 SET_DEVICE_OP(dev_ops, add_gid);
2504 SET_DEVICE_OP(dev_ops, advise_mr);
2505 SET_DEVICE_OP(dev_ops, alloc_dm);
2506 SET_DEVICE_OP(dev_ops, alloc_fmr);
2507 SET_DEVICE_OP(dev_ops, alloc_hw_stats);
2508 SET_DEVICE_OP(dev_ops, alloc_mr);
2509 SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
2510 SET_DEVICE_OP(dev_ops, alloc_mw);
2511 SET_DEVICE_OP(dev_ops, alloc_pd);
2512 SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
2513 SET_DEVICE_OP(dev_ops, alloc_ucontext);
2514 SET_DEVICE_OP(dev_ops, alloc_xrcd);
2515 SET_DEVICE_OP(dev_ops, attach_mcast);
2516 SET_DEVICE_OP(dev_ops, check_mr_status);
2517 SET_DEVICE_OP(dev_ops, counter_alloc_stats);
2518 SET_DEVICE_OP(dev_ops, counter_bind_qp);
2519 SET_DEVICE_OP(dev_ops, counter_dealloc);
2520 SET_DEVICE_OP(dev_ops, counter_unbind_qp);
2521 SET_DEVICE_OP(dev_ops, counter_update_stats);
2522 SET_DEVICE_OP(dev_ops, create_ah);
2523 SET_DEVICE_OP(dev_ops, create_counters);
2524 SET_DEVICE_OP(dev_ops, create_cq);
2525 SET_DEVICE_OP(dev_ops, create_flow);
2526 SET_DEVICE_OP(dev_ops, create_flow_action_esp);
2527 SET_DEVICE_OP(dev_ops, create_qp);
2528 SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
2529 SET_DEVICE_OP(dev_ops, create_srq);
2530 SET_DEVICE_OP(dev_ops, create_wq);
2531 SET_DEVICE_OP(dev_ops, dealloc_dm);
2532 SET_DEVICE_OP(dev_ops, dealloc_driver);
2533 SET_DEVICE_OP(dev_ops, dealloc_fmr);
2534 SET_DEVICE_OP(dev_ops, dealloc_mw);
2535 SET_DEVICE_OP(dev_ops, dealloc_pd);
2536 SET_DEVICE_OP(dev_ops, dealloc_ucontext);
2537 SET_DEVICE_OP(dev_ops, dealloc_xrcd);
2538 SET_DEVICE_OP(dev_ops, del_gid);
2539 SET_DEVICE_OP(dev_ops, dereg_mr);
2540 SET_DEVICE_OP(dev_ops, destroy_ah);
2541 SET_DEVICE_OP(dev_ops, destroy_counters);
2542 SET_DEVICE_OP(dev_ops, destroy_cq);
2543 SET_DEVICE_OP(dev_ops, destroy_flow);
2544 SET_DEVICE_OP(dev_ops, destroy_flow_action);
2545 SET_DEVICE_OP(dev_ops, destroy_qp);
2546 SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
2547 SET_DEVICE_OP(dev_ops, destroy_srq);
2548 SET_DEVICE_OP(dev_ops, destroy_wq);
2549 SET_DEVICE_OP(dev_ops, detach_mcast);
2550 SET_DEVICE_OP(dev_ops, disassociate_ucontext);
2551 SET_DEVICE_OP(dev_ops, drain_rq);
2552 SET_DEVICE_OP(dev_ops, drain_sq);
2553 SET_DEVICE_OP(dev_ops, enable_driver);
2554 SET_DEVICE_OP(dev_ops, fill_res_entry);
2555 SET_DEVICE_OP(dev_ops, get_dev_fw_str);
2556 SET_DEVICE_OP(dev_ops, get_dma_mr);
2557 SET_DEVICE_OP(dev_ops, get_hw_stats);
2558 SET_DEVICE_OP(dev_ops, get_link_layer);
2559 SET_DEVICE_OP(dev_ops, get_netdev);
2560 SET_DEVICE_OP(dev_ops, get_port_immutable);
2561 SET_DEVICE_OP(dev_ops, get_vector_affinity);
2562 SET_DEVICE_OP(dev_ops, get_vf_config);
2563 SET_DEVICE_OP(dev_ops, get_vf_stats);
2564 SET_DEVICE_OP(dev_ops, init_port);
2565 SET_DEVICE_OP(dev_ops, iw_accept);
2566 SET_DEVICE_OP(dev_ops, iw_add_ref);
2567 SET_DEVICE_OP(dev_ops, iw_connect);
2568 SET_DEVICE_OP(dev_ops, iw_create_listen);
2569 SET_DEVICE_OP(dev_ops, iw_destroy_listen);
2570 SET_DEVICE_OP(dev_ops, iw_get_qp);
2571 SET_DEVICE_OP(dev_ops, iw_reject);
2572 SET_DEVICE_OP(dev_ops, iw_rem_ref);
2573 SET_DEVICE_OP(dev_ops, map_mr_sg);
2574 SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
2575 SET_DEVICE_OP(dev_ops, map_phys_fmr);
2576 SET_DEVICE_OP(dev_ops, mmap);
2577 SET_DEVICE_OP(dev_ops, modify_ah);
2578 SET_DEVICE_OP(dev_ops, modify_cq);
2579 SET_DEVICE_OP(dev_ops, modify_device);
2580 SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
2581 SET_DEVICE_OP(dev_ops, modify_port);
2582 SET_DEVICE_OP(dev_ops, modify_qp);
2583 SET_DEVICE_OP(dev_ops, modify_srq);
2584 SET_DEVICE_OP(dev_ops, modify_wq);
2585 SET_DEVICE_OP(dev_ops, peek_cq);
2586 SET_DEVICE_OP(dev_ops, poll_cq);
2587 SET_DEVICE_OP(dev_ops, post_recv);
2588 SET_DEVICE_OP(dev_ops, post_send);
2589 SET_DEVICE_OP(dev_ops, post_srq_recv);
2590 SET_DEVICE_OP(dev_ops, process_mad);
2591 SET_DEVICE_OP(dev_ops, query_ah);
2592 SET_DEVICE_OP(dev_ops, query_device);
2593 SET_DEVICE_OP(dev_ops, query_gid);
2594 SET_DEVICE_OP(dev_ops, query_pkey);
2595 SET_DEVICE_OP(dev_ops, query_port);
2596 SET_DEVICE_OP(dev_ops, query_qp);
2597 SET_DEVICE_OP(dev_ops, query_srq);
2598 SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
2599 SET_DEVICE_OP(dev_ops, read_counters);
2600 SET_DEVICE_OP(dev_ops, reg_dm_mr);
2601 SET_DEVICE_OP(dev_ops, reg_user_mr);
2602 SET_DEVICE_OP(dev_ops, req_ncomp_notif);
2603 SET_DEVICE_OP(dev_ops, req_notify_cq);
2604 SET_DEVICE_OP(dev_ops, rereg_user_mr);
2605 SET_DEVICE_OP(dev_ops, resize_cq);
2606 SET_DEVICE_OP(dev_ops, set_vf_guid);
2607 SET_DEVICE_OP(dev_ops, set_vf_link_state);
2608 SET_DEVICE_OP(dev_ops, unmap_fmr);
2609
2610 SET_OBJ_SIZE(dev_ops, ib_ah);
2611 SET_OBJ_SIZE(dev_ops, ib_cq);
2612 SET_OBJ_SIZE(dev_ops, ib_pd);
2613 SET_OBJ_SIZE(dev_ops, ib_srq);
2614 SET_OBJ_SIZE(dev_ops, ib_ucontext);
2615}
2616EXPORT_SYMBOL(ib_set_device_ops);
2617
2618static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
2619 [RDMA_NL_LS_OP_RESOLVE] = {
2620 .doit = ib_nl_handle_resolve_resp,
2621 .flags = RDMA_NL_ADMIN_PERM,
2622 },
2623 [RDMA_NL_LS_OP_SET_TIMEOUT] = {
2624 .doit = ib_nl_handle_set_timeout,
2625 .flags = RDMA_NL_ADMIN_PERM,
2626 },
2627 [RDMA_NL_LS_OP_IP_RESOLVE] = {
2628 .doit = ib_nl_handle_ip_res_resp,
2629 .flags = RDMA_NL_ADMIN_PERM,
2630 },
2631};
2632
2633static int __init ib_core_init(void)
2634{
2635 int ret;
2636
2637 ib_wq = alloc_workqueue("infiniband", 0, 0);
2638 if (!ib_wq)
2639 return -ENOMEM;
2640
2641 ib_comp_wq = alloc_workqueue("ib-comp-wq",
2642 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
2643 if (!ib_comp_wq) {
2644 ret = -ENOMEM;
2645 goto err;
2646 }
2647
2648 ib_comp_unbound_wq =
2649 alloc_workqueue("ib-comp-unb-wq",
2650 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
2651 WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
2652 if (!ib_comp_unbound_wq) {
2653 ret = -ENOMEM;
2654 goto err_comp;
2655 }
2656
2657 ret = class_register(&ib_class);
2658 if (ret) {
2659 pr_warn("Couldn't create InfiniBand device class\n");
2660 goto err_comp_unbound;
2661 }
2662
2663 ret = rdma_nl_init();
2664 if (ret) {
2665 pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
2666 goto err_sysfs;
2667 }
2668
2669 ret = addr_init();
2670 if (ret) {
2671 pr_warn("Could't init IB address resolution\n");
2672 goto err_ibnl;
2673 }
2674
2675 ret = ib_mad_init();
2676 if (ret) {
2677 pr_warn("Couldn't init IB MAD\n");
2678 goto err_addr;
2679 }
2680
2681 ret = ib_sa_init();
2682 if (ret) {
2683 pr_warn("Couldn't init SA\n");
2684 goto err_mad;
2685 }
2686
2687 ret = register_blocking_lsm_notifier(&ibdev_lsm_nb);
2688 if (ret) {
2689 pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
2690 goto err_sa;
2691 }
2692
2693 ret = register_pernet_device(&rdma_dev_net_ops);
2694 if (ret) {
2695 pr_warn("Couldn't init compat dev. ret %d\n", ret);
2696 goto err_compat;
2697 }
2698
2699 nldev_init();
2700 rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
2701 roce_gid_mgmt_init();
2702
2703 return 0;
2704
2705err_compat:
2706 unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
2707err_sa:
2708 ib_sa_cleanup();
2709err_mad:
2710 ib_mad_cleanup();
2711err_addr:
2712 addr_cleanup();
2713err_ibnl:
2714 rdma_nl_exit();
2715err_sysfs:
2716 class_unregister(&ib_class);
2717err_comp_unbound:
2718 destroy_workqueue(ib_comp_unbound_wq);
2719err_comp:
2720 destroy_workqueue(ib_comp_wq);
2721err:
2722 destroy_workqueue(ib_wq);
2723 return ret;
2724}
2725
2726static void __exit ib_core_cleanup(void)
2727{
2728 roce_gid_mgmt_cleanup();
2729 nldev_exit();
2730 rdma_nl_unregister(RDMA_NL_LS);
2731 unregister_pernet_device(&rdma_dev_net_ops);
2732 unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
2733 ib_sa_cleanup();
2734 ib_mad_cleanup();
2735 addr_cleanup();
2736 rdma_nl_exit();
2737 class_unregister(&ib_class);
2738 destroy_workqueue(ib_comp_unbound_wq);
2739 destroy_workqueue(ib_comp_wq);
2740
2741 destroy_workqueue(ib_wq);
2742 flush_workqueue(system_unbound_wq);
2743 WARN_ON(!xa_empty(&clients));
2744 WARN_ON(!xa_empty(&devices));
2745}
2746
2747MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
2748
2749
2750
2751
2752fs_initcall(ib_core_init);
2753module_exit(ib_core_cleanup);
2754