1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46#include <linux/errno.h>
47#include <linux/inetdevice.h>
48#include <linux/init.h>
49#include <linux/module.h>
50#include <linux/slab.h>
51#include <rdma/ib_addr.h>
52#include <rdma/ib_smi.h>
53#include <rdma/ib_user_verbs.h>
54#include <net/addrconf.h>
55
56#include "pvrdma.h"
57
58#define DRV_NAME "vmw_pvrdma"
59#define DRV_VERSION "1.0.1.0-k"
60
61static DEFINE_MUTEX(pvrdma_device_list_lock);
62static LIST_HEAD(pvrdma_device_list);
63static struct workqueue_struct *event_wq;
64
65static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
66static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
67
68static ssize_t hca_type_show(struct device *device,
69 struct device_attribute *attr, char *buf)
70{
71 return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
72}
73static DEVICE_ATTR_RO(hca_type);
74
75static ssize_t hw_rev_show(struct device *device,
76 struct device_attribute *attr, char *buf)
77{
78 return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID);
79}
80static DEVICE_ATTR_RO(hw_rev);
81
82static ssize_t board_id_show(struct device *device,
83 struct device_attribute *attr, char *buf)
84{
85 return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID);
86}
87static DEVICE_ATTR_RO(board_id);
88
89static struct attribute *pvrdma_class_attributes[] = {
90 &dev_attr_hw_rev.attr,
91 &dev_attr_hca_type.attr,
92 &dev_attr_board_id.attr,
93 NULL,
94};
95
96static const struct attribute_group pvrdma_attr_group = {
97 .attrs = pvrdma_class_attributes,
98};
99
100static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str)
101{
102 struct pvrdma_dev *dev =
103 container_of(device, struct pvrdma_dev, ib_dev);
104 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d\n",
105 (int) (dev->dsr->caps.fw_ver >> 32),
106 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff,
107 (int) dev->dsr->caps.fw_ver & 0xffff);
108}
109
110static int pvrdma_init_device(struct pvrdma_dev *dev)
111{
112
113 spin_lock_init(&dev->cmd_lock);
114 sema_init(&dev->cmd_sema, 1);
115 atomic_set(&dev->num_qps, 0);
116 atomic_set(&dev->num_srqs, 0);
117 atomic_set(&dev->num_cqs, 0);
118 atomic_set(&dev->num_pds, 0);
119 atomic_set(&dev->num_ahs, 0);
120
121 return 0;
122}
123
124static int pvrdma_port_immutable(struct ib_device *ibdev, u32 port_num,
125 struct ib_port_immutable *immutable)
126{
127 struct pvrdma_dev *dev = to_vdev(ibdev);
128 struct ib_port_attr attr;
129 int err;
130
131 if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1)
132 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE;
133 else if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2)
134 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
135
136 err = ib_query_port(ibdev, port_num, &attr);
137 if (err)
138 return err;
139
140 immutable->pkey_tbl_len = attr.pkey_tbl_len;
141 immutable->gid_tbl_len = attr.gid_tbl_len;
142 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
143 return 0;
144}
145
146static const struct ib_device_ops pvrdma_dev_ops = {
147 .owner = THIS_MODULE,
148 .driver_id = RDMA_DRIVER_VMW_PVRDMA,
149 .uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION,
150
151 .add_gid = pvrdma_add_gid,
152 .alloc_mr = pvrdma_alloc_mr,
153 .alloc_pd = pvrdma_alloc_pd,
154 .alloc_ucontext = pvrdma_alloc_ucontext,
155 .create_ah = pvrdma_create_ah,
156 .create_cq = pvrdma_create_cq,
157 .create_qp = pvrdma_create_qp,
158 .dealloc_pd = pvrdma_dealloc_pd,
159 .dealloc_ucontext = pvrdma_dealloc_ucontext,
160 .del_gid = pvrdma_del_gid,
161 .dereg_mr = pvrdma_dereg_mr,
162 .destroy_ah = pvrdma_destroy_ah,
163 .destroy_cq = pvrdma_destroy_cq,
164 .destroy_qp = pvrdma_destroy_qp,
165 .device_group = &pvrdma_attr_group,
166 .get_dev_fw_str = pvrdma_get_fw_ver_str,
167 .get_dma_mr = pvrdma_get_dma_mr,
168 .get_link_layer = pvrdma_port_link_layer,
169 .get_port_immutable = pvrdma_port_immutable,
170 .map_mr_sg = pvrdma_map_mr_sg,
171 .mmap = pvrdma_mmap,
172 .modify_port = pvrdma_modify_port,
173 .modify_qp = pvrdma_modify_qp,
174 .poll_cq = pvrdma_poll_cq,
175 .post_recv = pvrdma_post_recv,
176 .post_send = pvrdma_post_send,
177 .query_device = pvrdma_query_device,
178 .query_gid = pvrdma_query_gid,
179 .query_pkey = pvrdma_query_pkey,
180 .query_port = pvrdma_query_port,
181 .query_qp = pvrdma_query_qp,
182 .reg_user_mr = pvrdma_reg_user_mr,
183 .req_notify_cq = pvrdma_req_notify_cq,
184
185 INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah),
186 INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq),
187 INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd),
188 INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext),
189};
190
191static const struct ib_device_ops pvrdma_dev_srq_ops = {
192 .create_srq = pvrdma_create_srq,
193 .destroy_srq = pvrdma_destroy_srq,
194 .modify_srq = pvrdma_modify_srq,
195 .query_srq = pvrdma_query_srq,
196
197 INIT_RDMA_OBJ_SIZE(ib_srq, pvrdma_srq, ibsrq),
198};
199
200static int pvrdma_register_device(struct pvrdma_dev *dev)
201{
202 int ret = -1;
203
204 dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
205 dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
206 dev->flags = 0;
207 dev->ib_dev.num_comp_vectors = 1;
208 dev->ib_dev.dev.parent = &dev->pdev->dev;
209
210 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
211 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt;
212
213 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops);
214
215 mutex_init(&dev->port_mutex);
216 spin_lock_init(&dev->desc_lock);
217
218 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(struct pvrdma_cq *),
219 GFP_KERNEL);
220 if (!dev->cq_tbl)
221 return ret;
222 spin_lock_init(&dev->cq_tbl_lock);
223
224 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(struct pvrdma_qp *),
225 GFP_KERNEL);
226 if (!dev->qp_tbl)
227 goto err_cq_free;
228 spin_lock_init(&dev->qp_tbl_lock);
229
230
231 if (dev->dsr->caps.max_srq) {
232 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops);
233
234 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq,
235 sizeof(struct pvrdma_srq *),
236 GFP_KERNEL);
237 if (!dev->srq_tbl)
238 goto err_qp_free;
239 }
240 ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1);
241 if (ret)
242 goto err_srq_free;
243 spin_lock_init(&dev->srq_tbl_lock);
244
245 ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev);
246 if (ret)
247 goto err_srq_free;
248
249 dev->ib_active = true;
250
251 return 0;
252
253err_srq_free:
254 kfree(dev->srq_tbl);
255err_qp_free:
256 kfree(dev->qp_tbl);
257err_cq_free:
258 kfree(dev->cq_tbl);
259
260 return ret;
261}
262
263static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id)
264{
265 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE;
266 struct pvrdma_dev *dev = dev_id;
267
268 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
269
270 if (!dev->pdev->msix_enabled) {
271
272 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
273 if (icr == 0)
274 return IRQ_NONE;
275 }
276
277 if (icr == PVRDMA_INTR_CAUSE_RESPONSE)
278 complete(&dev->cmd_done);
279
280 return IRQ_HANDLED;
281}
282
283static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
284{
285 struct pvrdma_qp *qp;
286 unsigned long flags;
287
288 spin_lock_irqsave(&dev->qp_tbl_lock, flags);
289 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp];
290 if (qp)
291 refcount_inc(&qp->refcnt);
292 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
293
294 if (qp && qp->ibqp.event_handler) {
295 struct ib_qp *ibqp = &qp->ibqp;
296 struct ib_event e;
297
298 e.device = ibqp->device;
299 e.element.qp = ibqp;
300 e.event = type;
301 ibqp->event_handler(&e, ibqp->qp_context);
302 }
303 if (qp) {
304 if (refcount_dec_and_test(&qp->refcnt))
305 complete(&qp->free);
306 }
307}
308
309static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
310{
311 struct pvrdma_cq *cq;
312 unsigned long flags;
313
314 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
315 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq];
316 if (cq)
317 refcount_inc(&cq->refcnt);
318 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
319
320 if (cq && cq->ibcq.event_handler) {
321 struct ib_cq *ibcq = &cq->ibcq;
322 struct ib_event e;
323
324 e.device = ibcq->device;
325 e.element.cq = ibcq;
326 e.event = type;
327 ibcq->event_handler(&e, ibcq->cq_context);
328 }
329 if (cq) {
330 if (refcount_dec_and_test(&cq->refcnt))
331 complete(&cq->free);
332 }
333}
334
335static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
336{
337 struct pvrdma_srq *srq;
338 unsigned long flags;
339
340 spin_lock_irqsave(&dev->srq_tbl_lock, flags);
341 if (dev->srq_tbl)
342 srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq];
343 else
344 srq = NULL;
345 if (srq)
346 refcount_inc(&srq->refcnt);
347 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
348
349 if (srq && srq->ibsrq.event_handler) {
350 struct ib_srq *ibsrq = &srq->ibsrq;
351 struct ib_event e;
352
353 e.device = ibsrq->device;
354 e.element.srq = ibsrq;
355 e.event = type;
356 ibsrq->event_handler(&e, ibsrq->srq_context);
357 }
358 if (srq) {
359 if (refcount_dec_and_test(&srq->refcnt))
360 complete(&srq->free);
361 }
362}
363
364static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
365 enum ib_event_type event)
366{
367 struct ib_event ib_event;
368
369 memset(&ib_event, 0, sizeof(ib_event));
370 ib_event.device = &dev->ib_dev;
371 ib_event.element.port_num = port;
372 ib_event.event = event;
373 ib_dispatch_event(&ib_event);
374}
375
376static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type)
377{
378 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) {
379 dev_warn(&dev->pdev->dev, "event on port %d\n", port);
380 return;
381 }
382
383 pvrdma_dispatch_event(dev, port, type);
384}
385
386static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i)
387{
388 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr(
389 &dev->async_pdir,
390 PAGE_SIZE +
391 sizeof(struct pvrdma_eqe) * i);
392}
393
394static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id)
395{
396 struct pvrdma_dev *dev = dev_id;
397 struct pvrdma_ring *ring = &dev->async_ring_state->rx;
398 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) *
399 PAGE_SIZE / sizeof(struct pvrdma_eqe);
400 unsigned int head;
401
402 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n");
403
404
405
406
407
408 if (!dev->ib_active)
409 return IRQ_HANDLED;
410
411 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
412 struct pvrdma_eqe *eqe;
413
414 eqe = get_eqe(dev, head);
415
416 switch (eqe->type) {
417 case PVRDMA_EVENT_QP_FATAL:
418 case PVRDMA_EVENT_QP_REQ_ERR:
419 case PVRDMA_EVENT_QP_ACCESS_ERR:
420 case PVRDMA_EVENT_COMM_EST:
421 case PVRDMA_EVENT_SQ_DRAINED:
422 case PVRDMA_EVENT_PATH_MIG:
423 case PVRDMA_EVENT_PATH_MIG_ERR:
424 case PVRDMA_EVENT_QP_LAST_WQE_REACHED:
425 pvrdma_qp_event(dev, eqe->info, eqe->type);
426 break;
427
428 case PVRDMA_EVENT_CQ_ERR:
429 pvrdma_cq_event(dev, eqe->info, eqe->type);
430 break;
431
432 case PVRDMA_EVENT_SRQ_ERR:
433 case PVRDMA_EVENT_SRQ_LIMIT_REACHED:
434 pvrdma_srq_event(dev, eqe->info, eqe->type);
435 break;
436
437 case PVRDMA_EVENT_PORT_ACTIVE:
438 case PVRDMA_EVENT_PORT_ERR:
439 case PVRDMA_EVENT_LID_CHANGE:
440 case PVRDMA_EVENT_PKEY_CHANGE:
441 case PVRDMA_EVENT_SM_CHANGE:
442 case PVRDMA_EVENT_CLIENT_REREGISTER:
443 case PVRDMA_EVENT_GID_CHANGE:
444 pvrdma_dev_event(dev, eqe->info, eqe->type);
445 break;
446
447 case PVRDMA_EVENT_DEVICE_FATAL:
448 pvrdma_dev_event(dev, 1, eqe->type);
449 break;
450
451 default:
452 break;
453 }
454
455 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
456 }
457
458 return IRQ_HANDLED;
459}
460
461static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev,
462 unsigned int i)
463{
464 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr(
465 &dev->cq_pdir,
466 PAGE_SIZE +
467 sizeof(struct pvrdma_cqne) * i);
468}
469
470static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
471{
472 struct pvrdma_dev *dev = dev_id;
473 struct pvrdma_ring *ring = &dev->cq_ring_state->rx;
474 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE /
475 sizeof(struct pvrdma_cqne);
476 unsigned int head;
477
478 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n");
479
480 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
481 struct pvrdma_cqne *cqne;
482 struct pvrdma_cq *cq;
483
484 cqne = get_cqne(dev, head);
485 spin_lock(&dev->cq_tbl_lock);
486 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq];
487 if (cq)
488 refcount_inc(&cq->refcnt);
489 spin_unlock(&dev->cq_tbl_lock);
490
491 if (cq && cq->ibcq.comp_handler)
492 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
493 if (cq) {
494 if (refcount_dec_and_test(&cq->refcnt))
495 complete(&cq->free);
496 }
497 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
498 }
499
500 return IRQ_HANDLED;
501}
502
503static void pvrdma_free_irq(struct pvrdma_dev *dev)
504{
505 int i;
506
507 dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
508 for (i = 0; i < dev->nr_vectors; i++)
509 free_irq(pci_irq_vector(dev->pdev, i), dev);
510}
511
512static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
513{
514 dev_dbg(&dev->pdev->dev, "enable interrupts\n");
515 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0);
516}
517
518static void pvrdma_disable_intrs(struct pvrdma_dev *dev)
519{
520 dev_dbg(&dev->pdev->dev, "disable interrupts\n");
521 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
522}
523
524static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
525{
526 struct pci_dev *pdev = dev->pdev;
527 int ret = 0, i;
528
529 ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS,
530 PCI_IRQ_MSIX);
531 if (ret < 0) {
532 ret = pci_alloc_irq_vectors(pdev, 1, 1,
533 PCI_IRQ_MSI | PCI_IRQ_LEGACY);
534 if (ret < 0)
535 return ret;
536 }
537 dev->nr_vectors = ret;
538
539 ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler,
540 pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev);
541 if (ret) {
542 dev_err(&dev->pdev->dev,
543 "failed to request interrupt 0\n");
544 goto out_free_vectors;
545 }
546
547 for (i = 1; i < dev->nr_vectors; i++) {
548 ret = request_irq(pci_irq_vector(dev->pdev, i),
549 i == 1 ? pvrdma_intr1_handler :
550 pvrdma_intrx_handler,
551 0, DRV_NAME, dev);
552 if (ret) {
553 dev_err(&dev->pdev->dev,
554 "failed to request interrupt %d\n", i);
555 goto free_irqs;
556 }
557 }
558
559 return 0;
560
561free_irqs:
562 while (--i >= 0)
563 free_irq(pci_irq_vector(dev->pdev, i), dev);
564out_free_vectors:
565 pci_free_irq_vectors(pdev);
566 return ret;
567}
568
569static void pvrdma_free_slots(struct pvrdma_dev *dev)
570{
571 struct pci_dev *pdev = dev->pdev;
572
573 if (dev->resp_slot)
574 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot,
575 dev->dsr->resp_slot_dma);
576 if (dev->cmd_slot)
577 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot,
578 dev->dsr->cmd_slot_dma);
579}
580
581static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
582 const union ib_gid *gid,
583 u8 gid_type,
584 int index)
585{
586 int ret;
587 union pvrdma_cmd_req req;
588 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind;
589
590 if (!dev->sgid_tbl) {
591 dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
592 return -EINVAL;
593 }
594
595 memset(cmd_bind, 0, sizeof(*cmd_bind));
596 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND;
597 memcpy(cmd_bind->new_gid, gid->raw, 16);
598 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024);
599 cmd_bind->vlan = 0xfff;
600 cmd_bind->index = index;
601 cmd_bind->gid_type = gid_type;
602
603 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
604 if (ret < 0) {
605 dev_warn(&dev->pdev->dev,
606 "could not create binding, error: %d\n", ret);
607 return -EFAULT;
608 }
609 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid));
610 return 0;
611}
612
613static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context)
614{
615 struct pvrdma_dev *dev = to_vdev(attr->device);
616
617 return pvrdma_add_gid_at_index(dev, &attr->gid,
618 ib_gid_type_to_pvrdma(attr->gid_type),
619 attr->index);
620}
621
622static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index)
623{
624 int ret;
625 union pvrdma_cmd_req req;
626 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind;
627
628
629 if (!dev->sgid_tbl) {
630 dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
631 return -EINVAL;
632 }
633
634 memset(cmd_dest, 0, sizeof(*cmd_dest));
635 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND;
636 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16);
637 cmd_dest->index = index;
638
639 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
640 if (ret < 0) {
641 dev_warn(&dev->pdev->dev,
642 "could not destroy binding, error: %d\n", ret);
643 return ret;
644 }
645 memset(&dev->sgid_tbl[index], 0, 16);
646 return 0;
647}
648
649static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context)
650{
651 struct pvrdma_dev *dev = to_vdev(attr->device);
652
653 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s",
654 attr->index, dev->netdev->name);
655
656 return pvrdma_del_gid_at_index(dev, attr->index);
657}
658
659static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
660 struct net_device *ndev,
661 unsigned long event)
662{
663 struct pci_dev *pdev_net;
664 unsigned int slot;
665
666 switch (event) {
667 case NETDEV_REBOOT:
668 case NETDEV_DOWN:
669 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
670 break;
671 case NETDEV_UP:
672 pvrdma_write_reg(dev, PVRDMA_REG_CTL,
673 PVRDMA_DEVICE_CTL_UNQUIESCE);
674
675 mb();
676
677 if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
678 dev_err(&dev->pdev->dev,
679 "failed to activate device during link up\n");
680 else
681 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
682 break;
683 case NETDEV_UNREGISTER:
684 ib_device_set_netdev(&dev->ib_dev, NULL, 1);
685 dev_put(dev->netdev);
686 dev->netdev = NULL;
687 break;
688 case NETDEV_REGISTER:
689
690 slot = PCI_SLOT(dev->pdev->devfn);
691 pdev_net = pci_get_slot(dev->pdev->bus,
692 PCI_DEVFN(slot, 0));
693 if ((dev->netdev == NULL) &&
694 (pci_get_drvdata(pdev_net) == ndev)) {
695
696 ib_device_set_netdev(&dev->ib_dev, ndev, 1);
697 dev->netdev = ndev;
698 dev_hold(ndev);
699 }
700 pci_dev_put(pdev_net);
701 break;
702
703 default:
704 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
705 event, dev_name(&dev->ib_dev.dev));
706 break;
707 }
708}
709
710static void pvrdma_netdevice_event_work(struct work_struct *work)
711{
712 struct pvrdma_netdevice_work *netdev_work;
713 struct pvrdma_dev *dev;
714
715 netdev_work = container_of(work, struct pvrdma_netdevice_work, work);
716
717 mutex_lock(&pvrdma_device_list_lock);
718 list_for_each_entry(dev, &pvrdma_device_list, device_link) {
719 if ((netdev_work->event == NETDEV_REGISTER) ||
720 (dev->netdev == netdev_work->event_netdev)) {
721 pvrdma_netdevice_event_handle(dev,
722 netdev_work->event_netdev,
723 netdev_work->event);
724 break;
725 }
726 }
727 mutex_unlock(&pvrdma_device_list_lock);
728
729 kfree(netdev_work);
730}
731
732static int pvrdma_netdevice_event(struct notifier_block *this,
733 unsigned long event, void *ptr)
734{
735 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr);
736 struct pvrdma_netdevice_work *netdev_work;
737
738 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC);
739 if (!netdev_work)
740 return NOTIFY_BAD;
741
742 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work);
743 netdev_work->event_netdev = event_netdev;
744 netdev_work->event = event;
745 queue_work(event_wq, &netdev_work->work);
746
747 return NOTIFY_DONE;
748}
749
750static int pvrdma_pci_probe(struct pci_dev *pdev,
751 const struct pci_device_id *id)
752{
753 struct pci_dev *pdev_net;
754 struct pvrdma_dev *dev;
755 int ret;
756 unsigned long start;
757 unsigned long len;
758 dma_addr_t slot_dma = 0;
759
760 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev));
761
762
763 dev = ib_alloc_device(pvrdma_dev, ib_dev);
764 if (!dev) {
765 dev_err(&pdev->dev, "failed to allocate IB device\n");
766 return -ENOMEM;
767 }
768
769 mutex_lock(&pvrdma_device_list_lock);
770 list_add(&dev->device_link, &pvrdma_device_list);
771 mutex_unlock(&pvrdma_device_list_lock);
772
773 ret = pvrdma_init_device(dev);
774 if (ret)
775 goto err_free_device;
776
777 dev->pdev = pdev;
778 pci_set_drvdata(pdev, dev);
779
780 ret = pci_enable_device(pdev);
781 if (ret) {
782 dev_err(&pdev->dev, "cannot enable PCI device\n");
783 goto err_free_device;
784 }
785
786 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n",
787 pci_resource_flags(pdev, 0));
788 dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
789 (unsigned long long)pci_resource_len(pdev, 0));
790 dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
791 (unsigned long long)pci_resource_start(pdev, 0));
792 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n",
793 pci_resource_flags(pdev, 1));
794 dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
795 (unsigned long long)pci_resource_len(pdev, 1));
796 dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
797 (unsigned long long)pci_resource_start(pdev, 1));
798
799 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
800 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
801 dev_err(&pdev->dev, "PCI BAR region not MMIO\n");
802 ret = -ENOMEM;
803 goto err_disable_pdev;
804 }
805
806 ret = pci_request_regions(pdev, DRV_NAME);
807 if (ret) {
808 dev_err(&pdev->dev, "cannot request PCI resources\n");
809 goto err_disable_pdev;
810 }
811
812
813 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
814 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
815 if (ret != 0) {
816 dev_err(&pdev->dev,
817 "pci_set_consistent_dma_mask failed\n");
818 goto err_free_resource;
819 }
820 } else {
821 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
822 if (ret != 0) {
823 dev_err(&pdev->dev,
824 "pci_set_dma_mask failed\n");
825 goto err_free_resource;
826 }
827 }
828 dma_set_max_seg_size(&pdev->dev, UINT_MAX);
829 pci_set_master(pdev);
830
831
832 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
833 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
834 dev->regs = ioremap(start, len);
835 if (!dev->regs) {
836 dev_err(&pdev->dev, "register mapping failed\n");
837 ret = -ENOMEM;
838 goto err_free_resource;
839 }
840
841
842 dev->driver_uar.index = 0;
843 dev->driver_uar.pfn =
844 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >>
845 PAGE_SHIFT;
846 dev->driver_uar.map =
847 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
848 if (!dev->driver_uar.map) {
849 dev_err(&pdev->dev, "failed to remap UAR pages\n");
850 ret = -ENOMEM;
851 goto err_unmap_regs;
852 }
853
854 dev->dsr_version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION);
855 dev_info(&pdev->dev, "device version %d, driver version %d\n",
856 dev->dsr_version, PVRDMA_VERSION);
857
858 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr),
859 &dev->dsrbase, GFP_KERNEL);
860 if (!dev->dsr) {
861 dev_err(&pdev->dev, "failed to allocate shared region\n");
862 ret = -ENOMEM;
863 goto err_uar_unmap;
864 }
865
866
867 dev->dsr->driver_version = PVRDMA_VERSION;
868 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ?
869 PVRDMA_GOS_BITS_32 :
870 PVRDMA_GOS_BITS_64;
871 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX;
872 dev->dsr->gos_info.gos_ver = 1;
873
874 if (dev->dsr_version < PVRDMA_PPN64_VERSION)
875 dev->dsr->uar_pfn = dev->driver_uar.pfn;
876 else
877 dev->dsr->uar_pfn64 = dev->driver_uar.pfn;
878
879
880 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
881 &slot_dma, GFP_KERNEL);
882 if (!dev->cmd_slot) {
883 ret = -ENOMEM;
884 goto err_free_dsr;
885 }
886
887 dev->dsr->cmd_slot_dma = (u64)slot_dma;
888
889
890 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
891 &slot_dma, GFP_KERNEL);
892 if (!dev->resp_slot) {
893 ret = -ENOMEM;
894 goto err_free_slots;
895 }
896
897 dev->dsr->resp_slot_dma = (u64)slot_dma;
898
899
900 dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
901 ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
902 dev->dsr->async_ring_pages.num_pages, true);
903 if (ret)
904 goto err_free_slots;
905 dev->async_ring_state = dev->async_pdir.pages[0];
906 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
907
908
909 dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
910 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
911 dev->dsr->cq_ring_pages.num_pages, true);
912 if (ret)
913 goto err_free_async_ring;
914 dev->cq_ring_state = dev->cq_pdir.pages[0];
915 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma;
916
917
918
919
920
921
922
923 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase);
924 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH,
925 (u32)((u64)(dev->dsrbase) >> 32));
926
927
928 mb();
929
930
931 if (!PVRDMA_SUPPORTED(dev)) {
932 dev_err(&pdev->dev, "driver needs RoCE v1 or v2 support\n");
933 ret = -EFAULT;
934 goto err_free_cq_ring;
935 }
936
937
938 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
939 if (!pdev_net) {
940 dev_err(&pdev->dev, "failed to find paired net device\n");
941 ret = -ENODEV;
942 goto err_free_cq_ring;
943 }
944
945 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE ||
946 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) {
947 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n");
948 pci_dev_put(pdev_net);
949 ret = -ENODEV;
950 goto err_free_cq_ring;
951 }
952
953 dev->netdev = pci_get_drvdata(pdev_net);
954 pci_dev_put(pdev_net);
955 if (!dev->netdev) {
956 dev_err(&pdev->dev, "failed to get vmxnet3 device\n");
957 ret = -ENODEV;
958 goto err_free_cq_ring;
959 }
960 dev_hold(dev->netdev);
961
962 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name);
963
964
965 ret = pvrdma_alloc_intrs(dev);
966 if (ret) {
967 dev_err(&pdev->dev, "failed to allocate interrupts\n");
968 ret = -ENOMEM;
969 goto err_free_cq_ring;
970 }
971
972
973 ret = pvrdma_uar_table_init(dev);
974 if (ret) {
975 dev_err(&pdev->dev, "failed to allocate UAR table\n");
976 ret = -ENOMEM;
977 goto err_free_intrs;
978 }
979
980
981 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len,
982 sizeof(union ib_gid), GFP_KERNEL);
983 if (!dev->sgid_tbl) {
984 ret = -ENOMEM;
985 goto err_free_uar_table;
986 }
987 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len);
988
989 pvrdma_enable_intrs(dev);
990
991
992 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
993
994
995 mb();
996
997
998 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
999 if (ret != 0) {
1000 dev_err(&pdev->dev, "failed to activate device\n");
1001 ret = -EFAULT;
1002 goto err_disable_intr;
1003 }
1004
1005
1006 ret = pvrdma_register_device(dev);
1007 if (ret) {
1008 dev_err(&pdev->dev, "failed to register IB device\n");
1009 goto err_disable_intr;
1010 }
1011
1012 dev->nb_netdev.notifier_call = pvrdma_netdevice_event;
1013 ret = register_netdevice_notifier(&dev->nb_netdev);
1014 if (ret) {
1015 dev_err(&pdev->dev, "failed to register netdevice events\n");
1016 goto err_unreg_ibdev;
1017 }
1018
1019 dev_info(&pdev->dev, "attached to device\n");
1020 return 0;
1021
1022err_unreg_ibdev:
1023 ib_unregister_device(&dev->ib_dev);
1024err_disable_intr:
1025 pvrdma_disable_intrs(dev);
1026 kfree(dev->sgid_tbl);
1027err_free_uar_table:
1028 pvrdma_uar_table_cleanup(dev);
1029err_free_intrs:
1030 pvrdma_free_irq(dev);
1031 pci_free_irq_vectors(pdev);
1032err_free_cq_ring:
1033 if (dev->netdev) {
1034 dev_put(dev->netdev);
1035 dev->netdev = NULL;
1036 }
1037 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
1038err_free_async_ring:
1039 pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
1040err_free_slots:
1041 pvrdma_free_slots(dev);
1042err_free_dsr:
1043 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
1044 dev->dsrbase);
1045err_uar_unmap:
1046 iounmap(dev->driver_uar.map);
1047err_unmap_regs:
1048 iounmap(dev->regs);
1049err_free_resource:
1050 pci_release_regions(pdev);
1051err_disable_pdev:
1052 pci_disable_device(pdev);
1053 pci_set_drvdata(pdev, NULL);
1054err_free_device:
1055 mutex_lock(&pvrdma_device_list_lock);
1056 list_del(&dev->device_link);
1057 mutex_unlock(&pvrdma_device_list_lock);
1058 ib_dealloc_device(&dev->ib_dev);
1059 return ret;
1060}
1061
1062static void pvrdma_pci_remove(struct pci_dev *pdev)
1063{
1064 struct pvrdma_dev *dev = pci_get_drvdata(pdev);
1065
1066 if (!dev)
1067 return;
1068
1069 dev_info(&pdev->dev, "detaching from device\n");
1070
1071 unregister_netdevice_notifier(&dev->nb_netdev);
1072 dev->nb_netdev.notifier_call = NULL;
1073
1074 flush_workqueue(event_wq);
1075
1076 if (dev->netdev) {
1077 dev_put(dev->netdev);
1078 dev->netdev = NULL;
1079 }
1080
1081
1082 ib_unregister_device(&dev->ib_dev);
1083
1084 mutex_lock(&pvrdma_device_list_lock);
1085 list_del(&dev->device_link);
1086 mutex_unlock(&pvrdma_device_list_lock);
1087
1088 pvrdma_disable_intrs(dev);
1089 pvrdma_free_irq(dev);
1090 pci_free_irq_vectors(pdev);
1091
1092
1093 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
1094 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
1095 pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
1096 pvrdma_free_slots(dev);
1097 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
1098 dev->dsrbase);
1099
1100 iounmap(dev->regs);
1101 kfree(dev->sgid_tbl);
1102 kfree(dev->cq_tbl);
1103 kfree(dev->srq_tbl);
1104 kfree(dev->qp_tbl);
1105 pvrdma_uar_table_cleanup(dev);
1106 iounmap(dev->driver_uar.map);
1107
1108 ib_dealloc_device(&dev->ib_dev);
1109
1110
1111 pci_release_regions(pdev);
1112 pci_disable_device(pdev);
1113 pci_set_drvdata(pdev, NULL);
1114}
1115
1116static const struct pci_device_id pvrdma_pci_table[] = {
1117 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), },
1118 { 0 },
1119};
1120
1121MODULE_DEVICE_TABLE(pci, pvrdma_pci_table);
1122
1123static struct pci_driver pvrdma_driver = {
1124 .name = DRV_NAME,
1125 .id_table = pvrdma_pci_table,
1126 .probe = pvrdma_pci_probe,
1127 .remove = pvrdma_pci_remove,
1128};
1129
1130static int __init pvrdma_init(void)
1131{
1132 int err;
1133
1134 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM);
1135 if (!event_wq)
1136 return -ENOMEM;
1137
1138 err = pci_register_driver(&pvrdma_driver);
1139 if (err)
1140 destroy_workqueue(event_wq);
1141
1142 return err;
1143}
1144
1145static void __exit pvrdma_cleanup(void)
1146{
1147 pci_unregister_driver(&pvrdma_driver);
1148
1149 destroy_workqueue(event_wq);
1150}
1151
1152module_init(pvrdma_init);
1153module_exit(pvrdma_cleanup);
1154
1155MODULE_AUTHOR("VMware, Inc");
1156MODULE_DESCRIPTION("VMware Paravirtual RDMA driver");
1157MODULE_LICENSE("Dual BSD/GPL");
1158