1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#ifndef _IPOIB_H
36#define _IPOIB_H
37
38#include <linux/list.h>
39#include <linux/skbuff.h>
40#include <linux/netdevice.h>
41#include <linux/workqueue.h>
42#include <linux/kref.h>
43#include <linux/if_infiniband.h>
44#include <linux/mutex.h>
45
46#include <net/neighbour.h>
47
48#include <asm/atomic.h>
49
50#include <rdma/ib_verbs.h>
51#include <rdma/ib_pack.h>
52#include <rdma/ib_sa.h>
53#include <linux/sched.h>
54
55
56
57enum ipoib_flush_level {
58 IPOIB_FLUSH_LIGHT,
59 IPOIB_FLUSH_NORMAL,
60 IPOIB_FLUSH_HEAVY
61};
62
63enum {
64 IPOIB_ENCAP_LEN = 4,
65
66 IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN,
67 IPOIB_UD_RX_SG = 2,
68
69 IPOIB_CM_MTU = 0x10000 - 0x10,
70 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
71 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
72 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
73 IPOIB_RX_RING_SIZE = 256,
74 IPOIB_TX_RING_SIZE = 128,
75 IPOIB_MAX_QUEUE_SIZE = 8192,
76 IPOIB_MIN_QUEUE_SIZE = 2,
77 IPOIB_CM_MAX_CONN_QP = 4096,
78
79 IPOIB_NUM_WC = 4,
80
81 IPOIB_MAX_PATH_REC_QUEUE = 3,
82 IPOIB_MAX_MCAST_QUEUE = 3,
83
84 IPOIB_FLAG_OPER_UP = 0,
85 IPOIB_FLAG_INITIALIZED = 1,
86 IPOIB_FLAG_ADMIN_UP = 2,
87 IPOIB_PKEY_ASSIGNED = 3,
88 IPOIB_PKEY_STOP = 4,
89 IPOIB_FLAG_SUBINTERFACE = 5,
90 IPOIB_MCAST_RUN = 6,
91 IPOIB_STOP_REAPER = 7,
92 IPOIB_FLAG_ADMIN_CM = 9,
93 IPOIB_FLAG_UMCAST = 10,
94 IPOIB_FLAG_CSUM = 11,
95
96 IPOIB_MAX_BACKOFF_SECONDS = 16,
97
98 IPOIB_MCAST_FLAG_FOUND = 0,
99 IPOIB_MCAST_FLAG_SENDONLY = 1,
100 IPOIB_MCAST_FLAG_BUSY = 2,
101 IPOIB_MCAST_FLAG_ATTACHED = 3,
102
103 MAX_SEND_CQE = 16,
104 IPOIB_CM_COPYBREAK = 256,
105};
106
107#define IPOIB_OP_RECV (1ul << 31)
108#ifdef CONFIG_INFINIBAND_IPOIB_CM
109#define IPOIB_OP_CM (1ul << 30)
110#else
111#define IPOIB_OP_CM (0)
112#endif
113
114
115
116struct ipoib_header {
117 __be16 proto;
118 u16 reserved;
119};
120
121struct ipoib_pseudoheader {
122 u8 hwaddr[INFINIBAND_ALEN];
123};
124
125
126struct ipoib_mcast {
127 struct ib_sa_mcmember_rec mcmember;
128 struct ib_sa_multicast *mc;
129 struct ipoib_ah *ah;
130
131 struct rb_node rb_node;
132 struct list_head list;
133
134 unsigned long created;
135 unsigned long backoff;
136
137 unsigned long flags;
138 unsigned char logcount;
139
140 struct list_head neigh_list;
141
142 struct sk_buff_head pkt_queue;
143
144 struct net_device *dev;
145};
146
147struct ipoib_rx_buf {
148 struct sk_buff *skb;
149 u64 mapping[IPOIB_UD_RX_SG];
150};
151
152struct ipoib_tx_buf {
153 struct sk_buff *skb;
154 u64 mapping[MAX_SKB_FRAGS + 1];
155};
156
157struct ipoib_cm_tx_buf {
158 struct sk_buff *skb;
159 u64 mapping;
160};
161
162struct ib_cm_id;
163
164struct ipoib_cm_data {
165 __be32 qpn;
166 __be32 mtu;
167};
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196enum ipoib_cm_state {
197 IPOIB_CM_RX_LIVE,
198 IPOIB_CM_RX_ERROR,
199 IPOIB_CM_RX_FLUSH
200};
201
202struct ipoib_cm_rx {
203 struct ib_cm_id *id;
204 struct ib_qp *qp;
205 struct ipoib_cm_rx_buf *rx_ring;
206 struct list_head list;
207 struct net_device *dev;
208 unsigned long jiffies;
209 enum ipoib_cm_state state;
210 int recv_count;
211};
212
213struct ipoib_cm_tx {
214 struct ib_cm_id *id;
215 struct ib_qp *qp;
216 struct list_head list;
217 struct net_device *dev;
218 struct ipoib_neigh *neigh;
219 struct ipoib_path *path;
220 struct ipoib_cm_tx_buf *tx_ring;
221 unsigned tx_head;
222 unsigned tx_tail;
223 unsigned long flags;
224 u32 mtu;
225};
226
227struct ipoib_cm_rx_buf {
228 struct sk_buff *skb;
229 u64 mapping[IPOIB_CM_RX_SG];
230};
231
232struct ipoib_cm_dev_priv {
233 struct ib_srq *srq;
234 struct ipoib_cm_rx_buf *srq_ring;
235 struct ib_cm_id *id;
236 struct list_head passive_ids;
237 struct list_head rx_error_list;
238 struct list_head rx_flush_list;
239 struct list_head rx_drain_list;
240 struct list_head rx_reap_list;
241 struct work_struct start_task;
242 struct work_struct reap_task;
243 struct work_struct skb_task;
244 struct work_struct rx_reap_task;
245 struct delayed_work stale_task;
246 struct sk_buff_head skb_queue;
247 struct list_head start_list;
248 struct list_head reap_list;
249 struct ib_wc ibwc[IPOIB_NUM_WC];
250 struct ib_sge rx_sge[IPOIB_CM_RX_SG];
251 struct ib_recv_wr rx_wr;
252 int nonsrq_conn_qp;
253 int max_cm_mtu;
254 int num_frags;
255};
256
257struct ipoib_ethtool_st {
258 u16 coalesce_usecs;
259 u16 max_coalesced_frames;
260};
261
262
263
264
265
266
267struct ipoib_dev_priv {
268 spinlock_t lock;
269
270 struct net_device *dev;
271
272 struct napi_struct napi;
273
274 unsigned long flags;
275
276 struct mutex vlan_mutex;
277
278 struct rb_root path_tree;
279 struct list_head path_list;
280
281 struct ipoib_mcast *broadcast;
282 struct list_head multicast_list;
283 struct rb_root multicast_tree;
284
285 struct delayed_work pkey_poll_task;
286 struct delayed_work mcast_task;
287 struct work_struct carrier_on_task;
288 struct work_struct flush_light;
289 struct work_struct flush_normal;
290 struct work_struct flush_heavy;
291 struct work_struct restart_task;
292 struct delayed_work ah_reap_task;
293
294 struct ib_device *ca;
295 u8 port;
296 u16 pkey;
297 u16 pkey_index;
298 struct ib_pd *pd;
299 struct ib_mr *mr;
300 struct ib_cq *recv_cq;
301 struct ib_cq *send_cq;
302 struct ib_qp *qp;
303 u32 qkey;
304
305 union ib_gid local_gid;
306 u16 local_lid;
307
308 unsigned int admin_mtu;
309 unsigned int mcast_mtu;
310 unsigned int max_ib_mtu;
311
312 struct ipoib_rx_buf *rx_ring;
313
314 struct ipoib_tx_buf *tx_ring;
315 unsigned tx_head;
316 unsigned tx_tail;
317 struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
318 struct ib_send_wr tx_wr;
319 unsigned tx_outstanding;
320 struct ib_wc send_wc[MAX_SEND_CQE];
321
322 struct ib_recv_wr rx_wr;
323 struct ib_sge rx_sge[IPOIB_UD_RX_SG];
324
325 struct ib_wc ibwc[IPOIB_NUM_WC];
326
327 struct list_head dead_ahs;
328
329 struct ib_event_handler event_handler;
330
331 struct net_device *parent;
332 struct list_head child_intfs;
333 struct list_head list;
334
335#ifdef CONFIG_INFINIBAND_IPOIB_CM
336 struct ipoib_cm_dev_priv cm;
337#endif
338
339#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
340 struct list_head fs_list;
341 struct dentry *mcg_dentry;
342 struct dentry *path_dentry;
343#endif
344 int hca_caps;
345 struct ipoib_ethtool_st ethtool;
346 struct timer_list poll_timer;
347};
348
349struct ipoib_ah {
350 struct net_device *dev;
351 struct ib_ah *ah;
352 struct list_head list;
353 struct kref ref;
354 unsigned last_send;
355};
356
357struct ipoib_path {
358 struct net_device *dev;
359 struct ib_sa_path_rec pathrec;
360 struct ipoib_ah *ah;
361 struct sk_buff_head queue;
362
363 struct list_head neigh_list;
364
365 int query_id;
366 struct ib_sa_query *query;
367 struct completion done;
368
369 struct rb_node rb_node;
370 struct list_head list;
371 int valid;
372};
373
374struct ipoib_neigh {
375 struct ipoib_ah *ah;
376#ifdef CONFIG_INFINIBAND_IPOIB_CM
377 struct ipoib_cm_tx *cm;
378#endif
379 union ib_gid dgid;
380 struct sk_buff_head queue;
381
382 struct neighbour *neighbour;
383 struct net_device *dev;
384
385 struct list_head list;
386};
387
388#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
389#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES)
390
391static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
392{
393 return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
394}
395
396
397
398
399
400
401
402static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
403{
404 return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) +
405 INFINIBAND_ALEN, sizeof(void *));
406}
407
408struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh,
409 struct net_device *dev);
410void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh);
411
412extern struct workqueue_struct *ipoib_workqueue;
413
414
415
416int ipoib_poll(struct napi_struct *napi, int budget);
417void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
418void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
419
420struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
421 struct ib_pd *pd, struct ib_ah_attr *attr);
422void ipoib_free_ah(struct kref *kref);
423static inline void ipoib_put_ah(struct ipoib_ah *ah)
424{
425 kref_put(&ah->ref, ipoib_free_ah);
426}
427
428int ipoib_open(struct net_device *dev);
429int ipoib_add_pkey_attr(struct net_device *dev);
430int ipoib_add_umcast_attr(struct net_device *dev);
431
432void ipoib_send(struct net_device *dev, struct sk_buff *skb,
433 struct ipoib_ah *address, u32 qpn);
434void ipoib_reap_ah(struct work_struct *work);
435
436void ipoib_mark_paths_invalid(struct net_device *dev);
437void ipoib_flush_paths(struct net_device *dev);
438struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
439
440int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
441void ipoib_ib_dev_flush_light(struct work_struct *work);
442void ipoib_ib_dev_flush_normal(struct work_struct *work);
443void ipoib_ib_dev_flush_heavy(struct work_struct *work);
444void ipoib_pkey_event(struct work_struct *work);
445void ipoib_ib_dev_cleanup(struct net_device *dev);
446
447int ipoib_ib_dev_open(struct net_device *dev);
448int ipoib_ib_dev_up(struct net_device *dev);
449int ipoib_ib_dev_down(struct net_device *dev, int flush);
450int ipoib_ib_dev_stop(struct net_device *dev, int flush);
451
452int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
453void ipoib_dev_cleanup(struct net_device *dev);
454
455void ipoib_mcast_join_task(struct work_struct *work);
456void ipoib_mcast_carrier_on_task(struct work_struct *work);
457void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
458
459void ipoib_mcast_restart_task(struct work_struct *work);
460int ipoib_mcast_start_thread(struct net_device *dev);
461int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
462
463void ipoib_mcast_dev_down(struct net_device *dev);
464void ipoib_mcast_dev_flush(struct net_device *dev);
465
466#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
467struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev);
468int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter);
469void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
470 union ib_gid *gid,
471 unsigned long *created,
472 unsigned int *queuelen,
473 unsigned int *complete,
474 unsigned int *send_only);
475
476struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev);
477int ipoib_path_iter_next(struct ipoib_path_iter *iter);
478void ipoib_path_iter_read(struct ipoib_path_iter *iter,
479 struct ipoib_path *path);
480#endif
481
482int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
483 union ib_gid *mgid, int set_qkey);
484
485int ipoib_init_qp(struct net_device *dev);
486int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
487void ipoib_transport_dev_cleanup(struct net_device *dev);
488
489void ipoib_event(struct ib_event_handler *handler,
490 struct ib_event *record);
491
492int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
493int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
494
495void ipoib_pkey_poll(struct work_struct *work);
496int ipoib_pkey_dev_delay_open(struct net_device *dev);
497void ipoib_drain_cq(struct net_device *dev);
498
499void ipoib_set_ethtool_ops(struct net_device *dev);
500int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
501
502#ifdef CONFIG_INFINIBAND_IPOIB_CM
503
504#define IPOIB_FLAGS_RC 0x80
505#define IPOIB_FLAGS_UC 0x40
506
507
508#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
509
510extern int ipoib_max_conn_qp;
511
512static inline int ipoib_cm_admin_enabled(struct net_device *dev)
513{
514 struct ipoib_dev_priv *priv = netdev_priv(dev);
515 return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
516 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
517}
518
519static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
520{
521 struct ipoib_dev_priv *priv = netdev_priv(dev);
522 return IPOIB_CM_SUPPORTED(n->ha) &&
523 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
524}
525
526static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
527
528{
529 return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags);
530}
531
532static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
533{
534 return neigh->cm;
535}
536
537static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
538{
539 neigh->cm = tx;
540}
541
542static inline int ipoib_cm_has_srq(struct net_device *dev)
543{
544 struct ipoib_dev_priv *priv = netdev_priv(dev);
545 return !!priv->cm.srq;
546}
547
548static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
549{
550 struct ipoib_dev_priv *priv = netdev_priv(dev);
551 return priv->cm.max_cm_mtu;
552}
553
554void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
555int ipoib_cm_dev_open(struct net_device *dev);
556void ipoib_cm_dev_stop(struct net_device *dev);
557int ipoib_cm_dev_init(struct net_device *dev);
558int ipoib_cm_add_mode_attr(struct net_device *dev);
559void ipoib_cm_dev_cleanup(struct net_device *dev);
560struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
561 struct ipoib_neigh *neigh);
562void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
563void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
564 unsigned int mtu);
565void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
566void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
567#else
568
569struct ipoib_cm_tx;
570
571#define ipoib_max_conn_qp 0
572
573static inline int ipoib_cm_admin_enabled(struct net_device *dev)
574{
575 return 0;
576}
577static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
578
579{
580 return 0;
581}
582
583static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
584
585{
586 return 0;
587}
588
589static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
590{
591 return NULL;
592}
593
594static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
595{
596}
597
598static inline int ipoib_cm_has_srq(struct net_device *dev)
599{
600 return 0;
601}
602
603static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
604{
605 return 0;
606}
607
608static inline
609void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
610{
611 return;
612}
613
614static inline
615int ipoib_cm_dev_open(struct net_device *dev)
616{
617 return 0;
618}
619
620static inline
621void ipoib_cm_dev_stop(struct net_device *dev)
622{
623 return;
624}
625
626static inline
627int ipoib_cm_dev_init(struct net_device *dev)
628{
629 return -ENOSYS;
630}
631
632static inline
633void ipoib_cm_dev_cleanup(struct net_device *dev)
634{
635 return;
636}
637
638static inline
639struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
640 struct ipoib_neigh *neigh)
641{
642 return NULL;
643}
644
645static inline
646void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
647{
648 return;
649}
650
651static inline
652int ipoib_cm_add_mode_attr(struct net_device *dev)
653{
654 return 0;
655}
656
657static inline void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
658 unsigned int mtu)
659{
660 dev_kfree_skb_any(skb);
661}
662
663static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
664{
665}
666
667static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
668{
669}
670#endif
671
672#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
673void ipoib_create_debug_files(struct net_device *dev);
674void ipoib_delete_debug_files(struct net_device *dev);
675int ipoib_register_debugfs(void);
676void ipoib_unregister_debugfs(void);
677#else
678static inline void ipoib_create_debug_files(struct net_device *dev) { }
679static inline void ipoib_delete_debug_files(struct net_device *dev) { }
680static inline int ipoib_register_debugfs(void) { return 0; }
681static inline void ipoib_unregister_debugfs(void) { }
682#endif
683
684#define ipoib_printk(level, priv, format, arg...) \
685 printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
686#define ipoib_warn(priv, format, arg...) \
687 ipoib_printk(KERN_WARNING, priv, format , ## arg)
688
689extern int ipoib_sendq_size;
690extern int ipoib_recvq_size;
691
692extern struct ib_sa_client ipoib_sa_client;
693
694#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
695extern int ipoib_debug_level;
696
697#define ipoib_dbg(priv, format, arg...) \
698 do { \
699 if (ipoib_debug_level > 0) \
700 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
701 } while (0)
702#define ipoib_dbg_mcast(priv, format, arg...) \
703 do { \
704 if (mcast_debug_level > 0) \
705 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
706 } while (0)
707#else
708#define ipoib_dbg(priv, format, arg...) \
709 do { (void) (priv); } while (0)
710#define ipoib_dbg_mcast(priv, format, arg...) \
711 do { (void) (priv); } while (0)
712#endif
713
714#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
715#define ipoib_dbg_data(priv, format, arg...) \
716 do { \
717 if (data_debug_level > 0) \
718 ipoib_printk(KERN_DEBUG, priv, format , ## arg); \
719 } while (0)
720#else
721#define ipoib_dbg_data(priv, format, arg...) \
722 do { (void) (priv); } while (0)
723#endif
724
725#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
726
727#endif
728