1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#define DEBUG_PORTAL_ALLOC
29#define DEBUG_SUBSYSTEM S_LND
30
31#include "socklnd_lib-linux.h"
32
33#include "../../../include/linux/libcfs/libcfs.h"
34#include "../../../include/linux/lnet/lnet.h"
35#include "../../../include/linux/lnet/lib-lnet.h"
36#include "../../../include/linux/lnet/socklnd.h"
37#include "../../../include/linux/lnet/lnet-sysctl.h"
38
39#define SOCKNAL_PEER_HASH_SIZE 101
40#define SOCKNAL_RESCHED 100
41#define SOCKNAL_INSANITY_RECONN 5000
42#define SOCKNAL_ENOMEM_RETRY CFS_TICK
43
44#define SOCKNAL_SINGLE_FRAG_TX 0
45#define SOCKNAL_SINGLE_FRAG_RX 0
46
47#define SOCKNAL_VERSION_DEBUG 0
48
49
50
51#ifdef CONFIG_HIGHMEM
52# define SOCKNAL_RISK_KMAP_DEADLOCK 0
53#else
54# define SOCKNAL_RISK_KMAP_DEADLOCK 1
55#endif
56
57struct ksock_sched_info;
58
59typedef struct
60{
61 spinlock_t kss_lock;
62 struct list_head kss_rx_conns;
63
64 struct list_head kss_tx_conns;
65
66 struct list_head kss_zombie_noop_txs;
67 wait_queue_head_t kss_waitq;
68
69 int kss_nconns;
70 struct ksock_sched_info *kss_info;
71 struct page *kss_rx_scratch_pgs[LNET_MAX_IOV];
72 struct kvec kss_scratch_iov[LNET_MAX_IOV];
73} ksock_sched_t;
74
75struct ksock_sched_info {
76 int ksi_nthreads_max;
77 int ksi_nthreads;
78 int ksi_cpt;
79 ksock_sched_t *ksi_scheds;
80};
81
82#define KSOCK_CPT_SHIFT 16
83#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid))
84#define KSOCK_THREAD_CPT(id) ((id) >> KSOCK_CPT_SHIFT)
85#define KSOCK_THREAD_SID(id) ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
86
87typedef struct
88{
89 __u32 ksni_ipaddr;
90 __u32 ksni_netmask;
91 int ksni_nroutes;
92 int ksni_npeers;
93 char ksni_name[IFNAMSIZ];
94} ksock_interface_t;
95
96typedef struct {
97
98 int *ksnd_timeout;
99
100 int *ksnd_nscheds;
101 int *ksnd_nconnds;
102 int *ksnd_nconnds_max;
103 int *ksnd_min_reconnectms;
104 int *ksnd_max_reconnectms;
105 int *ksnd_eager_ack;
106 int *ksnd_typed_conns;
107 int *ksnd_min_bulk;
108 int *ksnd_tx_buffer_size;
109 int *ksnd_rx_buffer_size;
110 int *ksnd_nagle;
111 int *ksnd_round_robin;
112 int *ksnd_keepalive;
113 int *ksnd_keepalive_idle;
114 int *ksnd_keepalive_count;
115 int *ksnd_keepalive_intvl;
116 int *ksnd_credits;
117 int *ksnd_peertxcredits;
118 int *ksnd_peerrtrcredits;
119 int *ksnd_peertimeout;
120 int *ksnd_enable_csum;
121 int *ksnd_inject_csum_error;
122 int *ksnd_nonblk_zcack;
123 unsigned int *ksnd_zc_min_payload;
124 int *ksnd_zc_recv;
125 int *ksnd_zc_recv_min_nfrags;
126} ksock_tunables_t;
127
128typedef struct {
129 __u64 ksnn_incarnation;
130 spinlock_t ksnn_lock;
131 struct list_head ksnn_list;
132 int ksnn_npeers;
133 int ksnn_shutdown;
134 int ksnn_ninterfaces;
135 ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES];
136} ksock_net_t;
137
138
139#define SOCKNAL_CONND_TIMEOUT 120
140
141#define SOCKNAL_CONND_RESV 1
142
143typedef struct {
144 int ksnd_init;
145 int ksnd_nnets;
146 struct list_head ksnd_nets;
147
148 rwlock_t ksnd_global_lock;
149
150 struct list_head *ksnd_peers;
151 int ksnd_peer_hash_size;
152
153 int ksnd_nthreads;
154 int ksnd_shuttingdown;
155
156 struct ksock_sched_info **ksnd_sched_info;
157
158 atomic_t ksnd_nactive_txs;
159
160 struct list_head ksnd_deathrow_conns;
161 struct list_head ksnd_zombie_conns;
162 struct list_head ksnd_enomem_conns;
163 wait_queue_head_t ksnd_reaper_waitq;
164 unsigned long ksnd_reaper_waketime;
165 spinlock_t ksnd_reaper_lock;
166
167 int ksnd_enomem_tx;
168 int ksnd_stall_tx;
169 int ksnd_stall_rx;
170
171 struct list_head ksnd_connd_connreqs;
172 struct list_head ksnd_connd_routes;
173 wait_queue_head_t ksnd_connd_waitq;
174 int ksnd_connd_connecting;
175
176 long ksnd_connd_failed_stamp;
177
178 unsigned ksnd_connd_starting;
179
180 long ksnd_connd_starting_stamp;
181
182 unsigned ksnd_connd_running;
183 spinlock_t ksnd_connd_lock;
184
185 struct list_head ksnd_idle_noop_txs;
186 spinlock_t ksnd_tx_lock;
187
188} ksock_nal_data_t;
189
190#define SOCKNAL_INIT_NOTHING 0
191#define SOCKNAL_INIT_DATA 1
192#define SOCKNAL_INIT_ALL 2
193
194
195
196
197
198
199
200
201
202
203struct ksock_conn;
204struct ksock_peer;
205struct ksock_route;
206struct ksock_proto;
207
208typedef struct
209{
210 struct list_head tx_list;
211 struct list_head tx_zc_list;
212 atomic_t tx_refcount;
213 int tx_nob;
214 int tx_resid;
215 int tx_niov;
216 struct kvec *tx_iov;
217 int tx_nkiov;
218 unsigned short tx_zc_aborted;
219 unsigned short tx_zc_capable:1;
220 unsigned short tx_zc_checked:1;
221 unsigned short tx_nonblk:1;
222 lnet_kiov_t *tx_kiov;
223 struct ksock_conn *tx_conn;
224 lnet_msg_t *tx_lnetmsg;
225 unsigned long tx_deadline;
226 ksock_msg_t tx_msg;
227 int tx_desc_size;
228 union {
229 struct {
230 struct kvec iov;
231 lnet_kiov_t kiov[0];
232 } paged;
233 struct {
234 struct kvec iov[1];
235 } virt;
236 } tx_frags;
237} ksock_tx_t;
238
239#define KSOCK_NOOP_TX_SIZE ((int)offsetof(ksock_tx_t, tx_frags.paged.kiov[0]))
240
241
242
243
244
245typedef union {
246 struct kvec iov[LNET_MAX_IOV];
247 lnet_kiov_t kiov[LNET_MAX_IOV];
248} ksock_rxiovspace_t;
249
250#define SOCKNAL_RX_KSM_HEADER 1
251#define SOCKNAL_RX_LNET_HEADER 2
252#define SOCKNAL_RX_PARSE 3
253#define SOCKNAL_RX_PARSE_WAIT 4
254#define SOCKNAL_RX_LNET_PAYLOAD 5
255#define SOCKNAL_RX_SLOP 6
256
257typedef struct ksock_conn {
258 struct ksock_peer *ksnc_peer;
259 struct ksock_route *ksnc_route;
260 struct list_head ksnc_list;
261 struct socket *ksnc_sock;
262 void *ksnc_saved_data_ready;
263 void *ksnc_saved_write_space;
264 atomic_t ksnc_conn_refcount;
265 atomic_t ksnc_sock_refcount;
266 ksock_sched_t *ksnc_scheduler;
267 __u32 ksnc_myipaddr;
268 __u32 ksnc_ipaddr;
269 int ksnc_port;
270 signed int ksnc_type:3;
271
272 unsigned int ksnc_closing:1;
273 unsigned int ksnc_flip:1;
274 unsigned int ksnc_zc_capable:1;
275 struct ksock_proto *ksnc_proto;
276
277
278 struct list_head ksnc_rx_list;
279 unsigned long ksnc_rx_deadline;
280 __u8 ksnc_rx_started;
281 __u8 ksnc_rx_ready;
282 __u8 ksnc_rx_scheduled;
283 __u8 ksnc_rx_state;
284 int ksnc_rx_nob_left;
285 int ksnc_rx_nob_wanted;
286 int ksnc_rx_niov;
287 struct kvec *ksnc_rx_iov;
288 int ksnc_rx_nkiov;
289 lnet_kiov_t *ksnc_rx_kiov;
290 ksock_rxiovspace_t ksnc_rx_iov_space;
291 __u32 ksnc_rx_csum;
292 void *ksnc_cookie;
293 ksock_msg_t ksnc_msg;
294
295
296
297
298
299
300
301 struct list_head ksnc_tx_list;
302 struct list_head ksnc_tx_queue;
303 ksock_tx_t *ksnc_tx_carrier;
304 unsigned long ksnc_tx_deadline;
305 int ksnc_tx_bufnob;
306 atomic_t ksnc_tx_nob;
307 int ksnc_tx_ready;
308 int ksnc_tx_scheduled;
309 unsigned long ksnc_tx_last_post;
310} ksock_conn_t;
311
312typedef struct ksock_route {
313 struct list_head ksnr_list;
314 struct list_head ksnr_connd_list;
315 struct ksock_peer *ksnr_peer;
316 atomic_t ksnr_refcount;
317 unsigned long ksnr_timeout;
318 long ksnr_retry_interval;
319 __u32 ksnr_myipaddr;
320 __u32 ksnr_ipaddr;
321 int ksnr_port;
322 unsigned int ksnr_scheduled:1;
323 unsigned int ksnr_connecting:1;
324 unsigned int ksnr_connected:4;
325 unsigned int ksnr_deleted:1;
326 unsigned int ksnr_share_count;
327 int ksnr_conn_count;
328} ksock_route_t;
329
330#define SOCKNAL_KEEPALIVE_PING 1
331
332typedef struct ksock_peer {
333 struct list_head ksnp_list;
334 unsigned long ksnp_last_alive;
335 lnet_process_id_t ksnp_id;
336 atomic_t ksnp_refcount;
337 int ksnp_sharecount;
338 int ksnp_closing;
339 int ksnp_accepting;
340 int ksnp_error;
341 __u64 ksnp_zc_next_cookie;
342 __u64 ksnp_incarnation;
343 struct ksock_proto *ksnp_proto;
344 struct list_head ksnp_conns;
345 struct list_head ksnp_routes;
346 struct list_head ksnp_tx_queue;
347 spinlock_t ksnp_lock;
348 struct list_head ksnp_zc_req_list;
349 unsigned long ksnp_send_keepalive;
350 lnet_ni_t *ksnp_ni;
351 int ksnp_n_passive_ips;
352 __u32 ksnp_passive_ips[LNET_MAX_INTERFACES];
353} ksock_peer_t;
354
355typedef struct ksock_connreq {
356 struct list_head ksncr_list;
357 lnet_ni_t *ksncr_ni;
358 struct socket *ksncr_sock;
359} ksock_connreq_t;
360
361extern ksock_nal_data_t ksocknal_data;
362extern ksock_tunables_t ksocknal_tunables;
363
364#define SOCKNAL_MATCH_NO 0
365#define SOCKNAL_MATCH_YES 1
366#define SOCKNAL_MATCH_MAY 2
367
368typedef struct ksock_proto {
369 int pro_version;
370 int (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *);
371 int (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);
372 void (*pro_pack)(ksock_tx_t *);
373 void (*pro_unpack)(ksock_msg_t *);
374 ksock_tx_t *(*pro_queue_tx_msg)(ksock_conn_t *, ksock_tx_t *);
375 int (*pro_queue_tx_zcack)(ksock_conn_t *, ksock_tx_t *, __u64);
376 int (*pro_handle_zcreq)(ksock_conn_t *, __u64, int);
377 int (*pro_handle_zcack)(ksock_conn_t *, __u64, __u64);
378 int (*pro_match_tx)(ksock_conn_t *, ksock_tx_t *, int);
379
380
381
382
383} ksock_proto_t;
384
385extern ksock_proto_t ksocknal_protocol_v1x;
386extern ksock_proto_t ksocknal_protocol_v2x;
387extern ksock_proto_t ksocknal_protocol_v3x;
388
389#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
390#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
391#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR
392
393#ifndef CPU_MASK_NONE
394#define CPU_MASK_NONE 0UL
395#endif
396
397static inline int
398ksocknal_route_mask(void)
399{
400 if (!*ksocknal_tunables.ksnd_typed_conns)
401 return (1 << SOCKLND_CONN_ANY);
402
403 return ((1 << SOCKLND_CONN_CONTROL) |
404 (1 << SOCKLND_CONN_BULK_IN) |
405 (1 << SOCKLND_CONN_BULK_OUT));
406}
407
408static inline struct list_head *
409ksocknal_nid2peerlist(lnet_nid_t nid)
410{
411 unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
412
413 return &ksocknal_data.ksnd_peers[hash];
414}
415
416static inline void
417ksocknal_conn_addref(ksock_conn_t *conn)
418{
419 LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
420 atomic_inc(&conn->ksnc_conn_refcount);
421}
422
423extern void ksocknal_queue_zombie_conn(ksock_conn_t *conn);
424extern void ksocknal_finalize_zcreq(ksock_conn_t *conn);
425
426static inline void
427ksocknal_conn_decref(ksock_conn_t *conn)
428{
429 LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
430 if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
431 ksocknal_queue_zombie_conn(conn);
432}
433
434static inline int
435ksocknal_connsock_addref(ksock_conn_t *conn)
436{
437 int rc = -ESHUTDOWN;
438
439 read_lock(&ksocknal_data.ksnd_global_lock);
440 if (!conn->ksnc_closing) {
441 LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
442 atomic_inc(&conn->ksnc_sock_refcount);
443 rc = 0;
444 }
445 read_unlock(&ksocknal_data.ksnd_global_lock);
446
447 return rc;
448}
449
450static inline void
451ksocknal_connsock_decref(ksock_conn_t *conn)
452{
453 LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
454 if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
455 LASSERT(conn->ksnc_closing);
456 libcfs_sock_release(conn->ksnc_sock);
457 conn->ksnc_sock = NULL;
458 ksocknal_finalize_zcreq(conn);
459 }
460}
461
462static inline void
463ksocknal_tx_addref(ksock_tx_t *tx)
464{
465 LASSERT(atomic_read(&tx->tx_refcount) > 0);
466 atomic_inc(&tx->tx_refcount);
467}
468
469extern void ksocknal_tx_prep(ksock_conn_t *, ksock_tx_t *tx);
470extern void ksocknal_tx_done(lnet_ni_t *ni, ksock_tx_t *tx);
471
472static inline void
473ksocknal_tx_decref(ksock_tx_t *tx)
474{
475 LASSERT(atomic_read(&tx->tx_refcount) > 0);
476 if (atomic_dec_and_test(&tx->tx_refcount))
477 ksocknal_tx_done(NULL, tx);
478}
479
480static inline void
481ksocknal_route_addref(ksock_route_t *route)
482{
483 LASSERT(atomic_read(&route->ksnr_refcount) > 0);
484 atomic_inc(&route->ksnr_refcount);
485}
486
487extern void ksocknal_destroy_route(ksock_route_t *route);
488
489static inline void
490ksocknal_route_decref(ksock_route_t *route)
491{
492 LASSERT(atomic_read(&route->ksnr_refcount) > 0);
493 if (atomic_dec_and_test(&route->ksnr_refcount))
494 ksocknal_destroy_route(route);
495}
496
497static inline void
498ksocknal_peer_addref(ksock_peer_t *peer)
499{
500 LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
501 atomic_inc(&peer->ksnp_refcount);
502}
503
504extern void ksocknal_destroy_peer(ksock_peer_t *peer);
505
506static inline void
507ksocknal_peer_decref(ksock_peer_t *peer)
508{
509 LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
510 if (atomic_dec_and_test(&peer->ksnp_refcount))
511 ksocknal_destroy_peer(peer);
512}
513
514int ksocknal_startup(lnet_ni_t *ni);
515void ksocknal_shutdown(lnet_ni_t *ni);
516int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
517int ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
518int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
519 int delayed, unsigned int niov,
520 struct kvec *iov, lnet_kiov_t *kiov,
521 unsigned int offset, unsigned int mlen, unsigned int rlen);
522int ksocknal_accept(lnet_ni_t *ni, struct socket *sock);
523
524extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
525extern ksock_peer_t *ksocknal_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id);
526extern ksock_peer_t *ksocknal_find_peer(lnet_ni_t *ni, lnet_process_id_t id);
527extern void ksocknal_peer_failed(ksock_peer_t *peer);
528extern int ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
529 struct socket *sock, int type);
530extern void ksocknal_close_conn_locked(ksock_conn_t *conn, int why);
531extern void ksocknal_terminate_conn(ksock_conn_t *conn);
532extern void ksocknal_destroy_conn(ksock_conn_t *conn);
533extern int ksocknal_close_peer_conns_locked(ksock_peer_t *peer,
534 __u32 ipaddr, int why);
535extern int ksocknal_close_conn_and_siblings(ksock_conn_t *conn, int why);
536extern int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr);
537extern ksock_conn_t *ksocknal_find_conn_locked(ksock_peer_t *peer,
538 ksock_tx_t *tx, int nonblk);
539
540extern int ksocknal_launch_packet(lnet_ni_t *ni, ksock_tx_t *tx,
541 lnet_process_id_t id);
542extern ksock_tx_t *ksocknal_alloc_tx(int type, int size);
543extern void ksocknal_free_tx(ksock_tx_t *tx);
544extern ksock_tx_t *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
545extern void ksocknal_next_tx_carrier(ksock_conn_t *conn);
546extern void ksocknal_queue_tx_locked(ksock_tx_t *tx, ksock_conn_t *conn);
547extern void ksocknal_txlist_done(lnet_ni_t *ni, struct list_head *txlist,
548 int error);
549extern void ksocknal_notify(lnet_ni_t *ni, lnet_nid_t gw_nid, int alive);
550extern void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when);
551extern int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
552extern void ksocknal_thread_fini(void);
553extern void ksocknal_launch_all_connections_locked(ksock_peer_t *peer);
554extern ksock_route_t *ksocknal_find_connectable_route_locked(ksock_peer_t *peer);
555extern ksock_route_t *ksocknal_find_connecting_route_locked(ksock_peer_t *peer);
556extern int ksocknal_new_packet(ksock_conn_t *conn, int skip);
557extern int ksocknal_scheduler(void *arg);
558extern int ksocknal_connd(void *arg);
559extern int ksocknal_reaper(void *arg);
560extern int ksocknal_send_hello(lnet_ni_t *ni, ksock_conn_t *conn,
561 lnet_nid_t peer_nid, ksock_hello_msg_t *hello);
562extern int ksocknal_recv_hello(lnet_ni_t *ni, ksock_conn_t *conn,
563 ksock_hello_msg_t *hello, lnet_process_id_t *id,
564 __u64 *incarnation);
565extern void ksocknal_read_callback(ksock_conn_t *conn);
566extern void ksocknal_write_callback(ksock_conn_t *conn);
567
568extern int ksocknal_lib_zc_capable(ksock_conn_t *conn);
569extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn);
570extern void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn);
571extern void ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn);
572extern void ksocknal_lib_push_conn(ksock_conn_t *conn);
573extern int ksocknal_lib_get_conn_addrs(ksock_conn_t *conn);
574extern int ksocknal_lib_setup_sock(struct socket *so);
575extern int ksocknal_lib_send_iov(ksock_conn_t *conn, ksock_tx_t *tx);
576extern int ksocknal_lib_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx);
577extern void ksocknal_lib_eager_ack(ksock_conn_t *conn);
578extern int ksocknal_lib_recv_iov(ksock_conn_t *conn);
579extern int ksocknal_lib_recv_kiov(ksock_conn_t *conn);
580extern int ksocknal_lib_get_conn_tunables(ksock_conn_t *conn, int *txmem,
581 int *rxmem, int *nagle);
582
583extern int ksocknal_tunables_init(void);
584
585extern void ksocknal_lib_csum_tx(ksock_tx_t *tx);
586
587extern int ksocknal_lib_memory_pressure(ksock_conn_t *conn);
588extern int ksocknal_lib_bind_thread_to_cpu(int id);
589