1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#ifndef _SOCKLND_SOCKLND_H_
25#define _SOCKLND_SOCKLND_H_
26
27#define DEBUG_PORTAL_ALLOC
28#define DEBUG_SUBSYSTEM S_LND
29
30#include <linux/crc32.h>
31#include <linux/errno.h>
32#include <linux/if.h>
33#include <linux/init.h>
34#include <linux/kernel.h>
35#include <linux/kmod.h>
36#include <linux/list.h>
37#include <linux/mm.h>
38#include <linux/module.h>
39#include <linux/stat.h>
40#include <linux/string.h>
41#include <linux/syscalls.h>
42#include <linux/sysctl.h>
43#include <linux/uio.h>
44#include <linux/unistd.h>
45#include <asm/irq.h>
46#include <net/sock.h>
47#include <net/tcp.h>
48
49#include "../../../include/linux/libcfs/libcfs.h"
50#include "../../../include/linux/lnet/lnet.h"
51#include "../../../include/linux/lnet/lib-lnet.h"
52#include "../../../include/linux/lnet/socklnd.h"
53
54
55#define SOCKNAL_NSCHEDS 3
56#define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1)
57
58#define SOCKNAL_PEER_HASH_SIZE 101
59#define SOCKNAL_RESCHED 100
60#define SOCKNAL_INSANITY_RECONN 5000
61#define SOCKNAL_ENOMEM_RETRY CFS_TICK
62
63#define SOCKNAL_SINGLE_FRAG_TX 0
64#define SOCKNAL_SINGLE_FRAG_RX 0
65
66#define SOCKNAL_VERSION_DEBUG 0
67
68
69
70
71
72#ifdef CONFIG_HIGHMEM
73# define SOCKNAL_RISK_KMAP_DEADLOCK 0
74#else
75# define SOCKNAL_RISK_KMAP_DEADLOCK 1
76#endif
77
78struct ksock_sched_info;
79
80struct ksock_sched {
81 spinlock_t kss_lock;
82 struct list_head kss_rx_conns;
83 struct list_head kss_tx_conns;
84 struct list_head kss_zombie_noop_txs;
85 wait_queue_head_t kss_waitq;
86 int kss_nconns;
87
88 struct ksock_sched_info *kss_info;
89};
90
91struct ksock_sched_info {
92 int ksi_nthreads_max;
93 int ksi_nthreads;
94 int ksi_cpt;
95 struct ksock_sched *ksi_scheds;
96};
97
98#define KSOCK_CPT_SHIFT 16
99#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid))
100#define KSOCK_THREAD_CPT(id) ((id) >> KSOCK_CPT_SHIFT)
101#define KSOCK_THREAD_SID(id) ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
102
103struct ksock_interface {
104 __u32 ksni_ipaddr;
105 __u32 ksni_netmask;
106 int ksni_nroutes;
107 int ksni_npeers;
108 char ksni_name[IFNAMSIZ];
109};
110
111struct ksock_tunables {
112 int *ksnd_timeout;
113
114 int *ksnd_nscheds;
115
116 int *ksnd_nconnds;
117 int *ksnd_nconnds_max;
118 int *ksnd_min_reconnectms;
119
120 int *ksnd_max_reconnectms;
121
122 int *ksnd_eager_ack;
123 int *ksnd_typed_conns;
124 int *ksnd_min_bulk;
125 int *ksnd_tx_buffer_size;
126 int *ksnd_rx_buffer_size;
127 int *ksnd_nagle;
128 int *ksnd_round_robin;
129
130 int *ksnd_keepalive;
131
132 int *ksnd_keepalive_idle;
133
134 int *ksnd_keepalive_count;
135 int *ksnd_keepalive_intvl;
136 int *ksnd_credits;
137 int *ksnd_peertxcredits;
138
139 int *ksnd_peerrtrcredits;
140
141 int *ksnd_peertimeout;
142
143 int *ksnd_enable_csum;
144 int *ksnd_inject_csum_error;
145
146 int *ksnd_nonblk_zcack;
147
148 unsigned int *ksnd_zc_min_payload;
149
150 int *ksnd_zc_recv;
151
152 int *ksnd_zc_recv_min_nfrags;
153
154};
155
156struct ksock_net {
157 __u64 ksnn_incarnation;
158 spinlock_t ksnn_lock;
159 struct list_head ksnn_list;
160 int ksnn_npeers;
161 int ksnn_shutdown;
162 int ksnn_ninterfaces;
163 struct ksock_interface ksnn_interfaces[LNET_MAX_INTERFACES];
164};
165
166
167#define SOCKNAL_CONND_TIMEOUT 120
168
169#define SOCKNAL_CONND_RESV 1
170
171struct ksock_nal_data {
172 int ksnd_init;
173
174 int ksnd_nnets;
175 struct list_head ksnd_nets;
176 rwlock_t ksnd_global_lock;
177
178 struct list_head *ksnd_peers;
179
180 int ksnd_peer_hash_size;
181
182 int ksnd_nthreads;
183 int ksnd_shuttingdown;
184
185 struct ksock_sched_info **ksnd_sched_info;
186
187 atomic_t ksnd_nactive_txs;
188
189 struct list_head ksnd_deathrow_conns;
190
191 struct list_head ksnd_zombie_conns;
192
193 struct list_head ksnd_enomem_conns;
194
195 wait_queue_head_t ksnd_reaper_waitq;
196 unsigned long ksnd_reaper_waketime;
197
198 spinlock_t ksnd_reaper_lock;
199
200 int ksnd_enomem_tx;
201 int ksnd_stall_tx;
202
203 int ksnd_stall_rx;
204
205
206 struct list_head ksnd_connd_connreqs;
207
208 struct list_head ksnd_connd_routes;
209
210 wait_queue_head_t ksnd_connd_waitq;
211 int ksnd_connd_connecting;
212
213 time64_t ksnd_connd_failed_stamp;
214
215
216 time64_t ksnd_connd_starting_stamp;
217
218
219 unsigned ksnd_connd_starting;
220 unsigned ksnd_connd_running;
221 spinlock_t ksnd_connd_lock;
222
223 struct list_head ksnd_idle_noop_txs;
224
225 spinlock_t ksnd_tx_lock;
226
227
228};
229
230#define SOCKNAL_INIT_NOTHING 0
231#define SOCKNAL_INIT_DATA 1
232#define SOCKNAL_INIT_ALL 2
233
234
235
236
237
238
239
240
241
242
243
244struct ksock_conn;
245struct ksock_peer;
246struct ksock_route;
247struct ksock_proto;
248
249struct ksock_tx {
250 struct list_head tx_list;
251
252 struct list_head tx_zc_list;
253 atomic_t tx_refcount;
254 int tx_nob;
255 int tx_resid;
256 int tx_niov;
257 struct kvec *tx_iov;
258 int tx_nkiov;
259 unsigned short tx_zc_aborted;
260 unsigned short tx_zc_capable:1;
261 unsigned short tx_zc_checked:1;
262 unsigned short tx_nonblk:1;
263 lnet_kiov_t *tx_kiov;
264 struct ksock_conn *tx_conn;
265 lnet_msg_t *tx_lnetmsg;
266
267 unsigned long tx_deadline;
268 ksock_msg_t tx_msg;
269 int tx_desc_size;
270 union {
271 struct {
272 struct kvec iov;
273 lnet_kiov_t kiov[0];
274 } paged;
275 struct {
276 struct kvec iov[1];
277 } virt;
278 } tx_frags;
279};
280
281#define KSOCK_NOOP_TX_SIZE (offsetof(struct ksock_tx, tx_frags.paged.kiov[0]))
282
283
284
285
286
287
288
289union ksock_rxiovspace {
290 struct kvec iov[LNET_MAX_IOV];
291 lnet_kiov_t kiov[LNET_MAX_IOV];
292};
293
294#define SOCKNAL_RX_KSM_HEADER 1
295#define SOCKNAL_RX_LNET_HEADER 2
296#define SOCKNAL_RX_PARSE 3
297#define SOCKNAL_RX_PARSE_WAIT 4
298#define SOCKNAL_RX_LNET_PAYLOAD 5
299#define SOCKNAL_RX_SLOP 6
300
301struct ksock_conn {
302 struct ksock_peer *ksnc_peer;
303 struct ksock_route *ksnc_route;
304 struct list_head ksnc_list;
305 struct socket *ksnc_sock;
306 void *ksnc_saved_data_ready;
307
308 void *ksnc_saved_write_space;
309
310 atomic_t ksnc_conn_refcount;
311 atomic_t ksnc_sock_refcount;
312 struct ksock_sched *ksnc_scheduler;
313
314 __u32 ksnc_myipaddr;
315 __u32 ksnc_ipaddr;
316 int ksnc_port;
317 signed int ksnc_type:3;
318
319 unsigned int ksnc_closing:1;
320 unsigned int ksnc_flip:1;
321 unsigned int ksnc_zc_capable:1;
322 struct ksock_proto *ksnc_proto;
323
324
325 struct list_head ksnc_rx_list;
326
327 unsigned long ksnc_rx_deadline;
328
329 __u8 ksnc_rx_started;
330 __u8 ksnc_rx_ready;
331 __u8 ksnc_rx_scheduled;
332 __u8 ksnc_rx_state;
333 int ksnc_rx_nob_left;
334 int ksnc_rx_nob_wanted;
335 int ksnc_rx_niov;
336 struct kvec *ksnc_rx_iov;
337 int ksnc_rx_nkiov;
338 lnet_kiov_t *ksnc_rx_kiov;
339 union ksock_rxiovspace ksnc_rx_iov_space;
340 __u32 ksnc_rx_csum;
341
342 void *ksnc_cookie;
343
344 ksock_msg_t ksnc_msg;
345
346
347
348
349
350
351
352 struct list_head ksnc_tx_list;
353
354 struct list_head ksnc_tx_queue;
355 struct ksock_tx *ksnc_tx_carrier;
356
357 unsigned long ksnc_tx_deadline;
358
359 int ksnc_tx_bufnob;
360 atomic_t ksnc_tx_nob;
361 int ksnc_tx_ready;
362 int ksnc_tx_scheduled;
363 unsigned long ksnc_tx_last_post;
364
365};
366
367struct ksock_route {
368 struct list_head ksnr_list;
369 struct list_head ksnr_connd_list;
370 struct ksock_peer *ksnr_peer;
371 atomic_t ksnr_refcount;
372 unsigned long ksnr_timeout;
373
374 long ksnr_retry_interval;
375 __u32 ksnr_myipaddr;
376 __u32 ksnr_ipaddr;
377 int ksnr_port;
378 unsigned int ksnr_scheduled:1;
379 unsigned int ksnr_connecting:1;
380
381 unsigned int ksnr_connected:4;
382
383 unsigned int ksnr_deleted:1;
384 unsigned int ksnr_share_count;
385 int ksnr_conn_count;
386
387};
388
389#define SOCKNAL_KEEPALIVE_PING 1
390
391struct ksock_peer {
392 struct list_head ksnp_list;
393 unsigned long ksnp_last_alive;
394
395 lnet_process_id_t ksnp_id;
396 atomic_t ksnp_refcount;
397 int ksnp_sharecount;
398 int ksnp_closing;
399 int ksnp_accepting;
400
401 int ksnp_error;
402 __u64 ksnp_zc_next_cookie;
403 __u64 ksnp_incarnation;
404
405 struct ksock_proto *ksnp_proto;
406 struct list_head ksnp_conns;
407 struct list_head ksnp_routes;
408 struct list_head ksnp_tx_queue;
409 spinlock_t ksnp_lock;
410 struct list_head ksnp_zc_req_list;
411
412 unsigned long ksnp_send_keepalive;
413 lnet_ni_t *ksnp_ni;
414 int ksnp_n_passive_ips;
415
416
417 __u32 ksnp_passive_ips[LNET_MAX_INTERFACES];
418};
419
420struct ksock_connreq {
421 struct list_head ksncr_list;
422 lnet_ni_t *ksncr_ni;
423 struct socket *ksncr_sock;
424};
425
426extern struct ksock_nal_data ksocknal_data;
427extern struct ksock_tunables ksocknal_tunables;
428
429#define SOCKNAL_MATCH_NO 0
430#define SOCKNAL_MATCH_YES 1
431#define SOCKNAL_MATCH_MAY 2
432
433
434struct ksock_proto {
435
436 int pro_version;
437
438
439 int (*pro_send_hello)(struct ksock_conn *, ksock_hello_msg_t *);
440
441
442 int (*pro_recv_hello)(struct ksock_conn *, ksock_hello_msg_t *, int);
443
444
445 void (*pro_pack)(struct ksock_tx *);
446
447
448 void (*pro_unpack)(ksock_msg_t *);
449
450
451 struct ksock_tx *(*pro_queue_tx_msg)(struct ksock_conn *, struct ksock_tx *);
452
453
454 int (*pro_queue_tx_zcack)(struct ksock_conn *, struct ksock_tx *, __u64);
455
456
457 int (*pro_handle_zcreq)(struct ksock_conn *, __u64, int);
458
459
460 int (*pro_handle_zcack)(struct ksock_conn *, __u64, __u64);
461
462
463
464
465
466
467
468
469 int (*pro_match_tx)(struct ksock_conn *, struct ksock_tx *, int);
470};
471
472extern struct ksock_proto ksocknal_protocol_v1x;
473extern struct ksock_proto ksocknal_protocol_v2x;
474extern struct ksock_proto ksocknal_protocol_v3x;
475
476#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
477#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
478#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR
479
480#ifndef CPU_MASK_NONE
481#define CPU_MASK_NONE 0UL
482#endif
483
484static inline __u32 ksocknal_csum(__u32 crc, unsigned char const *p, size_t len)
485{
486#if 1
487 return crc32_le(crc, p, len);
488#else
489 while (len-- > 0)
490 crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ;
491 return crc;
492#endif
493}
494
495static inline int
496ksocknal_route_mask(void)
497{
498 if (!*ksocknal_tunables.ksnd_typed_conns)
499 return (1 << SOCKLND_CONN_ANY);
500
501 return ((1 << SOCKLND_CONN_CONTROL) |
502 (1 << SOCKLND_CONN_BULK_IN) |
503 (1 << SOCKLND_CONN_BULK_OUT));
504}
505
506static inline struct list_head *
507ksocknal_nid2peerlist(lnet_nid_t nid)
508{
509 unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
510
511 return &ksocknal_data.ksnd_peers[hash];
512}
513
514static inline void
515ksocknal_conn_addref(struct ksock_conn *conn)
516{
517 LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
518 atomic_inc(&conn->ksnc_conn_refcount);
519}
520
521void ksocknal_queue_zombie_conn(struct ksock_conn *conn);
522void ksocknal_finalize_zcreq(struct ksock_conn *conn);
523
524static inline void
525ksocknal_conn_decref(struct ksock_conn *conn)
526{
527 LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0);
528 if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
529 ksocknal_queue_zombie_conn(conn);
530}
531
532static inline int
533ksocknal_connsock_addref(struct ksock_conn *conn)
534{
535 int rc = -ESHUTDOWN;
536
537 read_lock(&ksocknal_data.ksnd_global_lock);
538 if (!conn->ksnc_closing) {
539 LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
540 atomic_inc(&conn->ksnc_sock_refcount);
541 rc = 0;
542 }
543 read_unlock(&ksocknal_data.ksnd_global_lock);
544
545 return rc;
546}
547
548static inline void
549ksocknal_connsock_decref(struct ksock_conn *conn)
550{
551 LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
552 if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
553 LASSERT(conn->ksnc_closing);
554 sock_release(conn->ksnc_sock);
555 conn->ksnc_sock = NULL;
556 ksocknal_finalize_zcreq(conn);
557 }
558}
559
560static inline void
561ksocknal_tx_addref(struct ksock_tx *tx)
562{
563 LASSERT(atomic_read(&tx->tx_refcount) > 0);
564 atomic_inc(&tx->tx_refcount);
565}
566
567void ksocknal_tx_prep(struct ksock_conn *, struct ksock_tx *tx);
568void ksocknal_tx_done(lnet_ni_t *ni, struct ksock_tx *tx);
569
570static inline void
571ksocknal_tx_decref(struct ksock_tx *tx)
572{
573 LASSERT(atomic_read(&tx->tx_refcount) > 0);
574 if (atomic_dec_and_test(&tx->tx_refcount))
575 ksocknal_tx_done(NULL, tx);
576}
577
578static inline void
579ksocknal_route_addref(struct ksock_route *route)
580{
581 LASSERT(atomic_read(&route->ksnr_refcount) > 0);
582 atomic_inc(&route->ksnr_refcount);
583}
584
585void ksocknal_destroy_route(struct ksock_route *route);
586
587static inline void
588ksocknal_route_decref(struct ksock_route *route)
589{
590 LASSERT(atomic_read(&route->ksnr_refcount) > 0);
591 if (atomic_dec_and_test(&route->ksnr_refcount))
592 ksocknal_destroy_route(route);
593}
594
595static inline void
596ksocknal_peer_addref(struct ksock_peer *peer)
597{
598 LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
599 atomic_inc(&peer->ksnp_refcount);
600}
601
602void ksocknal_destroy_peer(struct ksock_peer *peer);
603
604static inline void
605ksocknal_peer_decref(struct ksock_peer *peer)
606{
607 LASSERT(atomic_read(&peer->ksnp_refcount) > 0);
608 if (atomic_dec_and_test(&peer->ksnp_refcount))
609 ksocknal_destroy_peer(peer);
610}
611
612int ksocknal_startup(lnet_ni_t *ni);
613void ksocknal_shutdown(lnet_ni_t *ni);
614int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
615int ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
616int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
617 int delayed, struct iov_iter *to, unsigned int rlen);
618int ksocknal_accept(lnet_ni_t *ni, struct socket *sock);
619
620int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
621struct ksock_peer *ksocknal_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id);
622struct ksock_peer *ksocknal_find_peer(lnet_ni_t *ni, lnet_process_id_t id);
623void ksocknal_peer_failed(struct ksock_peer *peer);
624int ksocknal_create_conn(lnet_ni_t *ni, struct ksock_route *route,
625 struct socket *sock, int type);
626void ksocknal_close_conn_locked(struct ksock_conn *conn, int why);
627void ksocknal_terminate_conn(struct ksock_conn *conn);
628void ksocknal_destroy_conn(struct ksock_conn *conn);
629int ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
630 __u32 ipaddr, int why);
631int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
632int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr);
633struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
634 struct ksock_tx *tx, int nonblk);
635
636int ksocknal_launch_packet(lnet_ni_t *ni, struct ksock_tx *tx,
637 lnet_process_id_t id);
638struct ksock_tx *ksocknal_alloc_tx(int type, int size);
639void ksocknal_free_tx(struct ksock_tx *tx);
640struct ksock_tx *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
641void ksocknal_next_tx_carrier(struct ksock_conn *conn);
642void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn);
643void ksocknal_txlist_done(lnet_ni_t *ni, struct list_head *txlist, int error);
644void ksocknal_notify(lnet_ni_t *ni, lnet_nid_t gw_nid, int alive);
645void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when);
646int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
647void ksocknal_thread_fini(void);
648void ksocknal_launch_all_connections_locked(struct ksock_peer *peer);
649struct ksock_route *ksocknal_find_connectable_route_locked(struct ksock_peer *peer);
650struct ksock_route *ksocknal_find_connecting_route_locked(struct ksock_peer *peer);
651int ksocknal_new_packet(struct ksock_conn *conn, int skip);
652int ksocknal_scheduler(void *arg);
653int ksocknal_connd(void *arg);
654int ksocknal_reaper(void *arg);
655int ksocknal_send_hello(lnet_ni_t *ni, struct ksock_conn *conn,
656 lnet_nid_t peer_nid, ksock_hello_msg_t *hello);
657int ksocknal_recv_hello(lnet_ni_t *ni, struct ksock_conn *conn,
658 ksock_hello_msg_t *hello, lnet_process_id_t *id,
659 __u64 *incarnation);
660void ksocknal_read_callback(struct ksock_conn *conn);
661void ksocknal_write_callback(struct ksock_conn *conn);
662
663int ksocknal_lib_zc_capable(struct ksock_conn *conn);
664void ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn);
665void ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn);
666void ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn);
667void ksocknal_lib_push_conn(struct ksock_conn *conn);
668int ksocknal_lib_get_conn_addrs(struct ksock_conn *conn);
669int ksocknal_lib_setup_sock(struct socket *so);
670int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
671int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
672void ksocknal_lib_eager_ack(struct ksock_conn *conn);
673int ksocknal_lib_recv_iov(struct ksock_conn *conn);
674int ksocknal_lib_recv_kiov(struct ksock_conn *conn);
675int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
676 int *rxmem, int *nagle);
677
678void ksocknal_read_callback(struct ksock_conn *conn);
679void ksocknal_write_callback(struct ksock_conn *conn);
680
681int ksocknal_tunables_init(void);
682
683void ksocknal_lib_csum_tx(struct ksock_tx *tx);
684
685int ksocknal_lib_memory_pressure(struct ksock_conn *conn);
686int ksocknal_lib_bind_thread_to_cpu(int id);
687
688#endif
689