1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include "socklnd.h"
34
35int
36ksocknal_lib_get_conn_addrs(struct ksock_conn *conn)
37{
38 int rc = lnet_sock_getaddr(conn->ksnc_sock, 1, &conn->ksnc_ipaddr,
39 &conn->ksnc_port);
40
41
42 LASSERT(!conn->ksnc_closing);
43
44 if (rc) {
45 CERROR("Error %d getting sock peer IP\n", rc);
46 return rc;
47 }
48
49 rc = lnet_sock_getaddr(conn->ksnc_sock, 0, &conn->ksnc_myipaddr, NULL);
50 if (rc) {
51 CERROR("Error %d getting sock local IP\n", rc);
52 return rc;
53 }
54
55 return 0;
56}
57
58int
59ksocknal_lib_zc_capable(struct ksock_conn *conn)
60{
61 int caps = conn->ksnc_sock->sk->sk_route_caps;
62
63 if (conn->ksnc_proto == &ksocknal_protocol_v1x)
64 return 0;
65
66
67
68
69
70 return ((caps & NETIF_F_SG) && (caps & NETIF_F_CSUM_MASK));
71}
72
73int
74ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
75{
76 struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
77 struct socket *sock = conn->ksnc_sock;
78 int nob, i;
79
80 if (*ksocknal_tunables.ksnd_enable_csum &&
81 conn->ksnc_proto == &ksocknal_protocol_v2x &&
82 tx->tx_nob == tx->tx_resid &&
83 !tx->tx_msg.ksm_csum)
84 ksocknal_lib_csum_tx(tx);
85
86 for (nob = i = 0; i < tx->tx_niov; i++)
87 nob += tx->tx_iov[i].iov_len;
88
89 if (!list_empty(&conn->ksnc_tx_queue) ||
90 nob < tx->tx_resid)
91 msg.msg_flags |= MSG_MORE;
92
93 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
94 tx->tx_iov, tx->tx_niov, nob);
95 return sock_sendmsg(sock, &msg);
96}
97
98int
99ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
100{
101 struct socket *sock = conn->ksnc_sock;
102 lnet_kiov_t *kiov = tx->tx_kiov;
103 int rc;
104 int nob;
105
106
107 LASSERT(tx->tx_lnetmsg);
108
109 if (tx->tx_msg.ksm_zc_cookies[0]) {
110
111 struct sock *sk = sock->sk;
112 struct page *page = kiov->bv_page;
113 int offset = kiov->bv_offset;
114 int fragsize = kiov->bv_len;
115 int msgflg = MSG_DONTWAIT;
116
117 CDEBUG(D_NET, "page %p + offset %x for %d\n",
118 page, offset, kiov->bv_len);
119
120 if (!list_empty(&conn->ksnc_tx_queue) ||
121 fragsize < tx->tx_resid)
122 msgflg |= MSG_MORE;
123
124 if (sk->sk_prot->sendpage) {
125 rc = sk->sk_prot->sendpage(sk, page,
126 offset, fragsize, msgflg);
127 } else {
128 rc = tcp_sendpage(sk, page, offset, fragsize, msgflg);
129 }
130 } else {
131 struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
132 int i;
133
134 for (nob = i = 0; i < tx->tx_nkiov; i++)
135 nob += kiov[i].bv_len;
136
137 if (!list_empty(&conn->ksnc_tx_queue) ||
138 nob < tx->tx_resid)
139 msg.msg_flags |= MSG_MORE;
140
141 iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC,
142 kiov, tx->tx_nkiov, nob);
143 rc = sock_sendmsg(sock, &msg);
144 }
145 return rc;
146}
147
148void
149ksocknal_lib_eager_ack(struct ksock_conn *conn)
150{
151 int opt = 1;
152 struct socket *sock = conn->ksnc_sock;
153
154
155
156
157
158
159
160 kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char *)&opt,
161 sizeof(opt));
162}
163
164int
165ksocknal_lib_recv_iov(struct ksock_conn *conn)
166{
167 unsigned int niov = conn->ksnc_rx_niov;
168 struct kvec *iov = conn->ksnc_rx_iov;
169 struct msghdr msg = {
170 .msg_flags = 0
171 };
172 int nob;
173 int i;
174 int rc;
175 int fragnob;
176 int sum;
177 __u32 saved_csum;
178
179 LASSERT(niov > 0);
180
181 for (nob = i = 0; i < niov; i++)
182 nob += iov[i].iov_len;
183
184 LASSERT(nob <= conn->ksnc_rx_nob_wanted);
185
186 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob);
187 rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
188
189 saved_csum = 0;
190 if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
191 saved_csum = conn->ksnc_msg.ksm_csum;
192 conn->ksnc_msg.ksm_csum = 0;
193 }
194
195 if (saved_csum) {
196
197 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
198 LASSERT(i < niov);
199
200 fragnob = iov[i].iov_len;
201 if (fragnob > sum)
202 fragnob = sum;
203
204 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
205 iov[i].iov_base, fragnob);
206 }
207 conn->ksnc_msg.ksm_csum = saved_csum;
208 }
209
210 return rc;
211}
212
213int
214ksocknal_lib_recv_kiov(struct ksock_conn *conn)
215{
216 unsigned int niov = conn->ksnc_rx_nkiov;
217 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
218 struct msghdr msg = {
219 .msg_flags = 0
220 };
221 int nob;
222 int i;
223 int rc;
224 void *base;
225 int sum;
226 int fragnob;
227
228 for (nob = i = 0; i < niov; i++)
229 nob += kiov[i].bv_len;
230
231 LASSERT(nob <= conn->ksnc_rx_nob_wanted);
232
233 iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob);
234 rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
235
236 if (conn->ksnc_msg.ksm_csum) {
237 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
238 LASSERT(i < niov);
239
240 base = kmap(kiov[i].bv_page) + kiov[i].bv_offset;
241 fragnob = kiov[i].bv_len;
242 if (fragnob > sum)
243 fragnob = sum;
244
245 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
246 base, fragnob);
247
248 kunmap(kiov[i].bv_page);
249 }
250 }
251 return rc;
252}
253
254void
255ksocknal_lib_csum_tx(struct ksock_tx *tx)
256{
257 int i;
258 __u32 csum;
259 void *base;
260
261 LASSERT(tx->tx_iov[0].iov_base == &tx->tx_msg);
262 LASSERT(tx->tx_conn);
263 LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
264
265 tx->tx_msg.ksm_csum = 0;
266
267 csum = ksocknal_csum(~0, tx->tx_iov[0].iov_base,
268 tx->tx_iov[0].iov_len);
269
270 if (tx->tx_kiov) {
271 for (i = 0; i < tx->tx_nkiov; i++) {
272 base = kmap(tx->tx_kiov[i].bv_page) +
273 tx->tx_kiov[i].bv_offset;
274
275 csum = ksocknal_csum(csum, base, tx->tx_kiov[i].bv_len);
276
277 kunmap(tx->tx_kiov[i].bv_page);
278 }
279 } else {
280 for (i = 1; i < tx->tx_niov; i++)
281 csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base,
282 tx->tx_iov[i].iov_len);
283 }
284
285 if (*ksocknal_tunables.ksnd_inject_csum_error) {
286 csum++;
287 *ksocknal_tunables.ksnd_inject_csum_error = 0;
288 }
289
290 tx->tx_msg.ksm_csum = csum;
291}
292
293int
294ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, int *rxmem, int *nagle)
295{
296 struct socket *sock = conn->ksnc_sock;
297 int len;
298 int rc;
299
300 rc = ksocknal_connsock_addref(conn);
301 if (rc) {
302 LASSERT(conn->ksnc_closing);
303 *txmem = *rxmem = *nagle = 0;
304 return -ESHUTDOWN;
305 }
306
307 rc = lnet_sock_getbuf(sock, txmem, rxmem);
308 if (!rc) {
309 len = sizeof(*nagle);
310 rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY,
311 (char *)nagle, &len);
312 }
313
314 ksocknal_connsock_decref(conn);
315
316 if (!rc)
317 *nagle = !*nagle;
318 else
319 *txmem = *rxmem = *nagle = 0;
320
321 return rc;
322}
323
324int
325ksocknal_lib_setup_sock(struct socket *sock)
326{
327 int rc;
328 int option;
329 int keep_idle;
330 int keep_intvl;
331 int keep_count;
332 int do_keepalive;
333 struct linger linger;
334
335 sock->sk->sk_allocation = GFP_NOFS;
336
337
338
339
340
341 linger.l_onoff = 0;
342 linger.l_linger = 0;
343
344 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&linger,
345 sizeof(linger));
346 if (rc) {
347 CERROR("Can't set SO_LINGER: %d\n", rc);
348 return rc;
349 }
350
351 option = -1;
352 rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, (char *)&option,
353 sizeof(option));
354 if (rc) {
355 CERROR("Can't set SO_LINGER2: %d\n", rc);
356 return rc;
357 }
358
359 if (!*ksocknal_tunables.ksnd_nagle) {
360 option = 1;
361
362 rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
363 (char *)&option, sizeof(option));
364 if (rc) {
365 CERROR("Can't disable nagle: %d\n", rc);
366 return rc;
367 }
368 }
369
370 rc = lnet_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size,
371 *ksocknal_tunables.ksnd_rx_buffer_size);
372 if (rc) {
373 CERROR("Can't set buffer tx %d, rx %d buffers: %d\n",
374 *ksocknal_tunables.ksnd_tx_buffer_size,
375 *ksocknal_tunables.ksnd_rx_buffer_size, rc);
376 return rc;
377 }
378
379
380
381
382 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
383 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
384 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
385
386 do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
387
388 option = (do_keepalive ? 1 : 0);
389 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option,
390 sizeof(option));
391 if (rc) {
392 CERROR("Can't set SO_KEEPALIVE: %d\n", rc);
393 return rc;
394 }
395
396 if (!do_keepalive)
397 return 0;
398
399 rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle,
400 sizeof(keep_idle));
401 if (rc) {
402 CERROR("Can't set TCP_KEEPIDLE: %d\n", rc);
403 return rc;
404 }
405
406 rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
407 (char *)&keep_intvl, sizeof(keep_intvl));
408 if (rc) {
409 CERROR("Can't set TCP_KEEPINTVL: %d\n", rc);
410 return rc;
411 }
412
413 rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count,
414 sizeof(keep_count));
415 if (rc) {
416 CERROR("Can't set TCP_KEEPCNT: %d\n", rc);
417 return rc;
418 }
419
420 return 0;
421}
422
423void
424ksocknal_lib_push_conn(struct ksock_conn *conn)
425{
426 struct sock *sk;
427 struct tcp_sock *tp;
428 int nonagle;
429 int val = 1;
430 int rc;
431
432 rc = ksocknal_connsock_addref(conn);
433 if (rc)
434 return;
435
436 sk = conn->ksnc_sock->sk;
437 tp = tcp_sk(sk);
438
439 lock_sock(sk);
440 nonagle = tp->nonagle;
441 tp->nonagle = 1;
442 release_sock(sk);
443
444 rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY,
445 (char *)&val, sizeof(val));
446 LASSERT(!rc);
447
448 lock_sock(sk);
449 tp->nonagle = nonagle;
450 release_sock(sk);
451
452 ksocknal_connsock_decref(conn);
453}
454
455
456
457
458static void
459ksocknal_data_ready(struct sock *sk)
460{
461 struct ksock_conn *conn;
462
463
464 LASSERT(!in_irq());
465 read_lock(&ksocknal_data.ksnd_global_lock);
466
467 conn = sk->sk_user_data;
468 if (!conn) {
469 LASSERT(sk->sk_data_ready != &ksocknal_data_ready);
470 sk->sk_data_ready(sk);
471 } else {
472 ksocknal_read_callback(conn);
473 }
474
475 read_unlock(&ksocknal_data.ksnd_global_lock);
476}
477
478static void
479ksocknal_write_space(struct sock *sk)
480{
481 struct ksock_conn *conn;
482 int wspace;
483 int min_wpace;
484
485
486 LASSERT(!in_irq());
487 read_lock(&ksocknal_data.ksnd_global_lock);
488
489 conn = sk->sk_user_data;
490 wspace = sk_stream_wspace(sk);
491 min_wpace = sk_stream_min_wspace(sk);
492
493 CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
494 sk, wspace, min_wpace, conn,
495 !conn ? "" : (conn->ksnc_tx_ready ?
496 " ready" : " blocked"),
497 !conn ? "" : (conn->ksnc_tx_scheduled ?
498 " scheduled" : " idle"),
499 !conn ? "" : (list_empty(&conn->ksnc_tx_queue) ?
500 " empty" : " queued"));
501
502 if (!conn) {
503 LASSERT(sk->sk_write_space != &ksocknal_write_space);
504 sk->sk_write_space(sk);
505
506 read_unlock(&ksocknal_data.ksnd_global_lock);
507 return;
508 }
509
510 if (wspace >= min_wpace) {
511 ksocknal_write_callback(conn);
512
513
514
515
516
517
518 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
519 }
520
521 read_unlock(&ksocknal_data.ksnd_global_lock);
522}
523
524void
525ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn)
526{
527 conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
528 conn->ksnc_saved_write_space = sock->sk->sk_write_space;
529}
530
531void
532ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn)
533{
534 sock->sk->sk_user_data = conn;
535 sock->sk->sk_data_ready = ksocknal_data_ready;
536 sock->sk->sk_write_space = ksocknal_write_space;
537}
538
539void
540ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn)
541{
542
543
544
545
546
547 sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
548 sock->sk->sk_write_space = conn->ksnc_saved_write_space;
549
550
551
552
553
554
555 sock->sk->sk_user_data = NULL;
556}
557
558int
559ksocknal_lib_memory_pressure(struct ksock_conn *conn)
560{
561 int rc = 0;
562 struct ksock_sched *sched;
563
564 sched = conn->ksnc_scheduler;
565 spin_lock_bh(&sched->kss_lock);
566
567 if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) &&
568 !conn->ksnc_tx_ready) {
569
570
571
572
573
574
575
576
577
578
579 rc = -ENOMEM;
580 }
581
582 spin_unlock_bh(&sched->kss_lock);
583
584 return rc;
585}
586