1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#include "socklnd.h"
38
39# if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
40
41
42enum {
43 SOCKLND_TIMEOUT = 1,
44 SOCKLND_CREDITS,
45 SOCKLND_PEER_TXCREDITS,
46 SOCKLND_PEER_RTRCREDITS,
47 SOCKLND_PEER_TIMEOUT,
48 SOCKLND_NCONNDS,
49 SOCKLND_RECONNECTS_MIN,
50 SOCKLND_RECONNECTS_MAX,
51 SOCKLND_EAGER_ACK,
52 SOCKLND_ZERO_COPY,
53 SOCKLND_TYPED,
54 SOCKLND_BULK_MIN,
55 SOCKLND_RX_BUFFER_SIZE,
56 SOCKLND_TX_BUFFER_SIZE,
57 SOCKLND_NAGLE,
58 SOCKLND_IRQ_AFFINITY,
59 SOCKLND_ROUND_ROBIN,
60 SOCKLND_KEEPALIVE,
61 SOCKLND_KEEPALIVE_IDLE,
62 SOCKLND_KEEPALIVE_COUNT,
63 SOCKLND_KEEPALIVE_INTVL,
64 SOCKLND_BACKOFF_INIT,
65 SOCKLND_BACKOFF_MAX,
66 SOCKLND_PROTOCOL,
67 SOCKLND_ZERO_COPY_RECV,
68 SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS
69};
70
71static ctl_table_t ksocknal_ctl_table[] = {
72 {
73 .ctl_name = SOCKLND_TIMEOUT,
74 .procname = "timeout",
75 .data = &ksocknal_tunables.ksnd_timeout,
76 .maxlen = sizeof (int),
77 .mode = 0644,
78 .proc_handler = &proc_dointvec,
79 .strategy = &sysctl_intvec,
80 },
81 {
82 .ctl_name = SOCKLND_CREDITS,
83 .procname = "credits",
84 .data = &ksocknal_tunables.ksnd_credits,
85 .maxlen = sizeof (int),
86 .mode = 0444,
87 .proc_handler = &proc_dointvec,
88 .strategy = &sysctl_intvec,
89 },
90 {
91 .ctl_name = SOCKLND_PEER_TXCREDITS,
92 .procname = "peer_credits",
93 .data = &ksocknal_tunables.ksnd_peertxcredits,
94 .maxlen = sizeof (int),
95 .mode = 0444,
96 .proc_handler = &proc_dointvec,
97 .strategy = &sysctl_intvec,
98 },
99 {
100 .ctl_name = SOCKLND_PEER_RTRCREDITS,
101 .procname = "peer_buffer_credits",
102 .data = &ksocknal_tunables.ksnd_peerrtrcredits,
103 .maxlen = sizeof (int),
104 .mode = 0444,
105 .proc_handler = &proc_dointvec,
106 .strategy = &sysctl_intvec,
107 },
108 {
109 .ctl_name = SOCKLND_PEER_TIMEOUT,
110 .procname = "peer_timeout",
111 .data = &ksocknal_tunables.ksnd_peertimeout,
112 .maxlen = sizeof (int),
113 .mode = 0444,
114 .proc_handler = &proc_dointvec
115 .strategy = &sysctl_intvec,
116 },
117 {
118 .ctl_name = SOCKLND_NCONNDS,
119 .procname = "nconnds",
120 .data = &ksocknal_tunables.ksnd_nconnds,
121 .maxlen = sizeof (int),
122 .mode = 0444,
123 .proc_handler = &proc_dointvec,
124 .strategy = &sysctl_intvec,
125 },
126 {
127 .ctl_name = SOCKLND_RECONNECTS_MIN,
128 .procname = "min_reconnectms",
129 .data = &ksocknal_tunables.ksnd_min_reconnectms,
130 .maxlen = sizeof (int),
131 .mode = 0444,
132 .proc_handler = &proc_dointvec,
133 .strategy = &sysctl_intvec,
134 },
135 {
136 .ctl_name = SOCKLND_RECONNECTS_MAX,
137 .procname = "max_reconnectms",
138 .data = &ksocknal_tunables.ksnd_max_reconnectms,
139 .maxlen = sizeof (int),
140 .mode = 0444,
141 .proc_handler = &proc_dointvec,
142 .strategy = &sysctl_intvec,
143 },
144 {
145 .ctl_name = SOCKLND_EAGER_ACK,
146 .procname = "eager_ack",
147 .data = &ksocknal_tunables.ksnd_eager_ack,
148 .maxlen = sizeof (int),
149 .mode = 0644,
150 .proc_handler = &proc_dointvec,
151 .strategy = &sysctl_intvec,
152 },
153 {
154 .ctl_name = SOCKLND_ZERO_COPY,
155 .procname = "zero_copy",
156 .data = &ksocknal_tunables.ksnd_zc_min_payload,
157 .maxlen = sizeof (int),
158 .mode = 0644,
159 .proc_handler = &proc_dointvec,
160 .strategy = &sysctl_intvec,
161 },
162 {
163 .ctl_name = SOCKLND_ZERO_COPY_RECV,
164 .procname = "zero_copy_recv",
165 .data = &ksocknal_tunables.ksnd_zc_recv,
166 .maxlen = sizeof (int),
167 .mode = 0644,
168 .proc_handler = &proc_dointvec,
169 .strategy = &sysctl_intvec,
170 },
171
172 {
173 .ctl_name = SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS,
174 .procname = "zero_copy_recv",
175 .data = &ksocknal_tunables.ksnd_zc_recv_min_nfrags,
176 .maxlen = sizeof (int),
177 .mode = 0644,
178 .proc_handler = &proc_dointvec,
179 .strategy = &sysctl_intvec,
180 },
181 {
182 .ctl_name = SOCKLND_TYPED,
183 .procname = "typed",
184 .data = &ksocknal_tunables.ksnd_typed_conns,
185 .maxlen = sizeof (int),
186 .mode = 0444,
187 .proc_handler = &proc_dointvec,
188 .strategy = &sysctl_intvec,
189 },
190 {
191 .ctl_name = SOCKLND_BULK_MIN,
192 .procname = "min_bulk",
193 .data = &ksocknal_tunables.ksnd_min_bulk,
194 .maxlen = sizeof (int),
195 .mode = 0644,
196 .proc_handler = &proc_dointvec,
197 .strategy = &sysctl_intvec,
198 },
199 {
200 .ctl_name = SOCKLND_RX_BUFFER_SIZE,
201 .procname = "rx_buffer_size",
202 .data = &ksocknal_tunables.ksnd_rx_buffer_size,
203 .maxlen = sizeof(int),
204 .mode = 0644,
205 .proc_handler = &proc_dointvec,
206 .strategy = &sysctl_intvec,
207 },
208 {
209 .ctl_name = SOCKLND_TX_BUFFER_SIZE,
210 .procname = "tx_buffer_size",
211 .data = &ksocknal_tunables.ksnd_tx_buffer_size,
212 .maxlen = sizeof(int),
213 .mode = 0644,
214 .proc_handler = &proc_dointvec,
215 .strategy = &sysctl_intvec,
216 },
217 {
218 .ctl_name = SOCKLND_NAGLE,
219 .procname = "nagle",
220 .data = &ksocknal_tunables.ksnd_nagle,
221 .maxlen = sizeof(int),
222 .mode = 0644,
223 .proc_handler = &proc_dointvec,
224 .strategy = &sysctl_intvec,
225 },
226 {
227 .ctl_name = SOCKLND_ROUND_ROBIN,
228 .procname = "round_robin",
229 .data = &ksocknal_tunables.ksnd_round_robin,
230 .maxlen = sizeof(int),
231 .mode = 0644,
232 .proc_handler = &proc_dointvec,
233 .strategy = &sysctl_intvec,
234 },
235 {
236 .ctl_name = SOCKLND_KEEPALIVE,
237 .procname = "keepalive",
238 .data = &ksocknal_tunables.ksnd_keepalive,
239 .maxlen = sizeof(int),
240 .mode = 0644,
241 .proc_handler = &proc_dointvec,
242 .strategy = &sysctl_intvec,
243 },
244 {
245 .ctl_name = SOCKLND_KEEPALIVE_IDLE,
246 .procname = "keepalive_idle",
247 .data = &ksocknal_tunables.ksnd_keepalive_idle,
248 .maxlen = sizeof(int),
249 .mode = 0644,
250 .proc_handler = &proc_dointvec,
251 .strategy = &sysctl_intvec,
252 },
253 {
254 .ctl_name = SOCKLND_KEEPALIVE_COUNT,
255 .procname = "keepalive_count",
256 .data = &ksocknal_tunables.ksnd_keepalive_count,
257 .maxlen = sizeof(int),
258 .mode = 0644,
259 .proc_handler = &proc_dointvec,
260 .strategy = &sysctl_intvec,
261 },
262 {
263 .ctl_name = SOCKLND_KEEPALIVE_INTVL,
264 .procname = "keepalive_intvl",
265 .data = &ksocknal_tunables.ksnd_keepalive_intvl,
266 .maxlen = sizeof(int),
267 .mode = 0644,
268 .proc_handler = &proc_dointvec,
269 .strategy = &sysctl_intvec,
270 },
271#if SOCKNAL_VERSION_DEBUG
272 {
273 .ctl_name = SOCKLND_PROTOCOL,
274 .procname = "protocol",
275 .data = &ksocknal_tunables.ksnd_protocol,
276 .maxlen = sizeof(int),
277 .mode = 0644,
278 .proc_handler = &proc_dointvec,
279 .strategy = &sysctl_intvec,
280 },
281#endif
282 {0}
283};
284
285
286ctl_table_t ksocknal_top_ctl_table[] = {
287 {
288 .ctl_name = CTL_SOCKLND,
289 .procname = "socknal",
290 .data = NULL,
291 .maxlen = 0,
292 .mode = 0555,
293 .child = ksocknal_ctl_table
294 },
295 { 0 }
296};
297
298int
299ksocknal_lib_tunables_init ()
300{
301 if (!*ksocknal_tunables.ksnd_typed_conns) {
302 int rc = -EINVAL;
303#if SOCKNAL_VERSION_DEBUG
304 if (*ksocknal_tunables.ksnd_protocol < 3)
305 rc = 0;
306#endif
307 if (rc != 0) {
308 CERROR("Protocol V3.x MUST have typed connections\n");
309 return rc;
310 }
311 }
312
313 if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
314 *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
315 if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
316 *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
317
318 ksocknal_tunables.ksnd_sysctl =
319 cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
320
321 if (ksocknal_tunables.ksnd_sysctl == NULL)
322 CWARN("Can't setup /proc tunables\n");
323
324 return 0;
325}
326
327void
328ksocknal_lib_tunables_fini ()
329{
330 if (ksocknal_tunables.ksnd_sysctl != NULL)
331 unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
332}
333#else
334int
335ksocknal_lib_tunables_init ()
336{
337 return 0;
338}
339
340void
341ksocknal_lib_tunables_fini ()
342{
343}
344#endif
345
346int
347ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
348{
349 int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
350 &conn->ksnc_ipaddr,
351 &conn->ksnc_port);
352
353
354 LASSERT (!conn->ksnc_closing);
355
356 if (rc != 0) {
357 CERROR ("Error %d getting sock peer IP\n", rc);
358 return rc;
359 }
360
361 rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
362 &conn->ksnc_myipaddr, NULL);
363 if (rc != 0) {
364 CERROR ("Error %d getting sock local IP\n", rc);
365 return rc;
366 }
367
368 return 0;
369}
370
371int
372ksocknal_lib_zc_capable(ksock_conn_t *conn)
373{
374 int caps = conn->ksnc_sock->sk->sk_route_caps;
375
376 if (conn->ksnc_proto == &ksocknal_protocol_v1x)
377 return 0;
378
379
380
381 return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0);
382}
383
384int
385ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
386{
387 struct socket *sock = conn->ksnc_sock;
388 int nob;
389 int rc;
390
391 if (*ksocknal_tunables.ksnd_enable_csum &&
392 conn->ksnc_proto == &ksocknal_protocol_v2x &&
393 tx->tx_nob == tx->tx_resid &&
394 tx->tx_msg.ksm_csum == 0)
395 ksocknal_lib_csum_tx(tx);
396
397
398
399
400 {
401#if SOCKNAL_SINGLE_FRAG_TX
402 struct iovec scratch;
403 struct iovec *scratchiov = &scratch;
404 unsigned int niov = 1;
405#else
406 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
407 unsigned int niov = tx->tx_niov;
408#endif
409 struct msghdr msg = {
410 .msg_name = NULL,
411 .msg_namelen = 0,
412 .msg_iov = scratchiov,
413 .msg_iovlen = niov,
414 .msg_control = NULL,
415 .msg_controllen = 0,
416 .msg_flags = MSG_DONTWAIT
417 };
418 mm_segment_t oldmm = get_fs();
419 int i;
420
421 for (nob = i = 0; i < niov; i++) {
422 scratchiov[i] = tx->tx_iov[i];
423 nob += scratchiov[i].iov_len;
424 }
425
426 if (!list_empty(&conn->ksnc_tx_queue) ||
427 nob < tx->tx_resid)
428 msg.msg_flags |= MSG_MORE;
429
430 set_fs (KERNEL_DS);
431 rc = sock_sendmsg(sock, &msg, nob);
432 set_fs (oldmm);
433 }
434 return rc;
435}
436
437int
438ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
439{
440 struct socket *sock = conn->ksnc_sock;
441 lnet_kiov_t *kiov = tx->tx_kiov;
442 int rc;
443 int nob;
444
445
446 LASSERT (tx->tx_lnetmsg != NULL);
447
448
449
450 if (tx->tx_msg.ksm_zc_cookies[0] != 0) {
451
452 struct sock *sk = sock->sk;
453 struct page *page = kiov->kiov_page;
454 int offset = kiov->kiov_offset;
455 int fragsize = kiov->kiov_len;
456 int msgflg = MSG_DONTWAIT;
457
458 CDEBUG(D_NET, "page %p + offset %x for %d\n",
459 page, offset, kiov->kiov_len);
460
461 if (!list_empty(&conn->ksnc_tx_queue) ||
462 fragsize < tx->tx_resid)
463 msgflg |= MSG_MORE;
464
465 if (sk->sk_prot->sendpage != NULL) {
466 rc = sk->sk_prot->sendpage(sk, page,
467 offset, fragsize, msgflg);
468 } else {
469 rc = cfs_tcp_sendpage(sk, page, offset, fragsize,
470 msgflg);
471 }
472 } else {
473#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
474 struct iovec scratch;
475 struct iovec *scratchiov = &scratch;
476 unsigned int niov = 1;
477#else
478#ifdef CONFIG_HIGHMEM
479#warning "XXX risk of kmap deadlock on multiple frags..."
480#endif
481 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
482 unsigned int niov = tx->tx_nkiov;
483#endif
484 struct msghdr msg = {
485 .msg_name = NULL,
486 .msg_namelen = 0,
487 .msg_iov = scratchiov,
488 .msg_iovlen = niov,
489 .msg_control = NULL,
490 .msg_controllen = 0,
491 .msg_flags = MSG_DONTWAIT
492 };
493 mm_segment_t oldmm = get_fs();
494 int i;
495
496 for (nob = i = 0; i < niov; i++) {
497 scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
498 kiov[i].kiov_offset;
499 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
500 }
501
502 if (!list_empty(&conn->ksnc_tx_queue) ||
503 nob < tx->tx_resid)
504 msg.msg_flags |= MSG_MORE;
505
506 set_fs (KERNEL_DS);
507 rc = sock_sendmsg(sock, &msg, nob);
508 set_fs (oldmm);
509
510 for (i = 0; i < niov; i++)
511 kunmap(kiov[i].kiov_page);
512 }
513 return rc;
514}
515
516void
517ksocknal_lib_eager_ack (ksock_conn_t *conn)
518{
519 int opt = 1;
520 mm_segment_t oldmm = get_fs();
521 struct socket *sock = conn->ksnc_sock;
522
523
524
525
526
527
528 set_fs(KERNEL_DS);
529 sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK,
530 (char *)&opt, sizeof (opt));
531 set_fs(oldmm);
532}
533
534int
535ksocknal_lib_recv_iov (ksock_conn_t *conn)
536{
537#if SOCKNAL_SINGLE_FRAG_RX
538 struct iovec scratch;
539 struct iovec *scratchiov = &scratch;
540 unsigned int niov = 1;
541#else
542 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
543 unsigned int niov = conn->ksnc_rx_niov;
544#endif
545 struct iovec *iov = conn->ksnc_rx_iov;
546 struct msghdr msg = {
547 .msg_name = NULL,
548 .msg_namelen = 0,
549 .msg_iov = scratchiov,
550 .msg_iovlen = niov,
551 .msg_control = NULL,
552 .msg_controllen = 0,
553 .msg_flags = 0
554 };
555 mm_segment_t oldmm = get_fs();
556 int nob;
557 int i;
558 int rc;
559 int fragnob;
560 int sum;
561 __u32 saved_csum;
562
563
564
565 LASSERT (niov > 0);
566
567 for (nob = i = 0; i < niov; i++) {
568 scratchiov[i] = iov[i];
569 nob += scratchiov[i].iov_len;
570 }
571 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
572
573 set_fs (KERNEL_DS);
574 rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
575
576 set_fs (oldmm);
577
578 saved_csum = 0;
579 if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
580 saved_csum = conn->ksnc_msg.ksm_csum;
581 conn->ksnc_msg.ksm_csum = 0;
582 }
583
584 if (saved_csum != 0) {
585
586 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
587 LASSERT (i < niov);
588
589 fragnob = iov[i].iov_len;
590 if (fragnob > sum)
591 fragnob = sum;
592
593 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
594 iov[i].iov_base, fragnob);
595 }
596 conn->ksnc_msg.ksm_csum = saved_csum;
597 }
598
599 return rc;
600}
601
602static void
603ksocknal_lib_kiov_vunmap(void *addr)
604{
605 if (addr == NULL)
606 return;
607
608 vunmap(addr);
609}
610
611static void *
612ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
613 struct iovec *iov, struct page **pages)
614{
615 void *addr;
616 int nob;
617 int i;
618
619 if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL)
620 return NULL;
621
622 LASSERT (niov <= LNET_MAX_IOV);
623
624 if (niov < 2 ||
625 niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
626 return NULL;
627
628 for (nob = i = 0; i < niov; i++) {
629 if ((kiov[i].kiov_offset != 0 && i > 0) ||
630 (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_CACHE_SIZE && i < niov - 1))
631 return NULL;
632
633 pages[i] = kiov[i].kiov_page;
634 nob += kiov[i].kiov_len;
635 }
636
637 addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
638 if (addr == NULL)
639 return NULL;
640
641 iov->iov_base = addr + kiov[0].kiov_offset;
642 iov->iov_len = nob;
643
644 return addr;
645}
646
647int
648ksocknal_lib_recv_kiov (ksock_conn_t *conn)
649{
650#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
651 struct iovec scratch;
652 struct iovec *scratchiov = &scratch;
653 struct page **pages = NULL;
654 unsigned int niov = 1;
655#else
656#ifdef CONFIG_HIGHMEM
657#warning "XXX risk of kmap deadlock on multiple frags..."
658#endif
659 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
660 struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs;
661 unsigned int niov = conn->ksnc_rx_nkiov;
662#endif
663 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
664 struct msghdr msg = {
665 .msg_name = NULL,
666 .msg_namelen = 0,
667 .msg_iov = scratchiov,
668 .msg_control = NULL,
669 .msg_controllen = 0,
670 .msg_flags = 0
671 };
672 mm_segment_t oldmm = get_fs();
673 int nob;
674 int i;
675 int rc;
676 void *base;
677 void *addr;
678 int sum;
679 int fragnob;
680
681
682
683 if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
684 nob = scratchiov[0].iov_len;
685 msg.msg_iovlen = 1;
686
687 } else {
688 for (nob = i = 0; i < niov; i++) {
689 nob += scratchiov[i].iov_len = kiov[i].kiov_len;
690 scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
691 kiov[i].kiov_offset;
692 }
693 msg.msg_iovlen = niov;
694 }
695
696 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
697
698 set_fs (KERNEL_DS);
699 rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
700
701 set_fs (oldmm);
702
703 if (conn->ksnc_msg.ksm_csum != 0) {
704 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
705 LASSERT (i < niov);
706
707
708
709
710
711 base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
712 fragnob = kiov[i].kiov_len;
713 if (fragnob > sum)
714 fragnob = sum;
715
716 conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
717 base, fragnob);
718
719 kunmap(kiov[i].kiov_page);
720 }
721 }
722
723 if (addr != NULL) {
724 ksocknal_lib_kiov_vunmap(addr);
725 } else {
726 for (i = 0; i < niov; i++)
727 kunmap(kiov[i].kiov_page);
728 }
729
730 return (rc);
731}
732
733void
734ksocknal_lib_csum_tx(ksock_tx_t *tx)
735{
736 int i;
737 __u32 csum;
738 void *base;
739
740 LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg);
741 LASSERT(tx->tx_conn != NULL);
742 LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x);
743
744 tx->tx_msg.ksm_csum = 0;
745
746 csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base,
747 tx->tx_iov[0].iov_len);
748
749 if (tx->tx_kiov != NULL) {
750 for (i = 0; i < tx->tx_nkiov; i++) {
751 base = kmap(tx->tx_kiov[i].kiov_page) +
752 tx->tx_kiov[i].kiov_offset;
753
754 csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
755
756 kunmap(tx->tx_kiov[i].kiov_page);
757 }
758 } else {
759 for (i = 1; i < tx->tx_niov; i++)
760 csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base,
761 tx->tx_iov[i].iov_len);
762 }
763
764 if (*ksocknal_tunables.ksnd_inject_csum_error) {
765 csum++;
766 *ksocknal_tunables.ksnd_inject_csum_error = 0;
767 }
768
769 tx->tx_msg.ksm_csum = csum;
770}
771
772int
773ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
774{
775 mm_segment_t oldmm = get_fs ();
776 struct socket *sock = conn->ksnc_sock;
777 int len;
778 int rc;
779
780 rc = ksocknal_connsock_addref(conn);
781 if (rc != 0) {
782 LASSERT (conn->ksnc_closing);
783 *txmem = *rxmem = *nagle = 0;
784 return (-ESHUTDOWN);
785 }
786
787 rc = libcfs_sock_getbuf(sock, txmem, rxmem);
788 if (rc == 0) {
789 len = sizeof(*nagle);
790 set_fs(KERNEL_DS);
791 rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY,
792 (char *)nagle, &len);
793 set_fs(oldmm);
794 }
795
796 ksocknal_connsock_decref(conn);
797
798 if (rc == 0)
799 *nagle = !*nagle;
800 else
801 *txmem = *rxmem = *nagle = 0;
802
803 return (rc);
804}
805
806int
807ksocknal_lib_setup_sock (struct socket *sock)
808{
809 mm_segment_t oldmm = get_fs ();
810 int rc;
811 int option;
812 int keep_idle;
813 int keep_intvl;
814 int keep_count;
815 int do_keepalive;
816 struct linger linger;
817
818 sock->sk->sk_allocation = GFP_NOFS;
819
820
821
822
823 linger.l_onoff = 0;
824 linger.l_linger = 0;
825
826 set_fs (KERNEL_DS);
827 rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER,
828 (char *)&linger, sizeof (linger));
829 set_fs (oldmm);
830 if (rc != 0) {
831 CERROR ("Can't set SO_LINGER: %d\n", rc);
832 return (rc);
833 }
834
835 option = -1;
836 set_fs (KERNEL_DS);
837 rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2,
838 (char *)&option, sizeof (option));
839 set_fs (oldmm);
840 if (rc != 0) {
841 CERROR ("Can't set SO_LINGER2: %d\n", rc);
842 return (rc);
843 }
844
845 if (!*ksocknal_tunables.ksnd_nagle) {
846 option = 1;
847
848 set_fs (KERNEL_DS);
849 rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY,
850 (char *)&option, sizeof (option));
851 set_fs (oldmm);
852 if (rc != 0) {
853 CERROR ("Can't disable nagle: %d\n", rc);
854 return (rc);
855 }
856 }
857
858 rc = libcfs_sock_setbuf(sock,
859 *ksocknal_tunables.ksnd_tx_buffer_size,
860 *ksocknal_tunables.ksnd_rx_buffer_size);
861 if (rc != 0) {
862 CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
863 *ksocknal_tunables.ksnd_tx_buffer_size,
864 *ksocknal_tunables.ksnd_rx_buffer_size, rc);
865 return (rc);
866 }
867
868
869
870
871 keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
872 keep_count = *ksocknal_tunables.ksnd_keepalive_count;
873 keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
874
875 do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
876
877 option = (do_keepalive ? 1 : 0);
878 set_fs (KERNEL_DS);
879 rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE,
880 (char *)&option, sizeof (option));
881 set_fs (oldmm);
882 if (rc != 0) {
883 CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
884 return (rc);
885 }
886
887 if (!do_keepalive)
888 return (0);
889
890 set_fs (KERNEL_DS);
891 rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE,
892 (char *)&keep_idle, sizeof (keep_idle));
893 set_fs (oldmm);
894 if (rc != 0) {
895 CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
896 return (rc);
897 }
898
899 set_fs (KERNEL_DS);
900 rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL,
901 (char *)&keep_intvl, sizeof (keep_intvl));
902 set_fs (oldmm);
903 if (rc != 0) {
904 CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
905 return (rc);
906 }
907
908 set_fs (KERNEL_DS);
909 rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT,
910 (char *)&keep_count, sizeof (keep_count));
911 set_fs (oldmm);
912 if (rc != 0) {
913 CERROR ("Can't set TCP_KEEPCNT: %d\n", rc);
914 return (rc);
915 }
916
917 return (0);
918}
919
920void
921ksocknal_lib_push_conn (ksock_conn_t *conn)
922{
923 struct sock *sk;
924 struct tcp_sock *tp;
925 int nonagle;
926 int val = 1;
927 int rc;
928 mm_segment_t oldmm;
929
930 rc = ksocknal_connsock_addref(conn);
931 if (rc != 0)
932 return;
933
934 sk = conn->ksnc_sock->sk;
935 tp = tcp_sk(sk);
936
937 lock_sock (sk);
938 nonagle = tp->nonagle;
939 tp->nonagle = 1;
940 release_sock (sk);
941
942 oldmm = get_fs ();
943 set_fs (KERNEL_DS);
944
945 rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
946 (char *)&val, sizeof (val));
947 LASSERT (rc == 0);
948
949 set_fs (oldmm);
950
951 lock_sock (sk);
952 tp->nonagle = nonagle;
953 release_sock (sk);
954
955 ksocknal_connsock_decref(conn);
956}
957
958extern void ksocknal_read_callback (ksock_conn_t *conn);
959extern void ksocknal_write_callback (ksock_conn_t *conn);
960
961
962
963static void
964ksocknal_data_ready (struct sock *sk, int n)
965{
966 ksock_conn_t *conn;
967 ENTRY;
968
969
970 LASSERT(!in_irq());
971 read_lock(&ksocknal_data.ksnd_global_lock);
972
973 conn = sk->sk_user_data;
974 if (conn == NULL) {
975 LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
976 sk->sk_data_ready (sk, n);
977 } else
978 ksocknal_read_callback(conn);
979
980 read_unlock(&ksocknal_data.ksnd_global_lock);
981
982 EXIT;
983}
984
985static void
986ksocknal_write_space (struct sock *sk)
987{
988 ksock_conn_t *conn;
989 int wspace;
990 int min_wpace;
991
992
993 LASSERT(!in_irq());
994 read_lock(&ksocknal_data.ksnd_global_lock);
995
996 conn = sk->sk_user_data;
997 wspace = SOCKNAL_WSPACE(sk);
998 min_wpace = SOCKNAL_MIN_WSPACE(sk);
999
1000 CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
1001 sk, wspace, min_wpace, conn,
1002 (conn == NULL) ? "" : (conn->ksnc_tx_ready ?
1003 " ready" : " blocked"),
1004 (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
1005 " scheduled" : " idle"),
1006 (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
1007 " empty" : " queued"));
1008
1009 if (conn == NULL) {
1010 LASSERT (sk->sk_write_space != &ksocknal_write_space);
1011 sk->sk_write_space (sk);
1012
1013 read_unlock(&ksocknal_data.ksnd_global_lock);
1014 return;
1015 }
1016
1017 if (wspace >= min_wpace) {
1018 ksocknal_write_callback(conn);
1019
1020
1021
1022
1023
1024 clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
1025 }
1026
1027 read_unlock(&ksocknal_data.ksnd_global_lock);
1028}
1029
1030void
1031ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
1032{
1033 conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
1034 conn->ksnc_saved_write_space = sock->sk->sk_write_space;
1035}
1036
1037void
1038ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
1039{
1040 sock->sk->sk_user_data = conn;
1041 sock->sk->sk_data_ready = ksocknal_data_ready;
1042 sock->sk->sk_write_space = ksocknal_write_space;
1043 return;
1044}
1045
1046void
1047ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
1048{
1049
1050
1051
1052 sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
1053 sock->sk->sk_write_space = conn->ksnc_saved_write_space;
1054
1055
1056
1057
1058 sock->sk->sk_user_data = NULL;
1059
1060 return ;
1061}
1062
1063int
1064ksocknal_lib_memory_pressure(ksock_conn_t *conn)
1065{
1066 int rc = 0;
1067 ksock_sched_t *sched;
1068
1069 sched = conn->ksnc_scheduler;
1070 spin_lock_bh(&sched->kss_lock);
1071
1072 if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
1073 !conn->ksnc_tx_ready) {
1074
1075
1076
1077
1078
1079
1080
1081
1082 rc = -ENOMEM;
1083 }
1084
1085 spin_unlock_bh(&sched->kss_lock);
1086
1087 return rc;
1088}
1089