linux/net/rds/tcp.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006 Oracle.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 *
  32 */
  33#include <linux/kernel.h>
  34#include <linux/slab.h>
  35#include <linux/in.h>
  36#include <linux/module.h>
  37#include <net/tcp.h>
  38#include <net/net_namespace.h>
  39#include <net/netns/generic.h>
  40
  41#include "rds.h"
  42#include "tcp.h"
  43
  44/* only for info exporting */
  45static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
  46static LIST_HEAD(rds_tcp_tc_list);
  47static unsigned int rds_tcp_tc_count;
  48
  49/* Track rds_tcp_connection structs so they can be cleaned up */
  50static DEFINE_SPINLOCK(rds_tcp_conn_lock);
  51static LIST_HEAD(rds_tcp_conn_list);
  52
  53static struct kmem_cache *rds_tcp_conn_slab;
  54
  55static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
  56                                 void __user *buffer, size_t *lenp,
  57                                 loff_t *fpos);
  58
  59int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
  60int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
  61
  62static struct ctl_table rds_tcp_sysctl_table[] = {
  63#define RDS_TCP_SNDBUF  0
  64        {
  65                .procname       = "rds_tcp_sndbuf",
  66                /* data is per-net pointer */
  67                .maxlen         = sizeof(int),
  68                .mode           = 0644,
  69                .proc_handler   = rds_tcp_skbuf_handler,
  70                .extra1         = &rds_tcp_min_sndbuf,
  71        },
  72#define RDS_TCP_RCVBUF  1
  73        {
  74                .procname       = "rds_tcp_rcvbuf",
  75                /* data is per-net pointer */
  76                .maxlen         = sizeof(int),
  77                .mode           = 0644,
  78                .proc_handler   = rds_tcp_skbuf_handler,
  79                .extra1         = &rds_tcp_min_rcvbuf,
  80        },
  81        { }
  82};
  83
  84/* doing it this way avoids calling tcp_sk() */
  85void rds_tcp_nonagle(struct socket *sock)
  86{
  87        mm_segment_t oldfs = get_fs();
  88        int val = 1;
  89
  90        set_fs(KERNEL_DS);
  91        sock->ops->setsockopt(sock, SOL_TCP, TCP_NODELAY, (char __user *)&val,
  92                              sizeof(val));
  93        set_fs(oldfs);
  94}
  95
  96u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
  97{
  98        return tcp_sk(tc->t_sock->sk)->snd_nxt;
  99}
 100
 101u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
 102{
 103        return tcp_sk(tc->t_sock->sk)->snd_una;
 104}
 105
 106void rds_tcp_restore_callbacks(struct socket *sock,
 107                               struct rds_tcp_connection *tc)
 108{
 109        rdsdebug("restoring sock %p callbacks from tc %p\n", sock, tc);
 110        write_lock_bh(&sock->sk->sk_callback_lock);
 111
 112        /* done under the callback_lock to serialize with write_space */
 113        spin_lock(&rds_tcp_tc_list_lock);
 114        list_del_init(&tc->t_list_item);
 115        rds_tcp_tc_count--;
 116        spin_unlock(&rds_tcp_tc_list_lock);
 117
 118        tc->t_sock = NULL;
 119
 120        sock->sk->sk_write_space = tc->t_orig_write_space;
 121        sock->sk->sk_data_ready = tc->t_orig_data_ready;
 122        sock->sk->sk_state_change = tc->t_orig_state_change;
 123        sock->sk->sk_user_data = NULL;
 124
 125        write_unlock_bh(&sock->sk->sk_callback_lock);
 126}
 127
 128/*
 129 * rds_tcp_reset_callbacks() switches the to the new sock and
 130 * returns the existing tc->t_sock.
 131 *
 132 * The only functions that set tc->t_sock are rds_tcp_set_callbacks
 133 * and rds_tcp_reset_callbacks.  Send and receive trust that
 134 * it is set.  The absence of RDS_CONN_UP bit protects those paths
 135 * from being called while it isn't set.
 136 */
 137void rds_tcp_reset_callbacks(struct socket *sock,
 138                             struct rds_connection *conn)
 139{
 140        struct rds_tcp_connection *tc = conn->c_transport_data;
 141        struct socket *osock = tc->t_sock;
 142
 143        if (!osock)
 144                goto newsock;
 145
 146        /* Need to resolve a duelling SYN between peers.
 147         * We have an outstanding SYN to this peer, which may
 148         * potentially have transitioned to the RDS_CONN_UP state,
 149         * so we must quiesce any send threads before resetting
 150         * c_transport_data. We quiesce these threads by setting
 151         * c_state to something other than RDS_CONN_UP, and then
 152         * waiting for any existing threads in rds_send_xmit to
 153         * complete release_in_xmit(). (Subsequent threads entering
 154         * rds_send_xmit() will bail on !rds_conn_up().
 155         *
 156         * However an incoming syn-ack at this point would end up
 157         * marking the conn as RDS_CONN_UP, and would again permit
 158         * rds_send_xmi() threads through, so ideally we would
 159         * synchronize on RDS_CONN_UP after lock_sock(), but cannot
 160         * do that: waiting on !RDS_IN_XMIT after lock_sock() may
 161         * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT
 162         * would not get set. As a result, we set c_state to
 163         * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
 164         * cannot mark rds_conn_path_up() in the window before lock_sock()
 165         */
 166        atomic_set(&conn->c_state, RDS_CONN_RESETTING);
 167        wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
 168        lock_sock(osock->sk);
 169        /* reset receive side state for rds_tcp_data_recv() for osock  */
 170        if (tc->t_tinc) {
 171                rds_inc_put(&tc->t_tinc->ti_inc);
 172                tc->t_tinc = NULL;
 173        }
 174        tc->t_tinc_hdr_rem = sizeof(struct rds_header);
 175        tc->t_tinc_data_rem = 0;
 176        tc->t_sock = NULL;
 177
 178        write_lock_bh(&osock->sk->sk_callback_lock);
 179
 180        osock->sk->sk_user_data = NULL;
 181        osock->sk->sk_data_ready = tc->t_orig_data_ready;
 182        osock->sk->sk_write_space = tc->t_orig_write_space;
 183        osock->sk->sk_state_change = tc->t_orig_state_change;
 184        write_unlock_bh(&osock->sk->sk_callback_lock);
 185        release_sock(osock->sk);
 186        sock_release(osock);
 187newsock:
 188        rds_send_reset(conn);
 189        lock_sock(sock->sk);
 190        write_lock_bh(&sock->sk->sk_callback_lock);
 191        tc->t_sock = sock;
 192        sock->sk->sk_user_data = conn;
 193        sock->sk->sk_data_ready = rds_tcp_data_ready;
 194        sock->sk->sk_write_space = rds_tcp_write_space;
 195        sock->sk->sk_state_change = rds_tcp_state_change;
 196
 197        write_unlock_bh(&sock->sk->sk_callback_lock);
 198        release_sock(sock->sk);
 199}
 200
 201/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments
 202 * above rds_tcp_reset_callbacks for notes about synchronization
 203 * with data path
 204 */
 205void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
 206{
 207        struct rds_tcp_connection *tc = conn->c_transport_data;
 208
 209        rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
 210        write_lock_bh(&sock->sk->sk_callback_lock);
 211
 212        /* done under the callback_lock to serialize with write_space */
 213        spin_lock(&rds_tcp_tc_list_lock);
 214        list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
 215        rds_tcp_tc_count++;
 216        spin_unlock(&rds_tcp_tc_list_lock);
 217
 218        /* accepted sockets need our listen data ready undone */
 219        if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready)
 220                sock->sk->sk_data_ready = sock->sk->sk_user_data;
 221
 222        tc->t_sock = sock;
 223        tc->conn = conn;
 224        tc->t_orig_data_ready = sock->sk->sk_data_ready;
 225        tc->t_orig_write_space = sock->sk->sk_write_space;
 226        tc->t_orig_state_change = sock->sk->sk_state_change;
 227
 228        sock->sk->sk_user_data = conn;
 229        sock->sk->sk_data_ready = rds_tcp_data_ready;
 230        sock->sk->sk_write_space = rds_tcp_write_space;
 231        sock->sk->sk_state_change = rds_tcp_state_change;
 232
 233        write_unlock_bh(&sock->sk->sk_callback_lock);
 234}
 235
 236static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
 237                            struct rds_info_iterator *iter,
 238                            struct rds_info_lengths *lens)
 239{
 240        struct rds_info_tcp_socket tsinfo;
 241        struct rds_tcp_connection *tc;
 242        unsigned long flags;
 243        struct sockaddr_in sin;
 244        int sinlen;
 245
 246        spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
 247
 248        if (len / sizeof(tsinfo) < rds_tcp_tc_count)
 249                goto out;
 250
 251        list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
 252
 253                sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0);
 254                tsinfo.local_addr = sin.sin_addr.s_addr;
 255                tsinfo.local_port = sin.sin_port;
 256                sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1);
 257                tsinfo.peer_addr = sin.sin_addr.s_addr;
 258                tsinfo.peer_port = sin.sin_port;
 259
 260                tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
 261                tsinfo.data_rem = tc->t_tinc_data_rem;
 262                tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
 263                tsinfo.last_expected_una = tc->t_last_expected_una;
 264                tsinfo.last_seen_una = tc->t_last_seen_una;
 265
 266                rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
 267        }
 268
 269out:
 270        lens->nr = rds_tcp_tc_count;
 271        lens->each = sizeof(tsinfo);
 272
 273        spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
 274}
 275
 276static int rds_tcp_laddr_check(struct net *net, __be32 addr)
 277{
 278        if (inet_addr_type(net, addr) == RTN_LOCAL)
 279                return 0;
 280        return -EADDRNOTAVAIL;
 281}
 282
 283static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 284{
 285        struct rds_tcp_connection *tc;
 286
 287        tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
 288        if (!tc)
 289                return -ENOMEM;
 290
 291        mutex_init(&tc->t_conn_lock);
 292        tc->t_sock = NULL;
 293        tc->t_tinc = NULL;
 294        tc->t_tinc_hdr_rem = sizeof(struct rds_header);
 295        tc->t_tinc_data_rem = 0;
 296
 297        conn->c_transport_data = tc;
 298
 299        spin_lock_irq(&rds_tcp_conn_lock);
 300        list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
 301        spin_unlock_irq(&rds_tcp_conn_lock);
 302
 303        rdsdebug("alloced tc %p\n", conn->c_transport_data);
 304        return 0;
 305}
 306
 307static void rds_tcp_conn_free(void *arg)
 308{
 309        struct rds_tcp_connection *tc = arg;
 310        unsigned long flags;
 311        rdsdebug("freeing tc %p\n", tc);
 312
 313        spin_lock_irqsave(&rds_tcp_conn_lock, flags);
 314        list_del(&tc->t_tcp_node);
 315        spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
 316
 317        kmem_cache_free(rds_tcp_conn_slab, tc);
 318}
 319
 320static void rds_tcp_destroy_conns(void)
 321{
 322        struct rds_tcp_connection *tc, *_tc;
 323        LIST_HEAD(tmp_list);
 324
 325        /* avoid calling conn_destroy with irqs off */
 326        spin_lock_irq(&rds_tcp_conn_lock);
 327        list_splice(&rds_tcp_conn_list, &tmp_list);
 328        INIT_LIST_HEAD(&rds_tcp_conn_list);
 329        spin_unlock_irq(&rds_tcp_conn_lock);
 330
 331        list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
 332                if (tc->conn->c_passive)
 333                        rds_conn_destroy(tc->conn->c_passive);
 334                rds_conn_destroy(tc->conn);
 335        }
 336}
 337
 338static void rds_tcp_exit(void);
 339
 340struct rds_transport rds_tcp_transport = {
 341        .laddr_check            = rds_tcp_laddr_check,
 342        .xmit_prepare           = rds_tcp_xmit_prepare,
 343        .xmit_complete          = rds_tcp_xmit_complete,
 344        .xmit                   = rds_tcp_xmit,
 345        .recv                   = rds_tcp_recv,
 346        .conn_alloc             = rds_tcp_conn_alloc,
 347        .conn_free              = rds_tcp_conn_free,
 348        .conn_connect           = rds_tcp_conn_connect,
 349        .conn_shutdown          = rds_tcp_conn_shutdown,
 350        .inc_copy_to_user       = rds_tcp_inc_copy_to_user,
 351        .inc_free               = rds_tcp_inc_free,
 352        .stats_info_copy        = rds_tcp_stats_info_copy,
 353        .exit                   = rds_tcp_exit,
 354        .t_owner                = THIS_MODULE,
 355        .t_name                 = "tcp",
 356        .t_type                 = RDS_TRANS_TCP,
 357        .t_prefer_loopback      = 1,
 358};
 359
 360static int rds_tcp_netid;
 361
 362/* per-network namespace private data for this module */
 363struct rds_tcp_net {
 364        struct socket *rds_tcp_listen_sock;
 365        struct work_struct rds_tcp_accept_w;
 366        struct ctl_table_header *rds_tcp_sysctl;
 367        struct ctl_table *ctl_table;
 368        int sndbuf_size;
 369        int rcvbuf_size;
 370};
 371
 372/* All module specific customizations to the RDS-TCP socket should be done in
 373 * rds_tcp_tune() and applied after socket creation.
 374 */
 375void rds_tcp_tune(struct socket *sock)
 376{
 377        struct sock *sk = sock->sk;
 378        struct net *net = sock_net(sk);
 379        struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
 380
 381        rds_tcp_nonagle(sock);
 382        lock_sock(sk);
 383        if (rtn->sndbuf_size > 0) {
 384                sk->sk_sndbuf = rtn->sndbuf_size;
 385                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
 386        }
 387        if (rtn->rcvbuf_size > 0) {
 388                sk->sk_sndbuf = rtn->rcvbuf_size;
 389                sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 390        }
 391        release_sock(sk);
 392}
 393
 394static void rds_tcp_accept_worker(struct work_struct *work)
 395{
 396        struct rds_tcp_net *rtn = container_of(work,
 397                                               struct rds_tcp_net,
 398                                               rds_tcp_accept_w);
 399
 400        while (rds_tcp_accept_one(rtn->rds_tcp_listen_sock) == 0)
 401                cond_resched();
 402}
 403
 404void rds_tcp_accept_work(struct sock *sk)
 405{
 406        struct net *net = sock_net(sk);
 407        struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
 408
 409        queue_work(rds_wq, &rtn->rds_tcp_accept_w);
 410}
 411
 412static __net_init int rds_tcp_init_net(struct net *net)
 413{
 414        struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
 415        struct ctl_table *tbl;
 416        int err = 0;
 417
 418        memset(rtn, 0, sizeof(*rtn));
 419
 420        /* {snd, rcv}buf_size default to 0, which implies we let the
 421         * stack pick the value, and permit auto-tuning of buffer size.
 422         */
 423        if (net == &init_net) {
 424                tbl = rds_tcp_sysctl_table;
 425        } else {
 426                tbl = kmemdup(rds_tcp_sysctl_table,
 427                              sizeof(rds_tcp_sysctl_table), GFP_KERNEL);
 428                if (!tbl) {
 429                        pr_warn("could not set allocate syctl table\n");
 430                        return -ENOMEM;
 431                }
 432                rtn->ctl_table = tbl;
 433        }
 434        tbl[RDS_TCP_SNDBUF].data = &rtn->sndbuf_size;
 435        tbl[RDS_TCP_RCVBUF].data = &rtn->rcvbuf_size;
 436        rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp", tbl);
 437        if (!rtn->rds_tcp_sysctl) {
 438                pr_warn("could not register sysctl\n");
 439                err = -ENOMEM;
 440                goto fail;
 441        }
 442        rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
 443        if (!rtn->rds_tcp_listen_sock) {
 444                pr_warn("could not set up listen sock\n");
 445                unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
 446                rtn->rds_tcp_sysctl = NULL;
 447                err = -EAFNOSUPPORT;
 448                goto fail;
 449        }
 450        INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
 451        return 0;
 452
 453fail:
 454        if (net != &init_net)
 455                kfree(tbl);
 456        return err;
 457}
 458
 459static void __net_exit rds_tcp_exit_net(struct net *net)
 460{
 461        struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
 462
 463        if (rtn->rds_tcp_sysctl)
 464                unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
 465
 466        if (net != &init_net && rtn->ctl_table)
 467                kfree(rtn->ctl_table);
 468
 469        /* If rds_tcp_exit_net() is called as a result of netns deletion,
 470         * the rds_tcp_kill_sock() device notifier would already have cleaned
 471         * up the listen socket, thus there is no work to do in this function.
 472         *
 473         * If rds_tcp_exit_net() is called as a result of module unload,
 474         * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
 475         * we do need to clean up the listen socket here.
 476         */
 477        if (rtn->rds_tcp_listen_sock) {
 478                rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
 479                rtn->rds_tcp_listen_sock = NULL;
 480                flush_work(&rtn->rds_tcp_accept_w);
 481        }
 482}
 483
 484static struct pernet_operations rds_tcp_net_ops = {
 485        .init = rds_tcp_init_net,
 486        .exit = rds_tcp_exit_net,
 487        .id = &rds_tcp_netid,
 488        .size = sizeof(struct rds_tcp_net),
 489};
 490
 491static void rds_tcp_kill_sock(struct net *net)
 492{
 493        struct rds_tcp_connection *tc, *_tc;
 494        struct sock *sk;
 495        LIST_HEAD(tmp_list);
 496        struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
 497
 498        rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
 499        rtn->rds_tcp_listen_sock = NULL;
 500        flush_work(&rtn->rds_tcp_accept_w);
 501        spin_lock_irq(&rds_tcp_conn_lock);
 502        list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 503                struct net *c_net = read_pnet(&tc->conn->c_net);
 504
 505                if (net != c_net || !tc->t_sock)
 506                        continue;
 507                list_move_tail(&tc->t_tcp_node, &tmp_list);
 508        }
 509        spin_unlock_irq(&rds_tcp_conn_lock);
 510        list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
 511                sk = tc->t_sock->sk;
 512                sk->sk_prot->disconnect(sk, 0);
 513                tcp_done(sk);
 514                if (tc->conn->c_passive)
 515                        rds_conn_destroy(tc->conn->c_passive);
 516                rds_conn_destroy(tc->conn);
 517        }
 518}
 519
 520static int rds_tcp_dev_event(struct notifier_block *this,
 521                             unsigned long event, void *ptr)
 522{
 523        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 524
 525        /* rds-tcp registers as a pernet subys, so the ->exit will only
 526         * get invoked after network acitivity has quiesced. We need to
 527         * clean up all sockets  to quiesce network activity, and use
 528         * the unregistration of the per-net loopback device as a trigger
 529         * to start that cleanup.
 530         */
 531        if (event == NETDEV_UNREGISTER_FINAL &&
 532            dev->ifindex == LOOPBACK_IFINDEX)
 533                rds_tcp_kill_sock(dev_net(dev));
 534
 535        return NOTIFY_DONE;
 536}
 537
 538static struct notifier_block rds_tcp_dev_notifier = {
 539        .notifier_call        = rds_tcp_dev_event,
 540        .priority = -10, /* must be called after other network notifiers */
 541};
 542
 543/* when sysctl is used to modify some kernel socket parameters,this
 544 * function  resets the RDS connections in that netns  so that we can
 545 * restart with new parameters.  The assumption is that such reset
 546 * events are few and far-between.
 547 */
 548static void rds_tcp_sysctl_reset(struct net *net)
 549{
 550        struct rds_tcp_connection *tc, *_tc;
 551
 552        spin_lock_irq(&rds_tcp_conn_lock);
 553        list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 554                struct net *c_net = read_pnet(&tc->conn->c_net);
 555
 556                if (net != c_net || !tc->t_sock)
 557                        continue;
 558
 559                rds_conn_drop(tc->conn); /* reconnect with new parameters */
 560        }
 561        spin_unlock_irq(&rds_tcp_conn_lock);
 562}
 563
 564static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
 565                                 void __user *buffer, size_t *lenp,
 566                                 loff_t *fpos)
 567{
 568        struct net *net = current->nsproxy->net_ns;
 569        int err;
 570
 571        err = proc_dointvec_minmax(ctl, write, buffer, lenp, fpos);
 572        if (err < 0) {
 573                pr_warn("Invalid input. Must be >= %d\n",
 574                        *(int *)(ctl->extra1));
 575                return err;
 576        }
 577        if (write)
 578                rds_tcp_sysctl_reset(net);
 579        return 0;
 580}
 581
 582static void rds_tcp_exit(void)
 583{
 584        rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 585        unregister_pernet_subsys(&rds_tcp_net_ops);
 586        if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
 587                pr_warn("could not unregister rds_tcp_dev_notifier\n");
 588        rds_tcp_destroy_conns();
 589        rds_trans_unregister(&rds_tcp_transport);
 590        rds_tcp_recv_exit();
 591        kmem_cache_destroy(rds_tcp_conn_slab);
 592}
 593module_exit(rds_tcp_exit);
 594
 595static int rds_tcp_init(void)
 596{
 597        int ret;
 598
 599        rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection",
 600                                              sizeof(struct rds_tcp_connection),
 601                                              0, 0, NULL);
 602        if (!rds_tcp_conn_slab) {
 603                ret = -ENOMEM;
 604                goto out;
 605        }
 606
 607        ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
 608        if (ret) {
 609                pr_warn("could not register rds_tcp_dev_notifier\n");
 610                goto out;
 611        }
 612
 613        ret = register_pernet_subsys(&rds_tcp_net_ops);
 614        if (ret)
 615                goto out_slab;
 616
 617        ret = rds_tcp_recv_init();
 618        if (ret)
 619                goto out_pernet;
 620
 621        ret = rds_trans_register(&rds_tcp_transport);
 622        if (ret)
 623                goto out_recv;
 624
 625        rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 626
 627        goto out;
 628
 629out_recv:
 630        rds_tcp_recv_exit();
 631out_pernet:
 632        unregister_pernet_subsys(&rds_tcp_net_ops);
 633out_slab:
 634        kmem_cache_destroy(rds_tcp_conn_slab);
 635out:
 636        return ret;
 637}
 638module_init(rds_tcp_init);
 639
 640MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
 641MODULE_DESCRIPTION("RDS: TCP transport");
 642MODULE_LICENSE("Dual BSD/GPL");
 643
 644