linux/net/rds/bind.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006 Oracle.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 *
  32 */
  33#include <linux/kernel.h>
  34#include <net/sock.h>
  35#include <linux/in.h>
  36#include <linux/if_arp.h>
  37#include "rds.h"
  38
  39/*
  40 * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't
  41 * particularly zippy.
  42 *
  43 * This is now called for every incoming frame so we arguably care much more
  44 * about it than we used to.
  45 */
  46static DEFINE_SPINLOCK(rds_bind_lock);
  47static struct rb_root rds_bind_tree = RB_ROOT;
  48
  49static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
  50                                           struct rds_sock *insert)
  51{
  52        struct rb_node **p = &rds_bind_tree.rb_node;
  53        struct rb_node *parent = NULL;
  54        struct rds_sock *rs;
  55        u64 cmp;
  56        u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
  57
  58        while (*p) {
  59                parent = *p;
  60                rs = rb_entry(parent, struct rds_sock, rs_bound_node);
  61
  62                cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
  63                      be16_to_cpu(rs->rs_bound_port);
  64
  65                if (needle < cmp)
  66                        p = &(*p)->rb_left;
  67                else if (needle > cmp)
  68                        p = &(*p)->rb_right;
  69                else
  70                        return rs;
  71        }
  72
  73        if (insert) {
  74                rb_link_node(&insert->rs_bound_node, parent, p);
  75                rb_insert_color(&insert->rs_bound_node, &rds_bind_tree);
  76        }
  77        return NULL;
  78}
  79
  80/*
  81 * Return the rds_sock bound at the given local address.
  82 *
  83 * The rx path can race with rds_release.  We notice if rds_release() has
  84 * marked this socket and don't return a rs ref to the rx path.
  85 */
  86struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
  87{
  88        struct rds_sock *rs;
  89        unsigned long flags;
  90
  91        spin_lock_irqsave(&rds_bind_lock, flags);
  92        rs = rds_bind_tree_walk(addr, port, NULL);
  93        if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
  94                rds_sock_addref(rs);
  95        else
  96                rs = NULL;
  97        spin_unlock_irqrestore(&rds_bind_lock, flags);
  98
  99        rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
 100                ntohs(port));
 101        return rs;
 102}
 103
 104/* returns -ve errno or +ve port */
 105static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
 106{
 107        unsigned long flags;
 108        int ret = -EADDRINUSE;
 109        u16 rover, last;
 110
 111        if (*port != 0) {
 112                rover = be16_to_cpu(*port);
 113                last = rover;
 114        } else {
 115                rover = max_t(u16, net_random(), 2);
 116                last = rover - 1;
 117        }
 118
 119        spin_lock_irqsave(&rds_bind_lock, flags);
 120
 121        do {
 122                if (rover == 0)
 123                        rover++;
 124                if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) {
 125                        *port = cpu_to_be16(rover);
 126                        ret = 0;
 127                        break;
 128                }
 129        } while (rover++ != last);
 130
 131        if (ret == 0)  {
 132                rs->rs_bound_addr = addr;
 133                rs->rs_bound_port = *port;
 134                rds_sock_addref(rs);
 135
 136                rdsdebug("rs %p binding to %pI4:%d\n",
 137                  rs, &addr, (int)ntohs(*port));
 138        }
 139
 140        spin_unlock_irqrestore(&rds_bind_lock, flags);
 141
 142        return ret;
 143}
 144
 145void rds_remove_bound(struct rds_sock *rs)
 146{
 147        unsigned long flags;
 148
 149        spin_lock_irqsave(&rds_bind_lock, flags);
 150
 151        if (rs->rs_bound_addr) {
 152                rdsdebug("rs %p unbinding from %pI4:%d\n",
 153                  rs, &rs->rs_bound_addr,
 154                  ntohs(rs->rs_bound_port));
 155
 156                rb_erase(&rs->rs_bound_node, &rds_bind_tree);
 157                rds_sock_put(rs);
 158                rs->rs_bound_addr = 0;
 159        }
 160
 161        spin_unlock_irqrestore(&rds_bind_lock, flags);
 162}
 163
 164int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 165{
 166        struct sock *sk = sock->sk;
 167        struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 168        struct rds_sock *rs = rds_sk_to_rs(sk);
 169        struct rds_transport *trans;
 170        int ret = 0;
 171
 172        lock_sock(sk);
 173
 174        if (addr_len != sizeof(struct sockaddr_in) ||
 175            sin->sin_family != AF_INET ||
 176            rs->rs_bound_addr ||
 177            sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
 178                ret = -EINVAL;
 179                goto out;
 180        }
 181
 182        ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
 183        if (ret)
 184                goto out;
 185
 186        trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
 187        if (trans == NULL) {
 188                ret = -EADDRNOTAVAIL;
 189                rds_remove_bound(rs);
 190                if (printk_ratelimit())
 191                        printk(KERN_INFO "RDS: rds_bind() could not find a transport, "
 192                                "load rds_tcp or rds_rdma?\n");
 193                goto out;
 194        }
 195
 196        rs->rs_transport = trans;
 197        ret = 0;
 198
 199out:
 200        release_sock(sk);
 201        return ret;
 202}
 203