linux/drivers/infiniband/core/addr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
   3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
   4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
   5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/mutex.h>
  37#include <linux/inetdevice.h>
  38#include <linux/slab.h>
  39#include <linux/workqueue.h>
  40#include <linux/module.h>
  41#include <net/arp.h>
  42#include <net/neighbour.h>
  43#include <net/route.h>
  44#include <net/netevent.h>
  45#include <net/addrconf.h>
  46#include <net/ip6_route.h>
  47#include <rdma/ib_addr.h>
  48#include <rdma/ib.h>
  49
  50MODULE_AUTHOR("Sean Hefty");
  51MODULE_DESCRIPTION("IB Address Translation");
  52MODULE_LICENSE("Dual BSD/GPL");
  53
  54struct addr_req {
  55        struct list_head list;
  56        struct sockaddr_storage src_addr;
  57        struct sockaddr_storage dst_addr;
  58        struct rdma_dev_addr *addr;
  59        struct rdma_addr_client *client;
  60        void *context;
  61        void (*callback)(int status, struct sockaddr *src_addr,
  62                         struct rdma_dev_addr *addr, void *context);
  63        unsigned long timeout;
  64        int status;
  65};
  66
  67static void process_req(struct work_struct *work);
  68
  69static DEFINE_MUTEX(lock);
  70static LIST_HEAD(req_list);
  71static DECLARE_DELAYED_WORK(work, process_req);
  72static struct workqueue_struct *addr_wq;
  73
  74int rdma_addr_size(struct sockaddr *addr)
  75{
  76        switch (addr->sa_family) {
  77        case AF_INET:
  78                return sizeof(struct sockaddr_in);
  79        case AF_INET6:
  80                return sizeof(struct sockaddr_in6);
  81        case AF_IB:
  82                return sizeof(struct sockaddr_ib);
  83        default:
  84                return 0;
  85        }
  86}
  87EXPORT_SYMBOL(rdma_addr_size);
  88
  89void rdma_addr_register_client(struct rdma_addr_client *client)
  90{
  91        atomic_set(&client->refcount, 1);
  92        init_completion(&client->comp);
  93}
  94EXPORT_SYMBOL(rdma_addr_register_client);
  95
  96static inline void put_client(struct rdma_addr_client *client)
  97{
  98        if (atomic_dec_and_test(&client->refcount))
  99                complete(&client->comp);
 100}
 101
 102void rdma_addr_unregister_client(struct rdma_addr_client *client)
 103{
 104        put_client(client);
 105        wait_for_completion(&client->comp);
 106}
 107EXPORT_SYMBOL(rdma_addr_unregister_client);
 108
 109int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 110                     const unsigned char *dst_dev_addr)
 111{
 112        dev_addr->dev_type = dev->type;
 113        memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
 114        memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
 115        if (dst_dev_addr)
 116                memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 117        dev_addr->bound_dev_if = dev->ifindex;
 118        return 0;
 119}
 120EXPORT_SYMBOL(rdma_copy_addr);
 121
 122int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 123{
 124        struct net_device *dev;
 125        int ret = -EADDRNOTAVAIL;
 126
 127        if (dev_addr->bound_dev_if) {
 128                dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
 129                if (!dev)
 130                        return -ENODEV;
 131                ret = rdma_copy_addr(dev_addr, dev, NULL);
 132                dev_put(dev);
 133                return ret;
 134        }
 135
 136        switch (addr->sa_family) {
 137        case AF_INET:
 138                dev = ip_dev_find(&init_net,
 139                        ((struct sockaddr_in *) addr)->sin_addr.s_addr);
 140
 141                if (!dev)
 142                        return ret;
 143
 144                ret = rdma_copy_addr(dev_addr, dev, NULL);
 145                dev_put(dev);
 146                break;
 147
 148#if IS_ENABLED(CONFIG_IPV6)
 149        case AF_INET6:
 150                rcu_read_lock();
 151                for_each_netdev_rcu(&init_net, dev) {
 152                        if (ipv6_chk_addr(&init_net,
 153                                          &((struct sockaddr_in6 *) addr)->sin6_addr,
 154                                          dev, 1)) {
 155                                ret = rdma_copy_addr(dev_addr, dev, NULL);
 156                                break;
 157                        }
 158                }
 159                rcu_read_unlock();
 160                break;
 161#endif
 162        }
 163        return ret;
 164}
 165EXPORT_SYMBOL(rdma_translate_ip);
 166
 167static void set_timeout(unsigned long time)
 168{
 169        unsigned long delay;
 170
 171        delay = time - jiffies;
 172        if ((long)delay <= 0)
 173                delay = 1;
 174
 175        mod_delayed_work(addr_wq, &work, delay);
 176}
 177
 178static void queue_req(struct addr_req *req)
 179{
 180        struct addr_req *temp_req;
 181
 182        mutex_lock(&lock);
 183        list_for_each_entry_reverse(temp_req, &req_list, list) {
 184                if (time_after_eq(req->timeout, temp_req->timeout))
 185                        break;
 186        }
 187
 188        list_add(&req->list, &temp_req->list);
 189
 190        if (req_list.next == &req->list)
 191                set_timeout(req->timeout);
 192        mutex_unlock(&lock);
 193}
 194
 195static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
 196{
 197        struct neighbour *n;
 198        int ret;
 199
 200        n = dst_neigh_lookup(dst, daddr);
 201
 202        rcu_read_lock();
 203        if (!n || !(n->nud_state & NUD_VALID)) {
 204                if (n)
 205                        neigh_event_send(n, NULL);
 206                ret = -ENODATA;
 207        } else {
 208                ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
 209        }
 210        rcu_read_unlock();
 211
 212        if (n)
 213                neigh_release(n);
 214
 215        return ret;
 216}
 217
 218static int addr4_resolve(struct sockaddr_in *src_in,
 219                         struct sockaddr_in *dst_in,
 220                         struct rdma_dev_addr *addr)
 221{
 222        __be32 src_ip = src_in->sin_addr.s_addr;
 223        __be32 dst_ip = dst_in->sin_addr.s_addr;
 224        struct rtable *rt;
 225        struct flowi4 fl4;
 226        int ret;
 227
 228        memset(&fl4, 0, sizeof(fl4));
 229        fl4.daddr = dst_ip;
 230        fl4.saddr = src_ip;
 231        fl4.flowi4_oif = addr->bound_dev_if;
 232        rt = ip_route_output_key(&init_net, &fl4);
 233        if (IS_ERR(rt)) {
 234                ret = PTR_ERR(rt);
 235                goto out;
 236        }
 237        src_in->sin_family = AF_INET;
 238        src_in->sin_addr.s_addr = fl4.saddr;
 239
 240        if (rt->dst.dev->flags & IFF_LOOPBACK) {
 241                ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
 242                if (!ret)
 243                        memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 244                goto put;
 245        }
 246
 247        /* If the device does ARP internally, return 'done' */
 248        if (rt->dst.dev->flags & IFF_NOARP) {
 249                ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
 250                goto put;
 251        }
 252
 253        ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
 254put:
 255        ip_rt_put(rt);
 256out:
 257        return ret;
 258}
 259
 260#if IS_ENABLED(CONFIG_IPV6)
 261static int addr6_resolve(struct sockaddr_in6 *src_in,
 262                         struct sockaddr_in6 *dst_in,
 263                         struct rdma_dev_addr *addr)
 264{
 265        struct flowi6 fl6;
 266        struct dst_entry *dst;
 267        int ret;
 268
 269        memset(&fl6, 0, sizeof fl6);
 270        fl6.daddr = dst_in->sin6_addr;
 271        fl6.saddr = src_in->sin6_addr;
 272        fl6.flowi6_oif = addr->bound_dev_if;
 273
 274        dst = ip6_route_output(&init_net, NULL, &fl6);
 275        if ((ret = dst->error))
 276                goto put;
 277
 278        if (ipv6_addr_any(&fl6.saddr)) {
 279                ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
 280                                         &fl6.daddr, 0, &fl6.saddr);
 281                if (ret)
 282                        goto put;
 283
 284                src_in->sin6_family = AF_INET6;
 285                src_in->sin6_addr = fl6.saddr;
 286        }
 287
 288        if (dst->dev->flags & IFF_LOOPBACK) {
 289                ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
 290                if (!ret)
 291                        memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 292                goto put;
 293        }
 294
 295        /* If the device does ARP internally, return 'done' */
 296        if (dst->dev->flags & IFF_NOARP) {
 297                ret = rdma_copy_addr(addr, dst->dev, NULL);
 298                goto put;
 299        }
 300
 301        ret = dst_fetch_ha(dst, addr, &fl6.daddr);
 302put:
 303        dst_release(dst);
 304        return ret;
 305}
 306#else
 307static int addr6_resolve(struct sockaddr_in6 *src_in,
 308                         struct sockaddr_in6 *dst_in,
 309                         struct rdma_dev_addr *addr)
 310{
 311        return -EADDRNOTAVAIL;
 312}
 313#endif
 314
 315static int addr_resolve(struct sockaddr *src_in,
 316                        struct sockaddr *dst_in,
 317                        struct rdma_dev_addr *addr)
 318{
 319        if (src_in->sa_family == AF_INET) {
 320                return addr4_resolve((struct sockaddr_in *) src_in,
 321                        (struct sockaddr_in *) dst_in, addr);
 322        } else
 323                return addr6_resolve((struct sockaddr_in6 *) src_in,
 324                        (struct sockaddr_in6 *) dst_in, addr);
 325}
 326
 327static void process_req(struct work_struct *work)
 328{
 329        struct addr_req *req, *temp_req;
 330        struct sockaddr *src_in, *dst_in;
 331        struct list_head done_list;
 332
 333        INIT_LIST_HEAD(&done_list);
 334
 335        mutex_lock(&lock);
 336        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 337                if (req->status == -ENODATA) {
 338                        src_in = (struct sockaddr *) &req->src_addr;
 339                        dst_in = (struct sockaddr *) &req->dst_addr;
 340                        req->status = addr_resolve(src_in, dst_in, req->addr);
 341                        if (req->status && time_after_eq(jiffies, req->timeout))
 342                                req->status = -ETIMEDOUT;
 343                        else if (req->status == -ENODATA)
 344                                continue;
 345                }
 346                list_move_tail(&req->list, &done_list);
 347        }
 348
 349        if (!list_empty(&req_list)) {
 350                req = list_entry(req_list.next, struct addr_req, list);
 351                set_timeout(req->timeout);
 352        }
 353        mutex_unlock(&lock);
 354
 355        list_for_each_entry_safe(req, temp_req, &done_list, list) {
 356                list_del(&req->list);
 357                req->callback(req->status, (struct sockaddr *) &req->src_addr,
 358                        req->addr, req->context);
 359                put_client(req->client);
 360                kfree(req);
 361        }
 362}
 363
 364int rdma_resolve_ip(struct rdma_addr_client *client,
 365                    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 366                    struct rdma_dev_addr *addr, int timeout_ms,
 367                    void (*callback)(int status, struct sockaddr *src_addr,
 368                                     struct rdma_dev_addr *addr, void *context),
 369                    void *context)
 370{
 371        struct sockaddr *src_in, *dst_in;
 372        struct addr_req *req;
 373        int ret = 0;
 374
 375        req = kzalloc(sizeof *req, GFP_KERNEL);
 376        if (!req)
 377                return -ENOMEM;
 378
 379        src_in = (struct sockaddr *) &req->src_addr;
 380        dst_in = (struct sockaddr *) &req->dst_addr;
 381
 382        if (src_addr) {
 383                if (src_addr->sa_family != dst_addr->sa_family) {
 384                        ret = -EINVAL;
 385                        goto err;
 386                }
 387
 388                memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 389        } else {
 390                src_in->sa_family = dst_addr->sa_family;
 391        }
 392
 393        memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
 394        req->addr = addr;
 395        req->callback = callback;
 396        req->context = context;
 397        req->client = client;
 398        atomic_inc(&client->refcount);
 399
 400        req->status = addr_resolve(src_in, dst_in, addr);
 401        switch (req->status) {
 402        case 0:
 403                req->timeout = jiffies;
 404                queue_req(req);
 405                break;
 406        case -ENODATA:
 407                req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
 408                queue_req(req);
 409                break;
 410        default:
 411                ret = req->status;
 412                atomic_dec(&client->refcount);
 413                goto err;
 414        }
 415        return ret;
 416err:
 417        kfree(req);
 418        return ret;
 419}
 420EXPORT_SYMBOL(rdma_resolve_ip);
 421
 422void rdma_addr_cancel(struct rdma_dev_addr *addr)
 423{
 424        struct addr_req *req, *temp_req;
 425
 426        mutex_lock(&lock);
 427        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 428                if (req->addr == addr) {
 429                        req->status = -ECANCELED;
 430                        req->timeout = jiffies;
 431                        list_move(&req->list, &req_list);
 432                        set_timeout(req->timeout);
 433                        break;
 434                }
 435        }
 436        mutex_unlock(&lock);
 437}
 438EXPORT_SYMBOL(rdma_addr_cancel);
 439
 440static int netevent_callback(struct notifier_block *self, unsigned long event,
 441        void *ctx)
 442{
 443        if (event == NETEVENT_NEIGH_UPDATE) {
 444                struct neighbour *neigh = ctx;
 445
 446                if (neigh->nud_state & NUD_VALID) {
 447                        set_timeout(jiffies);
 448                }
 449        }
 450        return 0;
 451}
 452
 453static struct notifier_block nb = {
 454        .notifier_call = netevent_callback
 455};
 456
 457static int __init addr_init(void)
 458{
 459        addr_wq = create_singlethread_workqueue("ib_addr");
 460        if (!addr_wq)
 461                return -ENOMEM;
 462
 463        register_netevent_notifier(&nb);
 464        return 0;
 465}
 466
 467static void __exit addr_cleanup(void)
 468{
 469        unregister_netevent_notifier(&nb);
 470        destroy_workqueue(addr_wq);
 471}
 472
 473module_init(addr_init);
 474module_exit(addr_cleanup);
 475