linux/drivers/infiniband/core/addr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
   3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
   4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
   5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/mutex.h>
  37#include <linux/inetdevice.h>
  38#include <linux/slab.h>
  39#include <linux/workqueue.h>
  40#include <linux/module.h>
  41#include <net/arp.h>
  42#include <net/neighbour.h>
  43#include <net/route.h>
  44#include <net/netevent.h>
  45#include <net/addrconf.h>
  46#include <net/ip6_route.h>
  47#include <rdma/ib_addr.h>
  48#include <rdma/ib.h>
  49
  50MODULE_AUTHOR("Sean Hefty");
  51MODULE_DESCRIPTION("IB Address Translation");
  52MODULE_LICENSE("Dual BSD/GPL");
  53
  54struct addr_req {
  55        struct list_head list;
  56        struct sockaddr_storage src_addr;
  57        struct sockaddr_storage dst_addr;
  58        struct rdma_dev_addr *addr;
  59        struct rdma_addr_client *client;
  60        void *context;
  61        void (*callback)(int status, struct sockaddr *src_addr,
  62                         struct rdma_dev_addr *addr, void *context);
  63        unsigned long timeout;
  64        int status;
  65};
  66
  67static void process_req(struct work_struct *work);
  68
  69static DEFINE_MUTEX(lock);
  70static LIST_HEAD(req_list);
  71static DECLARE_DELAYED_WORK(work, process_req);
  72static struct workqueue_struct *addr_wq;
  73
  74int rdma_addr_size(struct sockaddr *addr)
  75{
  76        switch (addr->sa_family) {
  77        case AF_INET:
  78                return sizeof(struct sockaddr_in);
  79        case AF_INET6:
  80                return sizeof(struct sockaddr_in6);
  81        case AF_IB:
  82                return sizeof(struct sockaddr_ib);
  83        default:
  84                return 0;
  85        }
  86}
  87EXPORT_SYMBOL(rdma_addr_size);
  88
  89static struct rdma_addr_client self;
  90
  91void rdma_addr_register_client(struct rdma_addr_client *client)
  92{
  93        atomic_set(&client->refcount, 1);
  94        init_completion(&client->comp);
  95}
  96EXPORT_SYMBOL(rdma_addr_register_client);
  97
  98static inline void put_client(struct rdma_addr_client *client)
  99{
 100        if (atomic_dec_and_test(&client->refcount))
 101                complete(&client->comp);
 102}
 103
 104void rdma_addr_unregister_client(struct rdma_addr_client *client)
 105{
 106        put_client(client);
 107        wait_for_completion(&client->comp);
 108}
 109EXPORT_SYMBOL(rdma_addr_unregister_client);
 110
 111int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 112                     const unsigned char *dst_dev_addr)
 113{
 114        dev_addr->dev_type = dev->type;
 115        memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
 116        memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
 117        if (dst_dev_addr)
 118                memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 119        dev_addr->bound_dev_if = dev->ifindex;
 120        return 0;
 121}
 122EXPORT_SYMBOL(rdma_copy_addr);
 123
 124int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
 125                      u16 *vlan_id)
 126{
 127        struct net_device *dev;
 128        int ret = -EADDRNOTAVAIL;
 129
 130        if (dev_addr->bound_dev_if) {
 131                dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
 132                if (!dev)
 133                        return -ENODEV;
 134                ret = rdma_copy_addr(dev_addr, dev, NULL);
 135                dev_put(dev);
 136                return ret;
 137        }
 138
 139        switch (addr->sa_family) {
 140        case AF_INET:
 141                dev = ip_dev_find(dev_addr->net,
 142                        ((struct sockaddr_in *) addr)->sin_addr.s_addr);
 143
 144                if (!dev)
 145                        return ret;
 146
 147                ret = rdma_copy_addr(dev_addr, dev, NULL);
 148                if (vlan_id)
 149                        *vlan_id = rdma_vlan_dev_vlan_id(dev);
 150                dev_put(dev);
 151                break;
 152#if IS_ENABLED(CONFIG_IPV6)
 153        case AF_INET6:
 154                rcu_read_lock();
 155                for_each_netdev_rcu(dev_addr->net, dev) {
 156                        if (ipv6_chk_addr(dev_addr->net,
 157                                          &((struct sockaddr_in6 *) addr)->sin6_addr,
 158                                          dev, 1)) {
 159                                ret = rdma_copy_addr(dev_addr, dev, NULL);
 160                                if (vlan_id)
 161                                        *vlan_id = rdma_vlan_dev_vlan_id(dev);
 162                                break;
 163                        }
 164                }
 165                rcu_read_unlock();
 166                break;
 167#endif
 168        }
 169        return ret;
 170}
 171EXPORT_SYMBOL(rdma_translate_ip);
 172
 173static void set_timeout(unsigned long time)
 174{
 175        unsigned long delay;
 176
 177        delay = time - jiffies;
 178        if ((long)delay < 0)
 179                delay = 0;
 180
 181        mod_delayed_work(addr_wq, &work, delay);
 182}
 183
 184static void queue_req(struct addr_req *req)
 185{
 186        struct addr_req *temp_req;
 187
 188        mutex_lock(&lock);
 189        list_for_each_entry_reverse(temp_req, &req_list, list) {
 190                if (time_after_eq(req->timeout, temp_req->timeout))
 191                        break;
 192        }
 193
 194        list_add(&req->list, &temp_req->list);
 195
 196        if (req_list.next == &req->list)
 197                set_timeout(req->timeout);
 198        mutex_unlock(&lock);
 199}
 200
 201static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
 202{
 203        struct neighbour *n;
 204        int ret;
 205
 206        n = dst_neigh_lookup(dst, daddr);
 207
 208        rcu_read_lock();
 209        if (!n || !(n->nud_state & NUD_VALID)) {
 210                if (n)
 211                        neigh_event_send(n, NULL);
 212                ret = -ENODATA;
 213        } else {
 214                ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
 215        }
 216        rcu_read_unlock();
 217
 218        if (n)
 219                neigh_release(n);
 220
 221        return ret;
 222}
 223
 224static int addr4_resolve(struct sockaddr_in *src_in,
 225                         struct sockaddr_in *dst_in,
 226                         struct rdma_dev_addr *addr)
 227{
 228        __be32 src_ip = src_in->sin_addr.s_addr;
 229        __be32 dst_ip = dst_in->sin_addr.s_addr;
 230        struct rtable *rt;
 231        struct flowi4 fl4;
 232        int ret;
 233
 234        memset(&fl4, 0, sizeof(fl4));
 235        fl4.daddr = dst_ip;
 236        fl4.saddr = src_ip;
 237        fl4.flowi4_oif = addr->bound_dev_if;
 238        rt = ip_route_output_key(addr->net, &fl4);
 239        if (IS_ERR(rt)) {
 240                ret = PTR_ERR(rt);
 241                goto out;
 242        }
 243        src_in->sin_family = AF_INET;
 244        src_in->sin_addr.s_addr = fl4.saddr;
 245
 246        if (rt->dst.dev->flags & IFF_LOOPBACK) {
 247                ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
 248                if (!ret)
 249                        memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 250                goto put;
 251        }
 252
 253        /* If the device does ARP internally, return 'done' */
 254        if (rt->dst.dev->flags & IFF_NOARP) {
 255                ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
 256                goto put;
 257        }
 258
 259        ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
 260put:
 261        ip_rt_put(rt);
 262out:
 263        return ret;
 264}
 265
 266#if IS_ENABLED(CONFIG_IPV6)
 267static int addr6_resolve(struct sockaddr_in6 *src_in,
 268                         struct sockaddr_in6 *dst_in,
 269                         struct rdma_dev_addr *addr)
 270{
 271        struct flowi6 fl6;
 272        struct dst_entry *dst;
 273        int ret;
 274
 275        memset(&fl6, 0, sizeof fl6);
 276        fl6.daddr = dst_in->sin6_addr;
 277        fl6.saddr = src_in->sin6_addr;
 278        fl6.flowi6_oif = addr->bound_dev_if;
 279
 280        dst = ip6_route_output(addr->net, NULL, &fl6);
 281        if ((ret = dst->error))
 282                goto put;
 283
 284        if (ipv6_addr_any(&fl6.saddr)) {
 285                ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
 286                                         &fl6.daddr, 0, &fl6.saddr);
 287                if (ret)
 288                        goto put;
 289
 290                src_in->sin6_family = AF_INET6;
 291                src_in->sin6_addr = fl6.saddr;
 292        }
 293
 294        if (dst->dev->flags & IFF_LOOPBACK) {
 295                ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
 296                if (!ret)
 297                        memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 298                goto put;
 299        }
 300
 301        /* If the device does ARP internally, return 'done' */
 302        if (dst->dev->flags & IFF_NOARP) {
 303                ret = rdma_copy_addr(addr, dst->dev, NULL);
 304                goto put;
 305        }
 306
 307        ret = dst_fetch_ha(dst, addr, &fl6.daddr);
 308put:
 309        dst_release(dst);
 310        return ret;
 311}
 312#else
 313static int addr6_resolve(struct sockaddr_in6 *src_in,
 314                         struct sockaddr_in6 *dst_in,
 315                         struct rdma_dev_addr *addr)
 316{
 317        return -EADDRNOTAVAIL;
 318}
 319#endif
 320
 321static int addr_resolve(struct sockaddr *src_in,
 322                        struct sockaddr *dst_in,
 323                        struct rdma_dev_addr *addr)
 324{
 325        if (src_in->sa_family == AF_INET) {
 326                return addr4_resolve((struct sockaddr_in *) src_in,
 327                        (struct sockaddr_in *) dst_in, addr);
 328        } else
 329                return addr6_resolve((struct sockaddr_in6 *) src_in,
 330                        (struct sockaddr_in6 *) dst_in, addr);
 331}
 332
 333static void process_req(struct work_struct *work)
 334{
 335        struct addr_req *req, *temp_req;
 336        struct sockaddr *src_in, *dst_in;
 337        struct list_head done_list;
 338
 339        INIT_LIST_HEAD(&done_list);
 340
 341        mutex_lock(&lock);
 342        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 343                if (req->status == -ENODATA) {
 344                        src_in = (struct sockaddr *) &req->src_addr;
 345                        dst_in = (struct sockaddr *) &req->dst_addr;
 346                        req->status = addr_resolve(src_in, dst_in, req->addr);
 347                        if (req->status && time_after_eq(jiffies, req->timeout))
 348                                req->status = -ETIMEDOUT;
 349                        else if (req->status == -ENODATA)
 350                                continue;
 351                }
 352                list_move_tail(&req->list, &done_list);
 353        }
 354
 355        if (!list_empty(&req_list)) {
 356                req = list_entry(req_list.next, struct addr_req, list);
 357                set_timeout(req->timeout);
 358        }
 359        mutex_unlock(&lock);
 360
 361        list_for_each_entry_safe(req, temp_req, &done_list, list) {
 362                list_del(&req->list);
 363                req->callback(req->status, (struct sockaddr *) &req->src_addr,
 364                        req->addr, req->context);
 365                put_client(req->client);
 366                kfree(req);
 367        }
 368}
 369
 370int rdma_resolve_ip(struct rdma_addr_client *client,
 371                    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 372                    struct rdma_dev_addr *addr, int timeout_ms,
 373                    void (*callback)(int status, struct sockaddr *src_addr,
 374                                     struct rdma_dev_addr *addr, void *context),
 375                    void *context)
 376{
 377        struct sockaddr *src_in, *dst_in;
 378        struct addr_req *req;
 379        int ret = 0;
 380
 381        req = kzalloc(sizeof *req, GFP_KERNEL);
 382        if (!req)
 383                return -ENOMEM;
 384
 385        src_in = (struct sockaddr *) &req->src_addr;
 386        dst_in = (struct sockaddr *) &req->dst_addr;
 387
 388        if (src_addr) {
 389                if (src_addr->sa_family != dst_addr->sa_family) {
 390                        ret = -EINVAL;
 391                        goto err;
 392                }
 393
 394                memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 395        } else {
 396                src_in->sa_family = dst_addr->sa_family;
 397        }
 398
 399        memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
 400        req->addr = addr;
 401        req->callback = callback;
 402        req->context = context;
 403        req->client = client;
 404        atomic_inc(&client->refcount);
 405
 406        req->status = addr_resolve(src_in, dst_in, addr);
 407        switch (req->status) {
 408        case 0:
 409                req->timeout = jiffies;
 410                queue_req(req);
 411                break;
 412        case -ENODATA:
 413                req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
 414                queue_req(req);
 415                break;
 416        default:
 417                ret = req->status;
 418                atomic_dec(&client->refcount);
 419                goto err;
 420        }
 421        return ret;
 422err:
 423        kfree(req);
 424        return ret;
 425}
 426EXPORT_SYMBOL(rdma_resolve_ip);
 427
 428void rdma_addr_cancel(struct rdma_dev_addr *addr)
 429{
 430        struct addr_req *req, *temp_req;
 431
 432        mutex_lock(&lock);
 433        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 434                if (req->addr == addr) {
 435                        req->status = -ECANCELED;
 436                        req->timeout = jiffies;
 437                        list_move(&req->list, &req_list);
 438                        set_timeout(req->timeout);
 439                        break;
 440                }
 441        }
 442        mutex_unlock(&lock);
 443}
 444EXPORT_SYMBOL(rdma_addr_cancel);
 445
 446struct resolve_cb_context {
 447        struct rdma_dev_addr *addr;
 448        struct completion comp;
 449};
 450
 451static void resolve_cb(int status, struct sockaddr *src_addr,
 452             struct rdma_dev_addr *addr, void *context)
 453{
 454        memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
 455                                rdma_dev_addr));
 456        complete(&((struct resolve_cb_context *)context)->comp);
 457}
 458
 459int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
 460                               u8 *dmac, u16 *vlan_id, int if_index)
 461{
 462        int ret = 0;
 463        struct rdma_dev_addr dev_addr;
 464        struct resolve_cb_context ctx;
 465        struct net_device *dev;
 466
 467        union {
 468                struct sockaddr     _sockaddr;
 469                struct sockaddr_in  _sockaddr_in;
 470                struct sockaddr_in6 _sockaddr_in6;
 471        } sgid_addr, dgid_addr;
 472
 473
 474        rdma_gid2ip(&sgid_addr._sockaddr, sgid);
 475        rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 476
 477        memset(&dev_addr, 0, sizeof(dev_addr));
 478        dev_addr.bound_dev_if = if_index;
 479        dev_addr.net = &init_net;
 480
 481        ctx.addr = &dev_addr;
 482        init_completion(&ctx.comp);
 483        ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
 484                        &dev_addr, 1000, resolve_cb, &ctx);
 485        if (ret)
 486                return ret;
 487
 488        wait_for_completion(&ctx.comp);
 489
 490        memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
 491        dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
 492        if (!dev)
 493                return -ENODEV;
 494        if (vlan_id)
 495                *vlan_id = rdma_vlan_dev_vlan_id(dev);
 496        dev_put(dev);
 497        return ret;
 498}
 499EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
 500
 501int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
 502{
 503        int ret = 0;
 504        struct rdma_dev_addr dev_addr;
 505        union {
 506                struct sockaddr     _sockaddr;
 507                struct sockaddr_in  _sockaddr_in;
 508                struct sockaddr_in6 _sockaddr_in6;
 509        } gid_addr;
 510
 511        rdma_gid2ip(&gid_addr._sockaddr, sgid);
 512
 513        memset(&dev_addr, 0, sizeof(dev_addr));
 514        dev_addr.net = &init_net;
 515        ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
 516        if (ret)
 517                return ret;
 518
 519        memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
 520        return ret;
 521}
 522EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
 523
 524static int netevent_callback(struct notifier_block *self, unsigned long event,
 525        void *ctx)
 526{
 527        if (event == NETEVENT_NEIGH_UPDATE) {
 528                struct neighbour *neigh = ctx;
 529
 530                if (neigh->nud_state & NUD_VALID) {
 531                        set_timeout(jiffies);
 532                }
 533        }
 534        return 0;
 535}
 536
 537static struct notifier_block nb = {
 538        .notifier_call = netevent_callback
 539};
 540
 541static int __init addr_init(void)
 542{
 543        addr_wq = create_singlethread_workqueue("ib_addr");
 544        if (!addr_wq)
 545                return -ENOMEM;
 546
 547        register_netevent_notifier(&nb);
 548        rdma_addr_register_client(&self);
 549        return 0;
 550}
 551
 552static void __exit addr_cleanup(void)
 553{
 554        rdma_addr_unregister_client(&self);
 555        unregister_netevent_notifier(&nb);
 556        destroy_workqueue(addr_wq);
 557}
 558
 559module_init(addr_init);
 560module_exit(addr_cleanup);
 561