linux/drivers/infiniband/core/sa_query.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
   4 * Copyright (c) 2006 Intel Corporation.  All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <linux/module.h>
  36#include <linux/init.h>
  37#include <linux/err.h>
  38#include <linux/random.h>
  39#include <linux/spinlock.h>
  40#include <linux/slab.h>
  41#include <linux/dma-mapping.h>
  42#include <linux/kref.h>
  43#include <linux/idr.h>
  44#include <linux/workqueue.h>
  45#include <uapi/linux/if_ether.h>
  46#include <rdma/ib_pack.h>
  47#include <rdma/ib_cache.h>
  48#include <rdma/rdma_netlink.h>
  49#include <net/netlink.h>
  50#include <uapi/rdma/ib_user_sa.h>
  51#include <rdma/ib_marshall.h>
  52#include <rdma/ib_addr.h>
  53#include "sa.h"
  54#include "core_priv.h"
  55
  56#define IB_SA_LOCAL_SVC_TIMEOUT_MIN             100
  57#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT         2000
  58#define IB_SA_LOCAL_SVC_TIMEOUT_MAX             200000
  59static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
  60
  61struct ib_sa_sm_ah {
  62        struct ib_ah        *ah;
  63        struct kref          ref;
  64        u16                  pkey_index;
  65        u8                   src_path_mask;
  66};
  67
  68struct ib_sa_port {
  69        struct ib_mad_agent *agent;
  70        struct ib_sa_sm_ah  *sm_ah;
  71        struct work_struct   update_task;
  72        spinlock_t           ah_lock;
  73        u8                   port_num;
  74};
  75
  76struct ib_sa_device {
  77        int                     start_port, end_port;
  78        struct ib_event_handler event_handler;
  79        struct ib_sa_port port[0];
  80};
  81
  82struct ib_sa_query {
  83        void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
  84        void (*release)(struct ib_sa_query *);
  85        struct ib_sa_client    *client;
  86        struct ib_sa_port      *port;
  87        struct ib_mad_send_buf *mad_buf;
  88        struct ib_sa_sm_ah     *sm_ah;
  89        int                     id;
  90        u32                     flags;
  91        struct list_head        list; /* Local svc request list */
  92        u32                     seq; /* Local svc request sequence number */
  93        unsigned long           timeout; /* Local svc timeout */
  94        u8                      path_use; /* How will the pathrecord be used */
  95};
  96
  97#define IB_SA_ENABLE_LOCAL_SERVICE      0x00000001
  98#define IB_SA_CANCEL                    0x00000002
  99
 100struct ib_sa_service_query {
 101        void (*callback)(int, struct ib_sa_service_rec *, void *);
 102        void *context;
 103        struct ib_sa_query sa_query;
 104};
 105
 106struct ib_sa_path_query {
 107        void (*callback)(int, struct ib_sa_path_rec *, void *);
 108        void *context;
 109        struct ib_sa_query sa_query;
 110};
 111
 112struct ib_sa_guidinfo_query {
 113        void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
 114        void *context;
 115        struct ib_sa_query sa_query;
 116};
 117
 118struct ib_sa_classport_info_query {
 119        void (*callback)(int, struct ib_class_port_info *, void *);
 120        void *context;
 121        struct ib_sa_query sa_query;
 122};
 123
 124struct ib_sa_mcmember_query {
 125        void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
 126        void *context;
 127        struct ib_sa_query sa_query;
 128};
 129
 130static LIST_HEAD(ib_nl_request_list);
 131static DEFINE_SPINLOCK(ib_nl_request_lock);
 132static atomic_t ib_nl_sa_request_seq;
 133static struct workqueue_struct *ib_nl_wq;
 134static struct delayed_work ib_nl_timed_work;
 135static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
 136        [LS_NLA_TYPE_PATH_RECORD]       = {.type = NLA_BINARY,
 137                .len = sizeof(struct ib_path_rec_data)},
 138        [LS_NLA_TYPE_TIMEOUT]           = {.type = NLA_U32},
 139        [LS_NLA_TYPE_SERVICE_ID]        = {.type = NLA_U64},
 140        [LS_NLA_TYPE_DGID]              = {.type = NLA_BINARY,
 141                .len = sizeof(struct rdma_nla_ls_gid)},
 142        [LS_NLA_TYPE_SGID]              = {.type = NLA_BINARY,
 143                .len = sizeof(struct rdma_nla_ls_gid)},
 144        [LS_NLA_TYPE_TCLASS]            = {.type = NLA_U8},
 145        [LS_NLA_TYPE_PKEY]              = {.type = NLA_U16},
 146        [LS_NLA_TYPE_QOS_CLASS]         = {.type = NLA_U16},
 147};
 148
 149
 150static void ib_sa_add_one(struct ib_device *device);
 151static void ib_sa_remove_one(struct ib_device *device, void *client_data);
 152
 153static struct ib_client sa_client = {
 154        .name   = "sa",
 155        .add    = ib_sa_add_one,
 156        .remove = ib_sa_remove_one
 157};
 158
 159static DEFINE_SPINLOCK(idr_lock);
 160static DEFINE_IDR(query_idr);
 161
 162static DEFINE_SPINLOCK(tid_lock);
 163static u32 tid;
 164
 165#define PATH_REC_FIELD(field) \
 166        .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field),          \
 167        .struct_size_bytes   = sizeof ((struct ib_sa_path_rec *) 0)->field,     \
 168        .field_name          = "sa_path_rec:" #field
 169
 170static const struct ib_field path_rec_table[] = {
 171        { PATH_REC_FIELD(service_id),
 172          .offset_words = 0,
 173          .offset_bits  = 0,
 174          .size_bits    = 64 },
 175        { PATH_REC_FIELD(dgid),
 176          .offset_words = 2,
 177          .offset_bits  = 0,
 178          .size_bits    = 128 },
 179        { PATH_REC_FIELD(sgid),
 180          .offset_words = 6,
 181          .offset_bits  = 0,
 182          .size_bits    = 128 },
 183        { PATH_REC_FIELD(dlid),
 184          .offset_words = 10,
 185          .offset_bits  = 0,
 186          .size_bits    = 16 },
 187        { PATH_REC_FIELD(slid),
 188          .offset_words = 10,
 189          .offset_bits  = 16,
 190          .size_bits    = 16 },
 191        { PATH_REC_FIELD(raw_traffic),
 192          .offset_words = 11,
 193          .offset_bits  = 0,
 194          .size_bits    = 1 },
 195        { RESERVED,
 196          .offset_words = 11,
 197          .offset_bits  = 1,
 198          .size_bits    = 3 },
 199        { PATH_REC_FIELD(flow_label),
 200          .offset_words = 11,
 201          .offset_bits  = 4,
 202          .size_bits    = 20 },
 203        { PATH_REC_FIELD(hop_limit),
 204          .offset_words = 11,
 205          .offset_bits  = 24,
 206          .size_bits    = 8 },
 207        { PATH_REC_FIELD(traffic_class),
 208          .offset_words = 12,
 209          .offset_bits  = 0,
 210          .size_bits    = 8 },
 211        { PATH_REC_FIELD(reversible),
 212          .offset_words = 12,
 213          .offset_bits  = 8,
 214          .size_bits    = 1 },
 215        { PATH_REC_FIELD(numb_path),
 216          .offset_words = 12,
 217          .offset_bits  = 9,
 218          .size_bits    = 7 },
 219        { PATH_REC_FIELD(pkey),
 220          .offset_words = 12,
 221          .offset_bits  = 16,
 222          .size_bits    = 16 },
 223        { PATH_REC_FIELD(qos_class),
 224          .offset_words = 13,
 225          .offset_bits  = 0,
 226          .size_bits    = 12 },
 227        { PATH_REC_FIELD(sl),
 228          .offset_words = 13,
 229          .offset_bits  = 12,
 230          .size_bits    = 4 },
 231        { PATH_REC_FIELD(mtu_selector),
 232          .offset_words = 13,
 233          .offset_bits  = 16,
 234          .size_bits    = 2 },
 235        { PATH_REC_FIELD(mtu),
 236          .offset_words = 13,
 237          .offset_bits  = 18,
 238          .size_bits    = 6 },
 239        { PATH_REC_FIELD(rate_selector),
 240          .offset_words = 13,
 241          .offset_bits  = 24,
 242          .size_bits    = 2 },
 243        { PATH_REC_FIELD(rate),
 244          .offset_words = 13,
 245          .offset_bits  = 26,
 246          .size_bits    = 6 },
 247        { PATH_REC_FIELD(packet_life_time_selector),
 248          .offset_words = 14,
 249          .offset_bits  = 0,
 250          .size_bits    = 2 },
 251        { PATH_REC_FIELD(packet_life_time),
 252          .offset_words = 14,
 253          .offset_bits  = 2,
 254          .size_bits    = 6 },
 255        { PATH_REC_FIELD(preference),
 256          .offset_words = 14,
 257          .offset_bits  = 8,
 258          .size_bits    = 8 },
 259        { RESERVED,
 260          .offset_words = 14,
 261          .offset_bits  = 16,
 262          .size_bits    = 48 },
 263};
 264
 265#define MCMEMBER_REC_FIELD(field) \
 266        .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),      \
 267        .struct_size_bytes   = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
 268        .field_name          = "sa_mcmember_rec:" #field
 269
 270static const struct ib_field mcmember_rec_table[] = {
 271        { MCMEMBER_REC_FIELD(mgid),
 272          .offset_words = 0,
 273          .offset_bits  = 0,
 274          .size_bits    = 128 },
 275        { MCMEMBER_REC_FIELD(port_gid),
 276          .offset_words = 4,
 277          .offset_bits  = 0,
 278          .size_bits    = 128 },
 279        { MCMEMBER_REC_FIELD(qkey),
 280          .offset_words = 8,
 281          .offset_bits  = 0,
 282          .size_bits    = 32 },
 283        { MCMEMBER_REC_FIELD(mlid),
 284          .offset_words = 9,
 285          .offset_bits  = 0,
 286          .size_bits    = 16 },
 287        { MCMEMBER_REC_FIELD(mtu_selector),
 288          .offset_words = 9,
 289          .offset_bits  = 16,
 290          .size_bits    = 2 },
 291        { MCMEMBER_REC_FIELD(mtu),
 292          .offset_words = 9,
 293          .offset_bits  = 18,
 294          .size_bits    = 6 },
 295        { MCMEMBER_REC_FIELD(traffic_class),
 296          .offset_words = 9,
 297          .offset_bits  = 24,
 298          .size_bits    = 8 },
 299        { MCMEMBER_REC_FIELD(pkey),
 300          .offset_words = 10,
 301          .offset_bits  = 0,
 302          .size_bits    = 16 },
 303        { MCMEMBER_REC_FIELD(rate_selector),
 304          .offset_words = 10,
 305          .offset_bits  = 16,
 306          .size_bits    = 2 },
 307        { MCMEMBER_REC_FIELD(rate),
 308          .offset_words = 10,
 309          .offset_bits  = 18,
 310          .size_bits    = 6 },
 311        { MCMEMBER_REC_FIELD(packet_life_time_selector),
 312          .offset_words = 10,
 313          .offset_bits  = 24,
 314          .size_bits    = 2 },
 315        { MCMEMBER_REC_FIELD(packet_life_time),
 316          .offset_words = 10,
 317          .offset_bits  = 26,
 318          .size_bits    = 6 },
 319        { MCMEMBER_REC_FIELD(sl),
 320          .offset_words = 11,
 321          .offset_bits  = 0,
 322          .size_bits    = 4 },
 323        { MCMEMBER_REC_FIELD(flow_label),
 324          .offset_words = 11,
 325          .offset_bits  = 4,
 326          .size_bits    = 20 },
 327        { MCMEMBER_REC_FIELD(hop_limit),
 328          .offset_words = 11,
 329          .offset_bits  = 24,
 330          .size_bits    = 8 },
 331        { MCMEMBER_REC_FIELD(scope),
 332          .offset_words = 12,
 333          .offset_bits  = 0,
 334          .size_bits    = 4 },
 335        { MCMEMBER_REC_FIELD(join_state),
 336          .offset_words = 12,
 337          .offset_bits  = 4,
 338          .size_bits    = 4 },
 339        { MCMEMBER_REC_FIELD(proxy_join),
 340          .offset_words = 12,
 341          .offset_bits  = 8,
 342          .size_bits    = 1 },
 343        { RESERVED,
 344          .offset_words = 12,
 345          .offset_bits  = 9,
 346          .size_bits    = 23 },
 347};
 348
 349#define SERVICE_REC_FIELD(field) \
 350        .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field),       \
 351        .struct_size_bytes   = sizeof ((struct ib_sa_service_rec *) 0)->field,  \
 352        .field_name          = "sa_service_rec:" #field
 353
 354static const struct ib_field service_rec_table[] = {
 355        { SERVICE_REC_FIELD(id),
 356          .offset_words = 0,
 357          .offset_bits  = 0,
 358          .size_bits    = 64 },
 359        { SERVICE_REC_FIELD(gid),
 360          .offset_words = 2,
 361          .offset_bits  = 0,
 362          .size_bits    = 128 },
 363        { SERVICE_REC_FIELD(pkey),
 364          .offset_words = 6,
 365          .offset_bits  = 0,
 366          .size_bits    = 16 },
 367        { SERVICE_REC_FIELD(lease),
 368          .offset_words = 7,
 369          .offset_bits  = 0,
 370          .size_bits    = 32 },
 371        { SERVICE_REC_FIELD(key),
 372          .offset_words = 8,
 373          .offset_bits  = 0,
 374          .size_bits    = 128 },
 375        { SERVICE_REC_FIELD(name),
 376          .offset_words = 12,
 377          .offset_bits  = 0,
 378          .size_bits    = 64*8 },
 379        { SERVICE_REC_FIELD(data8),
 380          .offset_words = 28,
 381          .offset_bits  = 0,
 382          .size_bits    = 16*8 },
 383        { SERVICE_REC_FIELD(data16),
 384          .offset_words = 32,
 385          .offset_bits  = 0,
 386          .size_bits    = 8*16 },
 387        { SERVICE_REC_FIELD(data32),
 388          .offset_words = 36,
 389          .offset_bits  = 0,
 390          .size_bits    = 4*32 },
 391        { SERVICE_REC_FIELD(data64),
 392          .offset_words = 40,
 393          .offset_bits  = 0,
 394          .size_bits    = 2*64 },
 395};
 396
 397#define CLASSPORTINFO_REC_FIELD(field) \
 398        .struct_offset_bytes = offsetof(struct ib_class_port_info, field),      \
 399        .struct_size_bytes   = sizeof((struct ib_class_port_info *)0)->field,   \
 400        .field_name          = "ib_class_port_info:" #field
 401
 402static const struct ib_field classport_info_rec_table[] = {
 403        { CLASSPORTINFO_REC_FIELD(base_version),
 404          .offset_words = 0,
 405          .offset_bits  = 0,
 406          .size_bits    = 8 },
 407        { CLASSPORTINFO_REC_FIELD(class_version),
 408          .offset_words = 0,
 409          .offset_bits  = 8,
 410          .size_bits    = 8 },
 411        { CLASSPORTINFO_REC_FIELD(capability_mask),
 412          .offset_words = 0,
 413          .offset_bits  = 16,
 414          .size_bits    = 16 },
 415        { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
 416          .offset_words = 1,
 417          .offset_bits  = 0,
 418          .size_bits    = 32 },
 419        { CLASSPORTINFO_REC_FIELD(redirect_gid),
 420          .offset_words = 2,
 421          .offset_bits  = 0,
 422          .size_bits    = 128 },
 423        { CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
 424          .offset_words = 6,
 425          .offset_bits  = 0,
 426          .size_bits    = 32 },
 427        { CLASSPORTINFO_REC_FIELD(redirect_lid),
 428          .offset_words = 7,
 429          .offset_bits  = 0,
 430          .size_bits    = 16 },
 431        { CLASSPORTINFO_REC_FIELD(redirect_pkey),
 432          .offset_words = 7,
 433          .offset_bits  = 16,
 434          .size_bits    = 16 },
 435
 436        { CLASSPORTINFO_REC_FIELD(redirect_qp),
 437          .offset_words = 8,
 438          .offset_bits  = 0,
 439          .size_bits    = 32 },
 440        { CLASSPORTINFO_REC_FIELD(redirect_qkey),
 441          .offset_words = 9,
 442          .offset_bits  = 0,
 443          .size_bits    = 32 },
 444
 445        { CLASSPORTINFO_REC_FIELD(trap_gid),
 446          .offset_words = 10,
 447          .offset_bits  = 0,
 448          .size_bits    = 128 },
 449        { CLASSPORTINFO_REC_FIELD(trap_tcslfl),
 450          .offset_words = 14,
 451          .offset_bits  = 0,
 452          .size_bits    = 32 },
 453
 454        { CLASSPORTINFO_REC_FIELD(trap_lid),
 455          .offset_words = 15,
 456          .offset_bits  = 0,
 457          .size_bits    = 16 },
 458        { CLASSPORTINFO_REC_FIELD(trap_pkey),
 459          .offset_words = 15,
 460          .offset_bits  = 16,
 461          .size_bits    = 16 },
 462
 463        { CLASSPORTINFO_REC_FIELD(trap_hlqp),
 464          .offset_words = 16,
 465          .offset_bits  = 0,
 466          .size_bits    = 32 },
 467        { CLASSPORTINFO_REC_FIELD(trap_qkey),
 468          .offset_words = 17,
 469          .offset_bits  = 0,
 470          .size_bits    = 32 },
 471};
 472
 473#define GUIDINFO_REC_FIELD(field) \
 474        .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),      \
 475        .struct_size_bytes   = sizeof((struct ib_sa_guidinfo_rec *) 0)->field,  \
 476        .field_name          = "sa_guidinfo_rec:" #field
 477
 478static const struct ib_field guidinfo_rec_table[] = {
 479        { GUIDINFO_REC_FIELD(lid),
 480          .offset_words = 0,
 481          .offset_bits  = 0,
 482          .size_bits    = 16 },
 483        { GUIDINFO_REC_FIELD(block_num),
 484          .offset_words = 0,
 485          .offset_bits  = 16,
 486          .size_bits    = 8 },
 487        { GUIDINFO_REC_FIELD(res1),
 488          .offset_words = 0,
 489          .offset_bits  = 24,
 490          .size_bits    = 8 },
 491        { GUIDINFO_REC_FIELD(res2),
 492          .offset_words = 1,
 493          .offset_bits  = 0,
 494          .size_bits    = 32 },
 495        { GUIDINFO_REC_FIELD(guid_info_list),
 496          .offset_words = 2,
 497          .offset_bits  = 0,
 498          .size_bits    = 512 },
 499};
 500
 501static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
 502{
 503        query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
 504}
 505
 506static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
 507{
 508        return (query->flags & IB_SA_CANCEL);
 509}
 510
 511static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
 512                                     struct ib_sa_query *query)
 513{
 514        struct ib_sa_path_rec *sa_rec = query->mad_buf->context[1];
 515        struct ib_sa_mad *mad = query->mad_buf->mad;
 516        ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
 517        u16 val16;
 518        u64 val64;
 519        struct rdma_ls_resolve_header *header;
 520
 521        query->mad_buf->context[1] = NULL;
 522
 523        /* Construct the family header first */
 524        header = (struct rdma_ls_resolve_header *)
 525                skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
 526        memcpy(header->device_name, query->port->agent->device->name,
 527               LS_DEVICE_NAME_MAX);
 528        header->port_num = query->port->port_num;
 529
 530        if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
 531            sa_rec->reversible != 0)
 532                query->path_use = LS_RESOLVE_PATH_USE_GMP;
 533        else
 534                query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
 535        header->path_use = query->path_use;
 536
 537        /* Now build the attributes */
 538        if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
 539                val64 = be64_to_cpu(sa_rec->service_id);
 540                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
 541                        sizeof(val64), &val64);
 542        }
 543        if (comp_mask & IB_SA_PATH_REC_DGID)
 544                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
 545                        sizeof(sa_rec->dgid), &sa_rec->dgid);
 546        if (comp_mask & IB_SA_PATH_REC_SGID)
 547                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
 548                        sizeof(sa_rec->sgid), &sa_rec->sgid);
 549        if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
 550                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
 551                        sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
 552
 553        if (comp_mask & IB_SA_PATH_REC_PKEY) {
 554                val16 = be16_to_cpu(sa_rec->pkey);
 555                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
 556                        sizeof(val16), &val16);
 557        }
 558        if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
 559                val16 = be16_to_cpu(sa_rec->qos_class);
 560                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
 561                        sizeof(val16), &val16);
 562        }
 563}
 564
 565static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
 566{
 567        int len = 0;
 568
 569        if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
 570                len += nla_total_size(sizeof(u64));
 571        if (comp_mask & IB_SA_PATH_REC_DGID)
 572                len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
 573        if (comp_mask & IB_SA_PATH_REC_SGID)
 574                len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
 575        if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
 576                len += nla_total_size(sizeof(u8));
 577        if (comp_mask & IB_SA_PATH_REC_PKEY)
 578                len += nla_total_size(sizeof(u16));
 579        if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
 580                len += nla_total_size(sizeof(u16));
 581
 582        /*
 583         * Make sure that at least some of the required comp_mask bits are
 584         * set.
 585         */
 586        if (WARN_ON(len == 0))
 587                return len;
 588
 589        /* Add the family header */
 590        len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
 591
 592        return len;
 593}
 594
 595static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
 596{
 597        struct sk_buff *skb = NULL;
 598        struct nlmsghdr *nlh;
 599        void *data;
 600        int ret = 0;
 601        struct ib_sa_mad *mad;
 602        int len;
 603
 604        mad = query->mad_buf->mad;
 605        len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
 606        if (len <= 0)
 607                return -EMSGSIZE;
 608
 609        skb = nlmsg_new(len, gfp_mask);
 610        if (!skb)
 611                return -ENOMEM;
 612
 613        /* Put nlmsg header only for now */
 614        data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
 615                            RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
 616        if (!data) {
 617                nlmsg_free(skb);
 618                return -EMSGSIZE;
 619        }
 620
 621        /* Add attributes */
 622        ib_nl_set_path_rec_attrs(skb, query);
 623
 624        /* Repair the nlmsg header length */
 625        nlmsg_end(skb, nlh);
 626
 627        ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask);
 628        if (!ret)
 629                ret = len;
 630        else
 631                ret = 0;
 632
 633        return ret;
 634}
 635
 636static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
 637{
 638        unsigned long flags;
 639        unsigned long delay;
 640        int ret;
 641
 642        INIT_LIST_HEAD(&query->list);
 643        query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
 644
 645        /* Put the request on the list first.*/
 646        spin_lock_irqsave(&ib_nl_request_lock, flags);
 647        delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
 648        query->timeout = delay + jiffies;
 649        list_add_tail(&query->list, &ib_nl_request_list);
 650        /* Start the timeout if this is the only request */
 651        if (ib_nl_request_list.next == &query->list)
 652                queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
 653        spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 654
 655        ret = ib_nl_send_msg(query, gfp_mask);
 656        if (ret <= 0) {
 657                ret = -EIO;
 658                /* Remove the request */
 659                spin_lock_irqsave(&ib_nl_request_lock, flags);
 660                list_del(&query->list);
 661                spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 662        } else {
 663                ret = 0;
 664        }
 665
 666        return ret;
 667}
 668
 669static int ib_nl_cancel_request(struct ib_sa_query *query)
 670{
 671        unsigned long flags;
 672        struct ib_sa_query *wait_query;
 673        int found = 0;
 674
 675        spin_lock_irqsave(&ib_nl_request_lock, flags);
 676        list_for_each_entry(wait_query, &ib_nl_request_list, list) {
 677                /* Let the timeout to take care of the callback */
 678                if (query == wait_query) {
 679                        query->flags |= IB_SA_CANCEL;
 680                        query->timeout = jiffies;
 681                        list_move(&query->list, &ib_nl_request_list);
 682                        found = 1;
 683                        mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
 684                        break;
 685                }
 686        }
 687        spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 688
 689        return found;
 690}
 691
 692static void send_handler(struct ib_mad_agent *agent,
 693                         struct ib_mad_send_wc *mad_send_wc);
 694
 695static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
 696                                           const struct nlmsghdr *nlh)
 697{
 698        struct ib_mad_send_wc mad_send_wc;
 699        struct ib_sa_mad *mad = NULL;
 700        const struct nlattr *head, *curr;
 701        struct ib_path_rec_data  *rec;
 702        int len, rem;
 703        u32 mask = 0;
 704        int status = -EIO;
 705
 706        if (query->callback) {
 707                head = (const struct nlattr *) nlmsg_data(nlh);
 708                len = nlmsg_len(nlh);
 709                switch (query->path_use) {
 710                case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
 711                        mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
 712                        break;
 713
 714                case LS_RESOLVE_PATH_USE_ALL:
 715                case LS_RESOLVE_PATH_USE_GMP:
 716                default:
 717                        mask = IB_PATH_PRIMARY | IB_PATH_GMP |
 718                                IB_PATH_BIDIRECTIONAL;
 719                        break;
 720                }
 721                nla_for_each_attr(curr, head, len, rem) {
 722                        if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
 723                                rec = nla_data(curr);
 724                                /*
 725                                 * Get the first one. In the future, we may
 726                                 * need to get up to 6 pathrecords.
 727                                 */
 728                                if ((rec->flags & mask) == mask) {
 729                                        mad = query->mad_buf->mad;
 730                                        mad->mad_hdr.method |=
 731                                                IB_MGMT_METHOD_RESP;
 732                                        memcpy(mad->data, rec->path_rec,
 733                                               sizeof(rec->path_rec));
 734                                        status = 0;
 735                                        break;
 736                                }
 737                        }
 738                }
 739                query->callback(query, status, mad);
 740        }
 741
 742        mad_send_wc.send_buf = query->mad_buf;
 743        mad_send_wc.status = IB_WC_SUCCESS;
 744        send_handler(query->mad_buf->mad_agent, &mad_send_wc);
 745}
 746
 747static void ib_nl_request_timeout(struct work_struct *work)
 748{
 749        unsigned long flags;
 750        struct ib_sa_query *query;
 751        unsigned long delay;
 752        struct ib_mad_send_wc mad_send_wc;
 753        int ret;
 754
 755        spin_lock_irqsave(&ib_nl_request_lock, flags);
 756        while (!list_empty(&ib_nl_request_list)) {
 757                query = list_entry(ib_nl_request_list.next,
 758                                   struct ib_sa_query, list);
 759
 760                if (time_after(query->timeout, jiffies)) {
 761                        delay = query->timeout - jiffies;
 762                        if ((long)delay <= 0)
 763                                delay = 1;
 764                        queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
 765                        break;
 766                }
 767
 768                list_del(&query->list);
 769                ib_sa_disable_local_svc(query);
 770                /* Hold the lock to protect against query cancellation */
 771                if (ib_sa_query_cancelled(query))
 772                        ret = -1;
 773                else
 774                        ret = ib_post_send_mad(query->mad_buf, NULL);
 775                if (ret) {
 776                        mad_send_wc.send_buf = query->mad_buf;
 777                        mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
 778                        spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 779                        send_handler(query->port->agent, &mad_send_wc);
 780                        spin_lock_irqsave(&ib_nl_request_lock, flags);
 781                }
 782        }
 783        spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 784}
 785
 786int ib_nl_handle_set_timeout(struct sk_buff *skb,
 787                             struct netlink_callback *cb)
 788{
 789        const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
 790        int timeout, delta, abs_delta;
 791        const struct nlattr *attr;
 792        unsigned long flags;
 793        struct ib_sa_query *query;
 794        long delay = 0;
 795        struct nlattr *tb[LS_NLA_TYPE_MAX];
 796        int ret;
 797
 798        if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
 799            !(NETLINK_CB(skb).sk) ||
 800            !netlink_capable(skb, CAP_NET_ADMIN))
 801                return -EPERM;
 802
 803        ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
 804                        nlmsg_len(nlh), ib_nl_policy);
 805        attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
 806        if (ret || !attr)
 807                goto settimeout_out;
 808
 809        timeout = *(int *) nla_data(attr);
 810        if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
 811                timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
 812        if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
 813                timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
 814
 815        delta = timeout - sa_local_svc_timeout_ms;
 816        if (delta < 0)
 817                abs_delta = -delta;
 818        else
 819                abs_delta = delta;
 820
 821        if (delta != 0) {
 822                spin_lock_irqsave(&ib_nl_request_lock, flags);
 823                sa_local_svc_timeout_ms = timeout;
 824                list_for_each_entry(query, &ib_nl_request_list, list) {
 825                        if (delta < 0 && abs_delta > query->timeout)
 826                                query->timeout = 0;
 827                        else
 828                                query->timeout += delta;
 829
 830                        /* Get the new delay from the first entry */
 831                        if (!delay) {
 832                                delay = query->timeout - jiffies;
 833                                if (delay <= 0)
 834                                        delay = 1;
 835                        }
 836                }
 837                if (delay)
 838                        mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
 839                                         (unsigned long)delay);
 840                spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 841        }
 842
 843settimeout_out:
 844        return skb->len;
 845}
 846
 847static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
 848{
 849        struct nlattr *tb[LS_NLA_TYPE_MAX];
 850        int ret;
 851
 852        if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
 853                return 0;
 854
 855        ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
 856                        nlmsg_len(nlh), ib_nl_policy);
 857        if (ret)
 858                return 0;
 859
 860        return 1;
 861}
 862
 863int ib_nl_handle_resolve_resp(struct sk_buff *skb,
 864                              struct netlink_callback *cb)
 865{
 866        const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
 867        unsigned long flags;
 868        struct ib_sa_query *query;
 869        struct ib_mad_send_buf *send_buf;
 870        struct ib_mad_send_wc mad_send_wc;
 871        int found = 0;
 872        int ret;
 873
 874        if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
 875            !(NETLINK_CB(skb).sk) ||
 876            !netlink_capable(skb, CAP_NET_ADMIN))
 877                return -EPERM;
 878
 879        spin_lock_irqsave(&ib_nl_request_lock, flags);
 880        list_for_each_entry(query, &ib_nl_request_list, list) {
 881                /*
 882                 * If the query is cancelled, let the timeout routine
 883                 * take care of it.
 884                 */
 885                if (nlh->nlmsg_seq == query->seq) {
 886                        found = !ib_sa_query_cancelled(query);
 887                        if (found)
 888                                list_del(&query->list);
 889                        break;
 890                }
 891        }
 892
 893        if (!found) {
 894                spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 895                goto resp_out;
 896        }
 897
 898        send_buf = query->mad_buf;
 899
 900        if (!ib_nl_is_good_resolve_resp(nlh)) {
 901                /* if the result is a failure, send out the packet via IB */
 902                ib_sa_disable_local_svc(query);
 903                ret = ib_post_send_mad(query->mad_buf, NULL);
 904                spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 905                if (ret) {
 906                        mad_send_wc.send_buf = send_buf;
 907                        mad_send_wc.status = IB_WC_GENERAL_ERR;
 908                        send_handler(query->port->agent, &mad_send_wc);
 909                }
 910        } else {
 911                spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 912                ib_nl_process_good_resolve_rsp(query, nlh);
 913        }
 914
 915resp_out:
 916        return skb->len;
 917}
 918
 919static void free_sm_ah(struct kref *kref)
 920{
 921        struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
 922
 923        ib_destroy_ah(sm_ah->ah);
 924        kfree(sm_ah);
 925}
 926
 927static void update_sm_ah(struct work_struct *work)
 928{
 929        struct ib_sa_port *port =
 930                container_of(work, struct ib_sa_port, update_task);
 931        struct ib_sa_sm_ah *new_ah;
 932        struct ib_port_attr port_attr;
 933        struct ib_ah_attr   ah_attr;
 934
 935        if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
 936                pr_warn("Couldn't query port\n");
 937                return;
 938        }
 939
 940        new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
 941        if (!new_ah) {
 942                return;
 943        }
 944
 945        kref_init(&new_ah->ref);
 946        new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
 947
 948        new_ah->pkey_index = 0;
 949        if (ib_find_pkey(port->agent->device, port->port_num,
 950                         IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
 951                pr_err("Couldn't find index for default PKey\n");
 952
 953        memset(&ah_attr, 0, sizeof ah_attr);
 954        ah_attr.dlid     = port_attr.sm_lid;
 955        ah_attr.sl       = port_attr.sm_sl;
 956        ah_attr.port_num = port->port_num;
 957        if (port_attr.grh_required) {
 958                ah_attr.ah_flags = IB_AH_GRH;
 959                ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
 960                ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
 961        }
 962
 963        new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
 964        if (IS_ERR(new_ah->ah)) {
 965                pr_warn("Couldn't create new SM AH\n");
 966                kfree(new_ah);
 967                return;
 968        }
 969
 970        spin_lock_irq(&port->ah_lock);
 971        if (port->sm_ah)
 972                kref_put(&port->sm_ah->ref, free_sm_ah);
 973        port->sm_ah = new_ah;
 974        spin_unlock_irq(&port->ah_lock);
 975
 976}
 977
 978static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
 979{
 980        if (event->event == IB_EVENT_PORT_ERR    ||
 981            event->event == IB_EVENT_PORT_ACTIVE ||
 982            event->event == IB_EVENT_LID_CHANGE  ||
 983            event->event == IB_EVENT_PKEY_CHANGE ||
 984            event->event == IB_EVENT_SM_CHANGE   ||
 985            event->event == IB_EVENT_CLIENT_REREGISTER) {
 986                unsigned long flags;
 987                struct ib_sa_device *sa_dev =
 988                        container_of(handler, typeof(*sa_dev), event_handler);
 989                struct ib_sa_port *port =
 990                        &sa_dev->port[event->element.port_num - sa_dev->start_port];
 991
 992                if (!rdma_cap_ib_sa(handler->device, port->port_num))
 993                        return;
 994
 995                spin_lock_irqsave(&port->ah_lock, flags);
 996                if (port->sm_ah)
 997                        kref_put(&port->sm_ah->ref, free_sm_ah);
 998                port->sm_ah = NULL;
 999                spin_unlock_irqrestore(&port->ah_lock, flags);
1000
1001                queue_work(ib_wq, &sa_dev->port[event->element.port_num -
1002                                            sa_dev->start_port].update_task);
1003        }
1004}
1005
1006void ib_sa_register_client(struct ib_sa_client *client)
1007{
1008        atomic_set(&client->users, 1);
1009        init_completion(&client->comp);
1010}
1011EXPORT_SYMBOL(ib_sa_register_client);
1012
1013void ib_sa_unregister_client(struct ib_sa_client *client)
1014{
1015        ib_sa_client_put(client);
1016        wait_for_completion(&client->comp);
1017}
1018EXPORT_SYMBOL(ib_sa_unregister_client);
1019
1020/**
1021 * ib_sa_cancel_query - try to cancel an SA query
1022 * @id:ID of query to cancel
1023 * @query:query pointer to cancel
1024 *
1025 * Try to cancel an SA query.  If the id and query don't match up or
1026 * the query has already completed, nothing is done.  Otherwise the
1027 * query is canceled and will complete with a status of -EINTR.
1028 */
1029void ib_sa_cancel_query(int id, struct ib_sa_query *query)
1030{
1031        unsigned long flags;
1032        struct ib_mad_agent *agent;
1033        struct ib_mad_send_buf *mad_buf;
1034
1035        spin_lock_irqsave(&idr_lock, flags);
1036        if (idr_find(&query_idr, id) != query) {
1037                spin_unlock_irqrestore(&idr_lock, flags);
1038                return;
1039        }
1040        agent = query->port->agent;
1041        mad_buf = query->mad_buf;
1042        spin_unlock_irqrestore(&idr_lock, flags);
1043
1044        /*
1045         * If the query is still on the netlink request list, schedule
1046         * it to be cancelled by the timeout routine. Otherwise, it has been
1047         * sent to the MAD layer and has to be cancelled from there.
1048         */
1049        if (!ib_nl_cancel_request(query))
1050                ib_cancel_mad(agent, mad_buf);
1051}
1052EXPORT_SYMBOL(ib_sa_cancel_query);
1053
1054static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
1055{
1056        struct ib_sa_device *sa_dev;
1057        struct ib_sa_port   *port;
1058        unsigned long flags;
1059        u8 src_path_mask;
1060
1061        sa_dev = ib_get_client_data(device, &sa_client);
1062        if (!sa_dev)
1063                return 0x7f;
1064
1065        port  = &sa_dev->port[port_num - sa_dev->start_port];
1066        spin_lock_irqsave(&port->ah_lock, flags);
1067        src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
1068        spin_unlock_irqrestore(&port->ah_lock, flags);
1069
1070        return src_path_mask;
1071}
1072
1073int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
1074                         struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
1075{
1076        int ret;
1077        u16 gid_index;
1078        int use_roce;
1079        struct net_device *ndev = NULL;
1080
1081        memset(ah_attr, 0, sizeof *ah_attr);
1082        ah_attr->dlid = be16_to_cpu(rec->dlid);
1083        ah_attr->sl = rec->sl;
1084        ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
1085                                 get_src_path_mask(device, port_num);
1086        ah_attr->port_num = port_num;
1087        ah_attr->static_rate = rec->rate;
1088
1089        use_roce = rdma_cap_eth_ah(device, port_num);
1090
1091        if (use_roce) {
1092                struct net_device *idev;
1093                struct net_device *resolved_dev;
1094                struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
1095                                                 .net = rec->net ? rec->net :
1096                                                         &init_net};
1097                union {
1098                        struct sockaddr     _sockaddr;
1099                        struct sockaddr_in  _sockaddr_in;
1100                        struct sockaddr_in6 _sockaddr_in6;
1101                } sgid_addr, dgid_addr;
1102
1103                if (!device->get_netdev)
1104                        return -EOPNOTSUPP;
1105
1106                rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
1107                rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
1108
1109                /* validate the route */
1110                ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
1111                                            &dgid_addr._sockaddr, &dev_addr);
1112                if (ret)
1113                        return ret;
1114
1115                if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
1116                     dev_addr.network == RDMA_NETWORK_IPV6) &&
1117                    rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
1118                        return -EINVAL;
1119
1120                idev = device->get_netdev(device, port_num);
1121                if (!idev)
1122                        return -ENODEV;
1123
1124                resolved_dev = dev_get_by_index(dev_addr.net,
1125                                                dev_addr.bound_dev_if);
1126                if (resolved_dev->flags & IFF_LOOPBACK) {
1127                        dev_put(resolved_dev);
1128                        resolved_dev = idev;
1129                        dev_hold(resolved_dev);
1130                }
1131                ndev = ib_get_ndev_from_path(rec);
1132                rcu_read_lock();
1133                if ((ndev && ndev != resolved_dev) ||
1134                    (resolved_dev != idev &&
1135                     !rdma_is_upper_dev_rcu(idev, resolved_dev)))
1136                        ret = -EHOSTUNREACH;
1137                rcu_read_unlock();
1138                dev_put(idev);
1139                dev_put(resolved_dev);
1140                if (ret) {
1141                        if (ndev)
1142                                dev_put(ndev);
1143                        return ret;
1144                }
1145        }
1146
1147        if (rec->hop_limit > 0 || use_roce) {
1148                ah_attr->ah_flags = IB_AH_GRH;
1149                ah_attr->grh.dgid = rec->dgid;
1150
1151                ret = ib_find_cached_gid_by_port(device, &rec->sgid,
1152                                                 rec->gid_type, port_num, ndev,
1153                                                 &gid_index);
1154                if (ret) {
1155                        if (ndev)
1156                                dev_put(ndev);
1157                        return ret;
1158                }
1159
1160                ah_attr->grh.sgid_index    = gid_index;
1161                ah_attr->grh.flow_label    = be32_to_cpu(rec->flow_label);
1162                ah_attr->grh.hop_limit     = rec->hop_limit;
1163                ah_attr->grh.traffic_class = rec->traffic_class;
1164                if (ndev)
1165                        dev_put(ndev);
1166        }
1167
1168        if (use_roce)
1169                memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
1170
1171        return 0;
1172}
1173EXPORT_SYMBOL(ib_init_ah_from_path);
1174
1175static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1176{
1177        unsigned long flags;
1178
1179        spin_lock_irqsave(&query->port->ah_lock, flags);
1180        if (!query->port->sm_ah) {
1181                spin_unlock_irqrestore(&query->port->ah_lock, flags);
1182                return -EAGAIN;
1183        }
1184        kref_get(&query->port->sm_ah->ref);
1185        query->sm_ah = query->port->sm_ah;
1186        spin_unlock_irqrestore(&query->port->ah_lock, flags);
1187
1188        query->mad_buf = ib_create_send_mad(query->port->agent, 1,
1189                                            query->sm_ah->pkey_index,
1190                                            0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
1191                                            gfp_mask,
1192                                            IB_MGMT_BASE_VERSION);
1193        if (IS_ERR(query->mad_buf)) {
1194                kref_put(&query->sm_ah->ref, free_sm_ah);
1195                return -ENOMEM;
1196        }
1197
1198        query->mad_buf->ah = query->sm_ah->ah;
1199
1200        return 0;
1201}
1202
1203static void free_mad(struct ib_sa_query *query)
1204{
1205        ib_free_send_mad(query->mad_buf);
1206        kref_put(&query->sm_ah->ref, free_sm_ah);
1207}
1208
1209static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
1210{
1211        unsigned long flags;
1212
1213        memset(mad, 0, sizeof *mad);
1214
1215        mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
1216        mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
1217        mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
1218
1219        spin_lock_irqsave(&tid_lock, flags);
1220        mad->mad_hdr.tid           =
1221                cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
1222        spin_unlock_irqrestore(&tid_lock, flags);
1223}
1224
1225static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
1226{
1227        bool preload = gfpflags_allow_blocking(gfp_mask);
1228        unsigned long flags;
1229        int ret, id;
1230
1231        if (preload)
1232                idr_preload(gfp_mask);
1233        spin_lock_irqsave(&idr_lock, flags);
1234
1235        id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT);
1236
1237        spin_unlock_irqrestore(&idr_lock, flags);
1238        if (preload)
1239                idr_preload_end();
1240        if (id < 0)
1241                return id;
1242
1243        query->mad_buf->timeout_ms  = timeout_ms;
1244        query->mad_buf->context[0] = query;
1245        query->id = id;
1246
1247        if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
1248                if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
1249                        if (!ib_nl_make_request(query, gfp_mask))
1250                                return id;
1251                }
1252                ib_sa_disable_local_svc(query);
1253        }
1254
1255        ret = ib_post_send_mad(query->mad_buf, NULL);
1256        if (ret) {
1257                spin_lock_irqsave(&idr_lock, flags);
1258                idr_remove(&query_idr, id);
1259                spin_unlock_irqrestore(&idr_lock, flags);
1260        }
1261
1262        /*
1263         * It's not safe to dereference query any more, because the
1264         * send may already have completed and freed the query in
1265         * another context.
1266         */
1267        return ret ? ret : id;
1268}
1269
1270void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
1271{
1272        ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
1273}
1274EXPORT_SYMBOL(ib_sa_unpack_path);
1275
1276void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
1277{
1278        ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
1279}
1280EXPORT_SYMBOL(ib_sa_pack_path);
1281
1282static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1283                                    int status,
1284                                    struct ib_sa_mad *mad)
1285{
1286        struct ib_sa_path_query *query =
1287                container_of(sa_query, struct ib_sa_path_query, sa_query);
1288
1289        if (mad) {
1290                struct ib_sa_path_rec rec;
1291
1292                ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
1293                          mad->data, &rec);
1294                rec.net = NULL;
1295                rec.ifindex = 0;
1296                rec.gid_type = IB_GID_TYPE_IB;
1297                eth_zero_addr(rec.dmac);
1298                query->callback(status, &rec, query->context);
1299        } else
1300                query->callback(status, NULL, query->context);
1301}
1302
1303static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
1304{
1305        kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
1306}
1307
1308/**
1309 * ib_sa_path_rec_get - Start a Path get query
1310 * @client:SA client
1311 * @device:device to send query on
1312 * @port_num: port number to send query on
1313 * @rec:Path Record to send in query
1314 * @comp_mask:component mask to send in query
1315 * @timeout_ms:time to wait for response
1316 * @gfp_mask:GFP mask to use for internal allocations
1317 * @callback:function called when query completes, times out or is
1318 * canceled
1319 * @context:opaque user context passed to callback
1320 * @sa_query:query context, used to cancel query
1321 *
1322 * Send a Path Record Get query to the SA to look up a path.  The
1323 * callback function will be called when the query completes (or
1324 * fails); status is 0 for a successful response, -EINTR if the query
1325 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1326 * occurred sending the query.  The resp parameter of the callback is
1327 * only valid if status is 0.
1328 *
1329 * If the return value of ib_sa_path_rec_get() is negative, it is an
1330 * error code.  Otherwise it is a query ID that can be used to cancel
1331 * the query.
1332 */
1333int ib_sa_path_rec_get(struct ib_sa_client *client,
1334                       struct ib_device *device, u8 port_num,
1335                       struct ib_sa_path_rec *rec,
1336                       ib_sa_comp_mask comp_mask,
1337                       int timeout_ms, gfp_t gfp_mask,
1338                       void (*callback)(int status,
1339                                        struct ib_sa_path_rec *resp,
1340                                        void *context),
1341                       void *context,
1342                       struct ib_sa_query **sa_query)
1343{
1344        struct ib_sa_path_query *query;
1345        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1346        struct ib_sa_port   *port;
1347        struct ib_mad_agent *agent;
1348        struct ib_sa_mad *mad;
1349        int ret;
1350
1351        if (!sa_dev)
1352                return -ENODEV;
1353
1354        port  = &sa_dev->port[port_num - sa_dev->start_port];
1355        agent = port->agent;
1356
1357        query = kzalloc(sizeof(*query), gfp_mask);
1358        if (!query)
1359                return -ENOMEM;
1360
1361        query->sa_query.port     = port;
1362        ret = alloc_mad(&query->sa_query, gfp_mask);
1363        if (ret)
1364                goto err1;
1365
1366        ib_sa_client_get(client);
1367        query->sa_query.client = client;
1368        query->callback        = callback;
1369        query->context         = context;
1370
1371        mad = query->sa_query.mad_buf->mad;
1372        init_mad(mad, agent);
1373
1374        query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
1375        query->sa_query.release  = ib_sa_path_rec_release;
1376        mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
1377        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_PATH_REC);
1378        mad->sa_hdr.comp_mask    = comp_mask;
1379
1380        ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
1381
1382        *sa_query = &query->sa_query;
1383
1384        query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
1385        query->sa_query.mad_buf->context[1] = rec;
1386
1387        ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1388        if (ret < 0)
1389                goto err2;
1390
1391        return ret;
1392
1393err2:
1394        *sa_query = NULL;
1395        ib_sa_client_put(query->sa_query.client);
1396        free_mad(&query->sa_query);
1397
1398err1:
1399        kfree(query);
1400        return ret;
1401}
1402EXPORT_SYMBOL(ib_sa_path_rec_get);
1403
1404static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
1405                                    int status,
1406                                    struct ib_sa_mad *mad)
1407{
1408        struct ib_sa_service_query *query =
1409                container_of(sa_query, struct ib_sa_service_query, sa_query);
1410
1411        if (mad) {
1412                struct ib_sa_service_rec rec;
1413
1414                ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
1415                          mad->data, &rec);
1416                query->callback(status, &rec, query->context);
1417        } else
1418                query->callback(status, NULL, query->context);
1419}
1420
1421static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
1422{
1423        kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
1424}
1425
1426/**
1427 * ib_sa_service_rec_query - Start Service Record operation
1428 * @client:SA client
1429 * @device:device to send request on
1430 * @port_num: port number to send request on
1431 * @method:SA method - should be get, set, or delete
1432 * @rec:Service Record to send in request
1433 * @comp_mask:component mask to send in request
1434 * @timeout_ms:time to wait for response
1435 * @gfp_mask:GFP mask to use for internal allocations
1436 * @callback:function called when request completes, times out or is
1437 * canceled
1438 * @context:opaque user context passed to callback
1439 * @sa_query:request context, used to cancel request
1440 *
1441 * Send a Service Record set/get/delete to the SA to register,
1442 * unregister or query a service record.
1443 * The callback function will be called when the request completes (or
1444 * fails); status is 0 for a successful response, -EINTR if the query
1445 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1446 * occurred sending the query.  The resp parameter of the callback is
1447 * only valid if status is 0.
1448 *
1449 * If the return value of ib_sa_service_rec_query() is negative, it is an
1450 * error code.  Otherwise it is a request ID that can be used to cancel
1451 * the query.
1452 */
1453int ib_sa_service_rec_query(struct ib_sa_client *client,
1454                            struct ib_device *device, u8 port_num, u8 method,
1455                            struct ib_sa_service_rec *rec,
1456                            ib_sa_comp_mask comp_mask,
1457                            int timeout_ms, gfp_t gfp_mask,
1458                            void (*callback)(int status,
1459                                             struct ib_sa_service_rec *resp,
1460                                             void *context),
1461                            void *context,
1462                            struct ib_sa_query **sa_query)
1463{
1464        struct ib_sa_service_query *query;
1465        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1466        struct ib_sa_port   *port;
1467        struct ib_mad_agent *agent;
1468        struct ib_sa_mad *mad;
1469        int ret;
1470
1471        if (!sa_dev)
1472                return -ENODEV;
1473
1474        port  = &sa_dev->port[port_num - sa_dev->start_port];
1475        agent = port->agent;
1476
1477        if (method != IB_MGMT_METHOD_GET &&
1478            method != IB_MGMT_METHOD_SET &&
1479            method != IB_SA_METHOD_DELETE)
1480                return -EINVAL;
1481
1482        query = kzalloc(sizeof(*query), gfp_mask);
1483        if (!query)
1484                return -ENOMEM;
1485
1486        query->sa_query.port     = port;
1487        ret = alloc_mad(&query->sa_query, gfp_mask);
1488        if (ret)
1489                goto err1;
1490
1491        ib_sa_client_get(client);
1492        query->sa_query.client = client;
1493        query->callback        = callback;
1494        query->context         = context;
1495
1496        mad = query->sa_query.mad_buf->mad;
1497        init_mad(mad, agent);
1498
1499        query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
1500        query->sa_query.release  = ib_sa_service_rec_release;
1501        mad->mad_hdr.method      = method;
1502        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
1503        mad->sa_hdr.comp_mask    = comp_mask;
1504
1505        ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
1506                rec, mad->data);
1507
1508        *sa_query = &query->sa_query;
1509
1510        ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1511        if (ret < 0)
1512                goto err2;
1513
1514        return ret;
1515
1516err2:
1517        *sa_query = NULL;
1518        ib_sa_client_put(query->sa_query.client);
1519        free_mad(&query->sa_query);
1520
1521err1:
1522        kfree(query);
1523        return ret;
1524}
1525EXPORT_SYMBOL(ib_sa_service_rec_query);
1526
1527static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
1528                                        int status,
1529                                        struct ib_sa_mad *mad)
1530{
1531        struct ib_sa_mcmember_query *query =
1532                container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
1533
1534        if (mad) {
1535                struct ib_sa_mcmember_rec rec;
1536
1537                ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1538                          mad->data, &rec);
1539                query->callback(status, &rec, query->context);
1540        } else
1541                query->callback(status, NULL, query->context);
1542}
1543
1544static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
1545{
1546        kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
1547}
1548
1549int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
1550                             struct ib_device *device, u8 port_num,
1551                             u8 method,
1552                             struct ib_sa_mcmember_rec *rec,
1553                             ib_sa_comp_mask comp_mask,
1554                             int timeout_ms, gfp_t gfp_mask,
1555                             void (*callback)(int status,
1556                                              struct ib_sa_mcmember_rec *resp,
1557                                              void *context),
1558                             void *context,
1559                             struct ib_sa_query **sa_query)
1560{
1561        struct ib_sa_mcmember_query *query;
1562        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1563        struct ib_sa_port   *port;
1564        struct ib_mad_agent *agent;
1565        struct ib_sa_mad *mad;
1566        int ret;
1567
1568        if (!sa_dev)
1569                return -ENODEV;
1570
1571        port  = &sa_dev->port[port_num - sa_dev->start_port];
1572        agent = port->agent;
1573
1574        query = kzalloc(sizeof(*query), gfp_mask);
1575        if (!query)
1576                return -ENOMEM;
1577
1578        query->sa_query.port     = port;
1579        ret = alloc_mad(&query->sa_query, gfp_mask);
1580        if (ret)
1581                goto err1;
1582
1583        ib_sa_client_get(client);
1584        query->sa_query.client = client;
1585        query->callback        = callback;
1586        query->context         = context;
1587
1588        mad = query->sa_query.mad_buf->mad;
1589        init_mad(mad, agent);
1590
1591        query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
1592        query->sa_query.release  = ib_sa_mcmember_rec_release;
1593        mad->mad_hdr.method      = method;
1594        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
1595        mad->sa_hdr.comp_mask    = comp_mask;
1596
1597        ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1598                rec, mad->data);
1599
1600        *sa_query = &query->sa_query;
1601
1602        ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1603        if (ret < 0)
1604                goto err2;
1605
1606        return ret;
1607
1608err2:
1609        *sa_query = NULL;
1610        ib_sa_client_put(query->sa_query.client);
1611        free_mad(&query->sa_query);
1612
1613err1:
1614        kfree(query);
1615        return ret;
1616}
1617
1618/* Support GuidInfoRecord */
1619static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
1620                                        int status,
1621                                        struct ib_sa_mad *mad)
1622{
1623        struct ib_sa_guidinfo_query *query =
1624                container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
1625
1626        if (mad) {
1627                struct ib_sa_guidinfo_rec rec;
1628
1629                ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
1630                          mad->data, &rec);
1631                query->callback(status, &rec, query->context);
1632        } else
1633                query->callback(status, NULL, query->context);
1634}
1635
1636static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
1637{
1638        kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
1639}
1640
1641int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1642                              struct ib_device *device, u8 port_num,
1643                              struct ib_sa_guidinfo_rec *rec,
1644                              ib_sa_comp_mask comp_mask, u8 method,
1645                              int timeout_ms, gfp_t gfp_mask,
1646                              void (*callback)(int status,
1647                                               struct ib_sa_guidinfo_rec *resp,
1648                                               void *context),
1649                              void *context,
1650                              struct ib_sa_query **sa_query)
1651{
1652        struct ib_sa_guidinfo_query *query;
1653        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1654        struct ib_sa_port *port;
1655        struct ib_mad_agent *agent;
1656        struct ib_sa_mad *mad;
1657        int ret;
1658
1659        if (!sa_dev)
1660                return -ENODEV;
1661
1662        if (method != IB_MGMT_METHOD_GET &&
1663            method != IB_MGMT_METHOD_SET &&
1664            method != IB_SA_METHOD_DELETE) {
1665                return -EINVAL;
1666        }
1667
1668        port  = &sa_dev->port[port_num - sa_dev->start_port];
1669        agent = port->agent;
1670
1671        query = kzalloc(sizeof(*query), gfp_mask);
1672        if (!query)
1673                return -ENOMEM;
1674
1675        query->sa_query.port = port;
1676        ret = alloc_mad(&query->sa_query, gfp_mask);
1677        if (ret)
1678                goto err1;
1679
1680        ib_sa_client_get(client);
1681        query->sa_query.client = client;
1682        query->callback        = callback;
1683        query->context         = context;
1684
1685        mad = query->sa_query.mad_buf->mad;
1686        init_mad(mad, agent);
1687
1688        query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
1689        query->sa_query.release  = ib_sa_guidinfo_rec_release;
1690
1691        mad->mad_hdr.method      = method;
1692        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
1693        mad->sa_hdr.comp_mask    = comp_mask;
1694
1695        ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
1696                mad->data);
1697
1698        *sa_query = &query->sa_query;
1699
1700        ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1701        if (ret < 0)
1702                goto err2;
1703
1704        return ret;
1705
1706err2:
1707        *sa_query = NULL;
1708        ib_sa_client_put(query->sa_query.client);
1709        free_mad(&query->sa_query);
1710
1711err1:
1712        kfree(query);
1713        return ret;
1714}
1715EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1716
1717/* Support get SA ClassPortInfo */
1718static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1719                                              int status,
1720                                              struct ib_sa_mad *mad)
1721{
1722        struct ib_sa_classport_info_query *query =
1723                container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1724
1725        if (mad) {
1726                struct ib_class_port_info rec;
1727
1728                ib_unpack(classport_info_rec_table,
1729                          ARRAY_SIZE(classport_info_rec_table),
1730                          mad->data, &rec);
1731                query->callback(status, &rec, query->context);
1732        } else {
1733                query->callback(status, NULL, query->context);
1734        }
1735}
1736
1737static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
1738{
1739        kfree(container_of(sa_query, struct ib_sa_classport_info_query,
1740                           sa_query));
1741}
1742
1743int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
1744                                   struct ib_device *device, u8 port_num,
1745                                   int timeout_ms, gfp_t gfp_mask,
1746                                   void (*callback)(int status,
1747                                                    struct ib_class_port_info *resp,
1748                                                    void *context),
1749                                   void *context,
1750                                   struct ib_sa_query **sa_query)
1751{
1752        struct ib_sa_classport_info_query *query;
1753        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1754        struct ib_sa_port *port;
1755        struct ib_mad_agent *agent;
1756        struct ib_sa_mad *mad;
1757        int ret;
1758
1759        if (!sa_dev)
1760                return -ENODEV;
1761
1762        port  = &sa_dev->port[port_num - sa_dev->start_port];
1763        agent = port->agent;
1764
1765        query = kzalloc(sizeof(*query), gfp_mask);
1766        if (!query)
1767                return -ENOMEM;
1768
1769        query->sa_query.port = port;
1770        ret = alloc_mad(&query->sa_query, gfp_mask);
1771        if (ret)
1772                goto err1;
1773
1774        ib_sa_client_get(client);
1775        query->sa_query.client = client;
1776        query->callback        = callback;
1777        query->context         = context;
1778
1779        mad = query->sa_query.mad_buf->mad;
1780        init_mad(mad, agent);
1781
1782        query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
1783
1784        query->sa_query.release  = ib_sa_portclass_info_rec_release;
1785        /* support GET only */
1786        mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
1787        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
1788        mad->sa_hdr.comp_mask    = 0;
1789        *sa_query = &query->sa_query;
1790
1791        ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1792        if (ret < 0)
1793                goto err2;
1794
1795        return ret;
1796
1797err2:
1798        *sa_query = NULL;
1799        ib_sa_client_put(query->sa_query.client);
1800        free_mad(&query->sa_query);
1801
1802err1:
1803        kfree(query);
1804        return ret;
1805}
1806EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
1807
1808static void send_handler(struct ib_mad_agent *agent,
1809                         struct ib_mad_send_wc *mad_send_wc)
1810{
1811        struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
1812        unsigned long flags;
1813
1814        if (query->callback)
1815                switch (mad_send_wc->status) {
1816                case IB_WC_SUCCESS:
1817                        /* No callback -- already got recv */
1818                        break;
1819                case IB_WC_RESP_TIMEOUT_ERR:
1820                        query->callback(query, -ETIMEDOUT, NULL);
1821                        break;
1822                case IB_WC_WR_FLUSH_ERR:
1823                        query->callback(query, -EINTR, NULL);
1824                        break;
1825                default:
1826                        query->callback(query, -EIO, NULL);
1827                        break;
1828                }
1829
1830        spin_lock_irqsave(&idr_lock, flags);
1831        idr_remove(&query_idr, query->id);
1832        spin_unlock_irqrestore(&idr_lock, flags);
1833
1834        free_mad(query);
1835        ib_sa_client_put(query->client);
1836        query->release(query);
1837}
1838
1839static void recv_handler(struct ib_mad_agent *mad_agent,
1840                         struct ib_mad_send_buf *send_buf,
1841                         struct ib_mad_recv_wc *mad_recv_wc)
1842{
1843        struct ib_sa_query *query;
1844
1845        if (!send_buf)
1846                return;
1847
1848        query = send_buf->context[0];
1849        if (query->callback) {
1850                if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
1851                        query->callback(query,
1852                                        mad_recv_wc->recv_buf.mad->mad_hdr.status ?
1853                                        -EINVAL : 0,
1854                                        (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
1855                else
1856                        query->callback(query, -EIO, NULL);
1857        }
1858
1859        ib_free_recv_mad(mad_recv_wc);
1860}
1861
1862static void ib_sa_add_one(struct ib_device *device)
1863{
1864        struct ib_sa_device *sa_dev;
1865        int s, e, i;
1866        int count = 0;
1867
1868        s = rdma_start_port(device);
1869        e = rdma_end_port(device);
1870
1871        sa_dev = kzalloc(sizeof *sa_dev +
1872                         (e - s + 1) * sizeof (struct ib_sa_port),
1873                         GFP_KERNEL);
1874        if (!sa_dev)
1875                return;
1876
1877        sa_dev->start_port = s;
1878        sa_dev->end_port   = e;
1879
1880        for (i = 0; i <= e - s; ++i) {
1881                spin_lock_init(&sa_dev->port[i].ah_lock);
1882                if (!rdma_cap_ib_sa(device, i + 1))
1883                        continue;
1884
1885                sa_dev->port[i].sm_ah    = NULL;
1886                sa_dev->port[i].port_num = i + s;
1887
1888                sa_dev->port[i].agent =
1889                        ib_register_mad_agent(device, i + s, IB_QPT_GSI,
1890                                              NULL, 0, send_handler,
1891                                              recv_handler, sa_dev, 0);
1892                if (IS_ERR(sa_dev->port[i].agent))
1893                        goto err;
1894
1895                INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
1896
1897                count++;
1898        }
1899
1900        if (!count)
1901                goto free;
1902
1903        ib_set_client_data(device, &sa_client, sa_dev);
1904
1905        /*
1906         * We register our event handler after everything is set up,
1907         * and then update our cached info after the event handler is
1908         * registered to avoid any problems if a port changes state
1909         * during our initialization.
1910         */
1911
1912        INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
1913        if (ib_register_event_handler(&sa_dev->event_handler))
1914                goto err;
1915
1916        for (i = 0; i <= e - s; ++i) {
1917                if (rdma_cap_ib_sa(device, i + 1))
1918                        update_sm_ah(&sa_dev->port[i].update_task);
1919        }
1920
1921        return;
1922
1923err:
1924        while (--i >= 0) {
1925                if (rdma_cap_ib_sa(device, i + 1))
1926                        ib_unregister_mad_agent(sa_dev->port[i].agent);
1927        }
1928free:
1929        kfree(sa_dev);
1930        return;
1931}
1932
1933static void ib_sa_remove_one(struct ib_device *device, void *client_data)
1934{
1935        struct ib_sa_device *sa_dev = client_data;
1936        int i;
1937
1938        if (!sa_dev)
1939                return;
1940
1941        ib_unregister_event_handler(&sa_dev->event_handler);
1942
1943        flush_workqueue(ib_wq);
1944
1945        for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
1946                if (rdma_cap_ib_sa(device, i + 1)) {
1947                        ib_unregister_mad_agent(sa_dev->port[i].agent);
1948                        if (sa_dev->port[i].sm_ah)
1949                                kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
1950                }
1951
1952        }
1953
1954        kfree(sa_dev);
1955}
1956
1957int ib_sa_init(void)
1958{
1959        int ret;
1960
1961        get_random_bytes(&tid, sizeof tid);
1962
1963        atomic_set(&ib_nl_sa_request_seq, 0);
1964
1965        ret = ib_register_client(&sa_client);
1966        if (ret) {
1967                pr_err("Couldn't register ib_sa client\n");
1968                goto err1;
1969        }
1970
1971        ret = mcast_init();
1972        if (ret) {
1973                pr_err("Couldn't initialize multicast handling\n");
1974                goto err2;
1975        }
1976
1977        ib_nl_wq = create_singlethread_workqueue("ib_nl_sa_wq");
1978        if (!ib_nl_wq) {
1979                ret = -ENOMEM;
1980                goto err3;
1981        }
1982
1983        INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
1984
1985        return 0;
1986
1987err3:
1988        mcast_cleanup();
1989err2:
1990        ib_unregister_client(&sa_client);
1991err1:
1992        return ret;
1993}
1994
1995void ib_sa_cleanup(void)
1996{
1997        cancel_delayed_work(&ib_nl_timed_work);
1998        flush_workqueue(ib_nl_wq);
1999        destroy_workqueue(ib_nl_wq);
2000        mcast_cleanup();
2001        ib_unregister_client(&sa_client);
2002        idr_destroy(&query_idr);
2003}
2004