linux/drivers/infiniband/core/sa_query.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
   4 * Copyright (c) 2006 Intel Corporation.  All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <linux/module.h>
  36#include <linux/init.h>
  37#include <linux/err.h>
  38#include <linux/random.h>
  39#include <linux/spinlock.h>
  40#include <linux/slab.h>
  41#include <linux/dma-mapping.h>
  42#include <linux/kref.h>
  43#include <linux/xarray.h>
  44#include <linux/workqueue.h>
  45#include <uapi/linux/if_ether.h>
  46#include <rdma/ib_pack.h>
  47#include <rdma/ib_cache.h>
  48#include <rdma/rdma_netlink.h>
  49#include <net/netlink.h>
  50#include <uapi/rdma/ib_user_sa.h>
  51#include <rdma/ib_marshall.h>
  52#include <rdma/ib_addr.h>
  53#include <rdma/opa_addr.h>
  54#include "sa.h"
  55#include "core_priv.h"
  56
  57#define IB_SA_LOCAL_SVC_TIMEOUT_MIN             100
  58#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT         2000
  59#define IB_SA_LOCAL_SVC_TIMEOUT_MAX             200000
  60#define IB_SA_CPI_MAX_RETRY_CNT                 3
  61#define IB_SA_CPI_RETRY_WAIT                    1000 /*msecs */
  62static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
  63
  64struct ib_sa_sm_ah {
  65        struct ib_ah        *ah;
  66        struct kref          ref;
  67        u16                  pkey_index;
  68        u8                   src_path_mask;
  69};
  70
  71enum rdma_class_port_info_type {
  72        RDMA_CLASS_PORT_INFO_IB,
  73        RDMA_CLASS_PORT_INFO_OPA
  74};
  75
  76struct rdma_class_port_info {
  77        enum rdma_class_port_info_type type;
  78        union {
  79                struct ib_class_port_info ib;
  80                struct opa_class_port_info opa;
  81        };
  82};
  83
  84struct ib_sa_classport_cache {
  85        bool valid;
  86        int retry_cnt;
  87        struct rdma_class_port_info data;
  88};
  89
  90struct ib_sa_port {
  91        struct ib_mad_agent *agent;
  92        struct ib_sa_sm_ah  *sm_ah;
  93        struct work_struct   update_task;
  94        struct ib_sa_classport_cache classport_info;
  95        struct delayed_work ib_cpi_work;
  96        spinlock_t                   classport_lock; /* protects class port info set */
  97        spinlock_t           ah_lock;
  98        u32                  port_num;
  99};
 100
 101struct ib_sa_device {
 102        int                     start_port, end_port;
 103        struct ib_event_handler event_handler;
 104        struct ib_sa_port port[];
 105};
 106
 107struct ib_sa_query {
 108        void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 109        void (*release)(struct ib_sa_query *);
 110        struct ib_sa_client    *client;
 111        struct ib_sa_port      *port;
 112        struct ib_mad_send_buf *mad_buf;
 113        struct ib_sa_sm_ah     *sm_ah;
 114        int                     id;
 115        u32                     flags;
 116        struct list_head        list; /* Local svc request list */
 117        u32                     seq; /* Local svc request sequence number */
 118        unsigned long           timeout; /* Local svc timeout */
 119        u8                      path_use; /* How will the pathrecord be used */
 120};
 121
 122#define IB_SA_ENABLE_LOCAL_SERVICE      0x00000001
 123#define IB_SA_CANCEL                    0x00000002
 124#define IB_SA_QUERY_OPA                 0x00000004
 125
 126struct ib_sa_path_query {
 127        void (*callback)(int, struct sa_path_rec *, void *);
 128        void *context;
 129        struct ib_sa_query sa_query;
 130        struct sa_path_rec *conv_pr;
 131};
 132
 133struct ib_sa_guidinfo_query {
 134        void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
 135        void *context;
 136        struct ib_sa_query sa_query;
 137};
 138
 139struct ib_sa_classport_info_query {
 140        void (*callback)(void *);
 141        void *context;
 142        struct ib_sa_query sa_query;
 143};
 144
 145struct ib_sa_mcmember_query {
 146        void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
 147        void *context;
 148        struct ib_sa_query sa_query;
 149};
 150
 151static LIST_HEAD(ib_nl_request_list);
 152static DEFINE_SPINLOCK(ib_nl_request_lock);
 153static atomic_t ib_nl_sa_request_seq;
 154static struct workqueue_struct *ib_nl_wq;
 155static struct delayed_work ib_nl_timed_work;
 156static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
 157        [LS_NLA_TYPE_PATH_RECORD]       = {.type = NLA_BINARY,
 158                .len = sizeof(struct ib_path_rec_data)},
 159        [LS_NLA_TYPE_TIMEOUT]           = {.type = NLA_U32},
 160        [LS_NLA_TYPE_SERVICE_ID]        = {.type = NLA_U64},
 161        [LS_NLA_TYPE_DGID]              = {.type = NLA_BINARY,
 162                .len = sizeof(struct rdma_nla_ls_gid)},
 163        [LS_NLA_TYPE_SGID]              = {.type = NLA_BINARY,
 164                .len = sizeof(struct rdma_nla_ls_gid)},
 165        [LS_NLA_TYPE_TCLASS]            = {.type = NLA_U8},
 166        [LS_NLA_TYPE_PKEY]              = {.type = NLA_U16},
 167        [LS_NLA_TYPE_QOS_CLASS]         = {.type = NLA_U16},
 168};
 169
 170
 171static int ib_sa_add_one(struct ib_device *device);
 172static void ib_sa_remove_one(struct ib_device *device, void *client_data);
 173
 174static struct ib_client sa_client = {
 175        .name   = "sa",
 176        .add    = ib_sa_add_one,
 177        .remove = ib_sa_remove_one
 178};
 179
 180static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
 181
 182static DEFINE_SPINLOCK(tid_lock);
 183static u32 tid;
 184
 185#define PATH_REC_FIELD(field) \
 186        .struct_offset_bytes = offsetof(struct sa_path_rec, field),     \
 187        .struct_size_bytes   = sizeof_field(struct sa_path_rec, field), \
 188        .field_name          = "sa_path_rec:" #field
 189
 190static const struct ib_field path_rec_table[] = {
 191        { PATH_REC_FIELD(service_id),
 192          .offset_words = 0,
 193          .offset_bits  = 0,
 194          .size_bits    = 64 },
 195        { PATH_REC_FIELD(dgid),
 196          .offset_words = 2,
 197          .offset_bits  = 0,
 198          .size_bits    = 128 },
 199        { PATH_REC_FIELD(sgid),
 200          .offset_words = 6,
 201          .offset_bits  = 0,
 202          .size_bits    = 128 },
 203        { PATH_REC_FIELD(ib.dlid),
 204          .offset_words = 10,
 205          .offset_bits  = 0,
 206          .size_bits    = 16 },
 207        { PATH_REC_FIELD(ib.slid),
 208          .offset_words = 10,
 209          .offset_bits  = 16,
 210          .size_bits    = 16 },
 211        { PATH_REC_FIELD(ib.raw_traffic),
 212          .offset_words = 11,
 213          .offset_bits  = 0,
 214          .size_bits    = 1 },
 215        { RESERVED,
 216          .offset_words = 11,
 217          .offset_bits  = 1,
 218          .size_bits    = 3 },
 219        { PATH_REC_FIELD(flow_label),
 220          .offset_words = 11,
 221          .offset_bits  = 4,
 222          .size_bits    = 20 },
 223        { PATH_REC_FIELD(hop_limit),
 224          .offset_words = 11,
 225          .offset_bits  = 24,
 226          .size_bits    = 8 },
 227        { PATH_REC_FIELD(traffic_class),
 228          .offset_words = 12,
 229          .offset_bits  = 0,
 230          .size_bits    = 8 },
 231        { PATH_REC_FIELD(reversible),
 232          .offset_words = 12,
 233          .offset_bits  = 8,
 234          .size_bits    = 1 },
 235        { PATH_REC_FIELD(numb_path),
 236          .offset_words = 12,
 237          .offset_bits  = 9,
 238          .size_bits    = 7 },
 239        { PATH_REC_FIELD(pkey),
 240          .offset_words = 12,
 241          .offset_bits  = 16,
 242          .size_bits    = 16 },
 243        { PATH_REC_FIELD(qos_class),
 244          .offset_words = 13,
 245          .offset_bits  = 0,
 246          .size_bits    = 12 },
 247        { PATH_REC_FIELD(sl),
 248          .offset_words = 13,
 249          .offset_bits  = 12,
 250          .size_bits    = 4 },
 251        { PATH_REC_FIELD(mtu_selector),
 252          .offset_words = 13,
 253          .offset_bits  = 16,
 254          .size_bits    = 2 },
 255        { PATH_REC_FIELD(mtu),
 256          .offset_words = 13,
 257          .offset_bits  = 18,
 258          .size_bits    = 6 },
 259        { PATH_REC_FIELD(rate_selector),
 260          .offset_words = 13,
 261          .offset_bits  = 24,
 262          .size_bits    = 2 },
 263        { PATH_REC_FIELD(rate),
 264          .offset_words = 13,
 265          .offset_bits  = 26,
 266          .size_bits    = 6 },
 267        { PATH_REC_FIELD(packet_life_time_selector),
 268          .offset_words = 14,
 269          .offset_bits  = 0,
 270          .size_bits    = 2 },
 271        { PATH_REC_FIELD(packet_life_time),
 272          .offset_words = 14,
 273          .offset_bits  = 2,
 274          .size_bits    = 6 },
 275        { PATH_REC_FIELD(preference),
 276          .offset_words = 14,
 277          .offset_bits  = 8,
 278          .size_bits    = 8 },
 279        { RESERVED,
 280          .offset_words = 14,
 281          .offset_bits  = 16,
 282          .size_bits    = 48 },
 283};
 284
 285#define OPA_PATH_REC_FIELD(field) \
 286        .struct_offset_bytes = \
 287                offsetof(struct sa_path_rec, field), \
 288        .struct_size_bytes   = \
 289                sizeof_field(struct sa_path_rec, field),        \
 290        .field_name          = "sa_path_rec:" #field
 291
 292static const struct ib_field opa_path_rec_table[] = {
 293        { OPA_PATH_REC_FIELD(service_id),
 294          .offset_words = 0,
 295          .offset_bits  = 0,
 296          .size_bits    = 64 },
 297        { OPA_PATH_REC_FIELD(dgid),
 298          .offset_words = 2,
 299          .offset_bits  = 0,
 300          .size_bits    = 128 },
 301        { OPA_PATH_REC_FIELD(sgid),
 302          .offset_words = 6,
 303          .offset_bits  = 0,
 304          .size_bits    = 128 },
 305        { OPA_PATH_REC_FIELD(opa.dlid),
 306          .offset_words = 10,
 307          .offset_bits  = 0,
 308          .size_bits    = 32 },
 309        { OPA_PATH_REC_FIELD(opa.slid),
 310          .offset_words = 11,
 311          .offset_bits  = 0,
 312          .size_bits    = 32 },
 313        { OPA_PATH_REC_FIELD(opa.raw_traffic),
 314          .offset_words = 12,
 315          .offset_bits  = 0,
 316          .size_bits    = 1 },
 317        { RESERVED,
 318          .offset_words = 12,
 319          .offset_bits  = 1,
 320          .size_bits    = 3 },
 321        { OPA_PATH_REC_FIELD(flow_label),
 322          .offset_words = 12,
 323          .offset_bits  = 4,
 324          .size_bits    = 20 },
 325        { OPA_PATH_REC_FIELD(hop_limit),
 326          .offset_words = 12,
 327          .offset_bits  = 24,
 328          .size_bits    = 8 },
 329        { OPA_PATH_REC_FIELD(traffic_class),
 330          .offset_words = 13,
 331          .offset_bits  = 0,
 332          .size_bits    = 8 },
 333        { OPA_PATH_REC_FIELD(reversible),
 334          .offset_words = 13,
 335          .offset_bits  = 8,
 336          .size_bits    = 1 },
 337        { OPA_PATH_REC_FIELD(numb_path),
 338          .offset_words = 13,
 339          .offset_bits  = 9,
 340          .size_bits    = 7 },
 341        { OPA_PATH_REC_FIELD(pkey),
 342          .offset_words = 13,
 343          .offset_bits  = 16,
 344          .size_bits    = 16 },
 345        { OPA_PATH_REC_FIELD(opa.l2_8B),
 346          .offset_words = 14,
 347          .offset_bits  = 0,
 348          .size_bits    = 1 },
 349        { OPA_PATH_REC_FIELD(opa.l2_10B),
 350          .offset_words = 14,
 351          .offset_bits  = 1,
 352          .size_bits    = 1 },
 353        { OPA_PATH_REC_FIELD(opa.l2_9B),
 354          .offset_words = 14,
 355          .offset_bits  = 2,
 356          .size_bits    = 1 },
 357        { OPA_PATH_REC_FIELD(opa.l2_16B),
 358          .offset_words = 14,
 359          .offset_bits  = 3,
 360          .size_bits    = 1 },
 361        { RESERVED,
 362          .offset_words = 14,
 363          .offset_bits  = 4,
 364          .size_bits    = 2 },
 365        { OPA_PATH_REC_FIELD(opa.qos_type),
 366          .offset_words = 14,
 367          .offset_bits  = 6,
 368          .size_bits    = 2 },
 369        { OPA_PATH_REC_FIELD(opa.qos_priority),
 370          .offset_words = 14,
 371          .offset_bits  = 8,
 372          .size_bits    = 8 },
 373        { RESERVED,
 374          .offset_words = 14,
 375          .offset_bits  = 16,
 376          .size_bits    = 3 },
 377        { OPA_PATH_REC_FIELD(sl),
 378          .offset_words = 14,
 379          .offset_bits  = 19,
 380          .size_bits    = 5 },
 381        { RESERVED,
 382          .offset_words = 14,
 383          .offset_bits  = 24,
 384          .size_bits    = 8 },
 385        { OPA_PATH_REC_FIELD(mtu_selector),
 386          .offset_words = 15,
 387          .offset_bits  = 0,
 388          .size_bits    = 2 },
 389        { OPA_PATH_REC_FIELD(mtu),
 390          .offset_words = 15,
 391          .offset_bits  = 2,
 392          .size_bits    = 6 },
 393        { OPA_PATH_REC_FIELD(rate_selector),
 394          .offset_words = 15,
 395          .offset_bits  = 8,
 396          .size_bits    = 2 },
 397        { OPA_PATH_REC_FIELD(rate),
 398          .offset_words = 15,
 399          .offset_bits  = 10,
 400          .size_bits    = 6 },
 401        { OPA_PATH_REC_FIELD(packet_life_time_selector),
 402          .offset_words = 15,
 403          .offset_bits  = 16,
 404          .size_bits    = 2 },
 405        { OPA_PATH_REC_FIELD(packet_life_time),
 406          .offset_words = 15,
 407          .offset_bits  = 18,
 408          .size_bits    = 6 },
 409        { OPA_PATH_REC_FIELD(preference),
 410          .offset_words = 15,
 411          .offset_bits  = 24,
 412          .size_bits    = 8 },
 413};
 414
 415#define MCMEMBER_REC_FIELD(field) \
 416        .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),      \
 417        .struct_size_bytes   = sizeof_field(struct ib_sa_mcmember_rec, field),  \
 418        .field_name          = "sa_mcmember_rec:" #field
 419
 420static const struct ib_field mcmember_rec_table[] = {
 421        { MCMEMBER_REC_FIELD(mgid),
 422          .offset_words = 0,
 423          .offset_bits  = 0,
 424          .size_bits    = 128 },
 425        { MCMEMBER_REC_FIELD(port_gid),
 426          .offset_words = 4,
 427          .offset_bits  = 0,
 428          .size_bits    = 128 },
 429        { MCMEMBER_REC_FIELD(qkey),
 430          .offset_words = 8,
 431          .offset_bits  = 0,
 432          .size_bits    = 32 },
 433        { MCMEMBER_REC_FIELD(mlid),
 434          .offset_words = 9,
 435          .offset_bits  = 0,
 436          .size_bits    = 16 },
 437        { MCMEMBER_REC_FIELD(mtu_selector),
 438          .offset_words = 9,
 439          .offset_bits  = 16,
 440          .size_bits    = 2 },
 441        { MCMEMBER_REC_FIELD(mtu),
 442          .offset_words = 9,
 443          .offset_bits  = 18,
 444          .size_bits    = 6 },
 445        { MCMEMBER_REC_FIELD(traffic_class),
 446          .offset_words = 9,
 447          .offset_bits  = 24,
 448          .size_bits    = 8 },
 449        { MCMEMBER_REC_FIELD(pkey),
 450          .offset_words = 10,
 451          .offset_bits  = 0,
 452          .size_bits    = 16 },
 453        { MCMEMBER_REC_FIELD(rate_selector),
 454          .offset_words = 10,
 455          .offset_bits  = 16,
 456          .size_bits    = 2 },
 457        { MCMEMBER_REC_FIELD(rate),
 458          .offset_words = 10,
 459          .offset_bits  = 18,
 460          .size_bits    = 6 },
 461        { MCMEMBER_REC_FIELD(packet_life_time_selector),
 462          .offset_words = 10,
 463          .offset_bits  = 24,
 464          .size_bits    = 2 },
 465        { MCMEMBER_REC_FIELD(packet_life_time),
 466          .offset_words = 10,
 467          .offset_bits  = 26,
 468          .size_bits    = 6 },
 469        { MCMEMBER_REC_FIELD(sl),
 470          .offset_words = 11,
 471          .offset_bits  = 0,
 472          .size_bits    = 4 },
 473        { MCMEMBER_REC_FIELD(flow_label),
 474          .offset_words = 11,
 475          .offset_bits  = 4,
 476          .size_bits    = 20 },
 477        { MCMEMBER_REC_FIELD(hop_limit),
 478          .offset_words = 11,
 479          .offset_bits  = 24,
 480          .size_bits    = 8 },
 481        { MCMEMBER_REC_FIELD(scope),
 482          .offset_words = 12,
 483          .offset_bits  = 0,
 484          .size_bits    = 4 },
 485        { MCMEMBER_REC_FIELD(join_state),
 486          .offset_words = 12,
 487          .offset_bits  = 4,
 488          .size_bits    = 4 },
 489        { MCMEMBER_REC_FIELD(proxy_join),
 490          .offset_words = 12,
 491          .offset_bits  = 8,
 492          .size_bits    = 1 },
 493        { RESERVED,
 494          .offset_words = 12,
 495          .offset_bits  = 9,
 496          .size_bits    = 23 },
 497};
 498
 499#define CLASSPORTINFO_REC_FIELD(field) \
 500        .struct_offset_bytes = offsetof(struct ib_class_port_info, field),      \
 501        .struct_size_bytes   = sizeof_field(struct ib_class_port_info, field),  \
 502        .field_name          = "ib_class_port_info:" #field
 503
 504static const struct ib_field ib_classport_info_rec_table[] = {
 505        { CLASSPORTINFO_REC_FIELD(base_version),
 506          .offset_words = 0,
 507          .offset_bits  = 0,
 508          .size_bits    = 8 },
 509        { CLASSPORTINFO_REC_FIELD(class_version),
 510          .offset_words = 0,
 511          .offset_bits  = 8,
 512          .size_bits    = 8 },
 513        { CLASSPORTINFO_REC_FIELD(capability_mask),
 514          .offset_words = 0,
 515          .offset_bits  = 16,
 516          .size_bits    = 16 },
 517        { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
 518          .offset_words = 1,
 519          .offset_bits  = 0,
 520          .size_bits    = 32 },
 521        { CLASSPORTINFO_REC_FIELD(redirect_gid),
 522          .offset_words = 2,
 523          .offset_bits  = 0,
 524          .size_bits    = 128 },
 525        { CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
 526          .offset_words = 6,
 527          .offset_bits  = 0,
 528          .size_bits    = 32 },
 529        { CLASSPORTINFO_REC_FIELD(redirect_lid),
 530          .offset_words = 7,
 531          .offset_bits  = 0,
 532          .size_bits    = 16 },
 533        { CLASSPORTINFO_REC_FIELD(redirect_pkey),
 534          .offset_words = 7,
 535          .offset_bits  = 16,
 536          .size_bits    = 16 },
 537
 538        { CLASSPORTINFO_REC_FIELD(redirect_qp),
 539          .offset_words = 8,
 540          .offset_bits  = 0,
 541          .size_bits    = 32 },
 542        { CLASSPORTINFO_REC_FIELD(redirect_qkey),
 543          .offset_words = 9,
 544          .offset_bits  = 0,
 545          .size_bits    = 32 },
 546
 547        { CLASSPORTINFO_REC_FIELD(trap_gid),
 548          .offset_words = 10,
 549          .offset_bits  = 0,
 550          .size_bits    = 128 },
 551        { CLASSPORTINFO_REC_FIELD(trap_tcslfl),
 552          .offset_words = 14,
 553          .offset_bits  = 0,
 554          .size_bits    = 32 },
 555
 556        { CLASSPORTINFO_REC_FIELD(trap_lid),
 557          .offset_words = 15,
 558          .offset_bits  = 0,
 559          .size_bits    = 16 },
 560        { CLASSPORTINFO_REC_FIELD(trap_pkey),
 561          .offset_words = 15,
 562          .offset_bits  = 16,
 563          .size_bits    = 16 },
 564
 565        { CLASSPORTINFO_REC_FIELD(trap_hlqp),
 566          .offset_words = 16,
 567          .offset_bits  = 0,
 568          .size_bits    = 32 },
 569        { CLASSPORTINFO_REC_FIELD(trap_qkey),
 570          .offset_words = 17,
 571          .offset_bits  = 0,
 572          .size_bits    = 32 },
 573};
 574
 575#define OPA_CLASSPORTINFO_REC_FIELD(field) \
 576        .struct_offset_bytes =\
 577                offsetof(struct opa_class_port_info, field),    \
 578        .struct_size_bytes   = \
 579                sizeof_field(struct opa_class_port_info, field),        \
 580        .field_name          = "opa_class_port_info:" #field
 581
 582static const struct ib_field opa_classport_info_rec_table[] = {
 583        { OPA_CLASSPORTINFO_REC_FIELD(base_version),
 584          .offset_words = 0,
 585          .offset_bits  = 0,
 586          .size_bits    = 8 },
 587        { OPA_CLASSPORTINFO_REC_FIELD(class_version),
 588          .offset_words = 0,
 589          .offset_bits  = 8,
 590          .size_bits    = 8 },
 591        { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
 592          .offset_words = 0,
 593          .offset_bits  = 16,
 594          .size_bits    = 16 },
 595        { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
 596          .offset_words = 1,
 597          .offset_bits  = 0,
 598          .size_bits    = 32 },
 599        { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
 600          .offset_words = 2,
 601          .offset_bits  = 0,
 602          .size_bits    = 128 },
 603        { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
 604          .offset_words = 6,
 605          .offset_bits  = 0,
 606          .size_bits    = 32 },
 607        { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
 608          .offset_words = 7,
 609          .offset_bits  = 0,
 610          .size_bits    = 32 },
 611        { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
 612          .offset_words = 8,
 613          .offset_bits  = 0,
 614          .size_bits    = 32 },
 615        { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
 616          .offset_words = 9,
 617          .offset_bits  = 0,
 618          .size_bits    = 32 },
 619        { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
 620          .offset_words = 10,
 621          .offset_bits  = 0,
 622          .size_bits    = 128 },
 623        { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
 624          .offset_words = 14,
 625          .offset_bits  = 0,
 626          .size_bits    = 32 },
 627        { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
 628          .offset_words = 15,
 629          .offset_bits  = 0,
 630          .size_bits    = 32 },
 631        { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
 632          .offset_words = 16,
 633          .offset_bits  = 0,
 634          .size_bits    = 32 },
 635        { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
 636          .offset_words = 17,
 637          .offset_bits  = 0,
 638          .size_bits    = 32 },
 639        { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
 640          .offset_words = 18,
 641          .offset_bits  = 0,
 642          .size_bits    = 16 },
 643        { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
 644          .offset_words = 18,
 645          .offset_bits  = 16,
 646          .size_bits    = 16 },
 647        { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
 648          .offset_words = 19,
 649          .offset_bits  = 0,
 650          .size_bits    = 8 },
 651        { RESERVED,
 652          .offset_words = 19,
 653          .offset_bits  = 8,
 654          .size_bits    = 24 },
 655};
 656
 657#define GUIDINFO_REC_FIELD(field) \
 658        .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),      \
 659        .struct_size_bytes   = sizeof_field(struct ib_sa_guidinfo_rec, field),  \
 660        .field_name          = "sa_guidinfo_rec:" #field
 661
 662static const struct ib_field guidinfo_rec_table[] = {
 663        { GUIDINFO_REC_FIELD(lid),
 664          .offset_words = 0,
 665          .offset_bits  = 0,
 666          .size_bits    = 16 },
 667        { GUIDINFO_REC_FIELD(block_num),
 668          .offset_words = 0,
 669          .offset_bits  = 16,
 670          .size_bits    = 8 },
 671        { GUIDINFO_REC_FIELD(res1),
 672          .offset_words = 0,
 673          .offset_bits  = 24,
 674          .size_bits    = 8 },
 675        { GUIDINFO_REC_FIELD(res2),
 676          .offset_words = 1,
 677          .offset_bits  = 0,
 678          .size_bits    = 32 },
 679        { GUIDINFO_REC_FIELD(guid_info_list),
 680          .offset_words = 2,
 681          .offset_bits  = 0,
 682          .size_bits    = 512 },
 683};
 684
 685static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
 686{
 687        query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
 688}
 689
 690static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
 691{
 692        return (query->flags & IB_SA_CANCEL);
 693}
 694
 695static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
 696                                     struct ib_sa_query *query)
 697{
 698        struct sa_path_rec *sa_rec = query->mad_buf->context[1];
 699        struct ib_sa_mad *mad = query->mad_buf->mad;
 700        ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
 701        u16 val16;
 702        u64 val64;
 703        struct rdma_ls_resolve_header *header;
 704
 705        query->mad_buf->context[1] = NULL;
 706
 707        /* Construct the family header first */
 708        header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
 709        strscpy_pad(header->device_name,
 710                    dev_name(&query->port->agent->device->dev),
 711                    LS_DEVICE_NAME_MAX);
 712        header->port_num = query->port->port_num;
 713
 714        if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
 715            sa_rec->reversible != 0)
 716                query->path_use = LS_RESOLVE_PATH_USE_GMP;
 717        else
 718                query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
 719        header->path_use = query->path_use;
 720
 721        /* Now build the attributes */
 722        if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
 723                val64 = be64_to_cpu(sa_rec->service_id);
 724                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
 725                        sizeof(val64), &val64);
 726        }
 727        if (comp_mask & IB_SA_PATH_REC_DGID)
 728                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
 729                        sizeof(sa_rec->dgid), &sa_rec->dgid);
 730        if (comp_mask & IB_SA_PATH_REC_SGID)
 731                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
 732                        sizeof(sa_rec->sgid), &sa_rec->sgid);
 733        if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
 734                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
 735                        sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
 736
 737        if (comp_mask & IB_SA_PATH_REC_PKEY) {
 738                val16 = be16_to_cpu(sa_rec->pkey);
 739                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
 740                        sizeof(val16), &val16);
 741        }
 742        if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
 743                val16 = be16_to_cpu(sa_rec->qos_class);
 744                nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
 745                        sizeof(val16), &val16);
 746        }
 747}
 748
 749static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
 750{
 751        int len = 0;
 752
 753        if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
 754                len += nla_total_size(sizeof(u64));
 755        if (comp_mask & IB_SA_PATH_REC_DGID)
 756                len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
 757        if (comp_mask & IB_SA_PATH_REC_SGID)
 758                len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
 759        if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
 760                len += nla_total_size(sizeof(u8));
 761        if (comp_mask & IB_SA_PATH_REC_PKEY)
 762                len += nla_total_size(sizeof(u16));
 763        if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
 764                len += nla_total_size(sizeof(u16));
 765
 766        /*
 767         * Make sure that at least some of the required comp_mask bits are
 768         * set.
 769         */
 770        if (WARN_ON(len == 0))
 771                return len;
 772
 773        /* Add the family header */
 774        len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
 775
 776        return len;
 777}
 778
 779static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
 780{
 781        struct sk_buff *skb = NULL;
 782        struct nlmsghdr *nlh;
 783        void *data;
 784        struct ib_sa_mad *mad;
 785        int len;
 786        unsigned long flags;
 787        unsigned long delay;
 788        gfp_t gfp_flag;
 789        int ret;
 790
 791        INIT_LIST_HEAD(&query->list);
 792        query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
 793
 794        mad = query->mad_buf->mad;
 795        len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
 796        if (len <= 0)
 797                return -EMSGSIZE;
 798
 799        skb = nlmsg_new(len, gfp_mask);
 800        if (!skb)
 801                return -ENOMEM;
 802
 803        /* Put nlmsg header only for now */
 804        data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
 805                            RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
 806        if (!data) {
 807                nlmsg_free(skb);
 808                return -EMSGSIZE;
 809        }
 810
 811        /* Add attributes */
 812        ib_nl_set_path_rec_attrs(skb, query);
 813
 814        /* Repair the nlmsg header length */
 815        nlmsg_end(skb, nlh);
 816
 817        gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
 818                GFP_NOWAIT;
 819
 820        spin_lock_irqsave(&ib_nl_request_lock, flags);
 821        ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
 822
 823        if (ret)
 824                goto out;
 825
 826        /* Put the request on the list.*/
 827        delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
 828        query->timeout = delay + jiffies;
 829        list_add_tail(&query->list, &ib_nl_request_list);
 830        /* Start the timeout if this is the only request */
 831        if (ib_nl_request_list.next == &query->list)
 832                queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
 833
 834out:
 835        spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 836
 837        return ret;
 838}
 839
 840static int ib_nl_cancel_request(struct ib_sa_query *query)
 841{
 842        unsigned long flags;
 843        struct ib_sa_query *wait_query;
 844        int found = 0;
 845
 846        spin_lock_irqsave(&ib_nl_request_lock, flags);
 847        list_for_each_entry(wait_query, &ib_nl_request_list, list) {
 848                /* Let the timeout to take care of the callback */
 849                if (query == wait_query) {
 850                        query->flags |= IB_SA_CANCEL;
 851                        query->timeout = jiffies;
 852                        list_move(&query->list, &ib_nl_request_list);
 853                        found = 1;
 854                        mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
 855                        break;
 856                }
 857        }
 858        spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 859
 860        return found;
 861}
 862
 863static void send_handler(struct ib_mad_agent *agent,
 864                         struct ib_mad_send_wc *mad_send_wc);
 865
 866static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
 867                                           const struct nlmsghdr *nlh)
 868{
 869        struct ib_mad_send_wc mad_send_wc;
 870        struct ib_sa_mad *mad = NULL;
 871        const struct nlattr *head, *curr;
 872        struct ib_path_rec_data  *rec;
 873        int len, rem;
 874        u32 mask = 0;
 875        int status = -EIO;
 876
 877        if (query->callback) {
 878                head = (const struct nlattr *) nlmsg_data(nlh);
 879                len = nlmsg_len(nlh);
 880                switch (query->path_use) {
 881                case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
 882                        mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
 883                        break;
 884
 885                case LS_RESOLVE_PATH_USE_ALL:
 886                case LS_RESOLVE_PATH_USE_GMP:
 887                default:
 888                        mask = IB_PATH_PRIMARY | IB_PATH_GMP |
 889                                IB_PATH_BIDIRECTIONAL;
 890                        break;
 891                }
 892                nla_for_each_attr(curr, head, len, rem) {
 893                        if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
 894                                rec = nla_data(curr);
 895                                /*
 896                                 * Get the first one. In the future, we may
 897                                 * need to get up to 6 pathrecords.
 898                                 */
 899                                if ((rec->flags & mask) == mask) {
 900                                        mad = query->mad_buf->mad;
 901                                        mad->mad_hdr.method |=
 902                                                IB_MGMT_METHOD_RESP;
 903                                        memcpy(mad->data, rec->path_rec,
 904                                               sizeof(rec->path_rec));
 905                                        status = 0;
 906                                        break;
 907                                }
 908                        }
 909                }
 910                query->callback(query, status, mad);
 911        }
 912
 913        mad_send_wc.send_buf = query->mad_buf;
 914        mad_send_wc.status = IB_WC_SUCCESS;
 915        send_handler(query->mad_buf->mad_agent, &mad_send_wc);
 916}
 917
 918static void ib_nl_request_timeout(struct work_struct *work)
 919{
 920        unsigned long flags;
 921        struct ib_sa_query *query;
 922        unsigned long delay;
 923        struct ib_mad_send_wc mad_send_wc;
 924        int ret;
 925
 926        spin_lock_irqsave(&ib_nl_request_lock, flags);
 927        while (!list_empty(&ib_nl_request_list)) {
 928                query = list_entry(ib_nl_request_list.next,
 929                                   struct ib_sa_query, list);
 930
 931                if (time_after(query->timeout, jiffies)) {
 932                        delay = query->timeout - jiffies;
 933                        if ((long)delay <= 0)
 934                                delay = 1;
 935                        queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
 936                        break;
 937                }
 938
 939                list_del(&query->list);
 940                ib_sa_disable_local_svc(query);
 941                /* Hold the lock to protect against query cancellation */
 942                if (ib_sa_query_cancelled(query))
 943                        ret = -1;
 944                else
 945                        ret = ib_post_send_mad(query->mad_buf, NULL);
 946                if (ret) {
 947                        mad_send_wc.send_buf = query->mad_buf;
 948                        mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
 949                        spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 950                        send_handler(query->port->agent, &mad_send_wc);
 951                        spin_lock_irqsave(&ib_nl_request_lock, flags);
 952                }
 953        }
 954        spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 955}
 956
 957int ib_nl_handle_set_timeout(struct sk_buff *skb,
 958                             struct nlmsghdr *nlh,
 959                             struct netlink_ext_ack *extack)
 960{
 961        int timeout, delta, abs_delta;
 962        const struct nlattr *attr;
 963        unsigned long flags;
 964        struct ib_sa_query *query;
 965        long delay = 0;
 966        struct nlattr *tb[LS_NLA_TYPE_MAX];
 967        int ret;
 968
 969        if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
 970            !(NETLINK_CB(skb).sk))
 971                return -EPERM;
 972
 973        ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
 974                                   nlmsg_len(nlh), ib_nl_policy, NULL);
 975        attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
 976        if (ret || !attr)
 977                goto settimeout_out;
 978
 979        timeout = *(int *) nla_data(attr);
 980        if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
 981                timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
 982        if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
 983                timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
 984
 985        delta = timeout - sa_local_svc_timeout_ms;
 986        if (delta < 0)
 987                abs_delta = -delta;
 988        else
 989                abs_delta = delta;
 990
 991        if (delta != 0) {
 992                spin_lock_irqsave(&ib_nl_request_lock, flags);
 993                sa_local_svc_timeout_ms = timeout;
 994                list_for_each_entry(query, &ib_nl_request_list, list) {
 995                        if (delta < 0 && abs_delta > query->timeout)
 996                                query->timeout = 0;
 997                        else
 998                                query->timeout += delta;
 999
1000                        /* Get the new delay from the first entry */
1001                        if (!delay) {
1002                                delay = query->timeout - jiffies;
1003                                if (delay <= 0)
1004                                        delay = 1;
1005                        }
1006                }
1007                if (delay)
1008                        mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
1009                                         (unsigned long)delay);
1010                spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1011        }
1012
1013settimeout_out:
1014        return 0;
1015}
1016
1017static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
1018{
1019        struct nlattr *tb[LS_NLA_TYPE_MAX];
1020        int ret;
1021
1022        if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
1023                return 0;
1024
1025        ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
1026                                   nlmsg_len(nlh), ib_nl_policy, NULL);
1027        if (ret)
1028                return 0;
1029
1030        return 1;
1031}
1032
1033int ib_nl_handle_resolve_resp(struct sk_buff *skb,
1034                              struct nlmsghdr *nlh,
1035                              struct netlink_ext_ack *extack)
1036{
1037        unsigned long flags;
1038        struct ib_sa_query *query;
1039        struct ib_mad_send_buf *send_buf;
1040        struct ib_mad_send_wc mad_send_wc;
1041        int found = 0;
1042        int ret;
1043
1044        if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
1045            !(NETLINK_CB(skb).sk))
1046                return -EPERM;
1047
1048        spin_lock_irqsave(&ib_nl_request_lock, flags);
1049        list_for_each_entry(query, &ib_nl_request_list, list) {
1050                /*
1051                 * If the query is cancelled, let the timeout routine
1052                 * take care of it.
1053                 */
1054                if (nlh->nlmsg_seq == query->seq) {
1055                        found = !ib_sa_query_cancelled(query);
1056                        if (found)
1057                                list_del(&query->list);
1058                        break;
1059                }
1060        }
1061
1062        if (!found) {
1063                spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1064                goto resp_out;
1065        }
1066
1067        send_buf = query->mad_buf;
1068
1069        if (!ib_nl_is_good_resolve_resp(nlh)) {
1070                /* if the result is a failure, send out the packet via IB */
1071                ib_sa_disable_local_svc(query);
1072                ret = ib_post_send_mad(query->mad_buf, NULL);
1073                spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1074                if (ret) {
1075                        mad_send_wc.send_buf = send_buf;
1076                        mad_send_wc.status = IB_WC_GENERAL_ERR;
1077                        send_handler(query->port->agent, &mad_send_wc);
1078                }
1079        } else {
1080                spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1081                ib_nl_process_good_resolve_rsp(query, nlh);
1082        }
1083
1084resp_out:
1085        return 0;
1086}
1087
1088static void free_sm_ah(struct kref *kref)
1089{
1090        struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
1091
1092        rdma_destroy_ah(sm_ah->ah, 0);
1093        kfree(sm_ah);
1094}
1095
1096void ib_sa_register_client(struct ib_sa_client *client)
1097{
1098        atomic_set(&client->users, 1);
1099        init_completion(&client->comp);
1100}
1101EXPORT_SYMBOL(ib_sa_register_client);
1102
1103void ib_sa_unregister_client(struct ib_sa_client *client)
1104{
1105        ib_sa_client_put(client);
1106        wait_for_completion(&client->comp);
1107}
1108EXPORT_SYMBOL(ib_sa_unregister_client);
1109
1110/**
1111 * ib_sa_cancel_query - try to cancel an SA query
1112 * @id:ID of query to cancel
1113 * @query:query pointer to cancel
1114 *
1115 * Try to cancel an SA query.  If the id and query don't match up or
1116 * the query has already completed, nothing is done.  Otherwise the
1117 * query is canceled and will complete with a status of -EINTR.
1118 */
1119void ib_sa_cancel_query(int id, struct ib_sa_query *query)
1120{
1121        unsigned long flags;
1122        struct ib_mad_send_buf *mad_buf;
1123
1124        xa_lock_irqsave(&queries, flags);
1125        if (xa_load(&queries, id) != query) {
1126                xa_unlock_irqrestore(&queries, flags);
1127                return;
1128        }
1129        mad_buf = query->mad_buf;
1130        xa_unlock_irqrestore(&queries, flags);
1131
1132        /*
1133         * If the query is still on the netlink request list, schedule
1134         * it to be cancelled by the timeout routine. Otherwise, it has been
1135         * sent to the MAD layer and has to be cancelled from there.
1136         */
1137        if (!ib_nl_cancel_request(query))
1138                ib_cancel_mad(mad_buf);
1139}
1140EXPORT_SYMBOL(ib_sa_cancel_query);
1141
1142static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
1143{
1144        struct ib_sa_device *sa_dev;
1145        struct ib_sa_port   *port;
1146        unsigned long flags;
1147        u8 src_path_mask;
1148
1149        sa_dev = ib_get_client_data(device, &sa_client);
1150        if (!sa_dev)
1151                return 0x7f;
1152
1153        port  = &sa_dev->port[port_num - sa_dev->start_port];
1154        spin_lock_irqsave(&port->ah_lock, flags);
1155        src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
1156        spin_unlock_irqrestore(&port->ah_lock, flags);
1157
1158        return src_path_mask;
1159}
1160
1161static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
1162                                   struct sa_path_rec *rec,
1163                                   struct rdma_ah_attr *ah_attr,
1164                                   const struct ib_gid_attr *gid_attr)
1165{
1166        enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
1167
1168        if (!gid_attr) {
1169                gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
1170                                                 port_num, NULL);
1171                if (IS_ERR(gid_attr))
1172                        return PTR_ERR(gid_attr);
1173        } else
1174                rdma_hold_gid_attr(gid_attr);
1175
1176        rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
1177                                be32_to_cpu(rec->flow_label),
1178                                rec->hop_limit, rec->traffic_class,
1179                                gid_attr);
1180        return 0;
1181}
1182
1183/**
1184 * ib_init_ah_attr_from_path - Initialize address handle attributes based on
1185 *   an SA path record.
1186 * @device: Device associated ah attributes initialization.
1187 * @port_num: Port on the specified device.
1188 * @rec: path record entry to use for ah attributes initialization.
1189 * @ah_attr: address handle attributes to initialization from path record.
1190 * @gid_attr: SGID attribute to consider during initialization.
1191 *
1192 * When ib_init_ah_attr_from_path() returns success,
1193 * (a) for IB link layer it optionally contains a reference to SGID attribute
1194 * when GRH is present for IB link layer.
1195 * (b) for RoCE link layer it contains a reference to SGID attribute.
1196 * User must invoke rdma_destroy_ah_attr() to release reference to SGID
1197 * attributes which are initialized using ib_init_ah_attr_from_path().
1198 */
1199int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
1200                              struct sa_path_rec *rec,
1201                              struct rdma_ah_attr *ah_attr,
1202                              const struct ib_gid_attr *gid_attr)
1203{
1204        int ret = 0;
1205
1206        memset(ah_attr, 0, sizeof(*ah_attr));
1207        ah_attr->type = rdma_ah_find_type(device, port_num);
1208        rdma_ah_set_sl(ah_attr, rec->sl);
1209        rdma_ah_set_port_num(ah_attr, port_num);
1210        rdma_ah_set_static_rate(ah_attr, rec->rate);
1211
1212        if (sa_path_is_roce(rec)) {
1213                ret = roce_resolve_route_from_path(rec, gid_attr);
1214                if (ret)
1215                        return ret;
1216
1217                memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
1218        } else {
1219                rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
1220                if (sa_path_is_opa(rec) &&
1221                    rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
1222                        rdma_ah_set_make_grd(ah_attr, true);
1223
1224                rdma_ah_set_path_bits(ah_attr,
1225                                      be32_to_cpu(sa_path_get_slid(rec)) &
1226                                      get_src_path_mask(device, port_num));
1227        }
1228
1229        if (rec->hop_limit > 0 || sa_path_is_roce(rec))
1230                ret = init_ah_attr_grh_fields(device, port_num,
1231                                              rec, ah_attr, gid_attr);
1232        return ret;
1233}
1234EXPORT_SYMBOL(ib_init_ah_attr_from_path);
1235
1236static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1237{
1238        struct rdma_ah_attr ah_attr;
1239        unsigned long flags;
1240
1241        spin_lock_irqsave(&query->port->ah_lock, flags);
1242        if (!query->port->sm_ah) {
1243                spin_unlock_irqrestore(&query->port->ah_lock, flags);
1244                return -EAGAIN;
1245        }
1246        kref_get(&query->port->sm_ah->ref);
1247        query->sm_ah = query->port->sm_ah;
1248        spin_unlock_irqrestore(&query->port->ah_lock, flags);
1249
1250        /*
1251         * Always check if sm_ah has valid dlid assigned,
1252         * before querying for class port info
1253         */
1254        if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
1255            !rdma_is_valid_unicast_lid(&ah_attr)) {
1256                kref_put(&query->sm_ah->ref, free_sm_ah);
1257                return -EAGAIN;
1258        }
1259        query->mad_buf = ib_create_send_mad(query->port->agent, 1,
1260                                            query->sm_ah->pkey_index,
1261                                            0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
1262                                            gfp_mask,
1263                                            ((query->flags & IB_SA_QUERY_OPA) ?
1264                                             OPA_MGMT_BASE_VERSION :
1265                                             IB_MGMT_BASE_VERSION));
1266        if (IS_ERR(query->mad_buf)) {
1267                kref_put(&query->sm_ah->ref, free_sm_ah);
1268                return -ENOMEM;
1269        }
1270
1271        query->mad_buf->ah = query->sm_ah->ah;
1272
1273        return 0;
1274}
1275
1276static void free_mad(struct ib_sa_query *query)
1277{
1278        ib_free_send_mad(query->mad_buf);
1279        kref_put(&query->sm_ah->ref, free_sm_ah);
1280}
1281
1282static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
1283{
1284        struct ib_sa_mad *mad = query->mad_buf->mad;
1285        unsigned long flags;
1286
1287        memset(mad, 0, sizeof *mad);
1288
1289        if (query->flags & IB_SA_QUERY_OPA) {
1290                mad->mad_hdr.base_version  = OPA_MGMT_BASE_VERSION;
1291                mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
1292        } else {
1293                mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
1294                mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
1295        }
1296        mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
1297        spin_lock_irqsave(&tid_lock, flags);
1298        mad->mad_hdr.tid           =
1299                cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
1300        spin_unlock_irqrestore(&tid_lock, flags);
1301}
1302
1303static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
1304                    gfp_t gfp_mask)
1305{
1306        unsigned long flags;
1307        int ret, id;
1308        const int nmbr_sa_query_retries = 10;
1309
1310        xa_lock_irqsave(&queries, flags);
1311        ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
1312        xa_unlock_irqrestore(&queries, flags);
1313        if (ret < 0)
1314                return ret;
1315
1316        query->mad_buf->timeout_ms  = timeout_ms / nmbr_sa_query_retries;
1317        query->mad_buf->retries = nmbr_sa_query_retries;
1318        if (!query->mad_buf->timeout_ms) {
1319                /* Special case, very small timeout_ms */
1320                query->mad_buf->timeout_ms = 1;
1321                query->mad_buf->retries = timeout_ms;
1322        }
1323        query->mad_buf->context[0] = query;
1324        query->id = id;
1325
1326        if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
1327            (!(query->flags & IB_SA_QUERY_OPA))) {
1328                if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
1329                        if (!ib_nl_make_request(query, gfp_mask))
1330                                return id;
1331                }
1332                ib_sa_disable_local_svc(query);
1333        }
1334
1335        ret = ib_post_send_mad(query->mad_buf, NULL);
1336        if (ret) {
1337                xa_lock_irqsave(&queries, flags);
1338                __xa_erase(&queries, id);
1339                xa_unlock_irqrestore(&queries, flags);
1340        }
1341
1342        /*
1343         * It's not safe to dereference query any more, because the
1344         * send may already have completed and freed the query in
1345         * another context.
1346         */
1347        return ret ? ret : id;
1348}
1349
1350void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
1351{
1352        ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
1353}
1354EXPORT_SYMBOL(ib_sa_unpack_path);
1355
1356void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
1357{
1358        ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
1359}
1360EXPORT_SYMBOL(ib_sa_pack_path);
1361
1362static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
1363                                         struct ib_sa_device *sa_dev,
1364                                         u32 port_num)
1365{
1366        struct ib_sa_port *port;
1367        unsigned long flags;
1368        bool ret = false;
1369
1370        port = &sa_dev->port[port_num - sa_dev->start_port];
1371        spin_lock_irqsave(&port->classport_lock, flags);
1372        if (!port->classport_info.valid)
1373                goto ret;
1374
1375        if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
1376                ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
1377                        OPA_CLASS_PORT_INFO_PR_SUPPORT;
1378ret:
1379        spin_unlock_irqrestore(&port->classport_lock, flags);
1380        return ret;
1381}
1382
1383enum opa_pr_supported {
1384        PR_NOT_SUPPORTED,
1385        PR_OPA_SUPPORTED,
1386        PR_IB_SUPPORTED
1387};
1388
1389/*
1390 * opa_pr_query_possible - Check if current PR query can be an OPA query.
1391 *
1392 * Retuns PR_NOT_SUPPORTED if a path record query is not
1393 * possible, PR_OPA_SUPPORTED if an OPA path record query
1394 * is possible and PR_IB_SUPPORTED if an IB path record
1395 * query is possible.
1396 */
1397static int opa_pr_query_possible(struct ib_sa_client *client,
1398                                 struct ib_sa_device *sa_dev,
1399                                 struct ib_device *device, u32 port_num)
1400{
1401        struct ib_port_attr port_attr;
1402
1403        if (ib_query_port(device, port_num, &port_attr))
1404                return PR_NOT_SUPPORTED;
1405
1406        if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
1407                return PR_OPA_SUPPORTED;
1408
1409        if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
1410                return PR_NOT_SUPPORTED;
1411        else
1412                return PR_IB_SUPPORTED;
1413}
1414
1415static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1416                                    int status,
1417                                    struct ib_sa_mad *mad)
1418{
1419        struct ib_sa_path_query *query =
1420                container_of(sa_query, struct ib_sa_path_query, sa_query);
1421
1422        if (mad) {
1423                struct sa_path_rec rec;
1424
1425                if (sa_query->flags & IB_SA_QUERY_OPA) {
1426                        ib_unpack(opa_path_rec_table,
1427                                  ARRAY_SIZE(opa_path_rec_table),
1428                                  mad->data, &rec);
1429                        rec.rec_type = SA_PATH_REC_TYPE_OPA;
1430                        query->callback(status, &rec, query->context);
1431                } else {
1432                        ib_unpack(path_rec_table,
1433                                  ARRAY_SIZE(path_rec_table),
1434                                  mad->data, &rec);
1435                        rec.rec_type = SA_PATH_REC_TYPE_IB;
1436                        sa_path_set_dmac_zero(&rec);
1437
1438                        if (query->conv_pr) {
1439                                struct sa_path_rec opa;
1440
1441                                memset(&opa, 0, sizeof(struct sa_path_rec));
1442                                sa_convert_path_ib_to_opa(&opa, &rec);
1443                                query->callback(status, &opa, query->context);
1444                        } else {
1445                                query->callback(status, &rec, query->context);
1446                        }
1447                }
1448        } else
1449                query->callback(status, NULL, query->context);
1450}
1451
1452static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
1453{
1454        struct ib_sa_path_query *query =
1455                container_of(sa_query, struct ib_sa_path_query, sa_query);
1456
1457        kfree(query->conv_pr);
1458        kfree(query);
1459}
1460
1461/**
1462 * ib_sa_path_rec_get - Start a Path get query
1463 * @client:SA client
1464 * @device:device to send query on
1465 * @port_num: port number to send query on
1466 * @rec:Path Record to send in query
1467 * @comp_mask:component mask to send in query
1468 * @timeout_ms:time to wait for response
1469 * @gfp_mask:GFP mask to use for internal allocations
1470 * @callback:function called when query completes, times out or is
1471 * canceled
1472 * @context:opaque user context passed to callback
1473 * @sa_query:query context, used to cancel query
1474 *
1475 * Send a Path Record Get query to the SA to look up a path.  The
1476 * callback function will be called when the query completes (or
1477 * fails); status is 0 for a successful response, -EINTR if the query
1478 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1479 * occurred sending the query.  The resp parameter of the callback is
1480 * only valid if status is 0.
1481 *
1482 * If the return value of ib_sa_path_rec_get() is negative, it is an
1483 * error code.  Otherwise it is a query ID that can be used to cancel
1484 * the query.
1485 */
1486int ib_sa_path_rec_get(struct ib_sa_client *client,
1487                       struct ib_device *device, u32 port_num,
1488                       struct sa_path_rec *rec,
1489                       ib_sa_comp_mask comp_mask,
1490                       unsigned long timeout_ms, gfp_t gfp_mask,
1491                       void (*callback)(int status,
1492                                        struct sa_path_rec *resp,
1493                                        void *context),
1494                       void *context,
1495                       struct ib_sa_query **sa_query)
1496{
1497        struct ib_sa_path_query *query;
1498        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1499        struct ib_sa_port   *port;
1500        struct ib_mad_agent *agent;
1501        struct ib_sa_mad *mad;
1502        enum opa_pr_supported status;
1503        int ret;
1504
1505        if (!sa_dev)
1506                return -ENODEV;
1507
1508        if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
1509            (rec->rec_type != SA_PATH_REC_TYPE_OPA))
1510                return -EINVAL;
1511
1512        port  = &sa_dev->port[port_num - sa_dev->start_port];
1513        agent = port->agent;
1514
1515        query = kzalloc(sizeof(*query), gfp_mask);
1516        if (!query)
1517                return -ENOMEM;
1518
1519        query->sa_query.port     = port;
1520        if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
1521                status = opa_pr_query_possible(client, sa_dev, device, port_num);
1522                if (status == PR_NOT_SUPPORTED) {
1523                        ret = -EINVAL;
1524                        goto err1;
1525                } else if (status == PR_OPA_SUPPORTED) {
1526                        query->sa_query.flags |= IB_SA_QUERY_OPA;
1527                } else {
1528                        query->conv_pr =
1529                                kmalloc(sizeof(*query->conv_pr), gfp_mask);
1530                        if (!query->conv_pr) {
1531                                ret = -ENOMEM;
1532                                goto err1;
1533                        }
1534                }
1535        }
1536
1537        ret = alloc_mad(&query->sa_query, gfp_mask);
1538        if (ret)
1539                goto err2;
1540
1541        ib_sa_client_get(client);
1542        query->sa_query.client = client;
1543        query->callback        = callback;
1544        query->context         = context;
1545
1546        mad = query->sa_query.mad_buf->mad;
1547        init_mad(&query->sa_query, agent);
1548
1549        query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
1550        query->sa_query.release  = ib_sa_path_rec_release;
1551        mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
1552        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_PATH_REC);
1553        mad->sa_hdr.comp_mask    = comp_mask;
1554
1555        if (query->sa_query.flags & IB_SA_QUERY_OPA) {
1556                ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
1557                        rec, mad->data);
1558        } else if (query->conv_pr) {
1559                sa_convert_path_opa_to_ib(query->conv_pr, rec);
1560                ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1561                        query->conv_pr, mad->data);
1562        } else {
1563                ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1564                        rec, mad->data);
1565        }
1566
1567        *sa_query = &query->sa_query;
1568
1569        query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
1570        query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
1571                                                query->conv_pr : rec;
1572
1573        ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1574        if (ret < 0)
1575                goto err3;
1576
1577        return ret;
1578
1579err3:
1580        *sa_query = NULL;
1581        ib_sa_client_put(query->sa_query.client);
1582        free_mad(&query->sa_query);
1583err2:
1584        kfree(query->conv_pr);
1585err1:
1586        kfree(query);
1587        return ret;
1588}
1589EXPORT_SYMBOL(ib_sa_path_rec_get);
1590
1591static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
1592                                        int status,
1593                                        struct ib_sa_mad *mad)
1594{
1595        struct ib_sa_mcmember_query *query =
1596                container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
1597
1598        if (mad) {
1599                struct ib_sa_mcmember_rec rec;
1600
1601                ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1602                          mad->data, &rec);
1603                query->callback(status, &rec, query->context);
1604        } else
1605                query->callback(status, NULL, query->context);
1606}
1607
1608static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
1609{
1610        kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
1611}
1612
1613int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
1614                             struct ib_device *device, u32 port_num,
1615                             u8 method,
1616                             struct ib_sa_mcmember_rec *rec,
1617                             ib_sa_comp_mask comp_mask,
1618                             unsigned long timeout_ms, gfp_t gfp_mask,
1619                             void (*callback)(int status,
1620                                              struct ib_sa_mcmember_rec *resp,
1621                                              void *context),
1622                             void *context,
1623                             struct ib_sa_query **sa_query)
1624{
1625        struct ib_sa_mcmember_query *query;
1626        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1627        struct ib_sa_port   *port;
1628        struct ib_mad_agent *agent;
1629        struct ib_sa_mad *mad;
1630        int ret;
1631
1632        if (!sa_dev)
1633                return -ENODEV;
1634
1635        port  = &sa_dev->port[port_num - sa_dev->start_port];
1636        agent = port->agent;
1637
1638        query = kzalloc(sizeof(*query), gfp_mask);
1639        if (!query)
1640                return -ENOMEM;
1641
1642        query->sa_query.port     = port;
1643        ret = alloc_mad(&query->sa_query, gfp_mask);
1644        if (ret)
1645                goto err1;
1646
1647        ib_sa_client_get(client);
1648        query->sa_query.client = client;
1649        query->callback        = callback;
1650        query->context         = context;
1651
1652        mad = query->sa_query.mad_buf->mad;
1653        init_mad(&query->sa_query, agent);
1654
1655        query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
1656        query->sa_query.release  = ib_sa_mcmember_rec_release;
1657        mad->mad_hdr.method      = method;
1658        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
1659        mad->sa_hdr.comp_mask    = comp_mask;
1660
1661        ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1662                rec, mad->data);
1663
1664        *sa_query = &query->sa_query;
1665
1666        ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1667        if (ret < 0)
1668                goto err2;
1669
1670        return ret;
1671
1672err2:
1673        *sa_query = NULL;
1674        ib_sa_client_put(query->sa_query.client);
1675        free_mad(&query->sa_query);
1676
1677err1:
1678        kfree(query);
1679        return ret;
1680}
1681
1682/* Support GuidInfoRecord */
1683static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
1684                                        int status,
1685                                        struct ib_sa_mad *mad)
1686{
1687        struct ib_sa_guidinfo_query *query =
1688                container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
1689
1690        if (mad) {
1691                struct ib_sa_guidinfo_rec rec;
1692
1693                ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
1694                          mad->data, &rec);
1695                query->callback(status, &rec, query->context);
1696        } else
1697                query->callback(status, NULL, query->context);
1698}
1699
1700static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
1701{
1702        kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
1703}
1704
1705int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1706                              struct ib_device *device, u32 port_num,
1707                              struct ib_sa_guidinfo_rec *rec,
1708                              ib_sa_comp_mask comp_mask, u8 method,
1709                              unsigned long timeout_ms, gfp_t gfp_mask,
1710                              void (*callback)(int status,
1711                                               struct ib_sa_guidinfo_rec *resp,
1712                                               void *context),
1713                              void *context,
1714                              struct ib_sa_query **sa_query)
1715{
1716        struct ib_sa_guidinfo_query *query;
1717        struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1718        struct ib_sa_port *port;
1719        struct ib_mad_agent *agent;
1720        struct ib_sa_mad *mad;
1721        int ret;
1722
1723        if (!sa_dev)
1724                return -ENODEV;
1725
1726        if (method != IB_MGMT_METHOD_GET &&
1727            method != IB_MGMT_METHOD_SET &&
1728            method != IB_SA_METHOD_DELETE) {
1729                return -EINVAL;
1730        }
1731
1732        port  = &sa_dev->port[port_num - sa_dev->start_port];
1733        agent = port->agent;
1734
1735        query = kzalloc(sizeof(*query), gfp_mask);
1736        if (!query)
1737                return -ENOMEM;
1738
1739        query->sa_query.port = port;
1740        ret = alloc_mad(&query->sa_query, gfp_mask);
1741        if (ret)
1742                goto err1;
1743
1744        ib_sa_client_get(client);
1745        query->sa_query.client = client;
1746        query->callback        = callback;
1747        query->context         = context;
1748
1749        mad = query->sa_query.mad_buf->mad;
1750        init_mad(&query->sa_query, agent);
1751
1752        query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
1753        query->sa_query.release  = ib_sa_guidinfo_rec_release;
1754
1755        mad->mad_hdr.method      = method;
1756        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
1757        mad->sa_hdr.comp_mask    = comp_mask;
1758
1759        ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
1760                mad->data);
1761
1762        *sa_query = &query->sa_query;
1763
1764        ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1765        if (ret < 0)
1766                goto err2;
1767
1768        return ret;
1769
1770err2:
1771        *sa_query = NULL;
1772        ib_sa_client_put(query->sa_query.client);
1773        free_mad(&query->sa_query);
1774
1775err1:
1776        kfree(query);
1777        return ret;
1778}
1779EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1780
1781struct ib_classport_info_context {
1782        struct completion       done;
1783        struct ib_sa_query      *sa_query;
1784};
1785
1786static void ib_classportinfo_cb(void *context)
1787{
1788        struct ib_classport_info_context *cb_ctx = context;
1789
1790        complete(&cb_ctx->done);
1791}
1792
1793static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1794                                              int status,
1795                                              struct ib_sa_mad *mad)
1796{
1797        unsigned long flags;
1798        struct ib_sa_classport_info_query *query =
1799                container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1800        struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
1801
1802        if (mad) {
1803                if (sa_query->flags & IB_SA_QUERY_OPA) {
1804                        struct opa_class_port_info rec;
1805
1806                        ib_unpack(opa_classport_info_rec_table,
1807                                  ARRAY_SIZE(opa_classport_info_rec_table),
1808                                  mad->data, &rec);
1809
1810                        spin_lock_irqsave(&sa_query->port->classport_lock,
1811                                          flags);
1812                        if (!status && !info->valid) {
1813                                memcpy(&info->data.opa, &rec,
1814                                       sizeof(info->data.opa));
1815
1816                                info->valid = true;
1817                                info->data.type = RDMA_CLASS_PORT_INFO_OPA;
1818                        }
1819                        spin_unlock_irqrestore(&sa_query->port->classport_lock,
1820                                               flags);
1821
1822                } else {
1823                        struct ib_class_port_info rec;
1824
1825                        ib_unpack(ib_classport_info_rec_table,
1826                                  ARRAY_SIZE(ib_classport_info_rec_table),
1827                                  mad->data, &rec);
1828
1829                        spin_lock_irqsave(&sa_query->port->classport_lock,
1830                                          flags);
1831                        if (!status && !info->valid) {
1832                                memcpy(&info->data.ib, &rec,
1833                                       sizeof(info->data.ib));
1834
1835                                info->valid = true;
1836                                info->data.type = RDMA_CLASS_PORT_INFO_IB;
1837                        }
1838                        spin_unlock_irqrestore(&sa_query->port->classport_lock,
1839                                               flags);
1840                }
1841        }
1842        query->callback(query->context);
1843}
1844
1845static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
1846{
1847        kfree(container_of(sa_query, struct ib_sa_classport_info_query,
1848                           sa_query));
1849}
1850
1851static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
1852                                          unsigned long timeout_ms,
1853                                          void (*callback)(void *context),
1854                                          void *context,
1855                                          struct ib_sa_query **sa_query)
1856{
1857        struct ib_mad_agent *agent;
1858        struct ib_sa_classport_info_query *query;
1859        struct ib_sa_mad *mad;
1860        gfp_t gfp_mask = GFP_KERNEL;
1861        int ret;
1862
1863        agent = port->agent;
1864
1865        query = kzalloc(sizeof(*query), gfp_mask);
1866        if (!query)
1867                return -ENOMEM;
1868
1869        query->sa_query.port = port;
1870        query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
1871                                                 port->port_num) ?
1872                                 IB_SA_QUERY_OPA : 0;
1873        ret = alloc_mad(&query->sa_query, gfp_mask);
1874        if (ret)
1875                goto err_free;
1876
1877        query->callback = callback;
1878        query->context = context;
1879
1880        mad = query->sa_query.mad_buf->mad;
1881        init_mad(&query->sa_query, agent);
1882
1883        query->sa_query.callback = ib_sa_classport_info_rec_callback;
1884        query->sa_query.release  = ib_sa_classport_info_rec_release;
1885        mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
1886        mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
1887        mad->sa_hdr.comp_mask    = 0;
1888        *sa_query = &query->sa_query;
1889
1890        ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1891        if (ret < 0)
1892                goto err_free_mad;
1893
1894        return ret;
1895
1896err_free_mad:
1897        *sa_query = NULL;
1898        free_mad(&query->sa_query);
1899
1900err_free:
1901        kfree(query);
1902        return ret;
1903}
1904
1905static void update_ib_cpi(struct work_struct *work)
1906{
1907        struct ib_sa_port *port =
1908                container_of(work, struct ib_sa_port, ib_cpi_work.work);
1909        struct ib_classport_info_context *cb_context;
1910        unsigned long flags;
1911        int ret;
1912
1913        /* If the classport info is valid, nothing
1914         * to do here.
1915         */
1916        spin_lock_irqsave(&port->classport_lock, flags);
1917        if (port->classport_info.valid) {
1918                spin_unlock_irqrestore(&port->classport_lock, flags);
1919                return;
1920        }
1921        spin_unlock_irqrestore(&port->classport_lock, flags);
1922
1923        cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
1924        if (!cb_context)
1925                goto err_nomem;
1926
1927        init_completion(&cb_context->done);
1928
1929        ret = ib_sa_classport_info_rec_query(port, 3000,
1930                                             ib_classportinfo_cb, cb_context,
1931                                             &cb_context->sa_query);
1932        if (ret < 0)
1933                goto free_cb_err;
1934        wait_for_completion(&cb_context->done);
1935free_cb_err:
1936        kfree(cb_context);
1937        spin_lock_irqsave(&port->classport_lock, flags);
1938
1939        /* If the classport info is still not valid, the query should have
1940         * failed for some reason. Retry issuing the query
1941         */
1942        if (!port->classport_info.valid) {
1943                port->classport_info.retry_cnt++;
1944                if (port->classport_info.retry_cnt <=
1945                    IB_SA_CPI_MAX_RETRY_CNT) {
1946                        unsigned long delay =
1947                                msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
1948
1949                        queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
1950                }
1951        }
1952        spin_unlock_irqrestore(&port->classport_lock, flags);
1953
1954err_nomem:
1955        return;
1956}
1957
1958static void send_handler(struct ib_mad_agent *agent,
1959                         struct ib_mad_send_wc *mad_send_wc)
1960{
1961        struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
1962        unsigned long flags;
1963
1964        if (query->callback)
1965                switch (mad_send_wc->status) {
1966                case IB_WC_SUCCESS:
1967                        /* No callback -- already got recv */
1968                        break;
1969                case IB_WC_RESP_TIMEOUT_ERR:
1970                        query->callback(query, -ETIMEDOUT, NULL);
1971                        break;
1972                case IB_WC_WR_FLUSH_ERR:
1973                        query->callback(query, -EINTR, NULL);
1974                        break;
1975                default:
1976                        query->callback(query, -EIO, NULL);
1977                        break;
1978                }
1979
1980        xa_lock_irqsave(&queries, flags);
1981        __xa_erase(&queries, query->id);
1982        xa_unlock_irqrestore(&queries, flags);
1983
1984        free_mad(query);
1985        if (query->client)
1986                ib_sa_client_put(query->client);
1987        query->release(query);
1988}
1989
1990static void recv_handler(struct ib_mad_agent *mad_agent,
1991                         struct ib_mad_send_buf *send_buf,
1992                         struct ib_mad_recv_wc *mad_recv_wc)
1993{
1994        struct ib_sa_query *query;
1995
1996        if (!send_buf)
1997                return;
1998
1999        query = send_buf->context[0];
2000        if (query->callback) {
2001                if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
2002                        query->callback(query,
2003                                        mad_recv_wc->recv_buf.mad->mad_hdr.status ?
2004                                        -EINVAL : 0,
2005                                        (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
2006                else
2007                        query->callback(query, -EIO, NULL);
2008        }
2009
2010        ib_free_recv_mad(mad_recv_wc);
2011}
2012
2013static void update_sm_ah(struct work_struct *work)
2014{
2015        struct ib_sa_port *port =
2016                container_of(work, struct ib_sa_port, update_task);
2017        struct ib_sa_sm_ah *new_ah;
2018        struct ib_port_attr port_attr;
2019        struct rdma_ah_attr   ah_attr;
2020        bool grh_required;
2021
2022        if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
2023                pr_warn("Couldn't query port\n");
2024                return;
2025        }
2026
2027        new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
2028        if (!new_ah)
2029                return;
2030
2031        kref_init(&new_ah->ref);
2032        new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
2033
2034        new_ah->pkey_index = 0;
2035        if (ib_find_pkey(port->agent->device, port->port_num,
2036                         IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
2037                pr_err("Couldn't find index for default PKey\n");
2038
2039        memset(&ah_attr, 0, sizeof(ah_attr));
2040        ah_attr.type = rdma_ah_find_type(port->agent->device,
2041                                         port->port_num);
2042        rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
2043        rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
2044        rdma_ah_set_port_num(&ah_attr, port->port_num);
2045
2046        grh_required = rdma_is_grh_required(port->agent->device,
2047                                            port->port_num);
2048
2049        /*
2050         * The OPA sm_lid of 0xFFFF needs special handling so that it can be
2051         * differentiated from a permissive LID of 0xFFFF.  We set the
2052         * grh_required flag here so the SA can program the DGID in the
2053         * address handle appropriately
2054         */
2055        if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
2056            (grh_required ||
2057             port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
2058                rdma_ah_set_make_grd(&ah_attr, true);
2059
2060        if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
2061                rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
2062                rdma_ah_set_subnet_prefix(&ah_attr,
2063                                          cpu_to_be64(port_attr.subnet_prefix));
2064                rdma_ah_set_interface_id(&ah_attr,
2065                                         cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
2066        }
2067
2068        new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
2069                                    RDMA_CREATE_AH_SLEEPABLE);
2070        if (IS_ERR(new_ah->ah)) {
2071                pr_warn("Couldn't create new SM AH\n");
2072                kfree(new_ah);
2073                return;
2074        }
2075
2076        spin_lock_irq(&port->ah_lock);
2077        if (port->sm_ah)
2078                kref_put(&port->sm_ah->ref, free_sm_ah);
2079        port->sm_ah = new_ah;
2080        spin_unlock_irq(&port->ah_lock);
2081}
2082
2083static void ib_sa_event(struct ib_event_handler *handler,
2084                        struct ib_event *event)
2085{
2086        if (event->event == IB_EVENT_PORT_ERR    ||
2087            event->event == IB_EVENT_PORT_ACTIVE ||
2088            event->event == IB_EVENT_LID_CHANGE  ||
2089            event->event == IB_EVENT_PKEY_CHANGE ||
2090            event->event == IB_EVENT_SM_CHANGE   ||
2091            event->event == IB_EVENT_CLIENT_REREGISTER) {
2092                unsigned long flags;
2093                struct ib_sa_device *sa_dev =
2094                        container_of(handler, typeof(*sa_dev), event_handler);
2095                u32 port_num = event->element.port_num - sa_dev->start_port;
2096                struct ib_sa_port *port = &sa_dev->port[port_num];
2097
2098                if (!rdma_cap_ib_sa(handler->device, port->port_num))
2099                        return;
2100
2101                spin_lock_irqsave(&port->ah_lock, flags);
2102                if (port->sm_ah)
2103                        kref_put(&port->sm_ah->ref, free_sm_ah);
2104                port->sm_ah = NULL;
2105                spin_unlock_irqrestore(&port->ah_lock, flags);
2106
2107                if (event->event == IB_EVENT_SM_CHANGE ||
2108                    event->event == IB_EVENT_CLIENT_REREGISTER ||
2109                    event->event == IB_EVENT_LID_CHANGE ||
2110                    event->event == IB_EVENT_PORT_ACTIVE) {
2111                        unsigned long delay =
2112                                msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
2113
2114                        spin_lock_irqsave(&port->classport_lock, flags);
2115                        port->classport_info.valid = false;
2116                        port->classport_info.retry_cnt = 0;
2117                        spin_unlock_irqrestore(&port->classport_lock, flags);
2118                        queue_delayed_work(ib_wq,
2119                                           &port->ib_cpi_work, delay);
2120                }
2121                queue_work(ib_wq, &sa_dev->port[port_num].update_task);
2122        }
2123}
2124
2125static int ib_sa_add_one(struct ib_device *device)
2126{
2127        struct ib_sa_device *sa_dev;
2128        int s, e, i;
2129        int count = 0;
2130        int ret;
2131
2132        s = rdma_start_port(device);
2133        e = rdma_end_port(device);
2134
2135        sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
2136        if (!sa_dev)
2137                return -ENOMEM;
2138
2139        sa_dev->start_port = s;
2140        sa_dev->end_port   = e;
2141
2142        for (i = 0; i <= e - s; ++i) {
2143                spin_lock_init(&sa_dev->port[i].ah_lock);
2144                if (!rdma_cap_ib_sa(device, i + 1))
2145                        continue;
2146
2147                sa_dev->port[i].sm_ah    = NULL;
2148                sa_dev->port[i].port_num = i + s;
2149
2150                spin_lock_init(&sa_dev->port[i].classport_lock);
2151                sa_dev->port[i].classport_info.valid = false;
2152
2153                sa_dev->port[i].agent =
2154                        ib_register_mad_agent(device, i + s, IB_QPT_GSI,
2155                                              NULL, 0, send_handler,
2156                                              recv_handler, sa_dev, 0);
2157                if (IS_ERR(sa_dev->port[i].agent)) {
2158                        ret = PTR_ERR(sa_dev->port[i].agent);
2159                        goto err;
2160                }
2161
2162                INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
2163                INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
2164                                  update_ib_cpi);
2165
2166                count++;
2167        }
2168
2169        if (!count) {
2170                ret = -EOPNOTSUPP;
2171                goto free;
2172        }
2173
2174        ib_set_client_data(device, &sa_client, sa_dev);
2175
2176        /*
2177         * We register our event handler after everything is set up,
2178         * and then update our cached info after the event handler is
2179         * registered to avoid any problems if a port changes state
2180         * during our initialization.
2181         */
2182
2183        INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
2184        ib_register_event_handler(&sa_dev->event_handler);
2185
2186        for (i = 0; i <= e - s; ++i) {
2187                if (rdma_cap_ib_sa(device, i + 1))
2188                        update_sm_ah(&sa_dev->port[i].update_task);
2189        }
2190
2191        return 0;
2192
2193err:
2194        while (--i >= 0) {
2195                if (rdma_cap_ib_sa(device, i + 1))
2196                        ib_unregister_mad_agent(sa_dev->port[i].agent);
2197        }
2198free:
2199        kfree(sa_dev);
2200        return ret;
2201}
2202
2203static void ib_sa_remove_one(struct ib_device *device, void *client_data)
2204{
2205        struct ib_sa_device *sa_dev = client_data;
2206        int i;
2207
2208        ib_unregister_event_handler(&sa_dev->event_handler);
2209        flush_workqueue(ib_wq);
2210
2211        for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
2212                if (rdma_cap_ib_sa(device, i + 1)) {
2213                        cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
2214                        ib_unregister_mad_agent(sa_dev->port[i].agent);
2215                        if (sa_dev->port[i].sm_ah)
2216                                kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
2217                }
2218
2219        }
2220
2221        kfree(sa_dev);
2222}
2223
2224int ib_sa_init(void)
2225{
2226        int ret;
2227
2228        get_random_bytes(&tid, sizeof tid);
2229
2230        atomic_set(&ib_nl_sa_request_seq, 0);
2231
2232        ret = ib_register_client(&sa_client);
2233        if (ret) {
2234                pr_err("Couldn't register ib_sa client\n");
2235                goto err1;
2236        }
2237
2238        ret = mcast_init();
2239        if (ret) {
2240                pr_err("Couldn't initialize multicast handling\n");
2241                goto err2;
2242        }
2243
2244        ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM);
2245        if (!ib_nl_wq) {
2246                ret = -ENOMEM;
2247                goto err3;
2248        }
2249
2250        INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
2251
2252        return 0;
2253
2254err3:
2255        mcast_cleanup();
2256err2:
2257        ib_unregister_client(&sa_client);
2258err1:
2259        return ret;
2260}
2261
2262void ib_sa_cleanup(void)
2263{
2264        cancel_delayed_work(&ib_nl_timed_work);
2265        flush_workqueue(ib_nl_wq);
2266        destroy_workqueue(ib_nl_wq);
2267        mcast_cleanup();
2268        ib_unregister_client(&sa_client);
2269        WARN_ON(!xa_empty(&queries));
2270}
2271