linux/include/rdma/ib_verbs.h
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
   3 * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
   4 * Copyright (c) 2004 Intel Corporation.  All rights reserved.
   5 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
   6 * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
   7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   8 * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
   9 *
  10 * This software is available to you under a choice of one of two
  11 * licenses.  You may choose to be licensed under the terms of the GNU
  12 * General Public License (GPL) Version 2, available from the file
  13 * COPYING in the main directory of this source tree, or the
  14 * OpenIB.org BSD license below:
  15 *
  16 *     Redistribution and use in source and binary forms, with or
  17 *     without modification, are permitted provided that the following
  18 *     conditions are met:
  19 *
  20 *      - Redistributions of source code must retain the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer.
  23 *
  24 *      - Redistributions in binary form must reproduce the above
  25 *        copyright notice, this list of conditions and the following
  26 *        disclaimer in the documentation and/or other materials
  27 *        provided with the distribution.
  28 *
  29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  30 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  31 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  32 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  33 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  36 * SOFTWARE.
  37 */
  38
  39#if !defined(IB_VERBS_H)
  40#define IB_VERBS_H
  41
  42#include <linux/types.h>
  43#include <linux/device.h>
  44#include <linux/dma-mapping.h>
  45#include <linux/kref.h>
  46#include <linux/list.h>
  47#include <linux/rwsem.h>
  48#include <linux/workqueue.h>
  49#include <linux/irq_poll.h>
  50#include <uapi/linux/if_ether.h>
  51#include <net/ipv6.h>
  52#include <net/ip.h>
  53#include <linux/string.h>
  54#include <linux/slab.h>
  55#include <linux/netdevice.h>
  56#include <linux/refcount.h>
  57#include <linux/if_link.h>
  58#include <linux/atomic.h>
  59#include <linux/mmu_notifier.h>
  60#include <linux/uaccess.h>
  61#include <linux/cgroup_rdma.h>
  62#include <linux/irqflags.h>
  63#include <linux/preempt.h>
  64#include <uapi/rdma/ib_user_verbs.h>
  65#include <rdma/restrack.h>
  66#include <uapi/rdma/rdma_user_ioctl.h>
  67#include <uapi/rdma/ib_user_ioctl_verbs.h>
  68
  69#define IB_FW_VERSION_NAME_MAX  ETHTOOL_FWVERS_LEN
  70
  71struct ib_umem_odp;
  72
  73extern struct workqueue_struct *ib_wq;
  74extern struct workqueue_struct *ib_comp_wq;
  75extern struct workqueue_struct *ib_comp_unbound_wq;
  76
  77__printf(3, 4) __cold
  78void ibdev_printk(const char *level, const struct ib_device *ibdev,
  79                  const char *format, ...);
  80__printf(2, 3) __cold
  81void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...);
  82__printf(2, 3) __cold
  83void ibdev_alert(const struct ib_device *ibdev, const char *format, ...);
  84__printf(2, 3) __cold
  85void ibdev_crit(const struct ib_device *ibdev, const char *format, ...);
  86__printf(2, 3) __cold
  87void ibdev_err(const struct ib_device *ibdev, const char *format, ...);
  88__printf(2, 3) __cold
  89void ibdev_warn(const struct ib_device *ibdev, const char *format, ...);
  90__printf(2, 3) __cold
  91void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
  92__printf(2, 3) __cold
  93void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
  94
  95#if defined(CONFIG_DYNAMIC_DEBUG)
  96#define ibdev_dbg(__dev, format, args...)                       \
  97        dynamic_ibdev_dbg(__dev, format, ##args)
  98#elif defined(DEBUG)
  99#define ibdev_dbg(__dev, format, args...)                       \
 100        ibdev_printk(KERN_DEBUG, __dev, format, ##args)
 101#else
 102__printf(2, 3) __cold
 103static inline
 104void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
 105#endif
 106
 107union ib_gid {
 108        u8      raw[16];
 109        struct {
 110                __be64  subnet_prefix;
 111                __be64  interface_id;
 112        } global;
 113};
 114
 115extern union ib_gid zgid;
 116
 117enum ib_gid_type {
 118        /* If link layer is Ethernet, this is RoCE V1 */
 119        IB_GID_TYPE_IB        = 0,
 120        IB_GID_TYPE_ROCE      = 0,
 121        IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
 122        IB_GID_TYPE_SIZE
 123};
 124
 125#define ROCE_V2_UDP_DPORT      4791
 126struct ib_gid_attr {
 127        struct net_device __rcu *ndev;
 128        struct ib_device        *device;
 129        union ib_gid            gid;
 130        enum ib_gid_type        gid_type;
 131        u16                     index;
 132        u8                      port_num;
 133};
 134
 135enum rdma_node_type {
 136        /* IB values map to NodeInfo:NodeType. */
 137        RDMA_NODE_IB_CA         = 1,
 138        RDMA_NODE_IB_SWITCH,
 139        RDMA_NODE_IB_ROUTER,
 140        RDMA_NODE_RNIC,
 141        RDMA_NODE_USNIC,
 142        RDMA_NODE_USNIC_UDP,
 143        RDMA_NODE_UNSPECIFIED,
 144};
 145
 146enum {
 147        /* set the local administered indication */
 148        IB_SA_WELL_KNOWN_GUID   = BIT_ULL(57) | 2,
 149};
 150
 151enum rdma_transport_type {
 152        RDMA_TRANSPORT_IB,
 153        RDMA_TRANSPORT_IWARP,
 154        RDMA_TRANSPORT_USNIC,
 155        RDMA_TRANSPORT_USNIC_UDP,
 156        RDMA_TRANSPORT_UNSPECIFIED,
 157};
 158
 159enum rdma_protocol_type {
 160        RDMA_PROTOCOL_IB,
 161        RDMA_PROTOCOL_IBOE,
 162        RDMA_PROTOCOL_IWARP,
 163        RDMA_PROTOCOL_USNIC_UDP
 164};
 165
 166__attribute_const__ enum rdma_transport_type
 167rdma_node_get_transport(enum rdma_node_type node_type);
 168
 169enum rdma_network_type {
 170        RDMA_NETWORK_IB,
 171        RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
 172        RDMA_NETWORK_IPV4,
 173        RDMA_NETWORK_IPV6
 174};
 175
 176static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
 177{
 178        if (network_type == RDMA_NETWORK_IPV4 ||
 179            network_type == RDMA_NETWORK_IPV6)
 180                return IB_GID_TYPE_ROCE_UDP_ENCAP;
 181
 182        /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
 183        return IB_GID_TYPE_IB;
 184}
 185
 186static inline enum rdma_network_type
 187rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
 188{
 189        if (attr->gid_type == IB_GID_TYPE_IB)
 190                return RDMA_NETWORK_IB;
 191
 192        if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
 193                return RDMA_NETWORK_IPV4;
 194        else
 195                return RDMA_NETWORK_IPV6;
 196}
 197
 198enum rdma_link_layer {
 199        IB_LINK_LAYER_UNSPECIFIED,
 200        IB_LINK_LAYER_INFINIBAND,
 201        IB_LINK_LAYER_ETHERNET,
 202};
 203
 204enum ib_device_cap_flags {
 205        IB_DEVICE_RESIZE_MAX_WR                 = (1 << 0),
 206        IB_DEVICE_BAD_PKEY_CNTR                 = (1 << 1),
 207        IB_DEVICE_BAD_QKEY_CNTR                 = (1 << 2),
 208        IB_DEVICE_RAW_MULTI                     = (1 << 3),
 209        IB_DEVICE_AUTO_PATH_MIG                 = (1 << 4),
 210        IB_DEVICE_CHANGE_PHY_PORT               = (1 << 5),
 211        IB_DEVICE_UD_AV_PORT_ENFORCE            = (1 << 6),
 212        IB_DEVICE_CURR_QP_STATE_MOD             = (1 << 7),
 213        IB_DEVICE_SHUTDOWN_PORT                 = (1 << 8),
 214        /* Not in use, former INIT_TYPE         = (1 << 9),*/
 215        IB_DEVICE_PORT_ACTIVE_EVENT             = (1 << 10),
 216        IB_DEVICE_SYS_IMAGE_GUID                = (1 << 11),
 217        IB_DEVICE_RC_RNR_NAK_GEN                = (1 << 12),
 218        IB_DEVICE_SRQ_RESIZE                    = (1 << 13),
 219        IB_DEVICE_N_NOTIFY_CQ                   = (1 << 14),
 220
 221        /*
 222         * This device supports a per-device lkey or stag that can be
 223         * used without performing a memory registration for the local
 224         * memory.  Note that ULPs should never check this flag, but
 225         * instead of use the local_dma_lkey flag in the ib_pd structure,
 226         * which will always contain a usable lkey.
 227         */
 228        IB_DEVICE_LOCAL_DMA_LKEY                = (1 << 15),
 229        /* Reserved, old SEND_W_INV             = (1 << 16),*/
 230        IB_DEVICE_MEM_WINDOW                    = (1 << 17),
 231        /*
 232         * Devices should set IB_DEVICE_UD_IP_SUM if they support
 233         * insertion of UDP and TCP checksum on outgoing UD IPoIB
 234         * messages and can verify the validity of checksum for
 235         * incoming messages.  Setting this flag implies that the
 236         * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
 237         */
 238        IB_DEVICE_UD_IP_CSUM                    = (1 << 18),
 239        IB_DEVICE_UD_TSO                        = (1 << 19),
 240        IB_DEVICE_XRC                           = (1 << 20),
 241
 242        /*
 243         * This device supports the IB "base memory management extension",
 244         * which includes support for fast registrations (IB_WR_REG_MR,
 245         * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs).  This flag should
 246         * also be set by any iWarp device which must support FRs to comply
 247         * to the iWarp verbs spec.  iWarp devices also support the
 248         * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
 249         * stag.
 250         */
 251        IB_DEVICE_MEM_MGT_EXTENSIONS            = (1 << 21),
 252        IB_DEVICE_BLOCK_MULTICAST_LOOPBACK      = (1 << 22),
 253        IB_DEVICE_MEM_WINDOW_TYPE_2A            = (1 << 23),
 254        IB_DEVICE_MEM_WINDOW_TYPE_2B            = (1 << 24),
 255        IB_DEVICE_RC_IP_CSUM                    = (1 << 25),
 256        /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
 257        IB_DEVICE_RAW_IP_CSUM                   = (1 << 26),
 258        /*
 259         * Devices should set IB_DEVICE_CROSS_CHANNEL if they
 260         * support execution of WQEs that involve synchronization
 261         * of I/O operations with single completion queue managed
 262         * by hardware.
 263         */
 264        IB_DEVICE_CROSS_CHANNEL                 = (1 << 27),
 265        IB_DEVICE_MANAGED_FLOW_STEERING         = (1 << 29),
 266        IB_DEVICE_SIGNATURE_HANDOVER            = (1 << 30),
 267        IB_DEVICE_ON_DEMAND_PAGING              = (1ULL << 31),
 268        IB_DEVICE_SG_GAPS_REG                   = (1ULL << 32),
 269        IB_DEVICE_VIRTUAL_FUNCTION              = (1ULL << 33),
 270        /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
 271        IB_DEVICE_RAW_SCATTER_FCS               = (1ULL << 34),
 272        IB_DEVICE_RDMA_NETDEV_OPA_VNIC          = (1ULL << 35),
 273        /* The device supports padding incoming writes to cacheline. */
 274        IB_DEVICE_PCI_WRITE_END_PADDING         = (1ULL << 36),
 275        IB_DEVICE_ALLOW_USER_UNREG              = (1ULL << 37),
 276};
 277
 278enum ib_signature_prot_cap {
 279        IB_PROT_T10DIF_TYPE_1 = 1,
 280        IB_PROT_T10DIF_TYPE_2 = 1 << 1,
 281        IB_PROT_T10DIF_TYPE_3 = 1 << 2,
 282};
 283
 284enum ib_signature_guard_cap {
 285        IB_GUARD_T10DIF_CRC     = 1,
 286        IB_GUARD_T10DIF_CSUM    = 1 << 1,
 287};
 288
 289enum ib_atomic_cap {
 290        IB_ATOMIC_NONE,
 291        IB_ATOMIC_HCA,
 292        IB_ATOMIC_GLOB
 293};
 294
 295enum ib_odp_general_cap_bits {
 296        IB_ODP_SUPPORT          = 1 << 0,
 297        IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
 298};
 299
 300enum ib_odp_transport_cap_bits {
 301        IB_ODP_SUPPORT_SEND     = 1 << 0,
 302        IB_ODP_SUPPORT_RECV     = 1 << 1,
 303        IB_ODP_SUPPORT_WRITE    = 1 << 2,
 304        IB_ODP_SUPPORT_READ     = 1 << 3,
 305        IB_ODP_SUPPORT_ATOMIC   = 1 << 4,
 306        IB_ODP_SUPPORT_SRQ_RECV = 1 << 5,
 307};
 308
 309struct ib_odp_caps {
 310        uint64_t general_caps;
 311        struct {
 312                uint32_t  rc_odp_caps;
 313                uint32_t  uc_odp_caps;
 314                uint32_t  ud_odp_caps;
 315                uint32_t  xrc_odp_caps;
 316        } per_transport_caps;
 317};
 318
 319struct ib_rss_caps {
 320        /* Corresponding bit will be set if qp type from
 321         * 'enum ib_qp_type' is supported, e.g.
 322         * supported_qpts |= 1 << IB_QPT_UD
 323         */
 324        u32 supported_qpts;
 325        u32 max_rwq_indirection_tables;
 326        u32 max_rwq_indirection_table_size;
 327};
 328
 329enum ib_tm_cap_flags {
 330        /*  Support tag matching on RC transport */
 331        IB_TM_CAP_RC                = 1 << 0,
 332};
 333
 334struct ib_tm_caps {
 335        /* Max size of RNDV header */
 336        u32 max_rndv_hdr_size;
 337        /* Max number of entries in tag matching list */
 338        u32 max_num_tags;
 339        /* From enum ib_tm_cap_flags */
 340        u32 flags;
 341        /* Max number of outstanding list operations */
 342        u32 max_ops;
 343        /* Max number of SGE in tag matching entry */
 344        u32 max_sge;
 345};
 346
 347struct ib_cq_init_attr {
 348        unsigned int    cqe;
 349        int             comp_vector;
 350        u32             flags;
 351};
 352
 353enum ib_cq_attr_mask {
 354        IB_CQ_MODERATE = 1 << 0,
 355};
 356
 357struct ib_cq_caps {
 358        u16     max_cq_moderation_count;
 359        u16     max_cq_moderation_period;
 360};
 361
 362struct ib_dm_mr_attr {
 363        u64             length;
 364        u64             offset;
 365        u32             access_flags;
 366};
 367
 368struct ib_dm_alloc_attr {
 369        u64     length;
 370        u32     alignment;
 371        u32     flags;
 372};
 373
 374struct ib_device_attr {
 375        u64                     fw_ver;
 376        __be64                  sys_image_guid;
 377        u64                     max_mr_size;
 378        u64                     page_size_cap;
 379        u32                     vendor_id;
 380        u32                     vendor_part_id;
 381        u32                     hw_ver;
 382        int                     max_qp;
 383        int                     max_qp_wr;
 384        u64                     device_cap_flags;
 385        int                     max_send_sge;
 386        int                     max_recv_sge;
 387        int                     max_sge_rd;
 388        int                     max_cq;
 389        int                     max_cqe;
 390        int                     max_mr;
 391        int                     max_pd;
 392        int                     max_qp_rd_atom;
 393        int                     max_ee_rd_atom;
 394        int                     max_res_rd_atom;
 395        int                     max_qp_init_rd_atom;
 396        int                     max_ee_init_rd_atom;
 397        enum ib_atomic_cap      atomic_cap;
 398        enum ib_atomic_cap      masked_atomic_cap;
 399        int                     max_ee;
 400        int                     max_rdd;
 401        int                     max_mw;
 402        int                     max_raw_ipv6_qp;
 403        int                     max_raw_ethy_qp;
 404        int                     max_mcast_grp;
 405        int                     max_mcast_qp_attach;
 406        int                     max_total_mcast_qp_attach;
 407        int                     max_ah;
 408        int                     max_fmr;
 409        int                     max_map_per_fmr;
 410        int                     max_srq;
 411        int                     max_srq_wr;
 412        int                     max_srq_sge;
 413        unsigned int            max_fast_reg_page_list_len;
 414        u16                     max_pkeys;
 415        u8                      local_ca_ack_delay;
 416        int                     sig_prot_cap;
 417        int                     sig_guard_cap;
 418        struct ib_odp_caps      odp_caps;
 419        uint64_t                timestamp_mask;
 420        uint64_t                hca_core_clock; /* in KHZ */
 421        struct ib_rss_caps      rss_caps;
 422        u32                     max_wq_type_rq;
 423        u32                     raw_packet_caps; /* Use ib_raw_packet_caps enum */
 424        struct ib_tm_caps       tm_caps;
 425        struct ib_cq_caps       cq_caps;
 426        u64                     max_dm_size;
 427};
 428
 429enum ib_mtu {
 430        IB_MTU_256  = 1,
 431        IB_MTU_512  = 2,
 432        IB_MTU_1024 = 3,
 433        IB_MTU_2048 = 4,
 434        IB_MTU_4096 = 5
 435};
 436
 437static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
 438{
 439        switch (mtu) {
 440        case IB_MTU_256:  return  256;
 441        case IB_MTU_512:  return  512;
 442        case IB_MTU_1024: return 1024;
 443        case IB_MTU_2048: return 2048;
 444        case IB_MTU_4096: return 4096;
 445        default:          return -1;
 446        }
 447}
 448
 449static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
 450{
 451        if (mtu >= 4096)
 452                return IB_MTU_4096;
 453        else if (mtu >= 2048)
 454                return IB_MTU_2048;
 455        else if (mtu >= 1024)
 456                return IB_MTU_1024;
 457        else if (mtu >= 512)
 458                return IB_MTU_512;
 459        else
 460                return IB_MTU_256;
 461}
 462
 463enum ib_port_state {
 464        IB_PORT_NOP             = 0,
 465        IB_PORT_DOWN            = 1,
 466        IB_PORT_INIT            = 2,
 467        IB_PORT_ARMED           = 3,
 468        IB_PORT_ACTIVE          = 4,
 469        IB_PORT_ACTIVE_DEFER    = 5
 470};
 471
 472enum ib_port_width {
 473        IB_WIDTH_1X     = 1,
 474        IB_WIDTH_2X     = 16,
 475        IB_WIDTH_4X     = 2,
 476        IB_WIDTH_8X     = 4,
 477        IB_WIDTH_12X    = 8
 478};
 479
 480static inline int ib_width_enum_to_int(enum ib_port_width width)
 481{
 482        switch (width) {
 483        case IB_WIDTH_1X:  return  1;
 484        case IB_WIDTH_2X:  return  2;
 485        case IB_WIDTH_4X:  return  4;
 486        case IB_WIDTH_8X:  return  8;
 487        case IB_WIDTH_12X: return 12;
 488        default:          return -1;
 489        }
 490}
 491
 492enum ib_port_speed {
 493        IB_SPEED_SDR    = 1,
 494        IB_SPEED_DDR    = 2,
 495        IB_SPEED_QDR    = 4,
 496        IB_SPEED_FDR10  = 8,
 497        IB_SPEED_FDR    = 16,
 498        IB_SPEED_EDR    = 32,
 499        IB_SPEED_HDR    = 64
 500};
 501
 502/**
 503 * struct rdma_hw_stats
 504 * @lock - Mutex to protect parallel write access to lifespan and values
 505 *    of counters, which are 64bits and not guaranteeed to be written
 506 *    atomicaly on 32bits systems.
 507 * @timestamp - Used by the core code to track when the last update was
 508 * @lifespan - Used by the core code to determine how old the counters
 509 *   should be before being updated again.  Stored in jiffies, defaults
 510 *   to 10 milliseconds, drivers can override the default be specifying
 511 *   their own value during their allocation routine.
 512 * @name - Array of pointers to static names used for the counters in
 513 *   directory.
 514 * @num_counters - How many hardware counters there are.  If name is
 515 *   shorter than this number, a kernel oops will result.  Driver authors
 516 *   are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
 517 *   in their code to prevent this.
 518 * @value - Array of u64 counters that are accessed by the sysfs code and
 519 *   filled in by the drivers get_stats routine
 520 */
 521struct rdma_hw_stats {
 522        struct mutex    lock; /* Protect lifespan and values[] */
 523        unsigned long   timestamp;
 524        unsigned long   lifespan;
 525        const char * const *names;
 526        int             num_counters;
 527        u64             value[];
 528};
 529
 530#define RDMA_HW_STATS_DEFAULT_LIFESPAN 10
 531/**
 532 * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
 533 *   for drivers.
 534 * @names - Array of static const char *
 535 * @num_counters - How many elements in array
 536 * @lifespan - How many milliseconds between updates
 537 */
 538static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 539                const char * const *names, int num_counters,
 540                unsigned long lifespan)
 541{
 542        struct rdma_hw_stats *stats;
 543
 544        stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64),
 545                        GFP_KERNEL);
 546        if (!stats)
 547                return NULL;
 548        stats->names = names;
 549        stats->num_counters = num_counters;
 550        stats->lifespan = msecs_to_jiffies(lifespan);
 551
 552        return stats;
 553}
 554
 555
 556/* Define bits for the various functionality this port needs to be supported by
 557 * the core.
 558 */
 559/* Management                           0x00000FFF */
 560#define RDMA_CORE_CAP_IB_MAD            0x00000001
 561#define RDMA_CORE_CAP_IB_SMI            0x00000002
 562#define RDMA_CORE_CAP_IB_CM             0x00000004
 563#define RDMA_CORE_CAP_IW_CM             0x00000008
 564#define RDMA_CORE_CAP_IB_SA             0x00000010
 565#define RDMA_CORE_CAP_OPA_MAD           0x00000020
 566
 567/* Address format                       0x000FF000 */
 568#define RDMA_CORE_CAP_AF_IB             0x00001000
 569#define RDMA_CORE_CAP_ETH_AH            0x00002000
 570#define RDMA_CORE_CAP_OPA_AH            0x00004000
 571#define RDMA_CORE_CAP_IB_GRH_REQUIRED   0x00008000
 572
 573/* Protocol                             0xFFF00000 */
 574#define RDMA_CORE_CAP_PROT_IB           0x00100000
 575#define RDMA_CORE_CAP_PROT_ROCE         0x00200000
 576#define RDMA_CORE_CAP_PROT_IWARP        0x00400000
 577#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
 578#define RDMA_CORE_CAP_PROT_RAW_PACKET   0x01000000
 579#define RDMA_CORE_CAP_PROT_USNIC        0x02000000
 580
 581#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
 582                                        | RDMA_CORE_CAP_PROT_ROCE     \
 583                                        | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP)
 584
 585#define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
 586                                        | RDMA_CORE_CAP_IB_MAD \
 587                                        | RDMA_CORE_CAP_IB_SMI \
 588                                        | RDMA_CORE_CAP_IB_CM  \
 589                                        | RDMA_CORE_CAP_IB_SA  \
 590                                        | RDMA_CORE_CAP_AF_IB)
 591#define RDMA_CORE_PORT_IBA_ROCE        (RDMA_CORE_CAP_PROT_ROCE \
 592                                        | RDMA_CORE_CAP_IB_MAD  \
 593                                        | RDMA_CORE_CAP_IB_CM   \
 594                                        | RDMA_CORE_CAP_AF_IB   \
 595                                        | RDMA_CORE_CAP_ETH_AH)
 596#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP                       \
 597                                        (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
 598                                        | RDMA_CORE_CAP_IB_MAD  \
 599                                        | RDMA_CORE_CAP_IB_CM   \
 600                                        | RDMA_CORE_CAP_AF_IB   \
 601                                        | RDMA_CORE_CAP_ETH_AH)
 602#define RDMA_CORE_PORT_IWARP           (RDMA_CORE_CAP_PROT_IWARP \
 603                                        | RDMA_CORE_CAP_IW_CM)
 604#define RDMA_CORE_PORT_INTEL_OPA       (RDMA_CORE_PORT_IBA_IB  \
 605                                        | RDMA_CORE_CAP_OPA_MAD)
 606
 607#define RDMA_CORE_PORT_RAW_PACKET       (RDMA_CORE_CAP_PROT_RAW_PACKET)
 608
 609#define RDMA_CORE_PORT_USNIC            (RDMA_CORE_CAP_PROT_USNIC)
 610
 611struct ib_port_attr {
 612        u64                     subnet_prefix;
 613        enum ib_port_state      state;
 614        enum ib_mtu             max_mtu;
 615        enum ib_mtu             active_mtu;
 616        int                     gid_tbl_len;
 617        unsigned int            ip_gids:1;
 618        /* This is the value from PortInfo CapabilityMask, defined by IBA */
 619        u32                     port_cap_flags;
 620        u32                     max_msg_sz;
 621        u32                     bad_pkey_cntr;
 622        u32                     qkey_viol_cntr;
 623        u16                     pkey_tbl_len;
 624        u32                     sm_lid;
 625        u32                     lid;
 626        u8                      lmc;
 627        u8                      max_vl_num;
 628        u8                      sm_sl;
 629        u8                      subnet_timeout;
 630        u8                      init_type_reply;
 631        u8                      active_width;
 632        u8                      active_speed;
 633        u8                      phys_state;
 634        u16                     port_cap_flags2;
 635};
 636
 637enum ib_device_modify_flags {
 638        IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0,
 639        IB_DEVICE_MODIFY_NODE_DESC      = 1 << 1
 640};
 641
 642#define IB_DEVICE_NODE_DESC_MAX 64
 643
 644struct ib_device_modify {
 645        u64     sys_image_guid;
 646        char    node_desc[IB_DEVICE_NODE_DESC_MAX];
 647};
 648
 649enum ib_port_modify_flags {
 650        IB_PORT_SHUTDOWN                = 1,
 651        IB_PORT_INIT_TYPE               = (1<<2),
 652        IB_PORT_RESET_QKEY_CNTR         = (1<<3),
 653        IB_PORT_OPA_MASK_CHG            = (1<<4)
 654};
 655
 656struct ib_port_modify {
 657        u32     set_port_cap_mask;
 658        u32     clr_port_cap_mask;
 659        u8      init_type;
 660};
 661
 662enum ib_event_type {
 663        IB_EVENT_CQ_ERR,
 664        IB_EVENT_QP_FATAL,
 665        IB_EVENT_QP_REQ_ERR,
 666        IB_EVENT_QP_ACCESS_ERR,
 667        IB_EVENT_COMM_EST,
 668        IB_EVENT_SQ_DRAINED,
 669        IB_EVENT_PATH_MIG,
 670        IB_EVENT_PATH_MIG_ERR,
 671        IB_EVENT_DEVICE_FATAL,
 672        IB_EVENT_PORT_ACTIVE,
 673        IB_EVENT_PORT_ERR,
 674        IB_EVENT_LID_CHANGE,
 675        IB_EVENT_PKEY_CHANGE,
 676        IB_EVENT_SM_CHANGE,
 677        IB_EVENT_SRQ_ERR,
 678        IB_EVENT_SRQ_LIMIT_REACHED,
 679        IB_EVENT_QP_LAST_WQE_REACHED,
 680        IB_EVENT_CLIENT_REREGISTER,
 681        IB_EVENT_GID_CHANGE,
 682        IB_EVENT_WQ_FATAL,
 683};
 684
 685const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
 686
 687struct ib_event {
 688        struct ib_device        *device;
 689        union {
 690                struct ib_cq    *cq;
 691                struct ib_qp    *qp;
 692                struct ib_srq   *srq;
 693                struct ib_wq    *wq;
 694                u8              port_num;
 695        } element;
 696        enum ib_event_type      event;
 697};
 698
 699struct ib_event_handler {
 700        struct ib_device *device;
 701        void            (*handler)(struct ib_event_handler *, struct ib_event *);
 702        struct list_head  list;
 703};
 704
 705#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler)          \
 706        do {                                                    \
 707                (_ptr)->device  = _device;                      \
 708                (_ptr)->handler = _handler;                     \
 709                INIT_LIST_HEAD(&(_ptr)->list);                  \
 710        } while (0)
 711
 712struct ib_global_route {
 713        const struct ib_gid_attr *sgid_attr;
 714        union ib_gid    dgid;
 715        u32             flow_label;
 716        u8              sgid_index;
 717        u8              hop_limit;
 718        u8              traffic_class;
 719};
 720
 721struct ib_grh {
 722        __be32          version_tclass_flow;
 723        __be16          paylen;
 724        u8              next_hdr;
 725        u8              hop_limit;
 726        union ib_gid    sgid;
 727        union ib_gid    dgid;
 728};
 729
 730union rdma_network_hdr {
 731        struct ib_grh ibgrh;
 732        struct {
 733                /* The IB spec states that if it's IPv4, the header
 734                 * is located in the last 20 bytes of the header.
 735                 */
 736                u8              reserved[20];
 737                struct iphdr    roce4grh;
 738        };
 739};
 740
 741#define IB_QPN_MASK             0xFFFFFF
 742
 743enum {
 744        IB_MULTICAST_QPN = 0xffffff
 745};
 746
 747#define IB_LID_PERMISSIVE       cpu_to_be16(0xFFFF)
 748#define IB_MULTICAST_LID_BASE   cpu_to_be16(0xC000)
 749
 750enum ib_ah_flags {
 751        IB_AH_GRH       = 1
 752};
 753
 754enum ib_rate {
 755        IB_RATE_PORT_CURRENT = 0,
 756        IB_RATE_2_5_GBPS = 2,
 757        IB_RATE_5_GBPS   = 5,
 758        IB_RATE_10_GBPS  = 3,
 759        IB_RATE_20_GBPS  = 6,
 760        IB_RATE_30_GBPS  = 4,
 761        IB_RATE_40_GBPS  = 7,
 762        IB_RATE_60_GBPS  = 8,
 763        IB_RATE_80_GBPS  = 9,
 764        IB_RATE_120_GBPS = 10,
 765        IB_RATE_14_GBPS  = 11,
 766        IB_RATE_56_GBPS  = 12,
 767        IB_RATE_112_GBPS = 13,
 768        IB_RATE_168_GBPS = 14,
 769        IB_RATE_25_GBPS  = 15,
 770        IB_RATE_100_GBPS = 16,
 771        IB_RATE_200_GBPS = 17,
 772        IB_RATE_300_GBPS = 18,
 773        IB_RATE_28_GBPS  = 19,
 774        IB_RATE_50_GBPS  = 20,
 775        IB_RATE_400_GBPS = 21,
 776        IB_RATE_600_GBPS = 22,
 777};
 778
 779/**
 780 * ib_rate_to_mult - Convert the IB rate enum to a multiple of the
 781 * base rate of 2.5 Gbit/sec.  For example, IB_RATE_5_GBPS will be
 782 * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
 783 * @rate: rate to convert.
 784 */
 785__attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
 786
 787/**
 788 * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
 789 * For example, IB_RATE_2_5_GBPS will be converted to 2500.
 790 * @rate: rate to convert.
 791 */
 792__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
 793
 794
 795/**
 796 * enum ib_mr_type - memory region type
 797 * @IB_MR_TYPE_MEM_REG:       memory region that is used for
 798 *                            normal registration
 799 * @IB_MR_TYPE_SIGNATURE:     memory region that is used for
 800 *                            signature operations (data-integrity
 801 *                            capable regions)
 802 * @IB_MR_TYPE_SG_GAPS:       memory region that is capable to
 803 *                            register any arbitrary sg lists (without
 804 *                            the normal mr constraints - see
 805 *                            ib_map_mr_sg)
 806 */
 807enum ib_mr_type {
 808        IB_MR_TYPE_MEM_REG,
 809        IB_MR_TYPE_SIGNATURE,
 810        IB_MR_TYPE_SG_GAPS,
 811};
 812
 813/**
 814 * Signature types
 815 * IB_SIG_TYPE_NONE: Unprotected.
 816 * IB_SIG_TYPE_T10_DIF: Type T10-DIF
 817 */
 818enum ib_signature_type {
 819        IB_SIG_TYPE_NONE,
 820        IB_SIG_TYPE_T10_DIF,
 821};
 822
 823/**
 824 * Signature T10-DIF block-guard types
 825 * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
 826 * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
 827 */
 828enum ib_t10_dif_bg_type {
 829        IB_T10DIF_CRC,
 830        IB_T10DIF_CSUM
 831};
 832
 833/**
 834 * struct ib_t10_dif_domain - Parameters specific for T10-DIF
 835 *     domain.
 836 * @bg_type: T10-DIF block guard type (CRC|CSUM)
 837 * @pi_interval: protection information interval.
 838 * @bg: seed of guard computation.
 839 * @app_tag: application tag of guard block
 840 * @ref_tag: initial guard block reference tag.
 841 * @ref_remap: Indicate wethear the reftag increments each block
 842 * @app_escape: Indicate to skip block check if apptag=0xffff
 843 * @ref_escape: Indicate to skip block check if reftag=0xffffffff
 844 * @apptag_check_mask: check bitmask of application tag.
 845 */
 846struct ib_t10_dif_domain {
 847        enum ib_t10_dif_bg_type bg_type;
 848        u16                     pi_interval;
 849        u16                     bg;
 850        u16                     app_tag;
 851        u32                     ref_tag;
 852        bool                    ref_remap;
 853        bool                    app_escape;
 854        bool                    ref_escape;
 855        u16                     apptag_check_mask;
 856};
 857
 858/**
 859 * struct ib_sig_domain - Parameters for signature domain
 860 * @sig_type: specific signauture type
 861 * @sig: union of all signature domain attributes that may
 862 *     be used to set domain layout.
 863 */
 864struct ib_sig_domain {
 865        enum ib_signature_type sig_type;
 866        union {
 867                struct ib_t10_dif_domain dif;
 868        } sig;
 869};
 870
 871/**
 872 * struct ib_sig_attrs - Parameters for signature handover operation
 873 * @check_mask: bitmask for signature byte check (8 bytes)
 874 * @mem: memory domain layout desciptor.
 875 * @wire: wire domain layout desciptor.
 876 */
 877struct ib_sig_attrs {
 878        u8                      check_mask;
 879        struct ib_sig_domain    mem;
 880        struct ib_sig_domain    wire;
 881};
 882
 883enum ib_sig_err_type {
 884        IB_SIG_BAD_GUARD,
 885        IB_SIG_BAD_REFTAG,
 886        IB_SIG_BAD_APPTAG,
 887};
 888
 889/**
 890 * Signature check masks (8 bytes in total) according to the T10-PI standard:
 891 *  -------- -------- ------------
 892 * | GUARD  | APPTAG |   REFTAG   |
 893 * |  2B    |  2B    |    4B      |
 894 *  -------- -------- ------------
 895 */
 896enum {
 897        IB_SIG_CHECK_GUARD      = 0xc0,
 898        IB_SIG_CHECK_APPTAG     = 0x30,
 899        IB_SIG_CHECK_REFTAG     = 0x0f,
 900};
 901
 902/**
 903 * struct ib_sig_err - signature error descriptor
 904 */
 905struct ib_sig_err {
 906        enum ib_sig_err_type    err_type;
 907        u32                     expected;
 908        u32                     actual;
 909        u64                     sig_err_offset;
 910        u32                     key;
 911};
 912
 913enum ib_mr_status_check {
 914        IB_MR_CHECK_SIG_STATUS = 1,
 915};
 916
 917/**
 918 * struct ib_mr_status - Memory region status container
 919 *
 920 * @fail_status: Bitmask of MR checks status. For each
 921 *     failed check a corresponding status bit is set.
 922 * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS
 923 *     failure.
 924 */
 925struct ib_mr_status {
 926        u32                 fail_status;
 927        struct ib_sig_err   sig_err;
 928};
 929
 930/**
 931 * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
 932 * enum.
 933 * @mult: multiple to convert.
 934 */
 935__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
 936
 937enum rdma_ah_attr_type {
 938        RDMA_AH_ATTR_TYPE_UNDEFINED,
 939        RDMA_AH_ATTR_TYPE_IB,
 940        RDMA_AH_ATTR_TYPE_ROCE,
 941        RDMA_AH_ATTR_TYPE_OPA,
 942};
 943
 944struct ib_ah_attr {
 945        u16                     dlid;
 946        u8                      src_path_bits;
 947};
 948
 949struct roce_ah_attr {
 950        u8                      dmac[ETH_ALEN];
 951};
 952
 953struct opa_ah_attr {
 954        u32                     dlid;
 955        u8                      src_path_bits;
 956        bool                    make_grd;
 957};
 958
 959struct rdma_ah_attr {
 960        struct ib_global_route  grh;
 961        u8                      sl;
 962        u8                      static_rate;
 963        u8                      port_num;
 964        u8                      ah_flags;
 965        enum rdma_ah_attr_type type;
 966        union {
 967                struct ib_ah_attr ib;
 968                struct roce_ah_attr roce;
 969                struct opa_ah_attr opa;
 970        };
 971};
 972
 973enum ib_wc_status {
 974        IB_WC_SUCCESS,
 975        IB_WC_LOC_LEN_ERR,
 976        IB_WC_LOC_QP_OP_ERR,
 977        IB_WC_LOC_EEC_OP_ERR,
 978        IB_WC_LOC_PROT_ERR,
 979        IB_WC_WR_FLUSH_ERR,
 980        IB_WC_MW_BIND_ERR,
 981        IB_WC_BAD_RESP_ERR,
 982        IB_WC_LOC_ACCESS_ERR,
 983        IB_WC_REM_INV_REQ_ERR,
 984        IB_WC_REM_ACCESS_ERR,
 985        IB_WC_REM_OP_ERR,
 986        IB_WC_RETRY_EXC_ERR,
 987        IB_WC_RNR_RETRY_EXC_ERR,
 988        IB_WC_LOC_RDD_VIOL_ERR,
 989        IB_WC_REM_INV_RD_REQ_ERR,
 990        IB_WC_REM_ABORT_ERR,
 991        IB_WC_INV_EECN_ERR,
 992        IB_WC_INV_EEC_STATE_ERR,
 993        IB_WC_FATAL_ERR,
 994        IB_WC_RESP_TIMEOUT_ERR,
 995        IB_WC_GENERAL_ERR
 996};
 997
 998const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
 999
1000enum ib_wc_opcode {
1001        IB_WC_SEND,
1002        IB_WC_RDMA_WRITE,
1003        IB_WC_RDMA_READ,
1004        IB_WC_COMP_SWAP,
1005        IB_WC_FETCH_ADD,
1006        IB_WC_LSO,
1007        IB_WC_LOCAL_INV,
1008        IB_WC_REG_MR,
1009        IB_WC_MASKED_COMP_SWAP,
1010        IB_WC_MASKED_FETCH_ADD,
1011/*
1012 * Set value of IB_WC_RECV so consumers can test if a completion is a
1013 * receive by testing (opcode & IB_WC_RECV).
1014 */
1015        IB_WC_RECV                      = 1 << 7,
1016        IB_WC_RECV_RDMA_WITH_IMM
1017};
1018
1019enum ib_wc_flags {
1020        IB_WC_GRH               = 1,
1021        IB_WC_WITH_IMM          = (1<<1),
1022        IB_WC_WITH_INVALIDATE   = (1<<2),
1023        IB_WC_IP_CSUM_OK        = (1<<3),
1024        IB_WC_WITH_SMAC         = (1<<4),
1025        IB_WC_WITH_VLAN         = (1<<5),
1026        IB_WC_WITH_NETWORK_HDR_TYPE     = (1<<6),
1027};
1028
1029struct ib_wc {
1030        union {
1031                u64             wr_id;
1032                struct ib_cqe   *wr_cqe;
1033        };
1034        enum ib_wc_status       status;
1035        enum ib_wc_opcode       opcode;
1036        u32                     vendor_err;
1037        u32                     byte_len;
1038        struct ib_qp           *qp;
1039        union {
1040                __be32          imm_data;
1041                u32             invalidate_rkey;
1042        } ex;
1043        u32                     src_qp;
1044        u32                     slid;
1045        int                     wc_flags;
1046        u16                     pkey_index;
1047        u8                      sl;
1048        u8                      dlid_path_bits;
1049        u8                      port_num;       /* valid only for DR SMPs on switches */
1050        u8                      smac[ETH_ALEN];
1051        u16                     vlan_id;
1052        u8                      network_hdr_type;
1053};
1054
1055enum ib_cq_notify_flags {
1056        IB_CQ_SOLICITED                 = 1 << 0,
1057        IB_CQ_NEXT_COMP                 = 1 << 1,
1058        IB_CQ_SOLICITED_MASK            = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP,
1059        IB_CQ_REPORT_MISSED_EVENTS      = 1 << 2,
1060};
1061
1062enum ib_srq_type {
1063        IB_SRQT_BASIC,
1064        IB_SRQT_XRC,
1065        IB_SRQT_TM,
1066};
1067
1068static inline bool ib_srq_has_cq(enum ib_srq_type srq_type)
1069{
1070        return srq_type == IB_SRQT_XRC ||
1071               srq_type == IB_SRQT_TM;
1072}
1073
1074enum ib_srq_attr_mask {
1075        IB_SRQ_MAX_WR   = 1 << 0,
1076        IB_SRQ_LIMIT    = 1 << 1,
1077};
1078
1079struct ib_srq_attr {
1080        u32     max_wr;
1081        u32     max_sge;
1082        u32     srq_limit;
1083};
1084
1085struct ib_srq_init_attr {
1086        void                  (*event_handler)(struct ib_event *, void *);
1087        void                   *srq_context;
1088        struct ib_srq_attr      attr;
1089        enum ib_srq_type        srq_type;
1090
1091        struct {
1092                struct ib_cq   *cq;
1093                union {
1094                        struct {
1095                                struct ib_xrcd *xrcd;
1096                        } xrc;
1097
1098                        struct {
1099                                u32             max_num_tags;
1100                        } tag_matching;
1101                };
1102        } ext;
1103};
1104
1105struct ib_qp_cap {
1106        u32     max_send_wr;
1107        u32     max_recv_wr;
1108        u32     max_send_sge;
1109        u32     max_recv_sge;
1110        u32     max_inline_data;
1111
1112        /*
1113         * Maximum number of rdma_rw_ctx structures in flight at a time.
1114         * ib_create_qp() will calculate the right amount of neededed WRs
1115         * and MRs based on this.
1116         */
1117        u32     max_rdma_ctxs;
1118};
1119
1120enum ib_sig_type {
1121        IB_SIGNAL_ALL_WR,
1122        IB_SIGNAL_REQ_WR
1123};
1124
1125enum ib_qp_type {
1126        /*
1127         * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
1128         * here (and in that order) since the MAD layer uses them as
1129         * indices into a 2-entry table.
1130         */
1131        IB_QPT_SMI,
1132        IB_QPT_GSI,
1133
1134        IB_QPT_RC,
1135        IB_QPT_UC,
1136        IB_QPT_UD,
1137        IB_QPT_RAW_IPV6,
1138        IB_QPT_RAW_ETHERTYPE,
1139        IB_QPT_RAW_PACKET = 8,
1140        IB_QPT_XRC_INI = 9,
1141        IB_QPT_XRC_TGT,
1142        IB_QPT_MAX,
1143        IB_QPT_DRIVER = 0xFF,
1144        /* Reserve a range for qp types internal to the low level driver.
1145         * These qp types will not be visible at the IB core layer, so the
1146         * IB_QPT_MAX usages should not be affected in the core layer
1147         */
1148        IB_QPT_RESERVED1 = 0x1000,
1149        IB_QPT_RESERVED2,
1150        IB_QPT_RESERVED3,
1151        IB_QPT_RESERVED4,
1152        IB_QPT_RESERVED5,
1153        IB_QPT_RESERVED6,
1154        IB_QPT_RESERVED7,
1155        IB_QPT_RESERVED8,
1156        IB_QPT_RESERVED9,
1157        IB_QPT_RESERVED10,
1158};
1159
1160enum ib_qp_create_flags {
1161        IB_QP_CREATE_IPOIB_UD_LSO               = 1 << 0,
1162        IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK   = 1 << 1,
1163        IB_QP_CREATE_CROSS_CHANNEL              = 1 << 2,
1164        IB_QP_CREATE_MANAGED_SEND               = 1 << 3,
1165        IB_QP_CREATE_MANAGED_RECV               = 1 << 4,
1166        IB_QP_CREATE_NETIF_QP                   = 1 << 5,
1167        IB_QP_CREATE_SIGNATURE_EN               = 1 << 6,
1168        /* FREE                                 = 1 << 7, */
1169        IB_QP_CREATE_SCATTER_FCS                = 1 << 8,
1170        IB_QP_CREATE_CVLAN_STRIPPING            = 1 << 9,
1171        IB_QP_CREATE_SOURCE_QPN                 = 1 << 10,
1172        IB_QP_CREATE_PCI_WRITE_END_PADDING      = 1 << 11,
1173        /* reserve bits 26-31 for low level drivers' internal use */
1174        IB_QP_CREATE_RESERVED_START             = 1 << 26,
1175        IB_QP_CREATE_RESERVED_END               = 1 << 31,
1176};
1177
1178/*
1179 * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
1180 * callback to destroy the passed in QP.
1181 */
1182
1183struct ib_qp_init_attr {
1184        /* Consumer's event_handler callback must not block */
1185        void                  (*event_handler)(struct ib_event *, void *);
1186
1187        void                   *qp_context;
1188        struct ib_cq           *send_cq;
1189        struct ib_cq           *recv_cq;
1190        struct ib_srq          *srq;
1191        struct ib_xrcd         *xrcd;     /* XRC TGT QPs only */
1192        struct ib_qp_cap        cap;
1193        enum ib_sig_type        sq_sig_type;
1194        enum ib_qp_type         qp_type;
1195        u32                     create_flags;
1196
1197        /*
1198         * Only needed for special QP types, or when using the RW API.
1199         */
1200        u8                      port_num;
1201        struct ib_rwq_ind_table *rwq_ind_tbl;
1202        u32                     source_qpn;
1203};
1204
1205struct ib_qp_open_attr {
1206        void                  (*event_handler)(struct ib_event *, void *);
1207        void                   *qp_context;
1208        u32                     qp_num;
1209        enum ib_qp_type         qp_type;
1210};
1211
1212enum ib_rnr_timeout {
1213        IB_RNR_TIMER_655_36 =  0,
1214        IB_RNR_TIMER_000_01 =  1,
1215        IB_RNR_TIMER_000_02 =  2,
1216        IB_RNR_TIMER_000_03 =  3,
1217        IB_RNR_TIMER_000_04 =  4,
1218        IB_RNR_TIMER_000_06 =  5,
1219        IB_RNR_TIMER_000_08 =  6,
1220        IB_RNR_TIMER_000_12 =  7,
1221        IB_RNR_TIMER_000_16 =  8,
1222        IB_RNR_TIMER_000_24 =  9,
1223        IB_RNR_TIMER_000_32 = 10,
1224        IB_RNR_TIMER_000_48 = 11,
1225        IB_RNR_TIMER_000_64 = 12,
1226        IB_RNR_TIMER_000_96 = 13,
1227        IB_RNR_TIMER_001_28 = 14,
1228        IB_RNR_TIMER_001_92 = 15,
1229        IB_RNR_TIMER_002_56 = 16,
1230        IB_RNR_TIMER_003_84 = 17,
1231        IB_RNR_TIMER_005_12 = 18,
1232        IB_RNR_TIMER_007_68 = 19,
1233        IB_RNR_TIMER_010_24 = 20,
1234        IB_RNR_TIMER_015_36 = 21,
1235        IB_RNR_TIMER_020_48 = 22,
1236        IB_RNR_TIMER_030_72 = 23,
1237        IB_RNR_TIMER_040_96 = 24,
1238        IB_RNR_TIMER_061_44 = 25,
1239        IB_RNR_TIMER_081_92 = 26,
1240        IB_RNR_TIMER_122_88 = 27,
1241        IB_RNR_TIMER_163_84 = 28,
1242        IB_RNR_TIMER_245_76 = 29,
1243        IB_RNR_TIMER_327_68 = 30,
1244        IB_RNR_TIMER_491_52 = 31
1245};
1246
1247enum ib_qp_attr_mask {
1248        IB_QP_STATE                     = 1,
1249        IB_QP_CUR_STATE                 = (1<<1),
1250        IB_QP_EN_SQD_ASYNC_NOTIFY       = (1<<2),
1251        IB_QP_ACCESS_FLAGS              = (1<<3),
1252        IB_QP_PKEY_INDEX                = (1<<4),
1253        IB_QP_PORT                      = (1<<5),
1254        IB_QP_QKEY                      = (1<<6),
1255        IB_QP_AV                        = (1<<7),
1256        IB_QP_PATH_MTU                  = (1<<8),
1257        IB_QP_TIMEOUT                   = (1<<9),
1258        IB_QP_RETRY_CNT                 = (1<<10),
1259        IB_QP_RNR_RETRY                 = (1<<11),
1260        IB_QP_RQ_PSN                    = (1<<12),
1261        IB_QP_MAX_QP_RD_ATOMIC          = (1<<13),
1262        IB_QP_ALT_PATH                  = (1<<14),
1263        IB_QP_MIN_RNR_TIMER             = (1<<15),
1264        IB_QP_SQ_PSN                    = (1<<16),
1265        IB_QP_MAX_DEST_RD_ATOMIC        = (1<<17),
1266        IB_QP_PATH_MIG_STATE            = (1<<18),
1267        IB_QP_CAP                       = (1<<19),
1268        IB_QP_DEST_QPN                  = (1<<20),
1269        IB_QP_RESERVED1                 = (1<<21),
1270        IB_QP_RESERVED2                 = (1<<22),
1271        IB_QP_RESERVED3                 = (1<<23),
1272        IB_QP_RESERVED4                 = (1<<24),
1273        IB_QP_RATE_LIMIT                = (1<<25),
1274};
1275
1276enum ib_qp_state {
1277        IB_QPS_RESET,
1278        IB_QPS_INIT,
1279        IB_QPS_RTR,
1280        IB_QPS_RTS,
1281        IB_QPS_SQD,
1282        IB_QPS_SQE,
1283        IB_QPS_ERR
1284};
1285
1286enum ib_mig_state {
1287        IB_MIG_MIGRATED,
1288        IB_MIG_REARM,
1289        IB_MIG_ARMED
1290};
1291
1292enum ib_mw_type {
1293        IB_MW_TYPE_1 = 1,
1294        IB_MW_TYPE_2 = 2
1295};
1296
1297struct ib_qp_attr {
1298        enum ib_qp_state        qp_state;
1299        enum ib_qp_state        cur_qp_state;
1300        enum ib_mtu             path_mtu;
1301        enum ib_mig_state       path_mig_state;
1302        u32                     qkey;
1303        u32                     rq_psn;
1304        u32                     sq_psn;
1305        u32                     dest_qp_num;
1306        int                     qp_access_flags;
1307        struct ib_qp_cap        cap;
1308        struct rdma_ah_attr     ah_attr;
1309        struct rdma_ah_attr     alt_ah_attr;
1310        u16                     pkey_index;
1311        u16                     alt_pkey_index;
1312        u8                      en_sqd_async_notify;
1313        u8                      sq_draining;
1314        u8                      max_rd_atomic;
1315        u8                      max_dest_rd_atomic;
1316        u8                      min_rnr_timer;
1317        u8                      port_num;
1318        u8                      timeout;
1319        u8                      retry_cnt;
1320        u8                      rnr_retry;
1321        u8                      alt_port_num;
1322        u8                      alt_timeout;
1323        u32                     rate_limit;
1324};
1325
1326enum ib_wr_opcode {
1327        /* These are shared with userspace */
1328        IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE,
1329        IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM,
1330        IB_WR_SEND = IB_UVERBS_WR_SEND,
1331        IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM,
1332        IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
1333        IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
1334        IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
1335        IB_WR_LSO = IB_UVERBS_WR_TSO,
1336        IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
1337        IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
1338        IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV,
1339        IB_WR_MASKED_ATOMIC_CMP_AND_SWP =
1340                IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
1341        IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
1342                IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
1343
1344        /* These are kernel only and can not be issued by userspace */
1345        IB_WR_REG_MR = 0x20,
1346        IB_WR_REG_SIG_MR,
1347
1348        /* reserve values for low level drivers' internal use.
1349         * These values will not be used at all in the ib core layer.
1350         */
1351        IB_WR_RESERVED1 = 0xf0,
1352        IB_WR_RESERVED2,
1353        IB_WR_RESERVED3,
1354        IB_WR_RESERVED4,
1355        IB_WR_RESERVED5,
1356        IB_WR_RESERVED6,
1357        IB_WR_RESERVED7,
1358        IB_WR_RESERVED8,
1359        IB_WR_RESERVED9,
1360        IB_WR_RESERVED10,
1361};
1362
1363enum ib_send_flags {
1364        IB_SEND_FENCE           = 1,
1365        IB_SEND_SIGNALED        = (1<<1),
1366        IB_SEND_SOLICITED       = (1<<2),
1367        IB_SEND_INLINE          = (1<<3),
1368        IB_SEND_IP_CSUM         = (1<<4),
1369
1370        /* reserve bits 26-31 for low level drivers' internal use */
1371        IB_SEND_RESERVED_START  = (1 << 26),
1372        IB_SEND_RESERVED_END    = (1 << 31),
1373};
1374
1375struct ib_sge {
1376        u64     addr;
1377        u32     length;
1378        u32     lkey;
1379};
1380
1381struct ib_cqe {
1382        void (*done)(struct ib_cq *cq, struct ib_wc *wc);
1383};
1384
1385struct ib_send_wr {
1386        struct ib_send_wr      *next;
1387        union {
1388                u64             wr_id;
1389                struct ib_cqe   *wr_cqe;
1390        };
1391        struct ib_sge          *sg_list;
1392        int                     num_sge;
1393        enum ib_wr_opcode       opcode;
1394        int                     send_flags;
1395        union {
1396                __be32          imm_data;
1397                u32             invalidate_rkey;
1398        } ex;
1399};
1400
1401struct ib_rdma_wr {
1402        struct ib_send_wr       wr;
1403        u64                     remote_addr;
1404        u32                     rkey;
1405};
1406
1407static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr)
1408{
1409        return container_of(wr, struct ib_rdma_wr, wr);
1410}
1411
1412struct ib_atomic_wr {
1413        struct ib_send_wr       wr;
1414        u64                     remote_addr;
1415        u64                     compare_add;
1416        u64                     swap;
1417        u64                     compare_add_mask;
1418        u64                     swap_mask;
1419        u32                     rkey;
1420};
1421
1422static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr)
1423{
1424        return container_of(wr, struct ib_atomic_wr, wr);
1425}
1426
1427struct ib_ud_wr {
1428        struct ib_send_wr       wr;
1429        struct ib_ah            *ah;
1430        void                    *header;
1431        int                     hlen;
1432        int                     mss;
1433        u32                     remote_qpn;
1434        u32                     remote_qkey;
1435        u16                     pkey_index; /* valid for GSI only */
1436        u8                      port_num;   /* valid for DR SMPs on switch only */
1437};
1438
1439static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
1440{
1441        return container_of(wr, struct ib_ud_wr, wr);
1442}
1443
1444struct ib_reg_wr {
1445        struct ib_send_wr       wr;
1446        struct ib_mr            *mr;
1447        u32                     key;
1448        int                     access;
1449};
1450
1451static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr)
1452{
1453        return container_of(wr, struct ib_reg_wr, wr);
1454}
1455
1456struct ib_sig_handover_wr {
1457        struct ib_send_wr       wr;
1458        struct ib_sig_attrs    *sig_attrs;
1459        struct ib_mr           *sig_mr;
1460        int                     access_flags;
1461        struct ib_sge          *prot;
1462};
1463
1464static inline const struct ib_sig_handover_wr *
1465sig_handover_wr(const struct ib_send_wr *wr)
1466{
1467        return container_of(wr, struct ib_sig_handover_wr, wr);
1468}
1469
1470struct ib_recv_wr {
1471        struct ib_recv_wr      *next;
1472        union {
1473                u64             wr_id;
1474                struct ib_cqe   *wr_cqe;
1475        };
1476        struct ib_sge          *sg_list;
1477        int                     num_sge;
1478};
1479
1480enum ib_access_flags {
1481        IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
1482        IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
1483        IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
1484        IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
1485        IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
1486        IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
1487        IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
1488        IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
1489
1490        IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
1491};
1492
1493/*
1494 * XXX: these are apparently used for ->rereg_user_mr, no idea why they
1495 * are hidden here instead of a uapi header!
1496 */
1497enum ib_mr_rereg_flags {
1498        IB_MR_REREG_TRANS       = 1,
1499        IB_MR_REREG_PD          = (1<<1),
1500        IB_MR_REREG_ACCESS      = (1<<2),
1501        IB_MR_REREG_SUPPORTED   = ((IB_MR_REREG_ACCESS << 1) - 1)
1502};
1503
1504struct ib_fmr_attr {
1505        int     max_pages;
1506        int     max_maps;
1507        u8      page_shift;
1508};
1509
1510struct ib_umem;
1511
1512enum rdma_remove_reason {
1513        /*
1514         * Userspace requested uobject deletion or initial try
1515         * to remove uobject via cleanup. Call could fail
1516         */
1517        RDMA_REMOVE_DESTROY,
1518        /* Context deletion. This call should delete the actual object itself */
1519        RDMA_REMOVE_CLOSE,
1520        /* Driver is being hot-unplugged. This call should delete the actual object itself */
1521        RDMA_REMOVE_DRIVER_REMOVE,
1522        /* uobj is being cleaned-up before being committed */
1523        RDMA_REMOVE_ABORT,
1524};
1525
1526struct ib_rdmacg_object {
1527#ifdef CONFIG_CGROUP_RDMA
1528        struct rdma_cgroup      *cg;            /* owner rdma cgroup */
1529#endif
1530};
1531
1532struct ib_ucontext {
1533        struct ib_device       *device;
1534        struct ib_uverbs_file  *ufile;
1535        /*
1536         * 'closing' can be read by the driver only during a destroy callback,
1537         * it is set when we are closing the file descriptor and indicates
1538         * that mm_sem may be locked.
1539         */
1540        bool closing;
1541
1542        bool cleanup_retryable;
1543
1544        void (*invalidate_range)(struct ib_umem_odp *umem_odp,
1545                                 unsigned long start, unsigned long end);
1546        struct mutex per_mm_list_lock;
1547        struct list_head per_mm_list;
1548
1549        struct ib_rdmacg_object cg_obj;
1550        /*
1551         * Implementation details of the RDMA core, don't use in drivers:
1552         */
1553        struct rdma_restrack_entry res;
1554};
1555
1556struct ib_uobject {
1557        u64                     user_handle;    /* handle given to us by userspace */
1558        /* ufile & ucontext owning this object */
1559        struct ib_uverbs_file  *ufile;
1560        /* FIXME, save memory: ufile->context == context */
1561        struct ib_ucontext     *context;        /* associated user context */
1562        void                   *object;         /* containing object */
1563        struct list_head        list;           /* link to context's list */
1564        struct ib_rdmacg_object cg_obj;         /* rdmacg object */
1565        int                     id;             /* index into kernel idr */
1566        struct kref             ref;
1567        atomic_t                usecnt;         /* protects exclusive access */
1568        struct rcu_head         rcu;            /* kfree_rcu() overhead */
1569
1570        const struct uverbs_api_object *uapi_object;
1571};
1572
1573struct ib_udata {
1574        const void __user *inbuf;
1575        void __user *outbuf;
1576        size_t       inlen;
1577        size_t       outlen;
1578};
1579
1580struct ib_pd {
1581        u32                     local_dma_lkey;
1582        u32                     flags;
1583        struct ib_device       *device;
1584        struct ib_uobject      *uobject;
1585        atomic_t                usecnt; /* count all resources */
1586
1587        u32                     unsafe_global_rkey;
1588
1589        /*
1590         * Implementation details of the RDMA core, don't use in drivers:
1591         */
1592        struct ib_mr           *__internal_mr;
1593        struct rdma_restrack_entry res;
1594};
1595
1596struct ib_xrcd {
1597        struct ib_device       *device;
1598        atomic_t                usecnt; /* count all exposed resources */
1599        struct inode           *inode;
1600
1601        struct mutex            tgt_qp_mutex;
1602        struct list_head        tgt_qp_list;
1603};
1604
1605struct ib_ah {
1606        struct ib_device        *device;
1607        struct ib_pd            *pd;
1608        struct ib_uobject       *uobject;
1609        const struct ib_gid_attr *sgid_attr;
1610        enum rdma_ah_attr_type  type;
1611};
1612
1613typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
1614
1615enum ib_poll_context {
1616        IB_POLL_DIRECT,            /* caller context, no hw completions */
1617        IB_POLL_SOFTIRQ,           /* poll from softirq context */
1618        IB_POLL_WORKQUEUE,         /* poll from workqueue */
1619        IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
1620};
1621
1622struct ib_cq {
1623        struct ib_device       *device;
1624        struct ib_uobject      *uobject;
1625        ib_comp_handler         comp_handler;
1626        void                  (*event_handler)(struct ib_event *, void *);
1627        void                   *cq_context;
1628        int                     cqe;
1629        atomic_t                usecnt; /* count number of work queues */
1630        enum ib_poll_context    poll_ctx;
1631        struct ib_wc            *wc;
1632        union {
1633                struct irq_poll         iop;
1634                struct work_struct      work;
1635        };
1636        struct workqueue_struct *comp_wq;
1637        /*
1638         * Implementation details of the RDMA core, don't use in drivers:
1639         */
1640        struct rdma_restrack_entry res;
1641};
1642
1643struct ib_srq {
1644        struct ib_device       *device;
1645        struct ib_pd           *pd;
1646        struct ib_uobject      *uobject;
1647        void                  (*event_handler)(struct ib_event *, void *);
1648        void                   *srq_context;
1649        enum ib_srq_type        srq_type;
1650        atomic_t                usecnt;
1651
1652        struct {
1653                struct ib_cq   *cq;
1654                union {
1655                        struct {
1656                                struct ib_xrcd *xrcd;
1657                                u32             srq_num;
1658                        } xrc;
1659                };
1660        } ext;
1661};
1662
1663enum ib_raw_packet_caps {
1664        /* Strip cvlan from incoming packet and report it in the matching work
1665         * completion is supported.
1666         */
1667        IB_RAW_PACKET_CAP_CVLAN_STRIPPING       = (1 << 0),
1668        /* Scatter FCS field of an incoming packet to host memory is supported.
1669         */
1670        IB_RAW_PACKET_CAP_SCATTER_FCS           = (1 << 1),
1671        /* Checksum offloads are supported (for both send and receive). */
1672        IB_RAW_PACKET_CAP_IP_CSUM               = (1 << 2),
1673        /* When a packet is received for an RQ with no receive WQEs, the
1674         * packet processing is delayed.
1675         */
1676        IB_RAW_PACKET_CAP_DELAY_DROP            = (1 << 3),
1677};
1678
1679enum ib_wq_type {
1680        IB_WQT_RQ
1681};
1682
1683enum ib_wq_state {
1684        IB_WQS_RESET,
1685        IB_WQS_RDY,
1686        IB_WQS_ERR
1687};
1688
1689struct ib_wq {
1690        struct ib_device       *device;
1691        struct ib_uobject      *uobject;
1692        void                *wq_context;
1693        void                (*event_handler)(struct ib_event *, void *);
1694        struct ib_pd           *pd;
1695        struct ib_cq           *cq;
1696        u32             wq_num;
1697        enum ib_wq_state       state;
1698        enum ib_wq_type wq_type;
1699        atomic_t                usecnt;
1700};
1701
1702enum ib_wq_flags {
1703        IB_WQ_FLAGS_CVLAN_STRIPPING     = 1 << 0,
1704        IB_WQ_FLAGS_SCATTER_FCS         = 1 << 1,
1705        IB_WQ_FLAGS_DELAY_DROP          = 1 << 2,
1706        IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3,
1707};
1708
1709struct ib_wq_init_attr {
1710        void                   *wq_context;
1711        enum ib_wq_type wq_type;
1712        u32             max_wr;
1713        u32             max_sge;
1714        struct  ib_cq          *cq;
1715        void                (*event_handler)(struct ib_event *, void *);
1716        u32             create_flags; /* Use enum ib_wq_flags */
1717};
1718
1719enum ib_wq_attr_mask {
1720        IB_WQ_STATE             = 1 << 0,
1721        IB_WQ_CUR_STATE         = 1 << 1,
1722        IB_WQ_FLAGS             = 1 << 2,
1723};
1724
1725struct ib_wq_attr {
1726        enum    ib_wq_state     wq_state;
1727        enum    ib_wq_state     curr_wq_state;
1728        u32                     flags; /* Use enum ib_wq_flags */
1729        u32                     flags_mask; /* Use enum ib_wq_flags */
1730};
1731
1732struct ib_rwq_ind_table {
1733        struct ib_device        *device;
1734        struct ib_uobject      *uobject;
1735        atomic_t                usecnt;
1736        u32             ind_tbl_num;
1737        u32             log_ind_tbl_size;
1738        struct ib_wq    **ind_tbl;
1739};
1740
1741struct ib_rwq_ind_table_init_attr {
1742        u32             log_ind_tbl_size;
1743        /* Each entry is a pointer to Receive Work Queue */
1744        struct ib_wq    **ind_tbl;
1745};
1746
1747enum port_pkey_state {
1748        IB_PORT_PKEY_NOT_VALID = 0,
1749        IB_PORT_PKEY_VALID = 1,
1750        IB_PORT_PKEY_LISTED = 2,
1751};
1752
1753struct ib_qp_security;
1754
1755struct ib_port_pkey {
1756        enum port_pkey_state    state;
1757        u16                     pkey_index;
1758        u8                      port_num;
1759        struct list_head        qp_list;
1760        struct list_head        to_error_list;
1761        struct ib_qp_security  *sec;
1762};
1763
1764struct ib_ports_pkeys {
1765        struct ib_port_pkey     main;
1766        struct ib_port_pkey     alt;
1767};
1768
1769struct ib_qp_security {
1770        struct ib_qp           *qp;
1771        struct ib_device       *dev;
1772        /* Hold this mutex when changing port and pkey settings. */
1773        struct mutex            mutex;
1774        struct ib_ports_pkeys  *ports_pkeys;
1775        /* A list of all open shared QP handles.  Required to enforce security
1776         * properly for all users of a shared QP.
1777         */
1778        struct list_head        shared_qp_list;
1779        void                   *security;
1780        bool                    destroying;
1781        atomic_t                error_list_count;
1782        struct completion       error_complete;
1783        int                     error_comps_pending;
1784};
1785
1786/*
1787 * @max_write_sge: Maximum SGE elements per RDMA WRITE request.
1788 * @max_read_sge:  Maximum SGE elements per RDMA READ request.
1789 */
1790struct ib_qp {
1791        struct ib_device       *device;
1792        struct ib_pd           *pd;
1793        struct ib_cq           *send_cq;
1794        struct ib_cq           *recv_cq;
1795        spinlock_t              mr_lock;
1796        int                     mrs_used;
1797        struct list_head        rdma_mrs;
1798        struct list_head        sig_mrs;
1799        struct ib_srq          *srq;
1800        struct ib_xrcd         *xrcd; /* XRC TGT QPs only */
1801        struct list_head        xrcd_list;
1802
1803        /* count times opened, mcast attaches, flow attaches */
1804        atomic_t                usecnt;
1805        struct list_head        open_list;
1806        struct ib_qp           *real_qp;
1807        struct ib_uobject      *uobject;
1808        void                  (*event_handler)(struct ib_event *, void *);
1809        void                   *qp_context;
1810        /* sgid_attrs associated with the AV's */
1811        const struct ib_gid_attr *av_sgid_attr;
1812        const struct ib_gid_attr *alt_path_sgid_attr;
1813        u32                     qp_num;
1814        u32                     max_write_sge;
1815        u32                     max_read_sge;
1816        enum ib_qp_type         qp_type;
1817        struct ib_rwq_ind_table *rwq_ind_tbl;
1818        struct ib_qp_security  *qp_sec;
1819        u8                      port;
1820
1821        /*
1822         * Implementation details of the RDMA core, don't use in drivers:
1823         */
1824        struct rdma_restrack_entry     res;
1825};
1826
1827struct ib_dm {
1828        struct ib_device  *device;
1829        u32                length;
1830        u32                flags;
1831        struct ib_uobject *uobject;
1832        atomic_t           usecnt;
1833};
1834
1835struct ib_mr {
1836        struct ib_device  *device;
1837        struct ib_pd      *pd;
1838        u32                lkey;
1839        u32                rkey;
1840        u64                iova;
1841        u64                length;
1842        unsigned int       page_size;
1843        bool               need_inval;
1844        union {
1845                struct ib_uobject       *uobject;       /* user */
1846                struct list_head        qp_entry;       /* FR */
1847        };
1848
1849        struct ib_dm      *dm;
1850
1851        /*
1852         * Implementation details of the RDMA core, don't use in drivers:
1853         */
1854        struct rdma_restrack_entry res;
1855};
1856
1857struct ib_mw {
1858        struct ib_device        *device;
1859        struct ib_pd            *pd;
1860        struct ib_uobject       *uobject;
1861        u32                     rkey;
1862        enum ib_mw_type         type;
1863};
1864
1865struct ib_fmr {
1866        struct ib_device        *device;
1867        struct ib_pd            *pd;
1868        struct list_head        list;
1869        u32                     lkey;
1870        u32                     rkey;
1871};
1872
1873/* Supported steering options */
1874enum ib_flow_attr_type {
1875        /* steering according to rule specifications */
1876        IB_FLOW_ATTR_NORMAL             = 0x0,
1877        /* default unicast and multicast rule -
1878         * receive all Eth traffic which isn't steered to any QP
1879         */
1880        IB_FLOW_ATTR_ALL_DEFAULT        = 0x1,
1881        /* default multicast rule -
1882         * receive all Eth multicast traffic which isn't steered to any QP
1883         */
1884        IB_FLOW_ATTR_MC_DEFAULT         = 0x2,
1885        /* sniffer rule - receive all port traffic */
1886        IB_FLOW_ATTR_SNIFFER            = 0x3
1887};
1888
1889/* Supported steering header types */
1890enum ib_flow_spec_type {
1891        /* L2 headers*/
1892        IB_FLOW_SPEC_ETH                = 0x20,
1893        IB_FLOW_SPEC_IB                 = 0x22,
1894        /* L3 header*/
1895        IB_FLOW_SPEC_IPV4               = 0x30,
1896        IB_FLOW_SPEC_IPV6               = 0x31,
1897        IB_FLOW_SPEC_ESP                = 0x34,
1898        /* L4 headers*/
1899        IB_FLOW_SPEC_TCP                = 0x40,
1900        IB_FLOW_SPEC_UDP                = 0x41,
1901        IB_FLOW_SPEC_VXLAN_TUNNEL       = 0x50,
1902        IB_FLOW_SPEC_GRE                = 0x51,
1903        IB_FLOW_SPEC_MPLS               = 0x60,
1904        IB_FLOW_SPEC_INNER              = 0x100,
1905        /* Actions */
1906        IB_FLOW_SPEC_ACTION_TAG         = 0x1000,
1907        IB_FLOW_SPEC_ACTION_DROP        = 0x1001,
1908        IB_FLOW_SPEC_ACTION_HANDLE      = 0x1002,
1909        IB_FLOW_SPEC_ACTION_COUNT       = 0x1003,
1910};
1911#define IB_FLOW_SPEC_LAYER_MASK 0xF0
1912#define IB_FLOW_SPEC_SUPPORT_LAYERS 10
1913
1914/* Flow steering rule priority is set according to it's domain.
1915 * Lower domain value means higher priority.
1916 */
1917enum ib_flow_domain {
1918        IB_FLOW_DOMAIN_USER,
1919        IB_FLOW_DOMAIN_ETHTOOL,
1920        IB_FLOW_DOMAIN_RFS,
1921        IB_FLOW_DOMAIN_NIC,
1922        IB_FLOW_DOMAIN_NUM /* Must be last */
1923};
1924
1925enum ib_flow_flags {
1926        IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
1927        IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
1928        IB_FLOW_ATTR_FLAGS_RESERVED  = 1UL << 3  /* Must be last */
1929};
1930
1931struct ib_flow_eth_filter {
1932        u8      dst_mac[6];
1933        u8      src_mac[6];
1934        __be16  ether_type;
1935        __be16  vlan_tag;
1936        /* Must be last */
1937        u8      real_sz[0];
1938};
1939
1940struct ib_flow_spec_eth {
1941        u32                       type;
1942        u16                       size;
1943        struct ib_flow_eth_filter val;
1944        struct ib_flow_eth_filter mask;
1945};
1946
1947struct ib_flow_ib_filter {
1948        __be16 dlid;
1949        __u8   sl;
1950        /* Must be last */
1951        u8      real_sz[0];
1952};
1953
1954struct ib_flow_spec_ib {
1955        u32                      type;
1956        u16                      size;
1957        struct ib_flow_ib_filter val;
1958        struct ib_flow_ib_filter mask;
1959};
1960
1961/* IPv4 header flags */
1962enum ib_ipv4_flags {
1963        IB_IPV4_DONT_FRAG = 0x2, /* Don't enable packet fragmentation */
1964        IB_IPV4_MORE_FRAG = 0X4  /* For All fragmented packets except the
1965                                    last have this flag set */
1966};
1967
1968struct ib_flow_ipv4_filter {
1969        __be32  src_ip;
1970        __be32  dst_ip;
1971        u8      proto;
1972        u8      tos;
1973        u8      ttl;
1974        u8      flags;
1975        /* Must be last */
1976        u8      real_sz[0];
1977};
1978
1979struct ib_flow_spec_ipv4 {
1980        u32                        type;
1981        u16                        size;
1982        struct ib_flow_ipv4_filter val;
1983        struct ib_flow_ipv4_filter mask;
1984};
1985
1986struct ib_flow_ipv6_filter {
1987        u8      src_ip[16];
1988        u8      dst_ip[16];
1989        __be32  flow_label;
1990        u8      next_hdr;
1991        u8      traffic_class;
1992        u8      hop_limit;
1993        /* Must be last */
1994        u8      real_sz[0];
1995};
1996
1997struct ib_flow_spec_ipv6 {
1998        u32                        type;
1999        u16                        size;
2000        struct ib_flow_ipv6_filter val;
2001        struct ib_flow_ipv6_filter mask;
2002};
2003
2004struct ib_flow_tcp_udp_filter {
2005        __be16  dst_port;
2006        __be16  src_port;
2007        /* Must be last */
2008        u8      real_sz[0];
2009};
2010
2011struct ib_flow_spec_tcp_udp {
2012        u32                           type;
2013        u16                           size;
2014        struct ib_flow_tcp_udp_filter val;
2015        struct ib_flow_tcp_udp_filter mask;
2016};
2017
2018struct ib_flow_tunnel_filter {
2019        __be32  tunnel_id;
2020        u8      real_sz[0];
2021};
2022
2023/* ib_flow_spec_tunnel describes the Vxlan tunnel
2024 * the tunnel_id from val has the vni value
2025 */
2026struct ib_flow_spec_tunnel {
2027        u32                           type;
2028        u16                           size;
2029        struct ib_flow_tunnel_filter  val;
2030        struct ib_flow_tunnel_filter  mask;
2031};
2032
2033struct ib_flow_esp_filter {
2034        __be32  spi;
2035        __be32  seq;
2036        /* Must be last */
2037        u8      real_sz[0];
2038};
2039
2040struct ib_flow_spec_esp {
2041        u32                           type;
2042        u16                           size;
2043        struct ib_flow_esp_filter     val;
2044        struct ib_flow_esp_filter     mask;
2045};
2046
2047struct ib_flow_gre_filter {
2048        __be16 c_ks_res0_ver;
2049        __be16 protocol;
2050        __be32 key;
2051        /* Must be last */
2052        u8      real_sz[0];
2053};
2054
2055struct ib_flow_spec_gre {
2056        u32                           type;
2057        u16                           size;
2058        struct ib_flow_gre_filter     val;
2059        struct ib_flow_gre_filter     mask;
2060};
2061
2062struct ib_flow_mpls_filter {
2063        __be32 tag;
2064        /* Must be last */
2065        u8      real_sz[0];
2066};
2067
2068struct ib_flow_spec_mpls {
2069        u32                           type;
2070        u16                           size;
2071        struct ib_flow_mpls_filter     val;
2072        struct ib_flow_mpls_filter     mask;
2073};
2074
2075struct ib_flow_spec_action_tag {
2076        enum ib_flow_spec_type        type;
2077        u16                           size;
2078        u32                           tag_id;
2079};
2080
2081struct ib_flow_spec_action_drop {
2082        enum ib_flow_spec_type        type;
2083        u16                           size;
2084};
2085
2086struct ib_flow_spec_action_handle {
2087        enum ib_flow_spec_type        type;
2088        u16                           size;
2089        struct ib_flow_action        *act;
2090};
2091
2092enum ib_counters_description {
2093        IB_COUNTER_PACKETS,
2094        IB_COUNTER_BYTES,
2095};
2096
2097struct ib_flow_spec_action_count {
2098        enum ib_flow_spec_type type;
2099        u16 size;
2100        struct ib_counters *counters;
2101};
2102
2103union ib_flow_spec {
2104        struct {
2105                u32                     type;
2106                u16                     size;
2107        };
2108        struct ib_flow_spec_eth         eth;
2109        struct ib_flow_spec_ib          ib;
2110        struct ib_flow_spec_ipv4        ipv4;
2111        struct ib_flow_spec_tcp_udp     tcp_udp;
2112        struct ib_flow_spec_ipv6        ipv6;
2113        struct ib_flow_spec_tunnel      tunnel;
2114        struct ib_flow_spec_esp         esp;
2115        struct ib_flow_spec_gre         gre;
2116        struct ib_flow_spec_mpls        mpls;
2117        struct ib_flow_spec_action_tag  flow_tag;
2118        struct ib_flow_spec_action_drop drop;
2119        struct ib_flow_spec_action_handle action;
2120        struct ib_flow_spec_action_count flow_count;
2121};
2122
2123struct ib_flow_attr {
2124        enum ib_flow_attr_type type;
2125        u16          size;
2126        u16          priority;
2127        u32          flags;
2128        u8           num_of_specs;
2129        u8           port;
2130        union ib_flow_spec flows[];
2131};
2132
2133struct ib_flow {
2134        struct ib_qp            *qp;
2135        struct ib_device        *device;
2136        struct ib_uobject       *uobject;
2137};
2138
2139enum ib_flow_action_type {
2140        IB_FLOW_ACTION_UNSPECIFIED,
2141        IB_FLOW_ACTION_ESP = 1,
2142};
2143
2144struct ib_flow_action_attrs_esp_keymats {
2145        enum ib_uverbs_flow_action_esp_keymat                   protocol;
2146        union {
2147                struct ib_uverbs_flow_action_esp_keymat_aes_gcm aes_gcm;
2148        } keymat;
2149};
2150
2151struct ib_flow_action_attrs_esp_replays {
2152        enum ib_uverbs_flow_action_esp_replay                   protocol;
2153        union {
2154                struct ib_uverbs_flow_action_esp_replay_bmp     bmp;
2155        } replay;
2156};
2157
2158enum ib_flow_action_attrs_esp_flags {
2159        /* All user-space flags at the top: Use enum ib_uverbs_flow_action_esp_flags
2160         * This is done in order to share the same flags between user-space and
2161         * kernel and spare an unnecessary translation.
2162         */
2163
2164        /* Kernel flags */
2165        IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED  = 1ULL << 32,
2166        IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS  = 1ULL << 33,
2167};
2168
2169struct ib_flow_spec_list {
2170        struct ib_flow_spec_list        *next;
2171        union ib_flow_spec              spec;
2172};
2173
2174struct ib_flow_action_attrs_esp {
2175        struct ib_flow_action_attrs_esp_keymats         *keymat;
2176        struct ib_flow_action_attrs_esp_replays         *replay;
2177        struct ib_flow_spec_list                        *encap;
2178        /* Used only if IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED is enabled.
2179         * Value of 0 is a valid value.
2180         */
2181        u32                                             esn;
2182        u32                                             spi;
2183        u32                                             seq;
2184        u32                                             tfc_pad;
2185        /* Use enum ib_flow_action_attrs_esp_flags */
2186        u64                                             flags;
2187        u64                                             hard_limit_pkts;
2188};
2189
2190struct ib_flow_action {
2191        struct ib_device                *device;
2192        struct ib_uobject               *uobject;
2193        enum ib_flow_action_type        type;
2194        atomic_t                        usecnt;
2195};
2196
2197struct ib_mad_hdr;
2198struct ib_grh;
2199
2200enum ib_process_mad_flags {
2201        IB_MAD_IGNORE_MKEY      = 1,
2202        IB_MAD_IGNORE_BKEY      = 2,
2203        IB_MAD_IGNORE_ALL       = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY
2204};
2205
2206enum ib_mad_result {
2207        IB_MAD_RESULT_FAILURE  = 0,      /* (!SUCCESS is the important flag) */
2208        IB_MAD_RESULT_SUCCESS  = 1 << 0, /* MAD was successfully processed   */
2209        IB_MAD_RESULT_REPLY    = 1 << 1, /* Reply packet needs to be sent    */
2210        IB_MAD_RESULT_CONSUMED = 1 << 2  /* Packet consumed: stop processing */
2211};
2212
2213struct ib_port_cache {
2214        u64                   subnet_prefix;
2215        struct ib_pkey_cache  *pkey;
2216        struct ib_gid_table   *gid;
2217        u8                     lmc;
2218        enum ib_port_state     port_state;
2219};
2220
2221struct ib_cache {
2222        rwlock_t                lock;
2223        struct ib_event_handler event_handler;
2224};
2225
2226struct ib_port_immutable {
2227        int                           pkey_tbl_len;
2228        int                           gid_tbl_len;
2229        u32                           core_cap_flags;
2230        u32                           max_mad_size;
2231};
2232
2233struct ib_port_data {
2234        struct ib_device *ib_dev;
2235
2236        struct ib_port_immutable immutable;
2237
2238        spinlock_t pkey_list_lock;
2239        struct list_head pkey_list;
2240
2241        struct ib_port_cache cache;
2242
2243        spinlock_t netdev_lock;
2244        struct net_device __rcu *netdev;
2245        struct hlist_node ndev_hash_link;
2246};
2247
2248/* rdma netdev type - specifies protocol type */
2249enum rdma_netdev_t {
2250        RDMA_NETDEV_OPA_VNIC,
2251        RDMA_NETDEV_IPOIB,
2252};
2253
2254/**
2255 * struct rdma_netdev - rdma netdev
2256 * For cases where netstack interfacing is required.
2257 */
2258struct rdma_netdev {
2259        void              *clnt_priv;
2260        struct ib_device  *hca;
2261        u8                 port_num;
2262
2263        /*
2264         * cleanup function must be specified.
2265         * FIXME: This is only used for OPA_VNIC and that usage should be
2266         * removed too.
2267         */
2268        void (*free_rdma_netdev)(struct net_device *netdev);
2269
2270        /* control functions */
2271        void (*set_id)(struct net_device *netdev, int id);
2272        /* send packet */
2273        int (*send)(struct net_device *dev, struct sk_buff *skb,
2274                    struct ib_ah *address, u32 dqpn);
2275        /* multicast */
2276        int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
2277                            union ib_gid *gid, u16 mlid,
2278                            int set_qkey, u32 qkey);
2279        int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
2280                            union ib_gid *gid, u16 mlid);
2281};
2282
2283struct rdma_netdev_alloc_params {
2284        size_t sizeof_priv;
2285        unsigned int txqs;
2286        unsigned int rxqs;
2287        void *param;
2288
2289        int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num,
2290                                      struct net_device *netdev, void *param);
2291};
2292
2293struct ib_counters {
2294        struct ib_device        *device;
2295        struct ib_uobject       *uobject;
2296        /* num of objects attached */
2297        atomic_t        usecnt;
2298};
2299
2300struct ib_counters_read_attr {
2301        u64     *counters_buff;
2302        u32     ncounters;
2303        u32     flags; /* use enum ib_read_counters_flags */
2304};
2305
2306struct uverbs_attr_bundle;
2307struct iw_cm_id;
2308struct iw_cm_conn_param;
2309
2310#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member)                      \
2311        .size_##ib_struct =                                                    \
2312                (sizeof(struct drv_struct) +                                   \
2313                 BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) +      \
2314                 BUILD_BUG_ON_ZERO(                                            \
2315                         !__same_type(((struct drv_struct *)NULL)->member,     \
2316                                      struct ib_struct)))
2317
2318#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp)                         \
2319        ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
2320
2321#define rdma_zalloc_drv_obj(ib_dev, ib_type)                                   \
2322        rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
2323
2324#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
2325
2326/**
2327 * struct ib_device_ops - InfiniBand device operations
2328 * This structure defines all the InfiniBand device operations, providers will
2329 * need to define the supported operations, otherwise they will be set to null.
2330 */
2331struct ib_device_ops {
2332        int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
2333                         const struct ib_send_wr **bad_send_wr);
2334        int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
2335                         const struct ib_recv_wr **bad_recv_wr);
2336        void (*drain_rq)(struct ib_qp *qp);
2337        void (*drain_sq)(struct ib_qp *qp);
2338        int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
2339        int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
2340        int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
2341        int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
2342        int (*post_srq_recv)(struct ib_srq *srq,
2343                             const struct ib_recv_wr *recv_wr,
2344                             const struct ib_recv_wr **bad_recv_wr);
2345        int (*process_mad)(struct ib_device *device, int process_mad_flags,
2346                           u8 port_num, const struct ib_wc *in_wc,
2347                           const struct ib_grh *in_grh,
2348                           const struct ib_mad_hdr *in_mad, size_t in_mad_size,
2349                           struct ib_mad_hdr *out_mad, size_t *out_mad_size,
2350                           u16 *out_mad_pkey_index);
2351        int (*query_device)(struct ib_device *device,
2352                            struct ib_device_attr *device_attr,
2353                            struct ib_udata *udata);
2354        int (*modify_device)(struct ib_device *device, int device_modify_mask,
2355                             struct ib_device_modify *device_modify);
2356        void (*get_dev_fw_str)(struct ib_device *device, char *str);
2357        const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
2358                                                     int comp_vector);
2359        int (*query_port)(struct ib_device *device, u8 port_num,
2360                          struct ib_port_attr *port_attr);
2361        int (*modify_port)(struct ib_device *device, u8 port_num,
2362                           int port_modify_mask,
2363                           struct ib_port_modify *port_modify);
2364        /**
2365         * The following mandatory functions are used only at device
2366         * registration.  Keep functions such as these at the end of this
2367         * structure to avoid cache line misses when accessing struct ib_device
2368         * in fast paths.
2369         */
2370        int (*get_port_immutable)(struct ib_device *device, u8 port_num,
2371                                  struct ib_port_immutable *immutable);
2372        enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
2373                                               u8 port_num);
2374        /**
2375         * When calling get_netdev, the HW vendor's driver should return the
2376         * net device of device @device at port @port_num or NULL if such
2377         * a net device doesn't exist. The vendor driver should call dev_hold
2378         * on this net device. The HW vendor's device driver must guarantee
2379         * that this function returns NULL before the net device has finished
2380         * NETDEV_UNREGISTER state.
2381         */
2382        struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
2383        /**
2384         * rdma netdev operation
2385         *
2386         * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
2387         * must return -EOPNOTSUPP if it doesn't support the specified type.
2388         */
2389        struct net_device *(*alloc_rdma_netdev)(
2390                struct ib_device *device, u8 port_num, enum rdma_netdev_t type,
2391                const char *name, unsigned char name_assign_type,
2392                void (*setup)(struct net_device *));
2393
2394        int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
2395                                      enum rdma_netdev_t type,
2396                                      struct rdma_netdev_alloc_params *params);
2397        /**
2398         * query_gid should be return GID value for @device, when @port_num
2399         * link layer is either IB or iWarp. It is no-op if @port_num port
2400         * is RoCE link layer.
2401         */
2402        int (*query_gid)(struct ib_device *device, u8 port_num, int index,
2403                         union ib_gid *gid);
2404        /**
2405         * When calling add_gid, the HW vendor's driver should add the gid
2406         * of device of port at gid index available at @attr. Meta-info of
2407         * that gid (for example, the network device related to this gid) is
2408         * available at @attr. @context allows the HW vendor driver to store
2409         * extra information together with a GID entry. The HW vendor driver may
2410         * allocate memory to contain this information and store it in @context
2411         * when a new GID entry is written to. Params are consistent until the
2412         * next call of add_gid or delete_gid. The function should return 0 on
2413         * success or error otherwise. The function could be called
2414         * concurrently for different ports. This function is only called when
2415         * roce_gid_table is used.
2416         */
2417        int (*add_gid)(const struct ib_gid_attr *attr, void **context);
2418        /**
2419         * When calling del_gid, the HW vendor's driver should delete the
2420         * gid of device @device at gid index gid_index of port port_num
2421         * available in @attr.
2422         * Upon the deletion of a GID entry, the HW vendor must free any
2423         * allocated memory. The caller will clear @context afterwards.
2424         * This function is only called when roce_gid_table is used.
2425         */
2426        int (*del_gid)(const struct ib_gid_attr *attr, void **context);
2427        int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
2428                          u16 *pkey);
2429        int (*alloc_ucontext)(struct ib_ucontext *context,
2430                              struct ib_udata *udata);
2431        void (*dealloc_ucontext)(struct ib_ucontext *context);
2432        int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
2433        void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
2434        int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
2435        void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
2436        int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
2437                         u32 flags, struct ib_udata *udata);
2438        int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
2439        int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
2440        void (*destroy_ah)(struct ib_ah *ah, u32 flags);
2441        int (*create_srq)(struct ib_srq *srq,
2442                          struct ib_srq_init_attr *srq_init_attr,
2443                          struct ib_udata *udata);
2444        int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
2445                          enum ib_srq_attr_mask srq_attr_mask,
2446                          struct ib_udata *udata);
2447        int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
2448        void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
2449        struct ib_qp *(*create_qp)(struct ib_pd *pd,
2450                                   struct ib_qp_init_attr *qp_init_attr,
2451                                   struct ib_udata *udata);
2452        int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
2453                         int qp_attr_mask, struct ib_udata *udata);
2454        int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
2455                        int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
2456        int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
2457        struct ib_cq *(*create_cq)(struct ib_device *device,
2458                                   const struct ib_cq_init_attr *attr,
2459                                   struct ib_udata *udata);
2460        int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
2461        int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
2462        int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
2463        struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
2464        struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
2465                                     u64 virt_addr, int mr_access_flags,
2466                                     struct ib_udata *udata);
2467        int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
2468                             u64 virt_addr, int mr_access_flags,
2469                             struct ib_pd *pd, struct ib_udata *udata);
2470        int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
2471        struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
2472                                  u32 max_num_sg, struct ib_udata *udata);
2473        int (*advise_mr)(struct ib_pd *pd,
2474                         enum ib_uverbs_advise_mr_advice advice, u32 flags,
2475                         struct ib_sge *sg_list, u32 num_sge,
2476                         struct uverbs_attr_bundle *attrs);
2477        int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
2478                         unsigned int *sg_offset);
2479        int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
2480                               struct ib_mr_status *mr_status);
2481        struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
2482                                  struct ib_udata *udata);
2483        int (*dealloc_mw)(struct ib_mw *mw);
2484        struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags,
2485                                    struct ib_fmr_attr *fmr_attr);
2486        int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len,
2487                            u64 iova);
2488        int (*unmap_fmr)(struct list_head *fmr_list);
2489        int (*dealloc_fmr)(struct ib_fmr *fmr);
2490        int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
2491        int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
2492        struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
2493                                      struct ib_udata *udata);
2494        int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
2495        struct ib_flow *(*create_flow)(struct ib_qp *qp,
2496                                       struct ib_flow_attr *flow_attr,
2497                                       int domain, struct ib_udata *udata);
2498        int (*destroy_flow)(struct ib_flow *flow_id);
2499        struct ib_flow_action *(*create_flow_action_esp)(
2500                struct ib_device *device,
2501                const struct ib_flow_action_attrs_esp *attr,
2502                struct uverbs_attr_bundle *attrs);
2503        int (*destroy_flow_action)(struct ib_flow_action *action);
2504        int (*modify_flow_action_esp)(
2505                struct ib_flow_action *action,
2506                const struct ib_flow_action_attrs_esp *attr,
2507                struct uverbs_attr_bundle *attrs);
2508        int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
2509                                 int state);
2510        int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
2511                             struct ifla_vf_info *ivf);
2512        int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
2513                            struct ifla_vf_stats *stats);
2514        int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
2515                           int type);
2516        struct ib_wq *(*create_wq)(struct ib_pd *pd,
2517                                   struct ib_wq_init_attr *init_attr,
2518                                   struct ib_udata *udata);
2519        int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
2520        int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
2521                         u32 wq_attr_mask, struct ib_udata *udata);
2522        struct ib_rwq_ind_table *(*create_rwq_ind_table)(
2523                struct ib_device *device,
2524                struct ib_rwq_ind_table_init_attr *init_attr,
2525                struct ib_udata *udata);
2526        int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
2527        struct ib_dm *(*alloc_dm)(struct ib_device *device,
2528                                  struct ib_ucontext *context,
2529                                  struct ib_dm_alloc_attr *attr,
2530                                  struct uverbs_attr_bundle *attrs);
2531        int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
2532        struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
2533                                   struct ib_dm_mr_attr *attr,
2534                                   struct uverbs_attr_bundle *attrs);
2535        struct ib_counters *(*create_counters)(
2536                struct ib_device *device, struct uverbs_attr_bundle *attrs);
2537        int (*destroy_counters)(struct ib_counters *counters);
2538        int (*read_counters)(struct ib_counters *counters,
2539                             struct ib_counters_read_attr *counters_read_attr,
2540                             struct uverbs_attr_bundle *attrs);
2541        /**
2542         * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
2543         *   driver initialized data.  The struct is kfree()'ed by the sysfs
2544         *   core when the device is removed.  A lifespan of -1 in the return
2545         *   struct tells the core to set a default lifespan.
2546         */
2547        struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
2548                                                u8 port_num);
2549        /**
2550         * get_hw_stats - Fill in the counter value(s) in the stats struct.
2551         * @index - The index in the value array we wish to have updated, or
2552         *   num_counters if we want all stats updated
2553         * Return codes -
2554         *   < 0 - Error, no counters updated
2555         *   index - Updated the single counter pointed to by index
2556         *   num_counters - Updated all counters (will reset the timestamp
2557         *     and prevent further calls for lifespan milliseconds)
2558         * Drivers are allowed to update all counters in leiu of just the
2559         *   one given in index at their option
2560         */
2561        int (*get_hw_stats)(struct ib_device *device,
2562                            struct rdma_hw_stats *stats, u8 port, int index);
2563        /*
2564         * This function is called once for each port when a ib device is
2565         * registered.
2566         */
2567        int (*init_port)(struct ib_device *device, u8 port_num,
2568                         struct kobject *port_sysfs);
2569        /**
2570         * Allows rdma drivers to add their own restrack attributes.
2571         */
2572        int (*fill_res_entry)(struct sk_buff *msg,
2573                              struct rdma_restrack_entry *entry);
2574
2575        /* Device lifecycle callbacks */
2576        /*
2577         * Called after the device becomes registered, before clients are
2578         * attached
2579         */
2580        int (*enable_driver)(struct ib_device *dev);
2581        /*
2582         * This is called as part of ib_dealloc_device().
2583         */
2584        void (*dealloc_driver)(struct ib_device *dev);
2585
2586        /* iWarp CM callbacks */
2587        void (*iw_add_ref)(struct ib_qp *qp);
2588        void (*iw_rem_ref)(struct ib_qp *qp);
2589        struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn);
2590        int (*iw_connect)(struct iw_cm_id *cm_id,
2591                          struct iw_cm_conn_param *conn_param);
2592        int (*iw_accept)(struct iw_cm_id *cm_id,
2593                         struct iw_cm_conn_param *conn_param);
2594        int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata,
2595                         u8 pdata_len);
2596        int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
2597        int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
2598
2599        DECLARE_RDMA_OBJ_SIZE(ib_ah);
2600        DECLARE_RDMA_OBJ_SIZE(ib_pd);
2601        DECLARE_RDMA_OBJ_SIZE(ib_srq);
2602        DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
2603};
2604
2605struct ib_core_device {
2606        /* device must be the first element in structure until,
2607         * union of ib_core_device and device exists in ib_device.
2608         */
2609        struct device dev;
2610        possible_net_t rdma_net;
2611        struct kobject *ports_kobj;
2612        struct list_head port_list;
2613        struct ib_device *owner; /* reach back to owner ib_device */
2614};
2615
2616struct rdma_restrack_root;
2617struct ib_device {
2618        /* Do not access @dma_device directly from ULP nor from HW drivers. */
2619        struct device                *dma_device;
2620        struct ib_device_ops         ops;
2621        char                          name[IB_DEVICE_NAME_MAX];
2622        struct rcu_head rcu_head;
2623
2624        struct list_head              event_handler_list;
2625        spinlock_t                    event_handler_lock;
2626
2627        struct rw_semaphore           client_data_rwsem;
2628        struct xarray                 client_data;
2629        struct mutex                  unregistration_lock;
2630
2631        struct ib_cache               cache;
2632        /**
2633         * port_data is indexed by port number
2634         */
2635        struct ib_port_data *port_data;
2636
2637        int                           num_comp_vectors;
2638
2639        struct module               *owner;
2640        union {
2641                struct device           dev;
2642                struct ib_core_device   coredev;
2643        };
2644
2645        /* First group for device attributes,
2646         * Second group for driver provided attributes (optional).
2647         * It is NULL terminated array.
2648         */
2649        const struct attribute_group    *groups[3];
2650
2651        int                          uverbs_abi_ver;
2652        u64                          uverbs_cmd_mask;
2653        u64                          uverbs_ex_cmd_mask;
2654
2655        char                         node_desc[IB_DEVICE_NODE_DESC_MAX];
2656        __be64                       node_guid;
2657        u32                          local_dma_lkey;
2658        u16                          is_switch:1;
2659        /* Indicates kernel verbs support, should not be used in drivers */
2660        u16                          kverbs_provider:1;
2661        u8                           node_type;
2662        u8                           phys_port_cnt;
2663        struct ib_device_attr        attrs;
2664        struct attribute_group       *hw_stats_ag;
2665        struct rdma_hw_stats         *hw_stats;
2666
2667#ifdef CONFIG_CGROUP_RDMA
2668        struct rdmacg_device         cg_device;
2669#endif
2670
2671        u32                          index;
2672        struct rdma_restrack_root *res;
2673
2674        const struct uapi_definition   *driver_def;
2675        enum rdma_driver_id             driver_id;
2676
2677        /*
2678         * Positive refcount indicates that the device is currently
2679         * registered and cannot be unregistered.
2680         */
2681        refcount_t refcount;
2682        struct completion unreg_completion;
2683        struct work_struct unregistration_work;
2684
2685        const struct rdma_link_ops *link_ops;
2686
2687        /* Protects compat_devs xarray modifications */
2688        struct mutex compat_devs_mutex;
2689        /* Maintains compat devices for each net namespace */
2690        struct xarray compat_devs;
2691
2692        /* Used by iWarp CM */
2693        char iw_ifname[IFNAMSIZ];
2694        u32 iw_driver_flags;
2695};
2696
2697struct ib_client {
2698        const char *name;
2699        void (*add)   (struct ib_device *);
2700        void (*remove)(struct ib_device *, void *client_data);
2701        void (*rename)(struct ib_device *dev, void *client_data);
2702
2703        /* Returns the net_dev belonging to this ib_client and matching the
2704         * given parameters.
2705         * @dev:         An RDMA device that the net_dev use for communication.
2706         * @port:        A physical port number on the RDMA device.
2707         * @pkey:        P_Key that the net_dev uses if applicable.
2708         * @gid:         A GID that the net_dev uses to communicate.
2709         * @addr:        An IP address the net_dev is configured with.
2710         * @client_data: The device's client data set by ib_set_client_data().
2711         *
2712         * An ib_client that implements a net_dev on top of RDMA devices
2713         * (such as IP over IB) should implement this callback, allowing the
2714         * rdma_cm module to find the right net_dev for a given request.
2715         *
2716         * The caller is responsible for calling dev_put on the returned
2717         * netdev. */
2718        struct net_device *(*get_net_dev_by_params)(
2719                        struct ib_device *dev,
2720                        u8 port,
2721                        u16 pkey,
2722                        const union ib_gid *gid,
2723                        const struct sockaddr *addr,
2724                        void *client_data);
2725        struct list_head list;
2726        u32 client_id;
2727
2728        /* kverbs are not required by the client */
2729        u8 no_kverbs_req:1;
2730};
2731
2732/*
2733 * IB block DMA iterator
2734 *
2735 * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
2736 * to a HW supported page size.
2737 */
2738struct ib_block_iter {
2739        /* internal states */
2740        struct scatterlist *__sg;       /* sg holding the current aligned block */
2741        dma_addr_t __dma_addr;          /* unaligned DMA address of this block */
2742        unsigned int __sg_nents;        /* number of SG entries */
2743        unsigned int __sg_advance;      /* number of bytes to advance in sg in next step */
2744        unsigned int __pg_bit;          /* alignment of current block */
2745};
2746
2747struct ib_device *_ib_alloc_device(size_t size);
2748#define ib_alloc_device(drv_struct, member)                                    \
2749        container_of(_ib_alloc_device(sizeof(struct drv_struct) +              \
2750                                      BUILD_BUG_ON_ZERO(offsetof(              \
2751                                              struct drv_struct, member))),    \
2752                     struct drv_struct, member)
2753
2754void ib_dealloc_device(struct ib_device *device);
2755
2756void ib_get_device_fw_str(struct ib_device *device, char *str);
2757
2758int ib_register_device(struct ib_device *device, const char *name);
2759void ib_unregister_device(struct ib_device *device);
2760void ib_unregister_driver(enum rdma_driver_id driver_id);
2761void ib_unregister_device_and_put(struct ib_device *device);
2762void ib_unregister_device_queued(struct ib_device *ib_dev);
2763
2764int ib_register_client   (struct ib_client *client);
2765void ib_unregister_client(struct ib_client *client);
2766
2767void __rdma_block_iter_start(struct ib_block_iter *biter,
2768                             struct scatterlist *sglist,
2769                             unsigned int nents,
2770                             unsigned long pgsz);
2771bool __rdma_block_iter_next(struct ib_block_iter *biter);
2772
2773/**
2774 * rdma_block_iter_dma_address - get the aligned dma address of the current
2775 * block held by the block iterator.
2776 * @biter: block iterator holding the memory block
2777 */
2778static inline dma_addr_t
2779rdma_block_iter_dma_address(struct ib_block_iter *biter)
2780{
2781        return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
2782}
2783
2784/**
2785 * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
2786 * @sglist: sglist to iterate over
2787 * @biter: block iterator holding the memory block
2788 * @nents: maximum number of sg entries to iterate over
2789 * @pgsz: best HW supported page size to use
2790 *
2791 * Callers may use rdma_block_iter_dma_address() to get each
2792 * blocks aligned DMA address.
2793 */
2794#define rdma_for_each_block(sglist, biter, nents, pgsz)         \
2795        for (__rdma_block_iter_start(biter, sglist, nents,      \
2796                                     pgsz);                     \
2797             __rdma_block_iter_next(biter);)
2798
2799/**
2800 * ib_get_client_data - Get IB client context
2801 * @device:Device to get context for
2802 * @client:Client to get context for
2803 *
2804 * ib_get_client_data() returns the client context data set with
2805 * ib_set_client_data(). This can only be called while the client is
2806 * registered to the device, once the ib_client remove() callback returns this
2807 * cannot be called.
2808 */
2809static inline void *ib_get_client_data(struct ib_device *device,
2810                                       struct ib_client *client)
2811{
2812        return xa_load(&device->client_data, client->client_id);
2813}
2814void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
2815                         void *data);
2816void ib_set_device_ops(struct ib_device *device,
2817                       const struct ib_device_ops *ops);
2818
2819#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
2820int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
2821                      unsigned long pfn, unsigned long size, pgprot_t prot);
2822#else
2823static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
2824                                    struct vm_area_struct *vma,
2825                                    unsigned long pfn, unsigned long size,
2826                                    pgprot_t prot)
2827{
2828        return -EINVAL;
2829}
2830#endif
2831
2832static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
2833{
2834        return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
2835}
2836
2837static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
2838{
2839        return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
2840}
2841
2842static inline bool ib_is_buffer_cleared(const void __user *p,
2843                                        size_t len)
2844{
2845        bool ret;
2846        u8 *buf;
2847
2848        if (len > USHRT_MAX)
2849                return false;
2850
2851        buf = memdup_user(p, len);
2852        if (IS_ERR(buf))
2853                return false;
2854
2855        ret = !memchr_inv(buf, 0, len);
2856        kfree(buf);
2857        return ret;
2858}
2859
2860static inline bool ib_is_udata_cleared(struct ib_udata *udata,
2861                                       size_t offset,
2862                                       size_t len)
2863{
2864        return ib_is_buffer_cleared(udata->inbuf + offset, len);
2865}
2866
2867/**
2868 * ib_is_destroy_retryable - Check whether the uobject destruction
2869 * is retryable.
2870 * @ret: The initial destruction return code
2871 * @why: remove reason
2872 * @uobj: The uobject that is destroyed
2873 *
2874 * This function is a helper function that IB layer and low-level drivers
2875 * can use to consider whether the destruction of the given uobject is
2876 * retry-able.
2877 * It checks the original return code, if it wasn't success the destruction
2878 * is retryable according to the ucontext state (i.e. cleanup_retryable) and
2879 * the remove reason. (i.e. why).
2880 * Must be called with the object locked for destroy.
2881 */
2882static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
2883                                           struct ib_uobject *uobj)
2884{
2885        return ret && (why == RDMA_REMOVE_DESTROY ||
2886                       uobj->context->cleanup_retryable);
2887}
2888
2889/**
2890 * ib_destroy_usecnt - Called during destruction to check the usecnt
2891 * @usecnt: The usecnt atomic
2892 * @why: remove reason
2893 * @uobj: The uobject that is destroyed
2894 *
2895 * Non-zero usecnts will block destruction unless destruction was triggered by
2896 * a ucontext cleanup.
2897 */
2898static inline int ib_destroy_usecnt(atomic_t *usecnt,
2899                                    enum rdma_remove_reason why,
2900                                    struct ib_uobject *uobj)
2901{
2902        if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
2903                return -EBUSY;
2904        return 0;
2905}
2906
2907/**
2908 * ib_modify_qp_is_ok - Check that the supplied attribute mask
2909 * contains all required attributes and no attributes not allowed for
2910 * the given QP state transition.
2911 * @cur_state: Current QP state
2912 * @next_state: Next QP state
2913 * @type: QP type
2914 * @mask: Mask of supplied QP attributes
2915 *
2916 * This function is a helper function that a low-level driver's
2917 * modify_qp method can use to validate the consumer's input.  It
2918 * checks that cur_state and next_state are valid QP states, that a
2919 * transition from cur_state to next_state is allowed by the IB spec,
2920 * and that the attribute mask supplied is allowed for the transition.
2921 */
2922bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
2923                        enum ib_qp_type type, enum ib_qp_attr_mask mask);
2924
2925void ib_register_event_handler(struct ib_event_handler *event_handler);
2926void ib_unregister_event_handler(struct ib_event_handler *event_handler);
2927void ib_dispatch_event(struct ib_event *event);
2928
2929int ib_query_port(struct ib_device *device,
2930                  u8 port_num, struct ib_port_attr *port_attr);
2931
2932enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
2933                                               u8 port_num);
2934
2935/**
2936 * rdma_cap_ib_switch - Check if the device is IB switch
2937 * @device: Device to check
2938 *
2939 * Device driver is responsible for setting is_switch bit on
2940 * in ib_device structure at init time.
2941 *
2942 * Return: true if the device is IB switch.
2943 */
2944static inline bool rdma_cap_ib_switch(const struct ib_device *device)
2945{
2946        return device->is_switch;
2947}
2948
2949/**
2950 * rdma_start_port - Return the first valid port number for the device
2951 * specified
2952 *
2953 * @device: Device to be checked
2954 *
2955 * Return start port number
2956 */
2957static inline u8 rdma_start_port(const struct ib_device *device)
2958{
2959        return rdma_cap_ib_switch(device) ? 0 : 1;
2960}
2961
2962/**
2963 * rdma_for_each_port - Iterate over all valid port numbers of the IB device
2964 * @device - The struct ib_device * to iterate over
2965 * @iter - The unsigned int to store the port number
2966 */
2967#define rdma_for_each_port(device, iter)                                       \
2968        for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type(   \
2969                                                     unsigned int, iter)));    \
2970             iter <= rdma_end_port(device); (iter)++)
2971
2972/**
2973 * rdma_end_port - Return the last valid port number for the device
2974 * specified
2975 *
2976 * @device: Device to be checked
2977 *
2978 * Return last port number
2979 */
2980static inline u8 rdma_end_port(const struct ib_device *device)
2981{
2982        return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt;
2983}
2984
2985static inline int rdma_is_port_valid(const struct ib_device *device,
2986                                     unsigned int port)
2987{
2988        return (port >= rdma_start_port(device) &&
2989                port <= rdma_end_port(device));
2990}
2991
2992static inline bool rdma_is_grh_required(const struct ib_device *device,
2993                                        u8 port_num)
2994{
2995        return device->port_data[port_num].immutable.core_cap_flags &
2996               RDMA_CORE_PORT_IB_GRH_REQUIRED;
2997}
2998
2999static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
3000{
3001        return device->port_data[port_num].immutable.core_cap_flags &
3002               RDMA_CORE_CAP_PROT_IB;
3003}
3004
3005static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
3006{
3007        return device->port_data[port_num].immutable.core_cap_flags &
3008               (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
3009}
3010
3011static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
3012{
3013        return device->port_data[port_num].immutable.core_cap_flags &
3014               RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
3015}
3016
3017static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
3018{
3019        return device->port_data[port_num].immutable.core_cap_flags &
3020               RDMA_CORE_CAP_PROT_ROCE;
3021}
3022
3023static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
3024{
3025        return device->port_data[port_num].immutable.core_cap_flags &
3026               RDMA_CORE_CAP_PROT_IWARP;
3027}
3028
3029static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
3030{
3031        return rdma_protocol_ib(device, port_num) ||
3032                rdma_protocol_roce(device, port_num);
3033}
3034
3035static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
3036{
3037        return device->port_data[port_num].immutable.core_cap_flags &
3038               RDMA_CORE_CAP_PROT_RAW_PACKET;
3039}
3040
3041static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
3042{
3043        return device->port_data[port_num].immutable.core_cap_flags &
3044               RDMA_CORE_CAP_PROT_USNIC;
3045}
3046
3047/**
3048 * rdma_cap_ib_mad - Check if the port of a device supports Infiniband
3049 * Management Datagrams.
3050 * @device: Device to check
3051 * @port_num: Port number to check
3052 *
3053 * Management Datagrams (MAD) are a required part of the InfiniBand
3054 * specification and are supported on all InfiniBand devices.  A slightly
3055 * extended version are also supported on OPA interfaces.
3056 *
3057 * Return: true if the port supports sending/receiving of MAD packets.
3058 */
3059static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
3060{
3061        return device->port_data[port_num].immutable.core_cap_flags &
3062               RDMA_CORE_CAP_IB_MAD;
3063}
3064
3065/**
3066 * rdma_cap_opa_mad - Check if the port of device provides support for OPA
3067 * Management Datagrams.
3068 * @device: Device to check
3069 * @port_num: Port number to check
3070 *
3071 * Intel OmniPath devices extend and/or replace the InfiniBand Management
3072 * datagrams with their own versions.  These OPA MADs share many but not all of
3073 * the characteristics of InfiniBand MADs.
3074 *
3075 * OPA MADs differ in the following ways:
3076 *
3077 *    1) MADs are variable size up to 2K
3078 *       IBTA defined MADs remain fixed at 256 bytes
3079 *    2) OPA SMPs must carry valid PKeys
3080 *    3) OPA SMP packets are a different format
3081 *
3082 * Return: true if the port supports OPA MAD packet formats.
3083 */
3084static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
3085{
3086        return device->port_data[port_num].immutable.core_cap_flags &
3087                RDMA_CORE_CAP_OPA_MAD;
3088}
3089
3090/**
3091 * rdma_cap_ib_smi - Check if the port of a device provides an Infiniband
3092 * Subnet Management Agent (SMA) on the Subnet Management Interface (SMI).
3093 * @device: Device to check
3094 * @port_num: Port number to check
3095 *
3096 * Each InfiniBand node is required to provide a Subnet Management Agent
3097 * that the subnet manager can access.  Prior to the fabric being fully
3098 * configured by the subnet manager, the SMA is accessed via a well known
3099 * interface called the Subnet Management Interface (SMI).  This interface
3100 * uses directed route packets to communicate with the SM to get around the
3101 * chicken and egg problem of the SM needing to know what's on the fabric
3102 * in order to configure the fabric, and needing to configure the fabric in
3103 * order to send packets to the devices on the fabric.  These directed
3104 * route packets do not need the fabric fully configured in order to reach
3105 * their destination.  The SMI is the only method allowed to send
3106 * directed route packets on an InfiniBand fabric.
3107 *
3108 * Return: true if the port provides an SMI.
3109 */
3110static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
3111{
3112        return device->port_data[port_num].immutable.core_cap_flags &
3113               RDMA_CORE_CAP_IB_SMI;
3114}
3115
3116/**
3117 * rdma_cap_ib_cm - Check if the port of device has the capability Infiniband
3118 * Communication Manager.
3119 * @device: Device to check
3120 * @port_num: Port number to check
3121 *
3122 * The InfiniBand Communication Manager is one of many pre-defined General
3123 * Service Agents (GSA) that are accessed via the General Service
3124 * Interface (GSI).  It's role is to facilitate establishment of connections
3125 * between nodes as well as other management related tasks for established
3126 * connections.
3127 *
3128 * Return: true if the port supports an IB CM (this does not guarantee that
3129 * a CM is actually running however).
3130 */
3131static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
3132{
3133        return device->port_data[port_num].immutable.core_cap_flags &
3134               RDMA_CORE_CAP_IB_CM;
3135}
3136
3137/**
3138 * rdma_cap_iw_cm - Check if the port of device has the capability IWARP
3139 * Communication Manager.
3140 * @device: Device to check
3141 * @port_num: Port number to check
3142 *
3143 * Similar to above, but specific to iWARP connections which have a different
3144 * managment protocol than InfiniBand.
3145 *
3146 * Return: true if the port supports an iWARP CM (this does not guarantee that
3147 * a CM is actually running however).
3148 */
3149static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
3150{
3151        return device->port_data[port_num].immutable.core_cap_flags &
3152               RDMA_CORE_CAP_IW_CM;
3153}
3154
3155/**
3156 * rdma_cap_ib_sa - Check if the port of device has the capability Infiniband
3157 * Subnet Administration.
3158 * @device: Device to check
3159 * @port_num: Port number to check
3160 *
3161 * An InfiniBand Subnet Administration (SA) service is a pre-defined General
3162 * Service Agent (GSA) provided by the Subnet Manager (SM).  On InfiniBand
3163 * fabrics, devices should resolve routes to other hosts by contacting the
3164 * SA to query the proper route.
3165 *
3166 * Return: true if the port should act as a client to the fabric Subnet
3167 * Administration interface.  This does not imply that the SA service is
3168 * running locally.
3169 */
3170static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
3171{
3172        return device->port_data[port_num].immutable.core_cap_flags &
3173               RDMA_CORE_CAP_IB_SA;
3174}
3175
3176/**
3177 * rdma_cap_ib_mcast - Check if the port of device has the capability Infiniband
3178 * Multicast.
3179 * @device: Device to check
3180 * @port_num: Port number to check
3181 *
3182 * InfiniBand multicast registration is more complex than normal IPv4 or
3183 * IPv6 multicast registration.  Each Host Channel Adapter must register
3184 * with the Subnet Manager when it wishes to join a multicast group.  It
3185 * should do so only once regardless of how many queue pairs it subscribes
3186 * to this group.  And it should leave the group only after all queue pairs
3187 * attached to the group have been detached.
3188 *
3189 * Return: true if the port must undertake the additional adminstrative
3190 * overhead of registering/unregistering with the SM and tracking of the
3191 * total number of queue pairs attached to the multicast group.
3192 */
3193static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num)
3194{
3195        return rdma_cap_ib_sa(device, port_num);
3196}
3197
3198/**
3199 * rdma_cap_af_ib - Check if the port of device has the capability
3200 * Native Infiniband Address.
3201 * @device: Device to check
3202 * @port_num: Port number to check
3203 *
3204 * InfiniBand addressing uses a port's GUID + Subnet Prefix to make a default
3205 * GID.  RoCE uses a different mechanism, but still generates a GID via
3206 * a prescribed mechanism and port specific data.
3207 *
3208 * Return: true if the port uses a GID address to identify devices on the
3209 * network.
3210 */
3211static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
3212{
3213        return device->port_data[port_num].immutable.core_cap_flags &
3214               RDMA_CORE_CAP_AF_IB;
3215}
3216
3217/**
3218 * rdma_cap_eth_ah - Check if the port of device has the capability
3219 * Ethernet Address Handle.
3220 * @device: Device to check
3221 * @port_num: Port number to check
3222 *
3223 * RoCE is InfiniBand over Ethernet, and it uses a well defined technique
3224 * to fabricate GIDs over Ethernet/IP specific addresses native to the
3225 * port.  Normally, packet headers are generated by the sending host
3226 * adapter, but when sending connectionless datagrams, we must manually
3227 * inject the proper headers for the fabric we are communicating over.
3228 *
3229 * Return: true if we are running as a RoCE port and must force the
3230 * addition of a Global Route Header built from our Ethernet Address
3231 * Handle into our header list for connectionless packets.
3232 */
3233static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
3234{
3235        return device->port_data[port_num].immutable.core_cap_flags &
3236               RDMA_CORE_CAP_ETH_AH;
3237}
3238
3239/**
3240 * rdma_cap_opa_ah - Check if the port of device supports
3241 * OPA Address handles
3242 * @device: Device to check
3243 * @port_num: Port number to check
3244 *
3245 * Return: true if we are running on an OPA device which supports
3246 * the extended OPA addressing.
3247 */
3248static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
3249{
3250        return (device->port_data[port_num].immutable.core_cap_flags &
3251                RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
3252}
3253
3254/**
3255 * rdma_max_mad_size - Return the max MAD size required by this RDMA Port.
3256 *
3257 * @device: Device
3258 * @port_num: Port number
3259 *
3260 * This MAD size includes the MAD headers and MAD payload.  No other headers
3261 * are included.
3262 *
3263 * Return the max MAD size required by the Port.  Will return 0 if the port
3264 * does not support MADs
3265 */
3266static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
3267{
3268        return device->port_data[port_num].immutable.max_mad_size;
3269}
3270
3271/**
3272 * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table
3273 * @device: Device to check
3274 * @port_num: Port number to check
3275 *
3276 * RoCE GID table mechanism manages the various GIDs for a device.
3277 *
3278 * NOTE: if allocating the port's GID table has failed, this call will still
3279 * return true, but any RoCE GID table API will fail.
3280 *
3281 * Return: true if the port uses RoCE GID table mechanism in order to manage
3282 * its GIDs.
3283 */
3284static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
3285                                           u8 port_num)
3286{
3287        return rdma_protocol_roce(device, port_num) &&
3288                device->ops.add_gid && device->ops.del_gid;
3289}
3290
3291/*
3292 * Check if the device supports READ W/ INVALIDATE.
3293 */
3294static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
3295{
3296        /*
3297         * iWarp drivers must support READ W/ INVALIDATE.  No other protocol
3298         * has support for it yet.
3299         */
3300        return rdma_protocol_iwarp(dev, port_num);
3301}
3302
3303/**
3304 * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
3305 *
3306 * @addr: address
3307 * @pgsz_bitmap: bitmap of HW supported page sizes
3308 */
3309static inline unsigned int rdma_find_pg_bit(unsigned long addr,
3310                                            unsigned long pgsz_bitmap)
3311{
3312        unsigned long align;
3313        unsigned long pgsz;
3314
3315        align = addr & -addr;
3316
3317        /* Find page bit such that addr is aligned to the highest supported
3318         * HW page size
3319         */
3320        pgsz = pgsz_bitmap & ~(-align << 1);
3321        if (!pgsz)
3322                return __ffs(pgsz_bitmap);
3323
3324        return __fls(pgsz);
3325}
3326
3327int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
3328                         int state);
3329int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
3330                     struct ifla_vf_info *info);
3331int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
3332                    struct ifla_vf_stats *stats);
3333int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
3334                   int type);
3335
3336int ib_query_pkey(struct ib_device *device,
3337                  u8 port_num, u16 index, u16 *pkey);
3338
3339int ib_modify_device(struct ib_device *device,
3340                     int device_modify_mask,
3341                     struct ib_device_modify *device_modify);
3342
3343int ib_modify_port(struct ib_device *device,
3344                   u8 port_num, int port_modify_mask,
3345                   struct ib_port_modify *port_modify);
3346
3347int ib_find_gid(struct ib_device *device, union ib_gid *gid,
3348                u8 *port_num, u16 *index);
3349
3350int ib_find_pkey(struct ib_device *device,
3351                 u8 port_num, u16 pkey, u16 *index);
3352
3353enum ib_pd_flags {
3354        /*
3355         * Create a memory registration for all memory in the system and place
3356         * the rkey for it into pd->unsafe_global_rkey.  This can be used by
3357         * ULPs to avoid the overhead of dynamic MRs.
3358         *
3359         * This flag is generally considered unsafe and must only be used in
3360         * extremly trusted environments.  Every use of it will log a warning
3361         * in the kernel log.
3362         */
3363        IB_PD_UNSAFE_GLOBAL_RKEY        = 0x01,
3364};
3365
3366struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
3367                const char *caller);
3368
3369#define ib_alloc_pd(device, flags) \
3370        __ib_alloc_pd((device), (flags), KBUILD_MODNAME)
3371
3372/**
3373 * ib_dealloc_pd_user - Deallocate kernel/user PD
3374 * @pd: The protection domain
3375 * @udata: Valid user data or NULL for kernel objects
3376 */
3377void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
3378
3379/**
3380 * ib_dealloc_pd - Deallocate kernel PD
3381 * @pd: The protection domain
3382 *
3383 * NOTE: for user PD use ib_dealloc_pd_user with valid udata!
3384 */
3385static inline void ib_dealloc_pd(struct ib_pd *pd)
3386{
3387        ib_dealloc_pd_user(pd, NULL);
3388}
3389
3390enum rdma_create_ah_flags {
3391        /* In a sleepable context */
3392        RDMA_CREATE_AH_SLEEPABLE = BIT(0),
3393};
3394
3395/**
3396 * rdma_create_ah - Creates an address handle for the given address vector.
3397 * @pd: The protection domain associated with the address handle.
3398 * @ah_attr: The attributes of the address vector.
3399 * @flags: Create address handle flags (see enum rdma_create_ah_flags).
3400 *
3401 * The address handle is used to reference a local or global destination
3402 * in all UD QP post sends.
3403 */
3404struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
3405                             u32 flags);
3406
3407/**
3408 * rdma_create_user_ah - Creates an address handle for the given address vector.
3409 * It resolves destination mac address for ah attribute of RoCE type.
3410 * @pd: The protection domain associated with the address handle.
3411 * @ah_attr: The attributes of the address vector.
3412 * @udata: pointer to user's input output buffer information need by
3413 *         provider driver.
3414 *
3415 * It returns 0 on success and returns appropriate error code on error.
3416 * The address handle is used to reference a local or global destination
3417 * in all UD QP post sends.
3418 */
3419struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
3420                                  struct rdma_ah_attr *ah_attr,
3421                                  struct ib_udata *udata);
3422/**
3423 * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header
3424 *   work completion.
3425 * @hdr: the L3 header to parse
3426 * @net_type: type of header to parse
3427 * @sgid: place to store source gid
3428 * @dgid: place to store destination gid
3429 */
3430int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
3431                              enum rdma_network_type net_type,
3432                              union ib_gid *sgid, union ib_gid *dgid);
3433
3434/**
3435 * ib_get_rdma_header_version - Get the header version
3436 * @hdr: the L3 header to parse
3437 */
3438int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
3439
3440/**
3441 * ib_init_ah_attr_from_wc - Initializes address handle attributes from a
3442 *   work completion.
3443 * @device: Device on which the received message arrived.
3444 * @port_num: Port on which the received message arrived.
3445 * @wc: Work completion associated with the received message.
3446 * @grh: References the received global route header.  This parameter is
3447 *   ignored unless the work completion indicates that the GRH is valid.
3448 * @ah_attr: Returned attributes that can be used when creating an address
3449 *   handle for replying to the message.
3450 * When ib_init_ah_attr_from_wc() returns success,
3451 * (a) for IB link layer it optionally contains a reference to SGID attribute
3452 * when GRH is present for IB link layer.
3453 * (b) for RoCE link layer it contains a reference to SGID attribute.
3454 * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID
3455 * attributes which are initialized using ib_init_ah_attr_from_wc().
3456 *
3457 */
3458int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
3459                            const struct ib_wc *wc, const struct ib_grh *grh,
3460                            struct rdma_ah_attr *ah_attr);
3461
3462/**
3463 * ib_create_ah_from_wc - Creates an address handle associated with the
3464 *   sender of the specified work completion.
3465 * @pd: The protection domain associated with the address handle.
3466 * @wc: Work completion information associated with a received message.
3467 * @grh: References the received global route header.  This parameter is
3468 *   ignored unless the work completion indicates that the GRH is valid.
3469 * @port_num: The outbound port number to associate with the address.
3470 *
3471 * The address handle is used to reference a local or global destination
3472 * in all UD QP post sends.
3473 */
3474struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
3475                                   const struct ib_grh *grh, u8 port_num);
3476
3477/**
3478 * rdma_modify_ah - Modifies the address vector associated with an address
3479 *   handle.
3480 * @ah: The address handle to modify.
3481 * @ah_attr: The new address vector attributes to associate with the
3482 *   address handle.
3483 */
3484int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
3485
3486/**
3487 * rdma_query_ah - Queries the address vector associated with an address
3488 *   handle.
3489 * @ah: The address handle to query.
3490 * @ah_attr: The address vector attributes associated with the address
3491 *   handle.
3492 */
3493int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
3494
3495enum rdma_destroy_ah_flags {
3496        /* In a sleepable context */
3497        RDMA_DESTROY_AH_SLEEPABLE = BIT(0),
3498};
3499
3500/**
3501 * rdma_destroy_ah_user - Destroys an address handle.
3502 * @ah: The address handle to destroy.
3503 * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
3504 * @udata: Valid user data or NULL for kernel objects
3505 */
3506int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
3507
3508/**
3509 * rdma_destroy_ah - Destroys an kernel address handle.
3510 * @ah: The address handle to destroy.
3511 * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
3512 *
3513 * NOTE: for user ah use rdma_destroy_ah_user with valid udata!
3514 */
3515static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
3516{
3517        return rdma_destroy_ah_user(ah, flags, NULL);
3518}
3519
3520/**
3521 * ib_create_srq - Creates a SRQ associated with the specified protection
3522 *   domain.
3523 * @pd: The protection domain associated with the SRQ.
3524 * @srq_init_attr: A list of initial attributes required to create the
3525 *   SRQ.  If SRQ creation succeeds, then the attributes are updated to
3526 *   the actual capabilities of the created SRQ.
3527 *
3528 * srq_attr->max_wr and srq_attr->max_sge are read the determine the
3529 * requested size of the SRQ, and set to the actual values allocated
3530 * on return.  If ib_create_srq() succeeds, then max_wr and max_sge
3531 * will always be at least as large as the requested values.
3532 */
3533struct ib_srq *ib_create_srq(struct ib_pd *pd,
3534                             struct ib_srq_init_attr *srq_init_attr);
3535
3536/**
3537 * ib_modify_srq - Modifies the attributes for the specified SRQ.
3538 * @srq: The SRQ to modify.
3539 * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
3540 *   the current values of selected SRQ attributes are returned.
3541 * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
3542 *   are being modified.
3543 *
3544 * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or
3545 * IB_SRQ_LIMIT to set the SRQ's limit and request notification when
3546 * the number of receives queued drops below the limit.
3547 */
3548int ib_modify_srq(struct ib_srq *srq,
3549                  struct ib_srq_attr *srq_attr,
3550                  enum ib_srq_attr_mask srq_attr_mask);
3551
3552/**
3553 * ib_query_srq - Returns the attribute list and current values for the
3554 *   specified SRQ.
3555 * @srq: The SRQ to query.
3556 * @srq_attr: The attributes of the specified SRQ.
3557 */
3558int ib_query_srq(struct ib_srq *srq,
3559                 struct ib_srq_attr *srq_attr);
3560
3561/**
3562 * ib_destroy_srq_user - Destroys the specified SRQ.
3563 * @srq: The SRQ to destroy.
3564 * @udata: Valid user data or NULL for kernel objects
3565 */
3566int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
3567
3568/**
3569 * ib_destroy_srq - Destroys the specified kernel SRQ.
3570 * @srq: The SRQ to destroy.
3571 *
3572 * NOTE: for user srq use ib_destroy_srq_user with valid udata!
3573 */
3574static inline int ib_destroy_srq(struct ib_srq *srq)
3575{
3576        return ib_destroy_srq_user(srq, NULL);
3577}
3578
3579/**
3580 * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
3581 * @srq: The SRQ to post the work request on.
3582 * @recv_wr: A list of work requests to post on the receive queue.
3583 * @bad_recv_wr: On an immediate failure, this parameter will reference
3584 *   the work request that failed to be posted on the QP.
3585 */
3586static inline int ib_post_srq_recv(struct ib_srq *srq,
3587                                   const struct ib_recv_wr *recv_wr,
3588                                   const struct ib_recv_wr **bad_recv_wr)
3589{
3590        const struct ib_recv_wr *dummy;
3591
3592        return srq->device->ops.post_srq_recv(srq, recv_wr,
3593                                              bad_recv_wr ? : &dummy);
3594}
3595
3596/**
3597 * ib_create_qp_user - Creates a QP associated with the specified protection
3598 *   domain.
3599 * @pd: The protection domain associated with the QP.
3600 * @qp_init_attr: A list of initial attributes required to create the
3601 *   QP.  If QP creation succeeds, then the attributes are updated to
3602 *   the actual capabilities of the created QP.
3603 * @udata: Valid user data or NULL for kernel objects
3604 */
3605struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
3606                                struct ib_qp_init_attr *qp_init_attr,
3607                                struct ib_udata *udata);
3608
3609/**
3610 * ib_create_qp - Creates a kernel QP associated with the specified protection
3611 *   domain.
3612 * @pd: The protection domain associated with the QP.
3613 * @qp_init_attr: A list of initial attributes required to create the
3614 *   QP.  If QP creation succeeds, then the attributes are updated to
3615 *   the actual capabilities of the created QP.
3616 * @udata: Valid user data or NULL for kernel objects
3617 *
3618 * NOTE: for user qp use ib_create_qp_user with valid udata!
3619 */
3620static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
3621                                         struct ib_qp_init_attr *qp_init_attr)
3622{
3623        return ib_create_qp_user(pd, qp_init_attr, NULL);
3624}
3625
3626/**
3627 * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
3628 * @qp: The QP to modify.
3629 * @attr: On input, specifies the QP attributes to modify.  On output,
3630 *   the current values of selected QP attributes are returned.
3631 * @attr_mask: A bit-mask used to specify which attributes of the QP
3632 *   are being modified.
3633 * @udata: pointer to user's input output buffer information
3634 *   are being modified.
3635 * It returns 0 on success and returns appropriate error code on error.
3636 */
3637int ib_modify_qp_with_udata(struct ib_qp *qp,
3638                            struct ib_qp_attr *attr,
3639                            int attr_mask,
3640                            struct ib_udata *udata);
3641
3642/**
3643 * ib_modify_qp - Modifies the attributes for the specified QP and then
3644 *   transitions the QP to the given state.
3645 * @qp: The QP to modify.
3646 * @qp_attr: On input, specifies the QP attributes to modify.  On output,
3647 *   the current values of selected QP attributes are returned.
3648 * @qp_attr_mask: A bit-mask used to specify which attributes of the QP
3649 *   are being modified.
3650 */
3651int ib_modify_qp(struct ib_qp *qp,
3652                 struct ib_qp_attr *qp_attr,
3653                 int qp_attr_mask);
3654
3655/**
3656 * ib_query_qp - Returns the attribute list and current values for the
3657 *   specified QP.
3658 * @qp: The QP to query.
3659 * @qp_attr: The attributes of the specified QP.
3660 * @qp_attr_mask: A bit-mask used to select specific attributes to query.
3661 * @qp_init_attr: Additional attributes of the selected QP.
3662 *
3663 * The qp_attr_mask may be used to limit the query to gathering only the
3664 * selected attributes.
3665 */
3666int ib_query_qp(struct ib_qp *qp,
3667                struct ib_qp_attr *qp_attr,
3668                int qp_attr_mask,
3669                struct ib_qp_init_attr *qp_init_attr);
3670
3671/**
3672 * ib_destroy_qp - Destroys the specified QP.
3673 * @qp: The QP to destroy.
3674 * @udata: Valid udata or NULL for kernel objects
3675 */
3676int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata);
3677
3678/**
3679 * ib_destroy_qp - Destroys the specified kernel QP.
3680 * @qp: The QP to destroy.
3681 *
3682 * NOTE: for user qp use ib_destroy_qp_user with valid udata!
3683 */
3684static inline int ib_destroy_qp(struct ib_qp *qp)
3685{
3686        return ib_destroy_qp_user(qp, NULL);
3687}
3688
3689/**
3690 * ib_open_qp - Obtain a reference to an existing sharable QP.
3691 * @xrcd - XRC domain
3692 * @qp_open_attr: Attributes identifying the QP to open.
3693 *
3694 * Returns a reference to a sharable QP.
3695 */
3696struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
3697                         struct ib_qp_open_attr *qp_open_attr);
3698
3699/**
3700 * ib_close_qp - Release an external reference to a QP.
3701 * @qp: The QP handle to release
3702 *
3703 * The opened QP handle is released by the caller.  The underlying
3704 * shared QP is not destroyed until all internal references are released.
3705 */
3706int ib_close_qp(struct ib_qp *qp);
3707
3708/**
3709 * ib_post_send - Posts a list of work requests to the send queue of
3710 *   the specified QP.
3711 * @qp: The QP to post the work request on.
3712 * @send_wr: A list of work requests to post on the send queue.
3713 * @bad_send_wr: On an immediate failure, this parameter will reference
3714 *   the work request that failed to be posted on the QP.
3715 *
3716 * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
3717 * error is returned, the QP state shall not be affected,
3718 * ib_post_send() will return an immediate error after queueing any
3719 * earlier work requests in the list.
3720 */
3721static inline int ib_post_send(struct ib_qp *qp,
3722                               const struct ib_send_wr *send_wr,
3723                               const struct ib_send_wr **bad_send_wr)
3724{
3725        const struct ib_send_wr *dummy;
3726
3727        return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy);
3728}
3729
3730/**
3731 * ib_post_recv - Posts a list of work requests to the receive queue of
3732 *   the specified QP.
3733 * @qp: The QP to post the work request on.
3734 * @recv_wr: A list of work requests to post on the receive queue.
3735 * @bad_recv_wr: On an immediate failure, this parameter will reference
3736 *   the work request that failed to be posted on the QP.
3737 */
3738static inline int ib_post_recv(struct ib_qp *qp,
3739                               const struct ib_recv_wr *recv_wr,
3740                               const struct ib_recv_wr **bad_recv_wr)
3741{
3742        const struct ib_recv_wr *dummy;
3743
3744        return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
3745}
3746
3747struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
3748                                 int nr_cqe, int comp_vector,
3749                                 enum ib_poll_context poll_ctx,
3750                                 const char *caller, struct ib_udata *udata);
3751
3752/**
3753 * ib_alloc_cq_user: Allocate kernel/user CQ
3754 * @dev: The IB device
3755 * @private: Private data attached to the CQE
3756 * @nr_cqe: Number of CQEs in the CQ
3757 * @comp_vector: Completion vector used for the IRQs
3758 * @poll_ctx: Context used for polling the CQ
3759 * @udata: Valid user data or NULL for kernel objects
3760 */
3761static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
3762                                             void *private, int nr_cqe,
3763                                             int comp_vector,
3764                                             enum ib_poll_context poll_ctx,
3765                                             struct ib_udata *udata)
3766{
3767        return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
3768                                  KBUILD_MODNAME, udata);
3769}
3770
3771/**
3772 * ib_alloc_cq: Allocate kernel CQ
3773 * @dev: The IB device
3774 * @private: Private data attached to the CQE
3775 * @nr_cqe: Number of CQEs in the CQ
3776 * @comp_vector: Completion vector used for the IRQs
3777 * @poll_ctx: Context used for polling the CQ
3778 *
3779 * NOTE: for user cq use ib_alloc_cq_user with valid udata!
3780 */
3781static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
3782                                        int nr_cqe, int comp_vector,
3783                                        enum ib_poll_context poll_ctx)
3784{
3785        return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
3786                                NULL);
3787}
3788
3789/**
3790 * ib_free_cq_user - Free kernel/user CQ
3791 * @cq: The CQ to free
3792 * @udata: Valid user data or NULL for kernel objects
3793 */
3794void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
3795
3796/**
3797 * ib_free_cq - Free kernel CQ
3798 * @cq: The CQ to free
3799 *
3800 * NOTE: for user cq use ib_free_cq_user with valid udata!
3801 */
3802static inline void ib_free_cq(struct ib_cq *cq)
3803{
3804        ib_free_cq_user(cq, NULL);
3805}
3806
3807int ib_process_cq_direct(struct ib_cq *cq, int budget);
3808
3809/**
3810 * ib_create_cq - Creates a CQ on the specified device.
3811 * @device: The device on which to create the CQ.
3812 * @comp_handler: A user-specified callback that is invoked when a
3813 *   completion event occurs on the CQ.
3814 * @event_handler: A user-specified callback that is invoked when an
3815 *   asynchronous event not associated with a completion occurs on the CQ.
3816 * @cq_context: Context associated with the CQ returned to the user via
3817 *   the associated completion and event handlers.
3818 * @cq_attr: The attributes the CQ should be created upon.
3819 *
3820 * Users can examine the cq structure to determine the actual CQ size.
3821 */
3822struct ib_cq *__ib_create_cq(struct ib_device *device,
3823                             ib_comp_handler comp_handler,
3824                             void (*event_handler)(struct ib_event *, void *),
3825                             void *cq_context,
3826                             const struct ib_cq_init_attr *cq_attr,
3827                             const char *caller);
3828#define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \
3829        __ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), KBUILD_MODNAME)
3830
3831/**
3832 * ib_resize_cq - Modifies the capacity of the CQ.
3833 * @cq: The CQ to resize.
3834 * @cqe: The minimum size of the CQ.
3835 *
3836 * Users can examine the cq structure to determine the actual CQ size.
3837 */
3838int ib_resize_cq(struct ib_cq *cq, int cqe);
3839
3840/**
3841 * rdma_set_cq_moderation - Modifies moderation params of the CQ
3842 * @cq: The CQ to modify.
3843 * @cq_count: number of CQEs that will trigger an event
3844 * @cq_period: max period of time in usec before triggering an event
3845 *
3846 */
3847int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period);
3848
3849/**
3850 * ib_destroy_cq_user - Destroys the specified CQ.
3851 * @cq: The CQ to destroy.
3852 * @udata: Valid user data or NULL for kernel objects
3853 */
3854int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
3855
3856/**
3857 * ib_destroy_cq - Destroys the specified kernel CQ.
3858 * @cq: The CQ to destroy.
3859 *
3860 * NOTE: for user cq use ib_destroy_cq_user with valid udata!
3861 */
3862static inline int ib_destroy_cq(struct ib_cq *cq)
3863{
3864        return ib_destroy_cq_user(cq, NULL);
3865}
3866
3867/**
3868 * ib_poll_cq - poll a CQ for completion(s)
3869 * @cq:the CQ being polled
3870 * @num_entries:maximum number of completions to return
3871 * @wc:array of at least @num_entries &struct ib_wc where completions
3872 *   will be returned
3873 *
3874 * Poll a CQ for (possibly multiple) completions.  If the return value
3875 * is < 0, an error occurred.  If the return value is >= 0, it is the
3876 * number of completions returned.  If the return value is
3877 * non-negative and < num_entries, then the CQ was emptied.
3878 */
3879static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
3880                             struct ib_wc *wc)
3881{
3882        return cq->device->ops.poll_cq(cq, num_entries, wc);
3883}
3884
3885/**
3886 * ib_req_notify_cq - Request completion notification on a CQ.
3887 * @cq: The CQ to generate an event for.
3888 * @flags:
3889 *   Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
3890 *   to request an event on the next solicited event or next work
3891 *   completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
3892 *   may also be |ed in to request a hint about missed events, as
3893 *   described below.
3894 *
3895 * Return Value:
3896 *    < 0 means an error occurred while requesting notification
3897 *   == 0 means notification was requested successfully, and if
3898 *        IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
3899 *        were missed and it is safe to wait for another event.  In
3900 *        this case is it guaranteed that any work completions added
3901 *        to the CQ since the last CQ poll will trigger a completion
3902 *        notification event.
3903 *    > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
3904 *        in.  It means that the consumer must poll the CQ again to
3905 *        make sure it is empty to avoid missing an event because of a
3906 *        race between requesting notification and an entry being
3907 *        added to the CQ.  This return value means it is possible
3908 *        (but not guaranteed) that a work completion has been added
3909 *        to the CQ since the last poll without triggering a
3910 *        completion notification event.
3911 */
3912static inline int ib_req_notify_cq(struct ib_cq *cq,
3913                                   enum ib_cq_notify_flags flags)
3914{
3915        return cq->device->ops.req_notify_cq(cq, flags);
3916}
3917
3918/**
3919 * ib_req_ncomp_notif - Request completion notification when there are
3920 *   at least the specified number of unreaped completions on the CQ.
3921 * @cq: The CQ to generate an event for.
3922 * @wc_cnt: The number of unreaped completions that should be on the
3923 *   CQ before an event is generated.
3924 */
3925static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
3926{
3927        return cq->device->ops.req_ncomp_notif ?
3928                cq->device->ops.req_ncomp_notif(cq, wc_cnt) :
3929                -ENOSYS;
3930}
3931
3932/**
3933 * ib_dma_mapping_error - check a DMA addr for error
3934 * @dev: The device for which the dma_addr was created
3935 * @dma_addr: The DMA address to check
3936 */
3937static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
3938{
3939        return dma_mapping_error(dev->dma_device, dma_addr);
3940}
3941
3942/**
3943 * ib_dma_map_single - Map a kernel virtual address to DMA address
3944 * @dev: The device for which the dma_addr is to be created
3945 * @cpu_addr: The kernel virtual address
3946 * @size: The size of the region in bytes
3947 * @direction: The direction of the DMA
3948 */
3949static inline u64 ib_dma_map_single(struct ib_device *dev,
3950                                    void *cpu_addr, size_t size,
3951                                    enum dma_data_direction direction)
3952{
3953        return dma_map_single(dev->dma_device, cpu_addr, size, direction);
3954}
3955
3956/**
3957 * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single()
3958 * @dev: The device for which the DMA address was created
3959 * @addr: The DMA address
3960 * @size: The size of the region in bytes
3961 * @direction: The direction of the DMA
3962 */
3963static inline void ib_dma_unmap_single(struct ib_device *dev,
3964                                       u64 addr, size_t size,
3965                                       enum dma_data_direction direction)
3966{
3967        dma_unmap_single(dev->dma_device, addr, size, direction);
3968}
3969
3970/**
3971 * ib_dma_map_page - Map a physical page to DMA address
3972 * @dev: The device for which the dma_addr is to be created
3973 * @page: The page to be mapped
3974 * @offset: The offset within the page
3975 * @size: The size of the region in bytes
3976 * @direction: The direction of the DMA
3977 */
3978static inline u64 ib_dma_map_page(struct ib_device *dev,
3979                                  struct page *page,
3980                                  unsigned long offset,
3981                                  size_t size,
3982                                         enum dma_data_direction direction)
3983{
3984        return dma_map_page(dev->dma_device, page, offset, size, direction);
3985}
3986
3987/**
3988 * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page()
3989 * @dev: The device for which the DMA address was created
3990 * @addr: The DMA address
3991 * @size: The size of the region in bytes
3992 * @direction: The direction of the DMA
3993 */
3994static inline void ib_dma_unmap_page(struct ib_device *dev,
3995                                     u64 addr, size_t size,
3996                                     enum dma_data_direction direction)
3997{
3998        dma_unmap_page(dev->dma_device, addr, size, direction);
3999}
4000
4001/**
4002 * ib_dma_map_sg - Map a scatter/gather list to DMA addresses
4003 * @dev: The device for which the DMA addresses are to be created
4004 * @sg: The array of scatter/gather entries
4005 * @nents: The number of scatter/gather entries
4006 * @direction: The direction of the DMA
4007 */
4008static inline int ib_dma_map_sg(struct ib_device *dev,
4009                                struct scatterlist *sg, int nents,
4010                                enum dma_data_direction direction)
4011{
4012        return dma_map_sg(dev->dma_device, sg, nents, direction);
4013}
4014
4015/**
4016 * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses
4017 * @dev: The device for which the DMA addresses were created
4018 * @sg: The array of scatter/gather entries
4019 * @nents: The number of scatter/gather entries
4020 * @direction: The direction of the DMA
4021 */
4022static inline void ib_dma_unmap_sg(struct ib_device *dev,
4023                                   struct scatterlist *sg, int nents,
4024                                   enum dma_data_direction direction)
4025{
4026        dma_unmap_sg(dev->dma_device, sg, nents, direction);
4027}
4028
4029static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
4030                                      struct scatterlist *sg, int nents,
4031                                      enum dma_data_direction direction,
4032                                      unsigned long dma_attrs)
4033{
4034        return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
4035                                dma_attrs);
4036}
4037
4038static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
4039                                         struct scatterlist *sg, int nents,
4040                                         enum dma_data_direction direction,
4041                                         unsigned long dma_attrs)
4042{
4043        dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
4044}
4045
4046/**
4047 * ib_dma_max_seg_size - Return the size limit of a single DMA transfer
4048 * @dev: The device to query
4049 *
4050 * The returned value represents a size in bytes.
4051 */
4052static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
4053{
4054        struct device_dma_parameters *p = dev->dma_device->dma_parms;
4055
4056        return p ? p->max_segment_size : UINT_MAX;
4057}
4058
4059/**
4060 * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU
4061 * @dev: The device for which the DMA address was created
4062 * @addr: The DMA address
4063 * @size: The size of the region in bytes
4064 * @dir: The direction of the DMA
4065 */
4066static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
4067                                              u64 addr,
4068                                              size_t size,
4069                                              enum dma_data_direction dir)
4070{
4071        dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
4072}
4073
4074/**
4075 * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device
4076 * @dev: The device for which the DMA address was created
4077 * @addr: The DMA address
4078 * @size: The size of the region in bytes
4079 * @dir: The direction of the DMA
4080 */
4081static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
4082                                                 u64 addr,
4083                                                 size_t size,
4084                                                 enum dma_data_direction dir)
4085{
4086        dma_sync_single_for_device(dev->dma_device, addr, size, dir);
4087}
4088
4089/**
4090 * ib_dma_alloc_coherent - Allocate memory and map it for DMA
4091 * @dev: The device for which the DMA address is requested
4092 * @size: The size of the region to allocate in bytes
4093 * @dma_handle: A pointer for returning the DMA address of the region
4094 * @flag: memory allocator flags
4095 */
4096static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
4097                                           size_t size,
4098                                           dma_addr_t *dma_handle,
4099                                           gfp_t flag)
4100{
4101        return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag);
4102}
4103
4104/**
4105 * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
4106 * @dev: The device for which the DMA addresses were allocated
4107 * @size: The size of the region
4108 * @cpu_addr: the address returned by ib_dma_alloc_coherent()
4109 * @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
4110 */
4111static inline void ib_dma_free_coherent(struct ib_device *dev,
4112                                        size_t size, void *cpu_addr,
4113                                        dma_addr_t dma_handle)
4114{
4115        dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
4116}
4117
4118/**
4119 * ib_dereg_mr_user - Deregisters a memory region and removes it from the
4120 *   HCA translation table.
4121 * @mr: The memory region to deregister.
4122 * @udata: Valid user data or NULL for kernel object
4123 *
4124 * This function can fail, if the memory region has memory windows bound to it.
4125 */
4126int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata);
4127
4128/**
4129 * ib_dereg_mr - Deregisters a kernel memory region and removes it from the
4130 *   HCA translation table.
4131 * @mr: The memory region to deregister.
4132 *
4133 * This function can fail, if the memory region has memory windows bound to it.
4134 *
4135 * NOTE: for user mr use ib_dereg_mr_user with valid udata!
4136 */
4137static inline int ib_dereg_mr(struct ib_mr *mr)
4138{
4139        return ib_dereg_mr_user(mr, NULL);
4140}
4141
4142struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
4143                               u32 max_num_sg, struct ib_udata *udata);
4144
4145static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
4146                                        enum ib_mr_type mr_type, u32 max_num_sg)
4147{
4148        return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL);
4149}
4150
4151/**
4152 * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
4153 *   R_Key and L_Key.
4154 * @mr - struct ib_mr pointer to be updated.
4155 * @newkey - new key to be used.
4156 */
4157static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
4158{
4159        mr->lkey = (mr->lkey & 0xffffff00) | newkey;
4160        mr->rkey = (mr->rkey & 0xffffff00) | newkey;
4161}
4162
4163/**
4164 * ib_inc_rkey - increments the key portion of the given rkey. Can be used
4165 * for calculating a new rkey for type 2 memory windows.
4166 * @rkey - the rkey to increment.
4167 */
4168static inline u32 ib_inc_rkey(u32 rkey)
4169{
4170        const u32 mask = 0x000000ff;
4171        return ((rkey + 1) & mask) | (rkey & ~mask);
4172}
4173
4174/**
4175 * ib_alloc_fmr - Allocates a unmapped fast memory region.
4176 * @pd: The protection domain associated with the unmapped region.
4177 * @mr_access_flags: Specifies the memory access rights.
4178 * @fmr_attr: Attributes of the unmapped region.
4179 *
4180 * A fast memory region must be mapped before it can be used as part of
4181 * a work request.
4182 */
4183struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
4184                            int mr_access_flags,
4185                            struct ib_fmr_attr *fmr_attr);
4186
4187/**
4188 * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
4189 * @fmr: The fast memory region to associate with the pages.
4190 * @page_list: An array of physical pages to map to the fast memory region.
4191 * @list_len: The number of pages in page_list.
4192 * @iova: The I/O virtual address to use with the mapped region.
4193 */
4194static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
4195                                  u64 *page_list, int list_len,
4196                                  u64 iova)
4197{
4198        return fmr->device->ops.map_phys_fmr(fmr, page_list, list_len, iova);
4199}
4200
4201/**
4202 * ib_unmap_fmr - Removes the mapping from a list of fast memory regions.
4203 * @fmr_list: A linked list of fast memory regions to unmap.
4204 */
4205int ib_unmap_fmr(struct list_head *fmr_list);
4206
4207/**
4208 * ib_dealloc_fmr - Deallocates a fast memory region.
4209 * @fmr: The fast memory region to deallocate.
4210 */
4211int ib_dealloc_fmr(struct ib_fmr *fmr);
4212
4213/**
4214 * ib_attach_mcast - Attaches the specified QP to a multicast group.
4215 * @qp: QP to attach to the multicast group.  The QP must be type
4216 *   IB_QPT_UD.
4217 * @gid: Multicast group GID.
4218 * @lid: Multicast group LID in host byte order.
4219 *
4220 * In order to send and receive multicast packets, subnet
4221 * administration must have created the multicast group and configured
4222 * the fabric appropriately.  The port associated with the specified
4223 * QP must also be a member of the multicast group.
4224 */
4225int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
4226
4227/**
4228 * ib_detach_mcast - Detaches the specified QP from a multicast group.
4229 * @qp: QP to detach from the multicast group.
4230 * @gid: Multicast group GID.
4231 * @lid: Multicast group LID in host byte order.
4232 */
4233int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
4234
4235/**
4236 * ib_alloc_xrcd - Allocates an XRC domain.
4237 * @device: The device on which to allocate the XRC domain.
4238 * @caller: Module name for kernel consumers
4239 */
4240struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
4241#define ib_alloc_xrcd(device) \
4242        __ib_alloc_xrcd((device), KBUILD_MODNAME)
4243
4244/**
4245 * ib_dealloc_xrcd - Deallocates an XRC domain.
4246 * @xrcd: The XRC domain to deallocate.
4247 * @udata: Valid user data or NULL for kernel object
4248 */
4249int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
4250
4251static inline int ib_check_mr_access(int flags)
4252{
4253        /*
4254         * Local write permission is required if remote write or
4255         * remote atomic permission is also requested.
4256         */
4257        if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
4258            !(flags & IB_ACCESS_LOCAL_WRITE))
4259                return -EINVAL;
4260
4261        return 0;
4262}
4263
4264static inline bool ib_access_writable(int access_flags)
4265{
4266        /*
4267         * We have writable memory backing the MR if any of the following
4268         * access flags are set.  "Local write" and "remote write" obviously
4269         * require write access.  "Remote atomic" can do things like fetch and
4270         * add, which will modify memory, and "MW bind" can change permissions
4271         * by binding a window.
4272         */
4273        return access_flags &
4274                (IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
4275                 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND);
4276}
4277
4278/**
4279 * ib_check_mr_status: lightweight check of MR status.
4280 *     This routine may provide status checks on a selected
4281 *     ib_mr. first use is for signature status check.
4282 *
4283 * @mr: A memory region.
4284 * @check_mask: Bitmask of which checks to perform from
4285 *     ib_mr_status_check enumeration.
4286 * @mr_status: The container of relevant status checks.
4287 *     failed checks will be indicated in the status bitmask
4288 *     and the relevant info shall be in the error item.
4289 */
4290int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
4291                       struct ib_mr_status *mr_status);
4292
4293/**
4294 * ib_device_try_get: Hold a registration lock
4295 * device: The device to lock
4296 *
4297 * A device under an active registration lock cannot become unregistered. It
4298 * is only possible to obtain a registration lock on a device that is fully
4299 * registered, otherwise this function returns false.
4300 *
4301 * The registration lock is only necessary for actions which require the
4302 * device to still be registered. Uses that only require the device pointer to
4303 * be valid should use get_device(&ibdev->dev) to hold the memory.
4304 *
4305 */
4306static inline bool ib_device_try_get(struct ib_device *dev)
4307{
4308        return refcount_inc_not_zero(&dev->refcount);
4309}
4310
4311void ib_device_put(struct ib_device *device);
4312struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
4313                                          enum rdma_driver_id driver_id);
4314struct ib_device *ib_device_get_by_name(const char *name,
4315                                        enum rdma_driver_id driver_id);
4316struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
4317                                            u16 pkey, const union ib_gid *gid,
4318                                            const struct sockaddr *addr);
4319int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
4320                         unsigned int port);
4321struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
4322
4323struct ib_wq *ib_create_wq(struct ib_pd *pd,
4324                           struct ib_wq_init_attr *init_attr);
4325int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
4326int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
4327                 u32 wq_attr_mask);
4328struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
4329                                                 struct ib_rwq_ind_table_init_attr*
4330                                                 wq_ind_table_init_attr);
4331int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
4332
4333int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
4334                 unsigned int *sg_offset, unsigned int page_size);
4335
4336static inline int
4337ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
4338                  unsigned int *sg_offset, unsigned int page_size)
4339{
4340        int n;
4341
4342        n = ib_map_mr_sg(mr, sg, sg_nents, sg_offset, page_size);
4343        mr->iova = 0;
4344
4345        return n;
4346}
4347
4348int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
4349                unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64));
4350
4351void ib_drain_rq(struct ib_qp *qp);
4352void ib_drain_sq(struct ib_qp *qp);
4353void ib_drain_qp(struct ib_qp *qp);
4354
4355int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width);
4356
4357static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
4358{
4359        if (attr->type == RDMA_AH_ATTR_TYPE_ROCE)
4360                return attr->roce.dmac;
4361        return NULL;
4362}
4363
4364static inline void rdma_ah_set_dlid(struct rdma_ah_attr *attr, u32 dlid)
4365{
4366        if (attr->type == RDMA_AH_ATTR_TYPE_IB)
4367                attr->ib.dlid = (u16)dlid;
4368        else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
4369                attr->opa.dlid = dlid;
4370}
4371
4372static inline u32 rdma_ah_get_dlid(const struct rdma_ah_attr *attr)
4373{
4374        if (attr->type == RDMA_AH_ATTR_TYPE_IB)
4375                return attr->ib.dlid;
4376        else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
4377                return attr->opa.dlid;
4378        return 0;
4379}
4380
4381static inline void rdma_ah_set_sl(struct rdma_ah_attr *attr, u8 sl)
4382{
4383        attr->sl = sl;
4384}
4385
4386static inline u8 rdma_ah_get_sl(const struct rdma_ah_attr *attr)
4387{
4388        return attr->sl;
4389}
4390
4391static inline void rdma_ah_set_path_bits(struct rdma_ah_attr *attr,
4392                                         u8 src_path_bits)
4393{
4394        if (attr->type == RDMA_AH_ATTR_TYPE_IB)
4395                attr->ib.src_path_bits = src_path_bits;
4396        else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
4397                attr->opa.src_path_bits = src_path_bits;
4398}
4399
4400static inline u8 rdma_ah_get_path_bits(const struct rdma_ah_attr *attr)
4401{
4402        if (attr->type == RDMA_AH_ATTR_TYPE_IB)
4403                return attr->ib.src_path_bits;
4404        else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
4405                return attr->opa.src_path_bits;
4406        return 0;
4407}
4408
4409static inline void rdma_ah_set_make_grd(struct rdma_ah_attr *attr,
4410                                        bool make_grd)
4411{
4412        if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
4413                attr->opa.make_grd = make_grd;
4414}
4415
4416static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr)
4417{
4418        if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
4419                return attr->opa.make_grd;
4420        return false;
4421}
4422
4423static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num)
4424{
4425        attr->port_num = port_num;
4426}
4427
4428static inline u8 rdma_ah_get_port_num(const struct rdma_ah_attr *attr)
4429{
4430        return attr->port_num;
4431}
4432
4433static inline void rdma_ah_set_static_rate(struct rdma_ah_attr *attr,
4434                                           u8 static_rate)
4435{
4436        attr->static_rate = static_rate;
4437}
4438
4439static inline u8 rdma_ah_get_static_rate(const struct rdma_ah_attr *attr)
4440{
4441        return attr->static_rate;
4442}
4443
4444static inline void rdma_ah_set_ah_flags(struct rdma_ah_attr *attr,
4445                                        enum ib_ah_flags flag)
4446{
4447        attr->ah_flags = flag;
4448}
4449
4450static inline enum ib_ah_flags
4451                rdma_ah_get_ah_flags(const struct rdma_ah_attr *attr)
4452{
4453        return attr->ah_flags;
4454}
4455
4456static inline const struct ib_global_route
4457                *rdma_ah_read_grh(const struct rdma_ah_attr *attr)
4458{
4459        return &attr->grh;
4460}
4461
4462/*To retrieve and modify the grh */
4463static inline struct ib_global_route
4464                *rdma_ah_retrieve_grh(struct rdma_ah_attr *attr)
4465{
4466        return &attr->grh;
4467}
4468
4469static inline void rdma_ah_set_dgid_raw(struct rdma_ah_attr *attr, void *dgid)
4470{
4471        struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
4472
4473        memcpy(grh->dgid.raw, dgid, sizeof(grh->dgid));
4474}
4475
4476static inline void rdma_ah_set_subnet_prefix(struct rdma_ah_attr *attr,
4477                                             __be64 prefix)
4478{
4479        struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
4480
4481        grh->dgid.global.subnet_prefix = prefix;
4482}
4483
4484static inline void rdma_ah_set_interface_id(struct rdma_ah_attr *attr,
4485                                            __be64 if_id)
4486{
4487        struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
4488
4489        grh->dgid.global.interface_id = if_id;
4490}
4491
4492static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
4493                                   union ib_gid *dgid, u32 flow_label,
4494                                   u8 sgid_index, u8 hop_limit,
4495                                   u8 traffic_class)
4496{
4497        struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
4498
4499        attr->ah_flags = IB_AH_GRH;
4500        if (dgid)
4501                grh->dgid = *dgid;
4502        grh->flow_label = flow_label;
4503        grh->sgid_index = sgid_index;
4504        grh->hop_limit = hop_limit;
4505        grh->traffic_class = traffic_class;
4506        grh->sgid_attr = NULL;
4507}
4508
4509void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
4510void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
4511                             u32 flow_label, u8 hop_limit, u8 traffic_class,
4512                             const struct ib_gid_attr *sgid_attr);
4513void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
4514                       const struct rdma_ah_attr *src);
4515void rdma_replace_ah_attr(struct rdma_ah_attr *old,
4516                          const struct rdma_ah_attr *new);
4517void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
4518
4519/**
4520 * rdma_ah_find_type - Return address handle type.
4521 *
4522 * @dev: Device to be checked
4523 * @port_num: Port number
4524 */
4525static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
4526                                                       u8 port_num)
4527{
4528        if (rdma_protocol_roce(dev, port_num))
4529                return RDMA_AH_ATTR_TYPE_ROCE;
4530        if (rdma_protocol_ib(dev, port_num)) {
4531                if (rdma_cap_opa_ah(dev, port_num))
4532                        return RDMA_AH_ATTR_TYPE_OPA;
4533                return RDMA_AH_ATTR_TYPE_IB;
4534        }
4535
4536        return RDMA_AH_ATTR_TYPE_UNDEFINED;
4537}
4538
4539/**
4540 * ib_lid_cpu16 - Return lid in 16bit CPU encoding.
4541 *     In the current implementation the only way to get
4542 *     get the 32bit lid is from other sources for OPA.
4543 *     For IB, lids will always be 16bits so cast the
4544 *     value accordingly.
4545 *
4546 * @lid: A 32bit LID
4547 */
4548static inline u16 ib_lid_cpu16(u32 lid)
4549{
4550        WARN_ON_ONCE(lid & 0xFFFF0000);
4551        return (u16)lid;
4552}
4553
4554/**
4555 * ib_lid_be16 - Return lid in 16bit BE encoding.
4556 *
4557 * @lid: A 32bit LID
4558 */
4559static inline __be16 ib_lid_be16(u32 lid)
4560{
4561        WARN_ON_ONCE(lid & 0xFFFF0000);
4562        return cpu_to_be16((u16)lid);
4563}
4564
4565/**
4566 * ib_get_vector_affinity - Get the affinity mappings of a given completion
4567 *   vector
4568 * @device:         the rdma device
4569 * @comp_vector:    index of completion vector
4570 *
4571 * Returns NULL on failure, otherwise a corresponding cpu map of the
4572 * completion vector (returns all-cpus map if the device driver doesn't
4573 * implement get_vector_affinity).
4574 */
4575static inline const struct cpumask *
4576ib_get_vector_affinity(struct ib_device *device, int comp_vector)
4577{
4578        if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
4579            !device->ops.get_vector_affinity)
4580                return NULL;
4581
4582        return device->ops.get_vector_affinity(device, comp_vector);
4583
4584}
4585
4586/**
4587 * rdma_roce_rescan_device - Rescan all of the network devices in the system
4588 * and add their gids, as needed, to the relevant RoCE devices.
4589 *
4590 * @device:         the rdma device
4591 */
4592void rdma_roce_rescan_device(struct ib_device *ibdev);
4593
4594struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
4595
4596int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
4597
4598struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
4599                                     enum rdma_netdev_t type, const char *name,
4600                                     unsigned char name_assign_type,
4601                                     void (*setup)(struct net_device *));
4602
4603int rdma_init_netdev(struct ib_device *device, u8 port_num,
4604                     enum rdma_netdev_t type, const char *name,
4605                     unsigned char name_assign_type,
4606                     void (*setup)(struct net_device *),
4607                     struct net_device *netdev);
4608
4609/**
4610 * rdma_set_device_sysfs_group - Set device attributes group to have
4611 *                               driver specific sysfs entries at
4612 *                               for infiniband class.
4613 *
4614 * @device:     device pointer for which attributes to be created
4615 * @group:      Pointer to group which should be added when device
4616 *              is registered with sysfs.
4617 * rdma_set_device_sysfs_group() allows existing drivers to expose one
4618 * group per device to have sysfs attributes.
4619 *
4620 * NOTE: New drivers should not make use of this API; instead new device
4621 * parameter should be exposed via netlink command. This API and mechanism
4622 * exist only for existing drivers.
4623 */
4624static inline void
4625rdma_set_device_sysfs_group(struct ib_device *dev,
4626                            const struct attribute_group *group)
4627{
4628        dev->groups[1] = group;
4629}
4630
4631/**
4632 * rdma_device_to_ibdev - Get ib_device pointer from device pointer
4633 *
4634 * @device:     device pointer for which ib_device pointer to retrieve
4635 *
4636 * rdma_device_to_ibdev() retrieves ib_device pointer from device.
4637 *
4638 */
4639static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
4640{
4641        struct ib_core_device *coredev =
4642                container_of(device, struct ib_core_device, dev);
4643
4644        return coredev->owner;
4645}
4646
4647/**
4648 * rdma_device_to_drv_device - Helper macro to reach back to driver's
4649 *                             ib_device holder structure from device pointer.
4650 *
4651 * NOTE: New drivers should not make use of this API; This API is only for
4652 * existing drivers who have exposed sysfs entries using
4653 * rdma_set_device_sysfs_group().
4654 */
4655#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member)           \
4656        container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
4657
4658bool rdma_dev_access_netns(const struct ib_device *device,
4659                           const struct net *net);
4660#endif /* IB_VERBS_H */
4661