linux/net/rds/ib.h
<<
>>
Prefs
   1#ifndef _RDS_IB_H
   2#define _RDS_IB_H
   3
   4#include <rdma/ib_verbs.h>
   5#include <rdma/rdma_cm.h>
   6#include <linux/interrupt.h>
   7#include <linux/pci.h>
   8#include <linux/slab.h>
   9#include "rds.h"
  10#include "rdma_transport.h"
  11
  12#define RDS_FMR_SIZE                    256
  13#define RDS_FMR_POOL_SIZE               8192
  14
  15#define RDS_IB_MAX_SGE                  8
  16#define RDS_IB_RECV_SGE                 2
  17
  18#define RDS_IB_DEFAULT_RECV_WR          1024
  19#define RDS_IB_DEFAULT_SEND_WR          256
  20
  21#define RDS_IB_DEFAULT_RETRY_COUNT      2
  22
  23#define RDS_IB_SUPPORTED_PROTOCOLS      0x00000003      /* minor versions supported */
  24
  25#define RDS_IB_RECYCLE_BATCH_COUNT      32
  26
  27extern struct rw_semaphore rds_ib_devices_lock;
  28extern struct list_head rds_ib_devices;
  29
  30/*
  31 * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to
  32 * try and minimize the amount of memory tied up both the device and
  33 * socket receive queues.
  34 */
  35struct rds_page_frag {
  36        struct list_head        f_item;
  37        struct list_head        f_cache_entry;
  38        struct scatterlist      f_sg;
  39};
  40
  41struct rds_ib_incoming {
  42        struct list_head        ii_frags;
  43        struct list_head        ii_cache_entry;
  44        struct rds_incoming     ii_inc;
  45};
  46
  47struct rds_ib_cache_head {
  48        struct list_head *first;
  49        unsigned long count;
  50};
  51
  52struct rds_ib_refill_cache {
  53        struct rds_ib_cache_head __percpu *percpu;
  54        struct list_head         *xfer;
  55        struct list_head         *ready;
  56};
  57
  58struct rds_ib_connect_private {
  59        /* Add new fields at the end, and don't permute existing fields. */
  60        __be32                  dp_saddr;
  61        __be32                  dp_daddr;
  62        u8                      dp_protocol_major;
  63        u8                      dp_protocol_minor;
  64        __be16                  dp_protocol_minor_mask; /* bitmask */
  65        __be32                  dp_reserved1;
  66        __be64                  dp_ack_seq;
  67        __be32                  dp_credit;              /* non-zero enables flow ctl */
  68};
  69
  70struct rds_ib_send_work {
  71        void                    *s_op;
  72        struct ib_send_wr       s_wr;
  73        struct ib_sge           s_sge[RDS_IB_MAX_SGE];
  74        unsigned long           s_queued;
  75};
  76
  77struct rds_ib_recv_work {
  78        struct rds_ib_incoming  *r_ibinc;
  79        struct rds_page_frag    *r_frag;
  80        struct ib_recv_wr       r_wr;
  81        struct ib_sge           r_sge[2];
  82};
  83
  84struct rds_ib_work_ring {
  85        u32             w_nr;
  86        u32             w_alloc_ptr;
  87        u32             w_alloc_ctr;
  88        u32             w_free_ptr;
  89        atomic_t        w_free_ctr;
  90};
  91
  92struct rds_ib_device;
  93
  94struct rds_ib_connection {
  95
  96        struct list_head        ib_node;
  97        struct rds_ib_device    *rds_ibdev;
  98        struct rds_connection   *conn;
  99
 100        /* alphabet soup, IBTA style */
 101        struct rdma_cm_id       *i_cm_id;
 102        struct ib_pd            *i_pd;
 103        struct ib_mr            *i_mr;
 104        struct ib_cq            *i_send_cq;
 105        struct ib_cq            *i_recv_cq;
 106
 107        /* tx */
 108        struct rds_ib_work_ring i_send_ring;
 109        struct rm_data_op       *i_data_op;
 110        struct rds_header       *i_send_hdrs;
 111        u64                     i_send_hdrs_dma;
 112        struct rds_ib_send_work *i_sends;
 113        atomic_t                i_signaled_sends;
 114
 115        /* rx */
 116        struct tasklet_struct   i_recv_tasklet;
 117        struct mutex            i_recv_mutex;
 118        struct rds_ib_work_ring i_recv_ring;
 119        struct rds_ib_incoming  *i_ibinc;
 120        u32                     i_recv_data_rem;
 121        struct rds_header       *i_recv_hdrs;
 122        u64                     i_recv_hdrs_dma;
 123        struct rds_ib_recv_work *i_recvs;
 124        u64                     i_ack_recv;     /* last ACK received */
 125        struct rds_ib_refill_cache i_cache_incs;
 126        struct rds_ib_refill_cache i_cache_frags;
 127
 128        /* sending acks */
 129        unsigned long           i_ack_flags;
 130#ifdef KERNEL_HAS_ATOMIC64
 131        atomic64_t              i_ack_next;     /* next ACK to send */
 132#else
 133        spinlock_t              i_ack_lock;     /* protect i_ack_next */
 134        u64                     i_ack_next;     /* next ACK to send */
 135#endif
 136        struct rds_header       *i_ack;
 137        struct ib_send_wr       i_ack_wr;
 138        struct ib_sge           i_ack_sge;
 139        u64                     i_ack_dma;
 140        unsigned long           i_ack_queued;
 141
 142        /* Flow control related information
 143         *
 144         * Our algorithm uses a pair variables that we need to access
 145         * atomically - one for the send credits, and one posted
 146         * recv credits we need to transfer to remote.
 147         * Rather than protect them using a slow spinlock, we put both into
 148         * a single atomic_t and update it using cmpxchg
 149         */
 150        atomic_t                i_credits;
 151
 152        /* Protocol version specific information */
 153        unsigned int            i_flowctl:1;    /* enable/disable flow ctl */
 154
 155        /* Batched completions */
 156        unsigned int            i_unsignaled_wrs;
 157};
 158
 159/* This assumes that atomic_t is at least 32 bits */
 160#define IB_GET_SEND_CREDITS(v)  ((v) & 0xffff)
 161#define IB_GET_POST_CREDITS(v)  ((v) >> 16)
 162#define IB_SET_SEND_CREDITS(v)  ((v) & 0xffff)
 163#define IB_SET_POST_CREDITS(v)  ((v) << 16)
 164
 165struct rds_ib_ipaddr {
 166        struct list_head        list;
 167        __be32                  ipaddr;
 168};
 169
 170struct rds_ib_device {
 171        struct list_head        list;
 172        struct list_head        ipaddr_list;
 173        struct list_head        conn_list;
 174        struct ib_device        *dev;
 175        struct ib_pd            *pd;
 176        struct ib_mr            *mr;
 177        struct rds_ib_mr_pool   *mr_pool;
 178        unsigned int            fmr_max_remaps;
 179        unsigned int            max_fmrs;
 180        int                     max_sge;
 181        unsigned int            max_wrs;
 182        unsigned int            max_initiator_depth;
 183        unsigned int            max_responder_resources;
 184        spinlock_t              spinlock;       /* protect the above */
 185        atomic_t                refcount;
 186        struct work_struct      free_work;
 187};
 188
 189#define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device)
 190#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
 191
 192/* bits for i_ack_flags */
 193#define IB_ACK_IN_FLIGHT        0
 194#define IB_ACK_REQUESTED        1
 195
 196/* Magic WR_ID for ACKs */
 197#define RDS_IB_ACK_WR_ID        (~(u64) 0)
 198
 199struct rds_ib_statistics {
 200        uint64_t        s_ib_connect_raced;
 201        uint64_t        s_ib_listen_closed_stale;
 202        uint64_t        s_ib_tx_cq_call;
 203        uint64_t        s_ib_tx_cq_event;
 204        uint64_t        s_ib_tx_ring_full;
 205        uint64_t        s_ib_tx_throttle;
 206        uint64_t        s_ib_tx_sg_mapping_failure;
 207        uint64_t        s_ib_tx_stalled;
 208        uint64_t        s_ib_tx_credit_updates;
 209        uint64_t        s_ib_rx_cq_call;
 210        uint64_t        s_ib_rx_cq_event;
 211        uint64_t        s_ib_rx_ring_empty;
 212        uint64_t        s_ib_rx_refill_from_cq;
 213        uint64_t        s_ib_rx_refill_from_thread;
 214        uint64_t        s_ib_rx_alloc_limit;
 215        uint64_t        s_ib_rx_credit_updates;
 216        uint64_t        s_ib_ack_sent;
 217        uint64_t        s_ib_ack_send_failure;
 218        uint64_t        s_ib_ack_send_delayed;
 219        uint64_t        s_ib_ack_send_piggybacked;
 220        uint64_t        s_ib_ack_received;
 221        uint64_t        s_ib_rdma_mr_alloc;
 222        uint64_t        s_ib_rdma_mr_free;
 223        uint64_t        s_ib_rdma_mr_used;
 224        uint64_t        s_ib_rdma_mr_pool_flush;
 225        uint64_t        s_ib_rdma_mr_pool_wait;
 226        uint64_t        s_ib_rdma_mr_pool_depleted;
 227        uint64_t        s_ib_atomic_cswp;
 228        uint64_t        s_ib_atomic_fadd;
 229};
 230
 231extern struct workqueue_struct *rds_ib_wq;
 232
 233/*
 234 * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h
 235 * doesn't define it.
 236 */
 237static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
 238                struct scatterlist *sg, unsigned int sg_dma_len, int direction)
 239{
 240        unsigned int i;
 241
 242        for (i = 0; i < sg_dma_len; ++i) {
 243                ib_dma_sync_single_for_cpu(dev,
 244                                ib_sg_dma_address(dev, &sg[i]),
 245                                ib_sg_dma_len(dev, &sg[i]),
 246                                direction);
 247        }
 248}
 249#define ib_dma_sync_sg_for_cpu  rds_ib_dma_sync_sg_for_cpu
 250
 251static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
 252                struct scatterlist *sg, unsigned int sg_dma_len, int direction)
 253{
 254        unsigned int i;
 255
 256        for (i = 0; i < sg_dma_len; ++i) {
 257                ib_dma_sync_single_for_device(dev,
 258                                ib_sg_dma_address(dev, &sg[i]),
 259                                ib_sg_dma_len(dev, &sg[i]),
 260                                direction);
 261        }
 262}
 263#define ib_dma_sync_sg_for_device       rds_ib_dma_sync_sg_for_device
 264
 265
 266/* ib.c */
 267extern struct rds_transport rds_ib_transport;
 268struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
 269void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
 270extern struct ib_client rds_ib_client;
 271
 272extern unsigned int fmr_message_size;
 273extern unsigned int rds_ib_retry_count;
 274
 275extern spinlock_t ib_nodev_conns_lock;
 276extern struct list_head ib_nodev_conns;
 277
 278/* ib_cm.c */
 279int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
 280void rds_ib_conn_free(void *arg);
 281int rds_ib_conn_connect(struct rds_connection *conn);
 282void rds_ib_conn_shutdown(struct rds_connection *conn);
 283void rds_ib_state_change(struct sock *sk);
 284int rds_ib_listen_init(void);
 285void rds_ib_listen_stop(void);
 286void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
 287int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 288                             struct rdma_cm_event *event);
 289int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id);
 290void rds_ib_cm_connect_complete(struct rds_connection *conn,
 291                                struct rdma_cm_event *event);
 292
 293
 294#define rds_ib_conn_error(conn, fmt...) \
 295        __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
 296
 297/* ib_rdma.c */
 298int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
 299void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 300void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 301void rds_ib_destroy_nodev_conns(void);
 302struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
 303void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
 304void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
 305void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 306                    struct rds_sock *rs, u32 *key_ret);
 307void rds_ib_sync_mr(void *trans_private, int dir);
 308void rds_ib_free_mr(void *trans_private, int invalidate);
 309void rds_ib_flush_mrs(void);
 310
 311/* ib_recv.c */
 312int rds_ib_recv_init(void);
 313void rds_ib_recv_exit(void);
 314int rds_ib_recv(struct rds_connection *conn);
 315int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
 316void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
 317void rds_ib_recv_refill(struct rds_connection *conn, int prefill);
 318void rds_ib_inc_free(struct rds_incoming *inc);
 319int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
 320                             size_t size);
 321void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context);
 322void rds_ib_recv_tasklet_fn(unsigned long data);
 323void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
 324void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
 325void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
 326void rds_ib_attempt_ack(struct rds_ib_connection *ic);
 327void rds_ib_ack_send_complete(struct rds_ib_connection *ic);
 328u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic);
 329
 330/* ib_ring.c */
 331void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr);
 332void rds_ib_ring_resize(struct rds_ib_work_ring *ring, u32 nr);
 333u32 rds_ib_ring_alloc(struct rds_ib_work_ring *ring, u32 val, u32 *pos);
 334void rds_ib_ring_free(struct rds_ib_work_ring *ring, u32 val);
 335void rds_ib_ring_unalloc(struct rds_ib_work_ring *ring, u32 val);
 336int rds_ib_ring_empty(struct rds_ib_work_ring *ring);
 337int rds_ib_ring_low(struct rds_ib_work_ring *ring);
 338u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring);
 339u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
 340extern wait_queue_head_t rds_ib_ring_empty_wait;
 341
 342/* ib_send.c */
 343char *rds_ib_wc_status_str(enum ib_wc_status status);
 344void rds_ib_xmit_complete(struct rds_connection *conn);
 345int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 346                unsigned int hdr_off, unsigned int sg, unsigned int off);
 347void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
 348void rds_ib_send_init_ring(struct rds_ib_connection *ic);
 349void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
 350int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
 351void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
 352void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
 353int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
 354                             u32 *adv_credits, int need_posted, int max_posted);
 355int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
 356
 357/* ib_stats.c */
 358DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
 359#define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member)
 360unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
 361                                    unsigned int avail);
 362
 363/* ib_sysctl.c */
 364int rds_ib_sysctl_init(void);
 365void rds_ib_sysctl_exit(void);
 366extern unsigned long rds_ib_sysctl_max_send_wr;
 367extern unsigned long rds_ib_sysctl_max_recv_wr;
 368extern unsigned long rds_ib_sysctl_max_unsig_wrs;
 369extern unsigned long rds_ib_sysctl_max_unsig_bytes;
 370extern unsigned long rds_ib_sysctl_max_recv_allocation;
 371extern unsigned int rds_ib_sysctl_flow_control;
 372
 373#endif
 374