linux/drivers/infiniband/ulp/rtrs/rtrs-pri.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 * RDMA Transport Layer
   4 *
   5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
   6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
   7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
   8 */
   9
  10#ifndef RTRS_PRI_H
  11#define RTRS_PRI_H
  12
  13#include <linux/uuid.h>
  14#include <rdma/rdma_cm.h>
  15#include <rdma/ib_verbs.h>
  16#include <rdma/ib.h>
  17
  18#include "rtrs.h"
  19
  20#define RTRS_PROTO_VER_MAJOR 2
  21#define RTRS_PROTO_VER_MINOR 0
  22
  23#define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
  24                               __stringify(RTRS_PROTO_VER_MINOR)
  25
  26enum rtrs_imm_const {
  27        MAX_IMM_TYPE_BITS = 4,
  28        MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
  29        MAX_IMM_PAYL_BITS = 28,
  30        MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
  31};
  32
  33enum rtrs_imm_type {
  34        RTRS_IO_REQ_IMM       = 0, /* client to server */
  35        RTRS_IO_RSP_IMM       = 1, /* server to client */
  36        RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
  37
  38        RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
  39        RTRS_HB_ACK_IMM = 9,
  40
  41        RTRS_LAST_IMM,
  42};
  43
  44enum {
  45        SERVICE_CON_QUEUE_DEPTH = 512,
  46
  47        MAX_PATHS_NUM = 128,
  48
  49        /*
  50         * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
  51         * and the minimum chunk size is 4096 (2^12).
  52         * So the maximum sess_queue_depth is 65536 (2^16) in theory.
  53         * But mempool_create, create_qp and ib_post_send fail with
  54         * "cannot allocate memory" error if sess_queue_depth is too big.
  55         * Therefore the pratical max value of sess_queue_depth is
  56         * somewhere between 1 and 65534 and it depends on the system.
  57         */
  58        MAX_SESS_QUEUE_DEPTH = 65535,
  59        MIN_CHUNK_SIZE = 8192,
  60
  61        RTRS_HB_INTERVAL_MS = 5000,
  62        RTRS_HB_MISSED_MAX = 5,
  63
  64        RTRS_MAGIC = 0x1BBD,
  65        RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
  66};
  67
  68struct rtrs_ib_dev;
  69
  70struct rtrs_rdma_dev_pd_ops {
  71        struct rtrs_ib_dev *(*alloc)(void);
  72        void (*free)(struct rtrs_ib_dev *dev);
  73        int (*init)(struct rtrs_ib_dev *dev);
  74        void (*deinit)(struct rtrs_ib_dev *dev);
  75};
  76
  77struct rtrs_rdma_dev_pd {
  78        struct mutex            mutex;
  79        struct list_head        list;
  80        enum ib_pd_flags        pd_flags;
  81        const struct rtrs_rdma_dev_pd_ops *ops;
  82};
  83
  84struct rtrs_ib_dev {
  85        struct ib_device         *ib_dev;
  86        struct ib_pd             *ib_pd;
  87        struct kref              ref;
  88        struct list_head         entry;
  89        struct rtrs_rdma_dev_pd *pool;
  90};
  91
  92struct rtrs_con {
  93        struct rtrs_sess        *sess;
  94        struct ib_qp            *qp;
  95        struct ib_cq            *cq;
  96        struct rdma_cm_id       *cm_id;
  97        unsigned int            cid;
  98        int                     nr_cqe;
  99        atomic_t                wr_cnt;
 100        atomic_t                sq_wr_avail;
 101};
 102
 103struct rtrs_sess {
 104        struct list_head        entry;
 105        struct sockaddr_storage dst_addr;
 106        struct sockaddr_storage src_addr;
 107        char                    sessname[NAME_MAX];
 108        uuid_t                  uuid;
 109        struct rtrs_con **con;
 110        unsigned int            con_num;
 111        unsigned int            irq_con_num;
 112        unsigned int            recon_cnt;
 113        unsigned int            signal_interval;
 114        struct rtrs_ib_dev      *dev;
 115        int                     dev_ref;
 116        struct ib_cqe           *hb_cqe;
 117        void                    (*hb_err_handler)(struct rtrs_con *con);
 118        struct workqueue_struct *hb_wq;
 119        struct delayed_work     hb_dwork;
 120        unsigned int            hb_interval_ms;
 121        unsigned int            hb_missed_cnt;
 122        unsigned int            hb_missed_max;
 123        ktime_t                 hb_last_sent;
 124        ktime_t                 hb_cur_latency;
 125};
 126
 127/* rtrs information unit */
 128struct rtrs_iu {
 129        struct ib_cqe           cqe;
 130        dma_addr_t              dma_addr;
 131        void                    *buf;
 132        size_t                  size;
 133        enum dma_data_direction direction;
 134};
 135
 136/**
 137 * enum rtrs_msg_types - RTRS message types, see also rtrs/README
 138 * @RTRS_MSG_INFO_REQ:          Client additional info request to the server
 139 * @RTRS_MSG_INFO_RSP:          Server additional info response to the client
 140 * @RTRS_MSG_WRITE:             Client writes data per RDMA to server
 141 * @RTRS_MSG_READ:              Client requests data transfer from server
 142 * @RTRS_MSG_RKEY_RSP:          Server refreshed rkey for rbuf
 143 */
 144enum rtrs_msg_types {
 145        RTRS_MSG_INFO_REQ,
 146        RTRS_MSG_INFO_RSP,
 147        RTRS_MSG_WRITE,
 148        RTRS_MSG_READ,
 149        RTRS_MSG_RKEY_RSP,
 150};
 151
 152/**
 153 * enum rtrs_msg_flags - RTRS message flags.
 154 * @RTRS_NEED_INVAL:    Send invalidation in response.
 155 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
 156 */
 157enum rtrs_msg_flags {
 158        RTRS_MSG_NEED_INVAL_F = 1 << 0,
 159        RTRS_MSG_NEW_RKEY_F = 1 << 1,
 160};
 161
 162/**
 163 * struct rtrs_sg_desc - RDMA-Buffer entry description
 164 * @addr:       Address of RDMA destination buffer
 165 * @key:        Authorization rkey to write to the buffer
 166 * @len:        Size of the buffer
 167 */
 168struct rtrs_sg_desc {
 169        __le64                  addr;
 170        __le32                  key;
 171        __le32                  len;
 172};
 173
 174/**
 175 * struct rtrs_msg_conn_req - Client connection request to the server
 176 * @magic:         RTRS magic
 177 * @version:       RTRS protocol version
 178 * @cid:           Current connection id
 179 * @cid_num:       Number of connections per session
 180 * @recon_cnt:     Reconnections counter
 181 * @sess_uuid:     UUID of a session (path)
 182 * @paths_uuid:    UUID of a group of sessions (paths)
 183 *
 184 * NOTE: max size 56 bytes, see man rdma_connect().
 185 */
 186struct rtrs_msg_conn_req {
 187        /* Is set to 0 by cma.c in case of AF_IB, do not touch that.
 188         * see https://www.spinics.net/lists/linux-rdma/msg22397.html
 189         */
 190        u8              __cma_version;
 191        /* On sender side that should be set to 0, or cma_save_ip_info()
 192         * extract garbage and will fail.
 193         */
 194        u8              __ip_version;
 195        __le16          magic;
 196        __le16          version;
 197        __le16          cid;
 198        __le16          cid_num;
 199        __le16          recon_cnt;
 200        uuid_t          sess_uuid;
 201        uuid_t          paths_uuid;
 202        u8              first_conn : 1;
 203        u8              reserved_bits : 7;
 204        u8              reserved[11];
 205};
 206
 207/**
 208 * struct rtrs_msg_conn_rsp - Server connection response to the client
 209 * @magic:         RTRS magic
 210 * @version:       RTRS protocol version
 211 * @errno:         If rdma_accept() then 0, if rdma_reject() indicates error
 212 * @queue_depth:   max inflight messages (queue-depth) in this session
 213 * @max_io_size:   max io size server supports
 214 * @max_hdr_size:  max msg header size server supports
 215 *
 216 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
 217 */
 218struct rtrs_msg_conn_rsp {
 219        __le16          magic;
 220        __le16          version;
 221        __le16          errno;
 222        __le16          queue_depth;
 223        __le32          max_io_size;
 224        __le32          max_hdr_size;
 225        __le32          flags;
 226        u8              reserved[36];
 227};
 228
 229/**
 230 * struct rtrs_msg_info_req
 231 * @type:               @RTRS_MSG_INFO_REQ
 232 * @sessname:           Session name chosen by client
 233 */
 234struct rtrs_msg_info_req {
 235        __le16          type;
 236        u8              sessname[NAME_MAX];
 237        u8              reserved[15];
 238};
 239
 240/**
 241 * struct rtrs_msg_info_rsp
 242 * @type:               @RTRS_MSG_INFO_RSP
 243 * @sg_cnt:             Number of @desc entries
 244 * @desc:               RDMA buffers where the client can write to server
 245 */
 246struct rtrs_msg_info_rsp {
 247        __le16          type;
 248        __le16          sg_cnt;
 249        u8              reserved[4];
 250        struct rtrs_sg_desc desc[];
 251};
 252
 253/**
 254 * struct rtrs_msg_rkey_rsp
 255 * @type:               @RTRS_MSG_RKEY_RSP
 256 * @buf_id:             RDMA buf_id of the new rkey
 257 * @rkey:               new remote key for RDMA buffers id from server
 258 */
 259struct rtrs_msg_rkey_rsp {
 260        __le16          type;
 261        __le16          buf_id;
 262        __le32          rkey;
 263};
 264
 265/**
 266 * struct rtrs_msg_rdma_read - RDMA data transfer request from client
 267 * @type:               always @RTRS_MSG_READ
 268 * @usr_len:            length of user payload
 269 * @sg_cnt:             number of @desc entries
 270 * @desc:               RDMA buffers where the server can write the result to
 271 */
 272struct rtrs_msg_rdma_read {
 273        __le16                  type;
 274        __le16                  usr_len;
 275        __le16                  flags;
 276        __le16                  sg_cnt;
 277        struct rtrs_sg_desc    desc[];
 278};
 279
 280/**
 281 * struct_msg_rdma_write - Message transferred to server with RDMA-Write
 282 * @type:               always @RTRS_MSG_WRITE
 283 * @usr_len:            length of user payload
 284 */
 285struct rtrs_msg_rdma_write {
 286        __le16                  type;
 287        __le16                  usr_len;
 288};
 289
 290/**
 291 * struct_msg_rdma_hdr - header for read or write request
 292 * @type:               @RTRS_MSG_WRITE | @RTRS_MSG_READ
 293 */
 294struct rtrs_msg_rdma_hdr {
 295        __le16                  type;
 296};
 297
 298/* rtrs.c */
 299
 300struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t,
 301                              struct ib_device *dev, enum dma_data_direction,
 302                              void (*done)(struct ib_cq *cq, struct ib_wc *wc));
 303void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num);
 304int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
 305int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
 306                      struct ib_send_wr *head);
 307int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
 308                                struct ib_sge *sge, unsigned int num_sge,
 309                                u32 rkey, u64 rdma_addr, u32 imm_data,
 310                                enum ib_send_flags flags,
 311                                struct ib_send_wr *head,
 312                                struct ib_send_wr *tail);
 313
 314int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
 315
 316int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
 317                      u32 max_send_sge, int cq_vector, int nr_cqe,
 318                      u32 max_send_wr, u32 max_recv_wr,
 319                      enum ib_poll_context poll_ctx);
 320void rtrs_cq_qp_destroy(struct rtrs_con *con);
 321
 322void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
 323                  unsigned int interval_ms, unsigned int missed_max,
 324                  void (*err_handler)(struct rtrs_con *con),
 325                  struct workqueue_struct *wq);
 326void rtrs_start_hb(struct rtrs_sess *sess);
 327void rtrs_stop_hb(struct rtrs_sess *sess);
 328void rtrs_send_hb_ack(struct rtrs_sess *sess);
 329
 330void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
 331                           struct rtrs_rdma_dev_pd *pool);
 332void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
 333
 334struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
 335                                            struct rtrs_rdma_dev_pd *pool);
 336int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
 337
 338static inline u32 rtrs_to_imm(u32 type, u32 payload)
 339{
 340        BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
 341        BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
 342        return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
 343                (payload & MAX_IMM_PAYL_MASK);
 344}
 345
 346static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
 347{
 348        *payload = imm & MAX_IMM_PAYL_MASK;
 349        *type = imm >> MAX_IMM_PAYL_BITS;
 350}
 351
 352static inline u32 rtrs_to_io_req_imm(u32 addr)
 353{
 354        return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
 355}
 356
 357static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
 358{
 359        enum rtrs_imm_type type;
 360        u32 payload;
 361
 362        /* 9 bits for errno, 19 bits for msg_id */
 363        payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
 364        type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
 365
 366        return rtrs_to_imm(type, payload);
 367}
 368
 369static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
 370{
 371        /* 9 bits for errno, 19 bits for msg_id */
 372        *msg_id = payload & 0x7ffff;
 373        *errno = -(int)((payload >> 19) & 0x1ff);
 374}
 375
 376#define STAT_STORE_FUNC(type, set_value, reset)                         \
 377static ssize_t set_value##_store(struct kobject *kobj,                  \
 378                             struct kobj_attribute *attr,               \
 379                             const char *buf, size_t count)             \
 380{                                                                       \
 381        int ret = -EINVAL;                                              \
 382        type *stats = container_of(kobj, type, kobj_stats);             \
 383                                                                        \
 384        if (sysfs_streq(buf, "1"))                                      \
 385                ret = reset(stats, true);                       \
 386        else if (sysfs_streq(buf, "0"))                                 \
 387                ret = reset(stats, false);                      \
 388        if (ret)                                                        \
 389                return ret;                                             \
 390                                                                        \
 391        return count;                                                   \
 392}
 393
 394#define STAT_SHOW_FUNC(type, get_value, print)                          \
 395static ssize_t get_value##_show(struct kobject *kobj,                   \
 396                           struct kobj_attribute *attr,                 \
 397                           char *page)                                  \
 398{                                                                       \
 399        type *stats = container_of(kobj, type, kobj_stats);             \
 400                                                                        \
 401        return print(stats, page, PAGE_SIZE);                   \
 402}
 403
 404#define STAT_ATTR(type, stat, print, reset)                             \
 405STAT_STORE_FUNC(type, stat, reset)                                      \
 406STAT_SHOW_FUNC(type, stat, print)                                       \
 407static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
 408
 409#endif /* RTRS_PRI_H */
 410