linux/drivers/infiniband/ulp/rtrs/rtrs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * RDMA Transport Layer
   4 *
   5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
   6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
   7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
   8 */
   9#undef pr_fmt
  10#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
  11
  12#include <linux/module.h>
  13#include <linux/inet.h>
  14
  15#include "rtrs-pri.h"
  16#include "rtrs-log.h"
  17
  18MODULE_DESCRIPTION("RDMA Transport Core");
  19MODULE_LICENSE("GPL");
  20
  21struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask,
  22                              struct ib_device *dma_dev,
  23                              enum dma_data_direction dir,
  24                              void (*done)(struct ib_cq *cq, struct ib_wc *wc))
  25{
  26        struct rtrs_iu *ius, *iu;
  27        int i;
  28
  29        ius = kcalloc(iu_num, sizeof(*ius), gfp_mask);
  30        if (!ius)
  31                return NULL;
  32        for (i = 0; i < iu_num; i++) {
  33                iu = &ius[i];
  34                iu->direction = dir;
  35                iu->buf = kzalloc(size, gfp_mask);
  36                if (!iu->buf)
  37                        goto err;
  38
  39                iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir);
  40                if (ib_dma_mapping_error(dma_dev, iu->dma_addr))
  41                        goto err;
  42
  43                iu->cqe.done  = done;
  44                iu->size      = size;
  45        }
  46        return ius;
  47err:
  48        rtrs_iu_free(ius, dma_dev, i);
  49        return NULL;
  50}
  51EXPORT_SYMBOL_GPL(rtrs_iu_alloc);
  52
  53void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_num)
  54{
  55        struct rtrs_iu *iu;
  56        int i;
  57
  58        if (!ius)
  59                return;
  60
  61        for (i = 0; i < queue_num; i++) {
  62                iu = &ius[i];
  63                ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, iu->direction);
  64                kfree(iu->buf);
  65        }
  66        kfree(ius);
  67}
  68EXPORT_SYMBOL_GPL(rtrs_iu_free);
  69
  70int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu)
  71{
  72        struct rtrs_sess *sess = con->sess;
  73        struct ib_recv_wr wr;
  74        struct ib_sge list;
  75
  76        list.addr   = iu->dma_addr;
  77        list.length = iu->size;
  78        list.lkey   = sess->dev->ib_pd->local_dma_lkey;
  79
  80        if (list.length == 0) {
  81                rtrs_wrn(con->sess,
  82                          "Posting receive work request failed, sg list is empty\n");
  83                return -EINVAL;
  84        }
  85        wr = (struct ib_recv_wr) {
  86                .wr_cqe  = &iu->cqe,
  87                .sg_list = &list,
  88                .num_sge = 1,
  89        };
  90
  91        return ib_post_recv(con->qp, &wr, NULL);
  92}
  93EXPORT_SYMBOL_GPL(rtrs_iu_post_recv);
  94
  95int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe)
  96{
  97        struct ib_recv_wr wr;
  98
  99        wr = (struct ib_recv_wr) {
 100                .wr_cqe  = cqe,
 101        };
 102
 103        return ib_post_recv(con->qp, &wr, NULL);
 104}
 105EXPORT_SYMBOL_GPL(rtrs_post_recv_empty);
 106
 107static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head,
 108                          struct ib_send_wr *wr, struct ib_send_wr *tail)
 109{
 110        if (head) {
 111                struct ib_send_wr *next = head;
 112
 113                while (next->next)
 114                        next = next->next;
 115                next->next = wr;
 116        } else {
 117                head = wr;
 118        }
 119
 120        if (tail)
 121                wr->next = tail;
 122
 123        return ib_post_send(qp, head, NULL);
 124}
 125
 126int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
 127                       struct ib_send_wr *head)
 128{
 129        struct rtrs_sess *sess = con->sess;
 130        struct ib_send_wr wr;
 131        struct ib_sge list;
 132
 133        if (WARN_ON(size == 0))
 134                return -EINVAL;
 135
 136        list.addr   = iu->dma_addr;
 137        list.length = size;
 138        list.lkey   = sess->dev->ib_pd->local_dma_lkey;
 139
 140        wr = (struct ib_send_wr) {
 141                .wr_cqe     = &iu->cqe,
 142                .sg_list    = &list,
 143                .num_sge    = 1,
 144                .opcode     = IB_WR_SEND,
 145                .send_flags = IB_SEND_SIGNALED,
 146        };
 147
 148        return rtrs_post_send(con->qp, head, &wr, NULL);
 149}
 150EXPORT_SYMBOL_GPL(rtrs_iu_post_send);
 151
 152int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
 153                                struct ib_sge *sge, unsigned int num_sge,
 154                                u32 rkey, u64 rdma_addr, u32 imm_data,
 155                                enum ib_send_flags flags,
 156                                struct ib_send_wr *head,
 157                                struct ib_send_wr *tail)
 158{
 159        struct ib_rdma_wr wr;
 160        int i;
 161
 162        wr = (struct ib_rdma_wr) {
 163                .wr.wr_cqe        = &iu->cqe,
 164                .wr.sg_list       = sge,
 165                .wr.num_sge       = num_sge,
 166                .rkey             = rkey,
 167                .remote_addr      = rdma_addr,
 168                .wr.opcode        = IB_WR_RDMA_WRITE_WITH_IMM,
 169                .wr.ex.imm_data = cpu_to_be32(imm_data),
 170                .wr.send_flags  = flags,
 171        };
 172
 173        /*
 174         * If one of the sges has 0 size, the operation will fail with a
 175         * length error
 176         */
 177        for (i = 0; i < num_sge; i++)
 178                if (WARN_ON(sge[i].length == 0))
 179                        return -EINVAL;
 180
 181        return rtrs_post_send(con->qp, head, &wr.wr, tail);
 182}
 183EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm);
 184
 185int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
 186                                    u32 imm_data, enum ib_send_flags flags,
 187                                    struct ib_send_wr *head)
 188{
 189        struct ib_rdma_wr wr;
 190
 191        wr = (struct ib_rdma_wr) {
 192                .wr.wr_cqe      = cqe,
 193                .wr.send_flags  = flags,
 194                .wr.opcode      = IB_WR_RDMA_WRITE_WITH_IMM,
 195                .wr.ex.imm_data = cpu_to_be32(imm_data),
 196        };
 197
 198        return rtrs_post_send(con->qp, head, &wr.wr, NULL);
 199}
 200EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty);
 201
 202static void qp_event_handler(struct ib_event *ev, void *ctx)
 203{
 204        struct rtrs_con *con = ctx;
 205
 206        switch (ev->event) {
 207        case IB_EVENT_COMM_EST:
 208                rtrs_info(con->sess, "QP event %s (%d) received\n",
 209                           ib_event_msg(ev->event), ev->event);
 210                rdma_notify(con->cm_id, IB_EVENT_COMM_EST);
 211                break;
 212        default:
 213                rtrs_info(con->sess, "Unhandled QP event %s (%d) received\n",
 214                           ib_event_msg(ev->event), ev->event);
 215                break;
 216        }
 217}
 218
 219static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe,
 220                     enum ib_poll_context poll_ctx)
 221{
 222        struct rdma_cm_id *cm_id = con->cm_id;
 223        struct ib_cq *cq;
 224
 225        cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx);
 226        if (IS_ERR(cq)) {
 227                rtrs_err(con->sess, "Creating completion queue failed, errno: %ld\n",
 228                          PTR_ERR(cq));
 229                return PTR_ERR(cq);
 230        }
 231        con->cq = cq;
 232        con->nr_cqe = nr_cqe;
 233
 234        return 0;
 235}
 236
 237static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
 238                     u32 max_send_wr, u32 max_recv_wr, u32 max_sge)
 239{
 240        struct ib_qp_init_attr init_attr = {NULL};
 241        struct rdma_cm_id *cm_id = con->cm_id;
 242        int ret;
 243
 244        init_attr.cap.max_send_wr = max_send_wr;
 245        init_attr.cap.max_recv_wr = max_recv_wr;
 246        init_attr.cap.max_recv_sge = 1;
 247        init_attr.event_handler = qp_event_handler;
 248        init_attr.qp_context = con;
 249        init_attr.cap.max_send_sge = max_sge;
 250
 251        init_attr.qp_type = IB_QPT_RC;
 252        init_attr.send_cq = con->cq;
 253        init_attr.recv_cq = con->cq;
 254        init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 255
 256        ret = rdma_create_qp(cm_id, pd, &init_attr);
 257        if (ret) {
 258                rtrs_err(con->sess, "Creating QP failed, err: %d\n", ret);
 259                return ret;
 260        }
 261        con->qp = cm_id->qp;
 262
 263        return ret;
 264}
 265
 266int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con,
 267                       u32 max_send_sge, int cq_vector, int nr_cqe,
 268                       u32 max_send_wr, u32 max_recv_wr,
 269                       enum ib_poll_context poll_ctx)
 270{
 271        int err;
 272
 273        err = create_cq(con, cq_vector, nr_cqe, poll_ctx);
 274        if (err)
 275                return err;
 276
 277        err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr,
 278                        max_send_sge);
 279        if (err) {
 280                ib_cq_pool_put(con->cq, con->nr_cqe);
 281                con->cq = NULL;
 282                return err;
 283        }
 284        con->sess = sess;
 285
 286        return 0;
 287}
 288EXPORT_SYMBOL_GPL(rtrs_cq_qp_create);
 289
 290void rtrs_cq_qp_destroy(struct rtrs_con *con)
 291{
 292        if (con->qp) {
 293                rdma_destroy_qp(con->cm_id);
 294                con->qp = NULL;
 295        }
 296        if (con->cq) {
 297                ib_cq_pool_put(con->cq, con->nr_cqe);
 298                con->cq = NULL;
 299        }
 300}
 301EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy);
 302
 303static void schedule_hb(struct rtrs_sess *sess)
 304{
 305        queue_delayed_work(sess->hb_wq, &sess->hb_dwork,
 306                           msecs_to_jiffies(sess->hb_interval_ms));
 307}
 308
 309void rtrs_send_hb_ack(struct rtrs_sess *sess)
 310{
 311        struct rtrs_con *usr_con = sess->con[0];
 312        u32 imm;
 313        int err;
 314
 315        imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0);
 316        err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
 317                                             0, NULL);
 318        if (err) {
 319                sess->hb_err_handler(usr_con);
 320                return;
 321        }
 322}
 323EXPORT_SYMBOL_GPL(rtrs_send_hb_ack);
 324
 325static void hb_work(struct work_struct *work)
 326{
 327        struct rtrs_con *usr_con;
 328        struct rtrs_sess *sess;
 329        u32 imm;
 330        int err;
 331
 332        sess = container_of(to_delayed_work(work), typeof(*sess), hb_dwork);
 333        usr_con = sess->con[0];
 334
 335        if (sess->hb_missed_cnt > sess->hb_missed_max) {
 336                sess->hb_err_handler(usr_con);
 337                return;
 338        }
 339        if (sess->hb_missed_cnt++) {
 340                /* Reschedule work without sending hb */
 341                schedule_hb(sess);
 342                return;
 343        }
 344
 345        sess->hb_last_sent = ktime_get();
 346
 347        imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0);
 348        err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
 349                                             0, NULL);
 350        if (err) {
 351                sess->hb_err_handler(usr_con);
 352                return;
 353        }
 354
 355        schedule_hb(sess);
 356}
 357
 358void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
 359                  unsigned int interval_ms, unsigned int missed_max,
 360                  void (*err_handler)(struct rtrs_con *con),
 361                  struct workqueue_struct *wq)
 362{
 363        sess->hb_cqe = cqe;
 364        sess->hb_interval_ms = interval_ms;
 365        sess->hb_err_handler = err_handler;
 366        sess->hb_wq = wq;
 367        sess->hb_missed_max = missed_max;
 368        sess->hb_missed_cnt = 0;
 369        INIT_DELAYED_WORK(&sess->hb_dwork, hb_work);
 370}
 371EXPORT_SYMBOL_GPL(rtrs_init_hb);
 372
 373void rtrs_start_hb(struct rtrs_sess *sess)
 374{
 375        schedule_hb(sess);
 376}
 377EXPORT_SYMBOL_GPL(rtrs_start_hb);
 378
 379void rtrs_stop_hb(struct rtrs_sess *sess)
 380{
 381        cancel_delayed_work_sync(&sess->hb_dwork);
 382        sess->hb_missed_cnt = 0;
 383}
 384EXPORT_SYMBOL_GPL(rtrs_stop_hb);
 385
 386static int rtrs_str_gid_to_sockaddr(const char *addr, size_t len,
 387                                     short port, struct sockaddr_storage *dst)
 388{
 389        struct sockaddr_ib *dst_ib = (struct sockaddr_ib *)dst;
 390        int ret;
 391
 392        /*
 393         * We can use some of the IPv6 functions since GID is a valid
 394         * IPv6 address format
 395         */
 396        ret = in6_pton(addr, len, dst_ib->sib_addr.sib_raw, '\0', NULL);
 397        if (ret == 0)
 398                return -EINVAL;
 399
 400        dst_ib->sib_family = AF_IB;
 401        /*
 402         * Use the same TCP server port number as the IB service ID
 403         * on the IB port space range
 404         */
 405        dst_ib->sib_sid = cpu_to_be64(RDMA_IB_IP_PS_IB | port);
 406        dst_ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
 407        dst_ib->sib_pkey = cpu_to_be16(0xffff);
 408
 409        return 0;
 410}
 411
 412/**
 413 * rtrs_str_to_sockaddr() - Convert rtrs address string to sockaddr
 414 * @addr:       String representation of an addr (IPv4, IPv6 or IB GID):
 415 *              - "ip:192.168.1.1"
 416 *              - "ip:fe80::200:5aee:feaa:20a2"
 417 *              - "gid:fe80::200:5aee:feaa:20a2"
 418 * @len:        String address length
 419 * @port:       Destination port
 420 * @dst:        Destination sockaddr structure
 421 *
 422 * Returns 0 if conversion successful. Non-zero on error.
 423 */
 424static int rtrs_str_to_sockaddr(const char *addr, size_t len,
 425                                u16 port, struct sockaddr_storage *dst)
 426{
 427        if (strncmp(addr, "gid:", 4) == 0) {
 428                return rtrs_str_gid_to_sockaddr(addr + 4, len - 4, port, dst);
 429        } else if (strncmp(addr, "ip:", 3) == 0) {
 430                char port_str[8];
 431                char *cpy;
 432                int err;
 433
 434                snprintf(port_str, sizeof(port_str), "%u", port);
 435                cpy = kstrndup(addr + 3, len - 3, GFP_KERNEL);
 436                err = cpy ? inet_pton_with_scope(&init_net, AF_UNSPEC,
 437                                                 cpy, port_str, dst) : -ENOMEM;
 438                kfree(cpy);
 439
 440                return err;
 441        }
 442        return -EPROTONOSUPPORT;
 443}
 444
 445/**
 446 * sockaddr_to_str() - convert sockaddr to a string.
 447 * @addr:       the sockadddr structure to be converted.
 448 * @buf:        string containing socket addr.
 449 * @len:        string length.
 450 *
 451 * The return value is the number of characters written into buf not
 452 * including the trailing '\0'. If len is == 0 the function returns 0..
 453 */
 454int sockaddr_to_str(const struct sockaddr *addr, char *buf, size_t len)
 455{
 456
 457        switch (addr->sa_family) {
 458        case AF_IB:
 459                return scnprintf(buf, len, "gid:%pI6",
 460                        &((struct sockaddr_ib *)addr)->sib_addr.sib_raw);
 461        case AF_INET:
 462                return scnprintf(buf, len, "ip:%pI4",
 463                        &((struct sockaddr_in *)addr)->sin_addr);
 464        case AF_INET6:
 465                return scnprintf(buf, len, "ip:%pI6c",
 466                          &((struct sockaddr_in6 *)addr)->sin6_addr);
 467        }
 468        return scnprintf(buf, len, "<invalid address family>");
 469}
 470EXPORT_SYMBOL(sockaddr_to_str);
 471
 472/**
 473 * rtrs_addr_to_str() - convert rtrs_addr to a string "src@dst"
 474 * @addr:       the rtrs_addr structure to be converted
 475 * @buf:        string containing source and destination addr of a path
 476 *              separated by '@' I.e. "ip:1.1.1.1@ip:1.1.1.2"
 477 *              "ip:1.1.1.1@ip:1.1.1.2".
 478 * @len:        string length
 479 *
 480 * The return value is the number of characters written into buf not
 481 * including the trailing '\0'.
 482 */
 483int rtrs_addr_to_str(const struct rtrs_addr *addr, char *buf, size_t len)
 484{
 485        int cnt;
 486
 487        cnt = sockaddr_to_str((struct sockaddr *)addr->src,
 488                              buf, len);
 489        cnt += scnprintf(buf + cnt, len - cnt, "@");
 490        sockaddr_to_str((struct sockaddr *)addr->dst,
 491                        buf + cnt, len - cnt);
 492        return cnt;
 493}
 494EXPORT_SYMBOL(rtrs_addr_to_str);
 495
 496/**
 497 * rtrs_addr_to_sockaddr() - convert path string "src,dst" or "src@dst"
 498 * to sockaddreses
 499 * @str:        string containing source and destination addr of a path
 500 *              separated by ',' or '@' I.e. "ip:1.1.1.1,ip:1.1.1.2" or
 501 *              "ip:1.1.1.1@ip:1.1.1.2". If str contains only one address it's
 502 *              considered to be destination.
 503 * @len:        string length
 504 * @port:       Destination port number.
 505 * @addr:       will be set to the source/destination address or to NULL
 506 *              if str doesn't contain any source address.
 507 *
 508 * Returns zero if conversion successful. Non-zero otherwise.
 509 */
 510int rtrs_addr_to_sockaddr(const char *str, size_t len, u16 port,
 511                          struct rtrs_addr *addr)
 512{
 513        const char *d;
 514
 515        d = strchr(str, ',');
 516        if (!d)
 517                d = strchr(str, '@');
 518        if (d) {
 519                if (rtrs_str_to_sockaddr(str, d - str, 0, addr->src))
 520                        return -EINVAL;
 521                d += 1;
 522                len -= d - str;
 523                str  = d;
 524
 525        } else {
 526                addr->src = NULL;
 527        }
 528        return rtrs_str_to_sockaddr(str, len, port, addr->dst);
 529}
 530EXPORT_SYMBOL(rtrs_addr_to_sockaddr);
 531
 532void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
 533                            struct rtrs_rdma_dev_pd *pool)
 534{
 535        WARN_ON(pool->ops && (!pool->ops->alloc ^ !pool->ops->free));
 536        INIT_LIST_HEAD(&pool->list);
 537        mutex_init(&pool->mutex);
 538        pool->pd_flags = pd_flags;
 539}
 540EXPORT_SYMBOL(rtrs_rdma_dev_pd_init);
 541
 542void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool)
 543{
 544        mutex_destroy(&pool->mutex);
 545        WARN_ON(!list_empty(&pool->list));
 546}
 547EXPORT_SYMBOL(rtrs_rdma_dev_pd_deinit);
 548
 549static void dev_free(struct kref *ref)
 550{
 551        struct rtrs_rdma_dev_pd *pool;
 552        struct rtrs_ib_dev *dev;
 553
 554        dev = container_of(ref, typeof(*dev), ref);
 555        pool = dev->pool;
 556
 557        mutex_lock(&pool->mutex);
 558        list_del(&dev->entry);
 559        mutex_unlock(&pool->mutex);
 560
 561        if (pool->ops && pool->ops->deinit)
 562                pool->ops->deinit(dev);
 563
 564        ib_dealloc_pd(dev->ib_pd);
 565
 566        if (pool->ops && pool->ops->free)
 567                pool->ops->free(dev);
 568        else
 569                kfree(dev);
 570}
 571
 572int rtrs_ib_dev_put(struct rtrs_ib_dev *dev)
 573{
 574        return kref_put(&dev->ref, dev_free);
 575}
 576EXPORT_SYMBOL(rtrs_ib_dev_put);
 577
 578static int rtrs_ib_dev_get(struct rtrs_ib_dev *dev)
 579{
 580        return kref_get_unless_zero(&dev->ref);
 581}
 582
 583struct rtrs_ib_dev *
 584rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
 585                         struct rtrs_rdma_dev_pd *pool)
 586{
 587        struct rtrs_ib_dev *dev;
 588
 589        mutex_lock(&pool->mutex);
 590        list_for_each_entry(dev, &pool->list, entry) {
 591                if (dev->ib_dev->node_guid == ib_dev->node_guid &&
 592                    rtrs_ib_dev_get(dev))
 593                        goto out_unlock;
 594        }
 595        mutex_unlock(&pool->mutex);
 596        if (pool->ops && pool->ops->alloc)
 597                dev = pool->ops->alloc();
 598        else
 599                dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 600        if (IS_ERR_OR_NULL(dev))
 601                goto out_err;
 602
 603        kref_init(&dev->ref);
 604        dev->pool = pool;
 605        dev->ib_dev = ib_dev;
 606        dev->ib_pd = ib_alloc_pd(ib_dev, pool->pd_flags);
 607        if (IS_ERR(dev->ib_pd))
 608                goto out_free_dev;
 609
 610        if (pool->ops && pool->ops->init && pool->ops->init(dev))
 611                goto out_free_pd;
 612
 613        mutex_lock(&pool->mutex);
 614        list_add(&dev->entry, &pool->list);
 615out_unlock:
 616        mutex_unlock(&pool->mutex);
 617        return dev;
 618
 619out_free_pd:
 620        ib_dealloc_pd(dev->ib_pd);
 621out_free_dev:
 622        if (pool->ops && pool->ops->free)
 623                pool->ops->free(dev);
 624        else
 625                kfree(dev);
 626out_err:
 627        return NULL;
 628}
 629EXPORT_SYMBOL(rtrs_ib_dev_find_or_add);
 630