linux/net/rds/ib_frmr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 Oracle.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include "ib_mr.h"
  34
  35static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
  36                                           int npages)
  37{
  38        struct rds_ib_mr_pool *pool;
  39        struct rds_ib_mr *ibmr = NULL;
  40        struct rds_ib_frmr *frmr;
  41        int err = 0;
  42
  43        if (npages <= RDS_MR_8K_MSG_SIZE)
  44                pool = rds_ibdev->mr_8k_pool;
  45        else
  46                pool = rds_ibdev->mr_1m_pool;
  47
  48        ibmr = rds_ib_try_reuse_ibmr(pool);
  49        if (ibmr)
  50                return ibmr;
  51
  52        ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
  53                            rdsibdev_to_node(rds_ibdev));
  54        if (!ibmr) {
  55                err = -ENOMEM;
  56                goto out_no_cigar;
  57        }
  58
  59        frmr = &ibmr->u.frmr;
  60        frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
  61                         pool->fmr_attr.max_pages);
  62        if (IS_ERR(frmr->mr)) {
  63                pr_warn("RDS/IB: %s failed to allocate MR", __func__);
  64                err = PTR_ERR(frmr->mr);
  65                goto out_no_cigar;
  66        }
  67
  68        ibmr->pool = pool;
  69        if (pool->pool_type == RDS_IB_MR_8K_POOL)
  70                rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
  71        else
  72                rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
  73
  74        if (atomic_read(&pool->item_count) > pool->max_items_soft)
  75                pool->max_items_soft = pool->max_items;
  76
  77        frmr->fr_state = FRMR_IS_FREE;
  78        return ibmr;
  79
  80out_no_cigar:
  81        kfree(ibmr);
  82        atomic_dec(&pool->item_count);
  83        return ERR_PTR(err);
  84}
  85
  86static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
  87{
  88        struct rds_ib_mr_pool *pool = ibmr->pool;
  89
  90        if (drop)
  91                llist_add(&ibmr->llnode, &pool->drop_list);
  92        else
  93                llist_add(&ibmr->llnode, &pool->free_list);
  94        atomic_add(ibmr->sg_len, &pool->free_pinned);
  95        atomic_inc(&pool->dirty_count);
  96
  97        /* If we've pinned too many pages, request a flush */
  98        if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
  99            atomic_read(&pool->dirty_count) >= pool->max_items / 5)
 100                queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
 101}
 102
 103static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
 104{
 105        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 106        struct ib_reg_wr reg_wr;
 107        int ret, off = 0;
 108
 109        while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
 110                atomic_inc(&ibmr->ic->i_fastreg_wrs);
 111                cpu_relax();
 112        }
 113
 114        ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len,
 115                                &off, PAGE_SIZE);
 116        if (unlikely(ret != ibmr->sg_len))
 117                return ret < 0 ? ret : -EINVAL;
 118
 119        /* Perform a WR for the fast_reg_mr. Each individual page
 120         * in the sg list is added to the fast reg page list and placed
 121         * inside the fast_reg_mr WR.  The key used is a rolling 8bit
 122         * counter, which should guarantee uniqueness.
 123         */
 124        ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
 125        frmr->fr_state = FRMR_IS_INUSE;
 126
 127        memset(&reg_wr, 0, sizeof(reg_wr));
 128        reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
 129        reg_wr.wr.opcode = IB_WR_REG_MR;
 130        reg_wr.wr.num_sge = 0;
 131        reg_wr.mr = frmr->mr;
 132        reg_wr.key = frmr->mr->rkey;
 133        reg_wr.access = IB_ACCESS_LOCAL_WRITE |
 134                        IB_ACCESS_REMOTE_READ |
 135                        IB_ACCESS_REMOTE_WRITE;
 136        reg_wr.wr.send_flags = IB_SEND_SIGNALED;
 137
 138        ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
 139        if (unlikely(ret)) {
 140                /* Failure here can be because of -ENOMEM as well */
 141                frmr->fr_state = FRMR_IS_STALE;
 142                atomic_inc(&ibmr->ic->i_fastreg_wrs);
 143                if (printk_ratelimit())
 144                        pr_warn("RDS/IB: %s returned error(%d)\n",
 145                                __func__, ret);
 146        }
 147        return ret;
 148}
 149
 150static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
 151                           struct rds_ib_mr_pool *pool,
 152                           struct rds_ib_mr *ibmr,
 153                           struct scatterlist *sg, unsigned int sg_len)
 154{
 155        struct ib_device *dev = rds_ibdev->dev;
 156        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 157        int i;
 158        u32 len;
 159        int ret = 0;
 160
 161        /* We want to teardown old ibmr values here and fill it up with
 162         * new sg values
 163         */
 164        rds_ib_teardown_mr(ibmr);
 165
 166        ibmr->sg = sg;
 167        ibmr->sg_len = sg_len;
 168        ibmr->sg_dma_len = 0;
 169        frmr->sg_byte_len = 0;
 170        WARN_ON(ibmr->sg_dma_len);
 171        ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len,
 172                                         DMA_BIDIRECTIONAL);
 173        if (unlikely(!ibmr->sg_dma_len)) {
 174                pr_warn("RDS/IB: %s failed!\n", __func__);
 175                return -EBUSY;
 176        }
 177
 178        frmr->sg_byte_len = 0;
 179        frmr->dma_npages = 0;
 180        len = 0;
 181
 182        ret = -EINVAL;
 183        for (i = 0; i < ibmr->sg_dma_len; ++i) {
 184                unsigned int dma_len = sg_dma_len(&ibmr->sg[i]);
 185                u64 dma_addr = sg_dma_address(&ibmr->sg[i]);
 186
 187                frmr->sg_byte_len += dma_len;
 188                if (dma_addr & ~PAGE_MASK) {
 189                        if (i > 0)
 190                                goto out_unmap;
 191                        else
 192                                ++frmr->dma_npages;
 193                }
 194
 195                if ((dma_addr + dma_len) & ~PAGE_MASK) {
 196                        if (i < ibmr->sg_dma_len - 1)
 197                                goto out_unmap;
 198                        else
 199                                ++frmr->dma_npages;
 200                }
 201
 202                len += dma_len;
 203        }
 204        frmr->dma_npages += len >> PAGE_SHIFT;
 205
 206        if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) {
 207                ret = -EMSGSIZE;
 208                goto out_unmap;
 209        }
 210
 211        ret = rds_ib_post_reg_frmr(ibmr);
 212        if (ret)
 213                goto out_unmap;
 214
 215        if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
 216                rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
 217        else
 218                rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
 219
 220        return ret;
 221
 222out_unmap:
 223        ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len,
 224                        DMA_BIDIRECTIONAL);
 225        ibmr->sg_dma_len = 0;
 226        return ret;
 227}
 228
 229static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
 230{
 231        struct ib_send_wr *s_wr;
 232        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 233        struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
 234        int ret = -EINVAL;
 235
 236        if (!i_cm_id || !i_cm_id->qp || !frmr->mr)
 237                goto out;
 238
 239        if (frmr->fr_state != FRMR_IS_INUSE)
 240                goto out;
 241
 242        while (atomic_dec_return(&ibmr->ic->i_fastunreg_wrs) <= 0) {
 243                atomic_inc(&ibmr->ic->i_fastunreg_wrs);
 244                cpu_relax();
 245        }
 246
 247        frmr->fr_inv = true;
 248        s_wr = &frmr->fr_wr;
 249
 250        memset(s_wr, 0, sizeof(*s_wr));
 251        s_wr->wr_id = (unsigned long)(void *)ibmr;
 252        s_wr->opcode = IB_WR_LOCAL_INV;
 253        s_wr->ex.invalidate_rkey = frmr->mr->rkey;
 254        s_wr->send_flags = IB_SEND_SIGNALED;
 255
 256        ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
 257        if (unlikely(ret)) {
 258                frmr->fr_state = FRMR_IS_STALE;
 259                frmr->fr_inv = false;
 260                atomic_inc(&ibmr->ic->i_fastunreg_wrs);
 261                pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
 262                goto out;
 263        }
 264out:
 265        return ret;
 266}
 267
 268void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 269{
 270        struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id;
 271        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 272
 273        if (wc->status != IB_WC_SUCCESS) {
 274                frmr->fr_state = FRMR_IS_STALE;
 275                if (rds_conn_up(ic->conn))
 276                        rds_ib_conn_error(ic->conn,
 277                                          "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
 278                                          &ic->conn->c_laddr,
 279                                          &ic->conn->c_faddr,
 280                                          wc->status,
 281                                          ib_wc_status_msg(wc->status),
 282                                          wc->vendor_err);
 283        }
 284
 285        if (frmr->fr_inv) {
 286                frmr->fr_state = FRMR_IS_FREE;
 287                frmr->fr_inv = false;
 288                atomic_inc(&ic->i_fastreg_wrs);
 289        } else {
 290                atomic_inc(&ic->i_fastunreg_wrs);
 291        }
 292}
 293
 294void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
 295                       unsigned long *unpinned, unsigned int goal)
 296{
 297        struct rds_ib_mr *ibmr, *next;
 298        struct rds_ib_frmr *frmr;
 299        int ret = 0;
 300        unsigned int freed = *nfreed;
 301
 302        /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
 303        list_for_each_entry(ibmr, list, unmap_list) {
 304                if (ibmr->sg_dma_len)
 305                        ret |= rds_ib_post_inv(ibmr);
 306        }
 307        if (ret)
 308                pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
 309
 310        /* Now we can destroy the DMA mapping and unpin any pages */
 311        list_for_each_entry_safe(ibmr, next, list, unmap_list) {
 312                *unpinned += ibmr->sg_len;
 313                frmr = &ibmr->u.frmr;
 314                __rds_ib_teardown_mr(ibmr);
 315                if (freed < goal || frmr->fr_state == FRMR_IS_STALE) {
 316                        /* Don't de-allocate if the MR is not free yet */
 317                        if (frmr->fr_state == FRMR_IS_INUSE)
 318                                continue;
 319
 320                        if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
 321                                rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
 322                        else
 323                                rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
 324                        list_del(&ibmr->unmap_list);
 325                        if (frmr->mr)
 326                                ib_dereg_mr(frmr->mr);
 327                        kfree(ibmr);
 328                        freed++;
 329                }
 330        }
 331        *nfreed = freed;
 332}
 333
 334struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
 335                                  struct rds_ib_connection *ic,
 336                                  struct scatterlist *sg,
 337                                  unsigned long nents, u32 *key)
 338{
 339        struct rds_ib_mr *ibmr = NULL;
 340        struct rds_ib_frmr *frmr;
 341        int ret;
 342
 343        if (!ic) {
 344                /* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/
 345                return ERR_PTR(-EOPNOTSUPP);
 346        }
 347
 348        do {
 349                if (ibmr)
 350                        rds_ib_free_frmr(ibmr, true);
 351                ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
 352                if (IS_ERR(ibmr))
 353                        return ibmr;
 354                frmr = &ibmr->u.frmr;
 355        } while (frmr->fr_state != FRMR_IS_FREE);
 356
 357        ibmr->ic = ic;
 358        ibmr->device = rds_ibdev;
 359        ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents);
 360        if (ret == 0) {
 361                *key = frmr->mr->rkey;
 362        } else {
 363                rds_ib_free_frmr(ibmr, false);
 364                ibmr = ERR_PTR(ret);
 365        }
 366
 367        return ibmr;
 368}
 369
 370void rds_ib_free_frmr_list(struct rds_ib_mr *ibmr)
 371{
 372        struct rds_ib_mr_pool *pool = ibmr->pool;
 373        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 374
 375        if (frmr->fr_state == FRMR_IS_STALE)
 376                llist_add(&ibmr->llnode, &pool->drop_list);
 377        else
 378                llist_add(&ibmr->llnode, &pool->free_list);
 379}
 380