linux/net/rds/ib_frmr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 Oracle.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include "ib_mr.h"
  34
  35static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
  36                                           int npages)
  37{
  38        struct rds_ib_mr_pool *pool;
  39        struct rds_ib_mr *ibmr = NULL;
  40        struct rds_ib_frmr *frmr;
  41        int err = 0;
  42
  43        if (npages <= RDS_MR_8K_MSG_SIZE)
  44                pool = rds_ibdev->mr_8k_pool;
  45        else
  46                pool = rds_ibdev->mr_1m_pool;
  47
  48        ibmr = rds_ib_try_reuse_ibmr(pool);
  49        if (ibmr)
  50                return ibmr;
  51
  52        ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
  53                            rdsibdev_to_node(rds_ibdev));
  54        if (!ibmr) {
  55                err = -ENOMEM;
  56                goto out_no_cigar;
  57        }
  58
  59        frmr = &ibmr->u.frmr;
  60        frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
  61                         pool->fmr_attr.max_pages);
  62        if (IS_ERR(frmr->mr)) {
  63                pr_warn("RDS/IB: %s failed to allocate MR", __func__);
  64                goto out_no_cigar;
  65        }
  66
  67        ibmr->pool = pool;
  68        if (pool->pool_type == RDS_IB_MR_8K_POOL)
  69                rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
  70        else
  71                rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
  72
  73        if (atomic_read(&pool->item_count) > pool->max_items_soft)
  74                pool->max_items_soft = pool->max_items;
  75
  76        frmr->fr_state = FRMR_IS_FREE;
  77        return ibmr;
  78
  79out_no_cigar:
  80        kfree(ibmr);
  81        atomic_dec(&pool->item_count);
  82        return ERR_PTR(err);
  83}
  84
  85static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
  86{
  87        struct rds_ib_mr_pool *pool = ibmr->pool;
  88
  89        if (drop)
  90                llist_add(&ibmr->llnode, &pool->drop_list);
  91        else
  92                llist_add(&ibmr->llnode, &pool->free_list);
  93        atomic_add(ibmr->sg_len, &pool->free_pinned);
  94        atomic_inc(&pool->dirty_count);
  95
  96        /* If we've pinned too many pages, request a flush */
  97        if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
  98            atomic_read(&pool->dirty_count) >= pool->max_items / 5)
  99                queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
 100}
 101
 102static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
 103{
 104        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 105        struct ib_send_wr *failed_wr;
 106        struct ib_reg_wr reg_wr;
 107        int ret, off = 0;
 108
 109        while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
 110                atomic_inc(&ibmr->ic->i_fastreg_wrs);
 111                cpu_relax();
 112        }
 113
 114        ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len,
 115                                &off, PAGE_SIZE);
 116        if (unlikely(ret != ibmr->sg_len))
 117                return ret < 0 ? ret : -EINVAL;
 118
 119        /* Perform a WR for the fast_reg_mr. Each individual page
 120         * in the sg list is added to the fast reg page list and placed
 121         * inside the fast_reg_mr WR.  The key used is a rolling 8bit
 122         * counter, which should guarantee uniqueness.
 123         */
 124        ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
 125        frmr->fr_state = FRMR_IS_INUSE;
 126
 127        memset(&reg_wr, 0, sizeof(reg_wr));
 128        reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
 129        reg_wr.wr.opcode = IB_WR_REG_MR;
 130        reg_wr.wr.num_sge = 0;
 131        reg_wr.mr = frmr->mr;
 132        reg_wr.key = frmr->mr->rkey;
 133        reg_wr.access = IB_ACCESS_LOCAL_WRITE |
 134                        IB_ACCESS_REMOTE_READ |
 135                        IB_ACCESS_REMOTE_WRITE;
 136        reg_wr.wr.send_flags = IB_SEND_SIGNALED;
 137
 138        failed_wr = &reg_wr.wr;
 139        ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, &failed_wr);
 140        WARN_ON(failed_wr != &reg_wr.wr);
 141        if (unlikely(ret)) {
 142                /* Failure here can be because of -ENOMEM as well */
 143                frmr->fr_state = FRMR_IS_STALE;
 144                atomic_inc(&ibmr->ic->i_fastreg_wrs);
 145                if (printk_ratelimit())
 146                        pr_warn("RDS/IB: %s returned error(%d)\n",
 147                                __func__, ret);
 148        }
 149        return ret;
 150}
 151
 152static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
 153                           struct rds_ib_mr_pool *pool,
 154                           struct rds_ib_mr *ibmr,
 155                           struct scatterlist *sg, unsigned int sg_len)
 156{
 157        struct ib_device *dev = rds_ibdev->dev;
 158        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 159        int i;
 160        u32 len;
 161        int ret = 0;
 162
 163        /* We want to teardown old ibmr values here and fill it up with
 164         * new sg values
 165         */
 166        rds_ib_teardown_mr(ibmr);
 167
 168        ibmr->sg = sg;
 169        ibmr->sg_len = sg_len;
 170        ibmr->sg_dma_len = 0;
 171        frmr->sg_byte_len = 0;
 172        WARN_ON(ibmr->sg_dma_len);
 173        ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len,
 174                                         DMA_BIDIRECTIONAL);
 175        if (unlikely(!ibmr->sg_dma_len)) {
 176                pr_warn("RDS/IB: %s failed!\n", __func__);
 177                return -EBUSY;
 178        }
 179
 180        frmr->sg_byte_len = 0;
 181        frmr->dma_npages = 0;
 182        len = 0;
 183
 184        ret = -EINVAL;
 185        for (i = 0; i < ibmr->sg_dma_len; ++i) {
 186                unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]);
 187                u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]);
 188
 189                frmr->sg_byte_len += dma_len;
 190                if (dma_addr & ~PAGE_MASK) {
 191                        if (i > 0)
 192                                goto out_unmap;
 193                        else
 194                                ++frmr->dma_npages;
 195                }
 196
 197                if ((dma_addr + dma_len) & ~PAGE_MASK) {
 198                        if (i < ibmr->sg_dma_len - 1)
 199                                goto out_unmap;
 200                        else
 201                                ++frmr->dma_npages;
 202                }
 203
 204                len += dma_len;
 205        }
 206        frmr->dma_npages += len >> PAGE_SHIFT;
 207
 208        if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) {
 209                ret = -EMSGSIZE;
 210                goto out_unmap;
 211        }
 212
 213        ret = rds_ib_post_reg_frmr(ibmr);
 214        if (ret)
 215                goto out_unmap;
 216
 217        if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
 218                rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
 219        else
 220                rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
 221
 222        return ret;
 223
 224out_unmap:
 225        ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len,
 226                        DMA_BIDIRECTIONAL);
 227        ibmr->sg_dma_len = 0;
 228        return ret;
 229}
 230
 231static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
 232{
 233        struct ib_send_wr *s_wr, *failed_wr;
 234        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 235        struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
 236        int ret = -EINVAL;
 237
 238        if (!i_cm_id || !i_cm_id->qp || !frmr->mr)
 239                goto out;
 240
 241        if (frmr->fr_state != FRMR_IS_INUSE)
 242                goto out;
 243
 244        while (atomic_dec_return(&ibmr->ic->i_fastunreg_wrs) <= 0) {
 245                atomic_inc(&ibmr->ic->i_fastunreg_wrs);
 246                cpu_relax();
 247        }
 248
 249        frmr->fr_inv = true;
 250        s_wr = &frmr->fr_wr;
 251
 252        memset(s_wr, 0, sizeof(*s_wr));
 253        s_wr->wr_id = (unsigned long)(void *)ibmr;
 254        s_wr->opcode = IB_WR_LOCAL_INV;
 255        s_wr->ex.invalidate_rkey = frmr->mr->rkey;
 256        s_wr->send_flags = IB_SEND_SIGNALED;
 257
 258        failed_wr = s_wr;
 259        ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr);
 260        WARN_ON(failed_wr != s_wr);
 261        if (unlikely(ret)) {
 262                frmr->fr_state = FRMR_IS_STALE;
 263                frmr->fr_inv = false;
 264                atomic_inc(&ibmr->ic->i_fastunreg_wrs);
 265                pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
 266                goto out;
 267        }
 268out:
 269        return ret;
 270}
 271
 272void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 273{
 274        struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id;
 275        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 276
 277        if (wc->status != IB_WC_SUCCESS) {
 278                frmr->fr_state = FRMR_IS_STALE;
 279                if (rds_conn_up(ic->conn))
 280                        rds_ib_conn_error(ic->conn,
 281                                          "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
 282                                          &ic->conn->c_laddr,
 283                                          &ic->conn->c_faddr,
 284                                          wc->status,
 285                                          ib_wc_status_msg(wc->status),
 286                                          wc->vendor_err);
 287        }
 288
 289        if (frmr->fr_inv) {
 290                frmr->fr_state = FRMR_IS_FREE;
 291                frmr->fr_inv = false;
 292                atomic_inc(&ic->i_fastreg_wrs);
 293        } else {
 294                atomic_inc(&ic->i_fastunreg_wrs);
 295        }
 296}
 297
 298void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
 299                       unsigned long *unpinned, unsigned int goal)
 300{
 301        struct rds_ib_mr *ibmr, *next;
 302        struct rds_ib_frmr *frmr;
 303        int ret = 0;
 304        unsigned int freed = *nfreed;
 305
 306        /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
 307        list_for_each_entry(ibmr, list, unmap_list) {
 308                if (ibmr->sg_dma_len)
 309                        ret |= rds_ib_post_inv(ibmr);
 310        }
 311        if (ret)
 312                pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
 313
 314        /* Now we can destroy the DMA mapping and unpin any pages */
 315        list_for_each_entry_safe(ibmr, next, list, unmap_list) {
 316                *unpinned += ibmr->sg_len;
 317                frmr = &ibmr->u.frmr;
 318                __rds_ib_teardown_mr(ibmr);
 319                if (freed < goal || frmr->fr_state == FRMR_IS_STALE) {
 320                        /* Don't de-allocate if the MR is not free yet */
 321                        if (frmr->fr_state == FRMR_IS_INUSE)
 322                                continue;
 323
 324                        if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
 325                                rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
 326                        else
 327                                rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
 328                        list_del(&ibmr->unmap_list);
 329                        if (frmr->mr)
 330                                ib_dereg_mr(frmr->mr);
 331                        kfree(ibmr);
 332                        freed++;
 333                }
 334        }
 335        *nfreed = freed;
 336}
 337
 338struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
 339                                  struct rds_ib_connection *ic,
 340                                  struct scatterlist *sg,
 341                                  unsigned long nents, u32 *key)
 342{
 343        struct rds_ib_mr *ibmr = NULL;
 344        struct rds_ib_frmr *frmr;
 345        int ret;
 346
 347        do {
 348                if (ibmr)
 349                        rds_ib_free_frmr(ibmr, true);
 350                ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
 351                if (IS_ERR(ibmr))
 352                        return ibmr;
 353                frmr = &ibmr->u.frmr;
 354        } while (frmr->fr_state != FRMR_IS_FREE);
 355
 356        ibmr->ic = ic;
 357        ibmr->device = rds_ibdev;
 358        ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents);
 359        if (ret == 0) {
 360                *key = frmr->mr->rkey;
 361        } else {
 362                rds_ib_free_frmr(ibmr, false);
 363                ibmr = ERR_PTR(ret);
 364        }
 365
 366        return ibmr;
 367}
 368
 369void rds_ib_free_frmr_list(struct rds_ib_mr *ibmr)
 370{
 371        struct rds_ib_mr_pool *pool = ibmr->pool;
 372        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 373
 374        if (frmr->fr_state == FRMR_IS_STALE)
 375                llist_add(&ibmr->llnode, &pool->drop_list);
 376        else
 377                llist_add(&ibmr->llnode, &pool->free_list);
 378}
 379