linux/net/rds/ib_frmr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 Oracle.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include "ib_mr.h"
  34
  35static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
  36                                           int npages)
  37{
  38        struct rds_ib_mr_pool *pool;
  39        struct rds_ib_mr *ibmr = NULL;
  40        struct rds_ib_frmr *frmr;
  41        int err = 0;
  42
  43        if (npages <= RDS_MR_8K_MSG_SIZE)
  44                pool = rds_ibdev->mr_8k_pool;
  45        else
  46                pool = rds_ibdev->mr_1m_pool;
  47
  48        ibmr = rds_ib_try_reuse_ibmr(pool);
  49        if (ibmr)
  50                return ibmr;
  51
  52        ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
  53                            rdsibdev_to_node(rds_ibdev));
  54        if (!ibmr) {
  55                err = -ENOMEM;
  56                goto out_no_cigar;
  57        }
  58
  59        frmr = &ibmr->u.frmr;
  60        frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
  61                         pool->fmr_attr.max_pages);
  62        if (IS_ERR(frmr->mr)) {
  63                pr_warn("RDS/IB: %s failed to allocate MR", __func__);
  64                goto out_no_cigar;
  65        }
  66
  67        ibmr->pool = pool;
  68        if (pool->pool_type == RDS_IB_MR_8K_POOL)
  69                rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
  70        else
  71                rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
  72
  73        if (atomic_read(&pool->item_count) > pool->max_items_soft)
  74                pool->max_items_soft = pool->max_items;
  75
  76        frmr->fr_state = FRMR_IS_FREE;
  77        return ibmr;
  78
  79out_no_cigar:
  80        kfree(ibmr);
  81        atomic_dec(&pool->item_count);
  82        return ERR_PTR(err);
  83}
  84
  85static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
  86{
  87        struct rds_ib_mr_pool *pool = ibmr->pool;
  88
  89        if (drop)
  90                llist_add(&ibmr->llnode, &pool->drop_list);
  91        else
  92                llist_add(&ibmr->llnode, &pool->free_list);
  93        atomic_add(ibmr->sg_len, &pool->free_pinned);
  94        atomic_inc(&pool->dirty_count);
  95
  96        /* If we've pinned too many pages, request a flush */
  97        if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
  98            atomic_read(&pool->dirty_count) >= pool->max_items / 5)
  99                queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
 100}
 101
 102static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
 103{
 104        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 105        struct ib_send_wr *failed_wr;
 106        struct ib_reg_wr reg_wr;
 107        int ret;
 108
 109        while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
 110                atomic_inc(&ibmr->ic->i_fastreg_wrs);
 111                cpu_relax();
 112        }
 113
 114        ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, 0, PAGE_SIZE);
 115        if (unlikely(ret != ibmr->sg_len))
 116                return ret < 0 ? ret : -EINVAL;
 117
 118        /* Perform a WR for the fast_reg_mr. Each individual page
 119         * in the sg list is added to the fast reg page list and placed
 120         * inside the fast_reg_mr WR.  The key used is a rolling 8bit
 121         * counter, which should guarantee uniqueness.
 122         */
 123        ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
 124        frmr->fr_state = FRMR_IS_INUSE;
 125
 126        memset(&reg_wr, 0, sizeof(reg_wr));
 127        reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
 128        reg_wr.wr.opcode = IB_WR_REG_MR;
 129        reg_wr.wr.num_sge = 0;
 130        reg_wr.mr = frmr->mr;
 131        reg_wr.key = frmr->mr->rkey;
 132        reg_wr.access = IB_ACCESS_LOCAL_WRITE |
 133                        IB_ACCESS_REMOTE_READ |
 134                        IB_ACCESS_REMOTE_WRITE;
 135        reg_wr.wr.send_flags = IB_SEND_SIGNALED;
 136
 137        failed_wr = &reg_wr.wr;
 138        ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, &failed_wr);
 139        WARN_ON(failed_wr != &reg_wr.wr);
 140        if (unlikely(ret)) {
 141                /* Failure here can be because of -ENOMEM as well */
 142                frmr->fr_state = FRMR_IS_STALE;
 143                atomic_inc(&ibmr->ic->i_fastreg_wrs);
 144                if (printk_ratelimit())
 145                        pr_warn("RDS/IB: %s returned error(%d)\n",
 146                                __func__, ret);
 147        }
 148        return ret;
 149}
 150
 151static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
 152                           struct rds_ib_mr_pool *pool,
 153                           struct rds_ib_mr *ibmr,
 154                           struct scatterlist *sg, unsigned int sg_len)
 155{
 156        struct ib_device *dev = rds_ibdev->dev;
 157        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 158        int i;
 159        u32 len;
 160        int ret = 0;
 161
 162        /* We want to teardown old ibmr values here and fill it up with
 163         * new sg values
 164         */
 165        rds_ib_teardown_mr(ibmr);
 166
 167        ibmr->sg = sg;
 168        ibmr->sg_len = sg_len;
 169        ibmr->sg_dma_len = 0;
 170        frmr->sg_byte_len = 0;
 171        WARN_ON(ibmr->sg_dma_len);
 172        ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len,
 173                                         DMA_BIDIRECTIONAL);
 174        if (unlikely(!ibmr->sg_dma_len)) {
 175                pr_warn("RDS/IB: %s failed!\n", __func__);
 176                return -EBUSY;
 177        }
 178
 179        frmr->sg_byte_len = 0;
 180        frmr->dma_npages = 0;
 181        len = 0;
 182
 183        ret = -EINVAL;
 184        for (i = 0; i < ibmr->sg_dma_len; ++i) {
 185                unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]);
 186                u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]);
 187
 188                frmr->sg_byte_len += dma_len;
 189                if (dma_addr & ~PAGE_MASK) {
 190                        if (i > 0)
 191                                goto out_unmap;
 192                        else
 193                                ++frmr->dma_npages;
 194                }
 195
 196                if ((dma_addr + dma_len) & ~PAGE_MASK) {
 197                        if (i < ibmr->sg_dma_len - 1)
 198                                goto out_unmap;
 199                        else
 200                                ++frmr->dma_npages;
 201                }
 202
 203                len += dma_len;
 204        }
 205        frmr->dma_npages += len >> PAGE_SHIFT;
 206
 207        if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) {
 208                ret = -EMSGSIZE;
 209                goto out_unmap;
 210        }
 211
 212        ret = rds_ib_post_reg_frmr(ibmr);
 213        if (ret)
 214                goto out_unmap;
 215
 216        if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
 217                rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
 218        else
 219                rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
 220
 221        return ret;
 222
 223out_unmap:
 224        ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len,
 225                        DMA_BIDIRECTIONAL);
 226        ibmr->sg_dma_len = 0;
 227        return ret;
 228}
 229
 230static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
 231{
 232        struct ib_send_wr *s_wr, *failed_wr;
 233        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 234        struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
 235        int ret = -EINVAL;
 236
 237        if (!i_cm_id || !i_cm_id->qp || !frmr->mr)
 238                goto out;
 239
 240        if (frmr->fr_state != FRMR_IS_INUSE)
 241                goto out;
 242
 243        while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
 244                atomic_inc(&ibmr->ic->i_fastreg_wrs);
 245                cpu_relax();
 246        }
 247
 248        frmr->fr_inv = true;
 249        s_wr = &frmr->fr_wr;
 250
 251        memset(s_wr, 0, sizeof(*s_wr));
 252        s_wr->wr_id = (unsigned long)(void *)ibmr;
 253        s_wr->opcode = IB_WR_LOCAL_INV;
 254        s_wr->ex.invalidate_rkey = frmr->mr->rkey;
 255        s_wr->send_flags = IB_SEND_SIGNALED;
 256
 257        failed_wr = s_wr;
 258        ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr);
 259        WARN_ON(failed_wr != s_wr);
 260        if (unlikely(ret)) {
 261                frmr->fr_state = FRMR_IS_STALE;
 262                frmr->fr_inv = false;
 263                atomic_inc(&ibmr->ic->i_fastreg_wrs);
 264                pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
 265                goto out;
 266        }
 267out:
 268        return ret;
 269}
 270
 271void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 272{
 273        struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id;
 274        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 275
 276        if (wc->status != IB_WC_SUCCESS) {
 277                frmr->fr_state = FRMR_IS_STALE;
 278                if (rds_conn_up(ic->conn))
 279                        rds_ib_conn_error(ic->conn,
 280                                          "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
 281                                          &ic->conn->c_laddr,
 282                                          &ic->conn->c_faddr,
 283                                          wc->status,
 284                                          ib_wc_status_msg(wc->status),
 285                                          wc->vendor_err);
 286        }
 287
 288        if (frmr->fr_inv) {
 289                frmr->fr_state = FRMR_IS_FREE;
 290                frmr->fr_inv = false;
 291        }
 292
 293        atomic_inc(&ic->i_fastreg_wrs);
 294}
 295
 296void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
 297                       unsigned long *unpinned, unsigned int goal)
 298{
 299        struct rds_ib_mr *ibmr, *next;
 300        struct rds_ib_frmr *frmr;
 301        int ret = 0;
 302        unsigned int freed = *nfreed;
 303
 304        /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
 305        list_for_each_entry(ibmr, list, unmap_list) {
 306                if (ibmr->sg_dma_len)
 307                        ret |= rds_ib_post_inv(ibmr);
 308        }
 309        if (ret)
 310                pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
 311
 312        /* Now we can destroy the DMA mapping and unpin any pages */
 313        list_for_each_entry_safe(ibmr, next, list, unmap_list) {
 314                *unpinned += ibmr->sg_len;
 315                frmr = &ibmr->u.frmr;
 316                __rds_ib_teardown_mr(ibmr);
 317                if (freed < goal || frmr->fr_state == FRMR_IS_STALE) {
 318                        /* Don't de-allocate if the MR is not free yet */
 319                        if (frmr->fr_state == FRMR_IS_INUSE)
 320                                continue;
 321
 322                        if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
 323                                rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
 324                        else
 325                                rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
 326                        list_del(&ibmr->unmap_list);
 327                        if (frmr->mr)
 328                                ib_dereg_mr(frmr->mr);
 329                        kfree(ibmr);
 330                        freed++;
 331                }
 332        }
 333        *nfreed = freed;
 334}
 335
 336struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
 337                                  struct rds_ib_connection *ic,
 338                                  struct scatterlist *sg,
 339                                  unsigned long nents, u32 *key)
 340{
 341        struct rds_ib_mr *ibmr = NULL;
 342        struct rds_ib_frmr *frmr;
 343        int ret;
 344
 345        do {
 346                if (ibmr)
 347                        rds_ib_free_frmr(ibmr, true);
 348                ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
 349                if (IS_ERR(ibmr))
 350                        return ibmr;
 351                frmr = &ibmr->u.frmr;
 352        } while (frmr->fr_state != FRMR_IS_FREE);
 353
 354        ibmr->ic = ic;
 355        ibmr->device = rds_ibdev;
 356        ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents);
 357        if (ret == 0) {
 358                *key = frmr->mr->rkey;
 359        } else {
 360                rds_ib_free_frmr(ibmr, false);
 361                ibmr = ERR_PTR(ret);
 362        }
 363
 364        return ibmr;
 365}
 366
 367void rds_ib_free_frmr_list(struct rds_ib_mr *ibmr)
 368{
 369        struct rds_ib_mr_pool *pool = ibmr->pool;
 370        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 371
 372        if (frmr->fr_state == FRMR_IS_STALE)
 373                llist_add(&ibmr->llnode, &pool->drop_list);
 374        else
 375                llist_add(&ibmr->llnode, &pool->free_list);
 376}
 377