linux/net/sunrpc/xprtrdma/svc_rdma_marshal.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the BSD-type
   8 * license below:
   9 *
  10 * Redistribution and use in source and binary forms, with or without
  11 * modification, are permitted provided that the following conditions
  12 * are met:
  13 *
  14 *      Redistributions of source code must retain the above copyright
  15 *      notice, this list of conditions and the following disclaimer.
  16 *
  17 *      Redistributions in binary form must reproduce the above
  18 *      copyright notice, this list of conditions and the following
  19 *      disclaimer in the documentation and/or other materials provided
  20 *      with the distribution.
  21 *
  22 *      Neither the name of the Network Appliance, Inc. nor the names of
  23 *      its contributors may be used to endorse or promote products
  24 *      derived from this software without specific prior written
  25 *      permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38 *
  39 * Author: Tom Tucker <tom@opengridcomputing.com>
  40 */
  41
  42#include <linux/sunrpc/xdr.h>
  43#include <linux/sunrpc/debug.h>
  44#include <asm/unaligned.h>
  45#include <linux/sunrpc/rpc_rdma.h>
  46#include <linux/sunrpc/svc_rdma.h>
  47
  48#define RPCDBG_FACILITY RPCDBG_SVCXPRT
  49
  50/*
  51 * Decodes a read chunk list. The expected format is as follows:
  52 *    descrim  : xdr_one
  53 *    position : __be32 offset into XDR stream
  54 *    handle   : __be32 RKEY
  55 *    . . .
  56 *  end-of-list: xdr_zero
  57 */
  58static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
  59{
  60        struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
  61
  62        while (ch->rc_discrim != xdr_zero) {
  63                if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
  64                    (unsigned long)vaend) {
  65                        dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
  66                        return NULL;
  67                }
  68                ch++;
  69        }
  70        return &ch->rc_position;
  71}
  72
  73/*
  74 * Decodes a write chunk list. The expected format is as follows:
  75 *    descrim  : xdr_one
  76 *    nchunks  : <count>
  77 *       handle   : __be32 RKEY           ---+
  78 *       length   : __be32 <len of segment>  |
  79 *       offset   : remove va                + <count>
  80 *       . . .                               |
  81 *                                        ---+
  82 */
  83static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
  84{
  85        unsigned long start, end;
  86        int nchunks;
  87
  88        struct rpcrdma_write_array *ary =
  89                (struct rpcrdma_write_array *)va;
  90
  91        /* Check for not write-array */
  92        if (ary->wc_discrim == xdr_zero)
  93                return &ary->wc_nchunks;
  94
  95        if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
  96            (unsigned long)vaend) {
  97                dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
  98                return NULL;
  99        }
 100        nchunks = be32_to_cpu(ary->wc_nchunks);
 101
 102        start = (unsigned long)&ary->wc_array[0];
 103        end = (unsigned long)vaend;
 104        if (nchunks < 0 ||
 105            nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
 106            (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
 107                dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
 108                        ary, nchunks, vaend);
 109                return NULL;
 110        }
 111        /*
 112         * rs_length is the 2nd 4B field in wc_target and taking its
 113         * address skips the list terminator
 114         */
 115        return &ary->wc_array[nchunks].wc_target.rs_length;
 116}
 117
 118static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
 119{
 120        unsigned long start, end;
 121        int nchunks;
 122        struct rpcrdma_write_array *ary =
 123                (struct rpcrdma_write_array *)va;
 124
 125        /* Check for no reply-array */
 126        if (ary->wc_discrim == xdr_zero)
 127                return &ary->wc_nchunks;
 128
 129        if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
 130            (unsigned long)vaend) {
 131                dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
 132                return NULL;
 133        }
 134        nchunks = be32_to_cpu(ary->wc_nchunks);
 135
 136        start = (unsigned long)&ary->wc_array[0];
 137        end = (unsigned long)vaend;
 138        if (nchunks < 0 ||
 139            nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
 140            (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
 141                dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
 142                        ary, nchunks, vaend);
 143                return NULL;
 144        }
 145        return (__be32 *)&ary->wc_array[nchunks];
 146}
 147
 148/**
 149 * svc_rdma_xdr_decode_req - Parse incoming RPC-over-RDMA header
 150 * @rq_arg: Receive buffer
 151 *
 152 * On entry, xdr->head[0].iov_base points to first byte in the
 153 * RPC-over-RDMA header.
 154 *
 155 * On successful exit, head[0] points to first byte past the
 156 * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
 157 * The length of the RPC-over-RDMA header is returned.
 158 */
 159int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
 160{
 161        struct rpcrdma_msg *rmsgp;
 162        __be32 *va, *vaend;
 163        unsigned int len;
 164        u32 hdr_len;
 165
 166        /* Verify that there's enough bytes for header + something */
 167        if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) {
 168                dprintk("svcrdma: header too short = %d\n",
 169                        rq_arg->len);
 170                return -EINVAL;
 171        }
 172
 173        rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base;
 174        if (rmsgp->rm_vers != rpcrdma_version) {
 175                dprintk("%s: bad version %u\n", __func__,
 176                        be32_to_cpu(rmsgp->rm_vers));
 177                return -EPROTONOSUPPORT;
 178        }
 179
 180        switch (be32_to_cpu(rmsgp->rm_type)) {
 181        case RDMA_MSG:
 182        case RDMA_NOMSG:
 183                break;
 184
 185        case RDMA_DONE:
 186                /* Just drop it */
 187                dprintk("svcrdma: dropping RDMA_DONE message\n");
 188                return 0;
 189
 190        case RDMA_ERROR:
 191                /* Possible if this is a backchannel reply.
 192                 * XXX: We should cancel this XID, though.
 193                 */
 194                dprintk("svcrdma: dropping RDMA_ERROR message\n");
 195                return 0;
 196
 197        case RDMA_MSGP:
 198                /* Pull in the extra for the padded case, bump our pointer */
 199                rmsgp->rm_body.rm_padded.rm_align =
 200                        be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
 201                rmsgp->rm_body.rm_padded.rm_thresh =
 202                        be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
 203
 204                va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
 205                rq_arg->head[0].iov_base = va;
 206                len = (u32)((unsigned long)va - (unsigned long)rmsgp);
 207                rq_arg->head[0].iov_len -= len;
 208                if (len > rq_arg->len)
 209                        return -EINVAL;
 210                return len;
 211        default:
 212                dprintk("svcrdma: bad rdma procedure (%u)\n",
 213                        be32_to_cpu(rmsgp->rm_type));
 214                return -EINVAL;
 215        }
 216
 217        /* The chunk list may contain either a read chunk list or a write
 218         * chunk list and a reply chunk list.
 219         */
 220        va = &rmsgp->rm_body.rm_chunks[0];
 221        vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len);
 222        va = decode_read_list(va, vaend);
 223        if (!va) {
 224                dprintk("svcrdma: failed to decode read list\n");
 225                return -EINVAL;
 226        }
 227        va = decode_write_list(va, vaend);
 228        if (!va) {
 229                dprintk("svcrdma: failed to decode write list\n");
 230                return -EINVAL;
 231        }
 232        va = decode_reply_array(va, vaend);
 233        if (!va) {
 234                dprintk("svcrdma: failed to decode reply chunk\n");
 235                return -EINVAL;
 236        }
 237
 238        rq_arg->head[0].iov_base = va;
 239        hdr_len = (unsigned long)va - (unsigned long)rmsgp;
 240        rq_arg->head[0].iov_len -= hdr_len;
 241        return hdr_len;
 242}
 243
 244int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
 245                              struct rpcrdma_msg *rmsgp,
 246                              enum rpcrdma_errcode err, __be32 *va)
 247{
 248        __be32 *startp = va;
 249
 250        *va++ = rmsgp->rm_xid;
 251        *va++ = rmsgp->rm_vers;
 252        *va++ = cpu_to_be32(xprt->sc_max_requests);
 253        *va++ = rdma_error;
 254        *va++ = cpu_to_be32(err);
 255        if (err == ERR_VERS) {
 256                *va++ = rpcrdma_version;
 257                *va++ = rpcrdma_version;
 258        }
 259
 260        return (int)((unsigned long)va - (unsigned long)startp);
 261}
 262
 263int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
 264{
 265        struct rpcrdma_write_array *wr_ary;
 266
 267        /* There is no read-list in a reply */
 268
 269        /* skip write list */
 270        wr_ary = (struct rpcrdma_write_array *)
 271                &rmsgp->rm_body.rm_chunks[1];
 272        if (wr_ary->wc_discrim)
 273                wr_ary = (struct rpcrdma_write_array *)
 274                        &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
 275                        wc_target.rs_length;
 276        else
 277                wr_ary = (struct rpcrdma_write_array *)
 278                        &wr_ary->wc_nchunks;
 279
 280        /* skip reply array */
 281        if (wr_ary->wc_discrim)
 282                wr_ary = (struct rpcrdma_write_array *)
 283                        &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
 284        else
 285                wr_ary = (struct rpcrdma_write_array *)
 286                        &wr_ary->wc_nchunks;
 287
 288        return (unsigned long) wr_ary - (unsigned long) rmsgp;
 289}
 290
 291void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
 292{
 293        struct rpcrdma_write_array *ary;
 294
 295        /* no read-list */
 296        rmsgp->rm_body.rm_chunks[0] = xdr_zero;
 297
 298        /* write-array discrim */
 299        ary = (struct rpcrdma_write_array *)
 300                &rmsgp->rm_body.rm_chunks[1];
 301        ary->wc_discrim = xdr_one;
 302        ary->wc_nchunks = cpu_to_be32(chunks);
 303
 304        /* write-list terminator */
 305        ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
 306
 307        /* reply-array discriminator */
 308        ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
 309}
 310
 311void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
 312                                 int chunks)
 313{
 314        ary->wc_discrim = xdr_one;
 315        ary->wc_nchunks = cpu_to_be32(chunks);
 316}
 317
 318void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
 319                                     int chunk_no,
 320                                     __be32 rs_handle,
 321                                     __be64 rs_offset,
 322                                     u32 write_len)
 323{
 324        struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
 325        seg->rs_handle = rs_handle;
 326        seg->rs_offset = rs_offset;
 327        seg->rs_length = cpu_to_be32(write_len);
 328}
 329
 330void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
 331                                  struct rpcrdma_msg *rdma_argp,
 332                                  struct rpcrdma_msg *rdma_resp,
 333                                  enum rpcrdma_proc rdma_type)
 334{
 335        rdma_resp->rm_xid = rdma_argp->rm_xid;
 336        rdma_resp->rm_vers = rdma_argp->rm_vers;
 337        rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
 338        rdma_resp->rm_type = cpu_to_be32(rdma_type);
 339
 340        /* Encode <nul> chunks lists */
 341        rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
 342        rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
 343        rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
 344}
 345