linux/net/sunrpc/xprtrdma/svc_rdma_marshal.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the BSD-type
   8 * license below:
   9 *
  10 * Redistribution and use in source and binary forms, with or without
  11 * modification, are permitted provided that the following conditions
  12 * are met:
  13 *
  14 *      Redistributions of source code must retain the above copyright
  15 *      notice, this list of conditions and the following disclaimer.
  16 *
  17 *      Redistributions in binary form must reproduce the above
  18 *      copyright notice, this list of conditions and the following
  19 *      disclaimer in the documentation and/or other materials provided
  20 *      with the distribution.
  21 *
  22 *      Neither the name of the Network Appliance, Inc. nor the names of
  23 *      its contributors may be used to endorse or promote products
  24 *      derived from this software without specific prior written
  25 *      permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38 *
  39 * Author: Tom Tucker <tom@opengridcomputing.com>
  40 */
  41
  42#include <linux/sunrpc/xdr.h>
  43#include <linux/sunrpc/debug.h>
  44#include <asm/unaligned.h>
  45#include <linux/sunrpc/rpc_rdma.h>
  46#include <linux/sunrpc/svc_rdma.h>
  47
  48#define RPCDBG_FACILITY RPCDBG_SVCXPRT
  49
  50/*
  51 * Decodes a read chunk list. The expected format is as follows:
  52 *    descrim  : xdr_one
  53 *    position : u32 offset into XDR stream
  54 *    handle   : u32 RKEY
  55 *    . . .
  56 *  end-of-list: xdr_zero
  57 */
  58static u32 *decode_read_list(u32 *va, u32 *vaend)
  59{
  60        struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
  61
  62        while (ch->rc_discrim != xdr_zero) {
  63                u64 ch_offset;
  64
  65                if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
  66                    (unsigned long)vaend) {
  67                        dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
  68                        return NULL;
  69                }
  70
  71                ch->rc_discrim = ntohl(ch->rc_discrim);
  72                ch->rc_position = ntohl(ch->rc_position);
  73                ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle);
  74                ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length);
  75                va = (u32 *)&ch->rc_target.rs_offset;
  76                xdr_decode_hyper(va, &ch_offset);
  77                put_unaligned(ch_offset, (u64 *)va);
  78                ch++;
  79        }
  80        return (u32 *)&ch->rc_position;
  81}
  82
  83/*
  84 * Determine number of chunks and total bytes in chunk list. The chunk
  85 * list has already been verified to fit within the RPCRDMA header.
  86 */
  87void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
  88                               int *ch_count, int *byte_count)
  89{
  90        /* compute the number of bytes represented by read chunks */
  91        *byte_count = 0;
  92        *ch_count = 0;
  93        for (; ch->rc_discrim != 0; ch++) {
  94                *byte_count = *byte_count + ch->rc_target.rs_length;
  95                *ch_count = *ch_count + 1;
  96        }
  97}
  98
  99/*
 100 * Decodes a write chunk list. The expected format is as follows:
 101 *    descrim  : xdr_one
 102 *    nchunks  : <count>
 103 *       handle   : u32 RKEY              ---+
 104 *       length   : u32 <len of segment>     |
 105 *       offset   : remove va                + <count>
 106 *       . . .                               |
 107 *                                        ---+
 108 */
 109static u32 *decode_write_list(u32 *va, u32 *vaend)
 110{
 111        int ch_no;
 112        struct rpcrdma_write_array *ary =
 113                (struct rpcrdma_write_array *)va;
 114
 115        /* Check for not write-array */
 116        if (ary->wc_discrim == xdr_zero)
 117                return (u32 *)&ary->wc_nchunks;
 118
 119        if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
 120            (unsigned long)vaend) {
 121                dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
 122                return NULL;
 123        }
 124        ary->wc_discrim = ntohl(ary->wc_discrim);
 125        ary->wc_nchunks = ntohl(ary->wc_nchunks);
 126        if (((unsigned long)&ary->wc_array[0] +
 127             (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
 128            (unsigned long)vaend) {
 129                dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
 130                        ary, ary->wc_nchunks, vaend);
 131                return NULL;
 132        }
 133        for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
 134                u64 ch_offset;
 135
 136                ary->wc_array[ch_no].wc_target.rs_handle =
 137                        ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
 138                ary->wc_array[ch_no].wc_target.rs_length =
 139                        ntohl(ary->wc_array[ch_no].wc_target.rs_length);
 140                va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
 141                xdr_decode_hyper(va, &ch_offset);
 142                put_unaligned(ch_offset, (u64 *)va);
 143        }
 144
 145        /*
 146         * rs_length is the 2nd 4B field in wc_target and taking its
 147         * address skips the list terminator
 148         */
 149        return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length;
 150}
 151
 152static u32 *decode_reply_array(u32 *va, u32 *vaend)
 153{
 154        int ch_no;
 155        struct rpcrdma_write_array *ary =
 156                (struct rpcrdma_write_array *)va;
 157
 158        /* Check for no reply-array */
 159        if (ary->wc_discrim == xdr_zero)
 160                return (u32 *)&ary->wc_nchunks;
 161
 162        if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
 163            (unsigned long)vaend) {
 164                dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
 165                return NULL;
 166        }
 167        ary->wc_discrim = ntohl(ary->wc_discrim);
 168        ary->wc_nchunks = ntohl(ary->wc_nchunks);
 169        if (((unsigned long)&ary->wc_array[0] +
 170             (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
 171            (unsigned long)vaend) {
 172                dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
 173                        ary, ary->wc_nchunks, vaend);
 174                return NULL;
 175        }
 176        for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
 177                u64 ch_offset;
 178
 179                ary->wc_array[ch_no].wc_target.rs_handle =
 180                        ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
 181                ary->wc_array[ch_no].wc_target.rs_length =
 182                        ntohl(ary->wc_array[ch_no].wc_target.rs_length);
 183                va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
 184                xdr_decode_hyper(va, &ch_offset);
 185                put_unaligned(ch_offset, (u64 *)va);
 186        }
 187
 188        return (u32 *)&ary->wc_array[ch_no];
 189}
 190
 191int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
 192                            struct svc_rqst *rqstp)
 193{
 194        struct rpcrdma_msg *rmsgp = NULL;
 195        u32 *va;
 196        u32 *vaend;
 197        u32 hdr_len;
 198
 199        rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
 200
 201        /* Verify that there's enough bytes for header + something */
 202        if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
 203                dprintk("svcrdma: header too short = %d\n",
 204                        rqstp->rq_arg.len);
 205                return -EINVAL;
 206        }
 207
 208        /* Decode the header */
 209        rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
 210        rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
 211        rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
 212        rmsgp->rm_type = ntohl(rmsgp->rm_type);
 213
 214        if (rmsgp->rm_vers != RPCRDMA_VERSION)
 215                return -ENOSYS;
 216
 217        /* Pull in the extra for the padded case and bump our pointer */
 218        if (rmsgp->rm_type == RDMA_MSGP) {
 219                int hdrlen;
 220                rmsgp->rm_body.rm_padded.rm_align =
 221                        ntohl(rmsgp->rm_body.rm_padded.rm_align);
 222                rmsgp->rm_body.rm_padded.rm_thresh =
 223                        ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
 224
 225                va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
 226                rqstp->rq_arg.head[0].iov_base = va;
 227                hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
 228                rqstp->rq_arg.head[0].iov_len -= hdrlen;
 229                if (hdrlen > rqstp->rq_arg.len)
 230                        return -EINVAL;
 231                return hdrlen;
 232        }
 233
 234        /* The chunk list may contain either a read chunk list or a write
 235         * chunk list and a reply chunk list.
 236         */
 237        va = &rmsgp->rm_body.rm_chunks[0];
 238        vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
 239        va = decode_read_list(va, vaend);
 240        if (!va)
 241                return -EINVAL;
 242        va = decode_write_list(va, vaend);
 243        if (!va)
 244                return -EINVAL;
 245        va = decode_reply_array(va, vaend);
 246        if (!va)
 247                return -EINVAL;
 248
 249        rqstp->rq_arg.head[0].iov_base = va;
 250        hdr_len = (unsigned long)va - (unsigned long)rmsgp;
 251        rqstp->rq_arg.head[0].iov_len -= hdr_len;
 252
 253        *rdma_req = rmsgp;
 254        return hdr_len;
 255}
 256
 257int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
 258{
 259        struct rpcrdma_msg *rmsgp = NULL;
 260        struct rpcrdma_read_chunk *ch;
 261        struct rpcrdma_write_array *ary;
 262        u32 *va;
 263        u32 hdrlen;
 264
 265        dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
 266                rqstp);
 267        rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
 268
 269        /* Pull in the extra for the padded case and bump our pointer */
 270        if (rmsgp->rm_type == RDMA_MSGP) {
 271                va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
 272                rqstp->rq_arg.head[0].iov_base = va;
 273                hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
 274                rqstp->rq_arg.head[0].iov_len -= hdrlen;
 275                return hdrlen;
 276        }
 277
 278        /*
 279         * Skip all chunks to find RPC msg. These were previously processed
 280         */
 281        va = &rmsgp->rm_body.rm_chunks[0];
 282
 283        /* Skip read-list */
 284        for (ch = (struct rpcrdma_read_chunk *)va;
 285             ch->rc_discrim != xdr_zero; ch++);
 286        va = (u32 *)&ch->rc_position;
 287
 288        /* Skip write-list */
 289        ary = (struct rpcrdma_write_array *)va;
 290        if (ary->wc_discrim == xdr_zero)
 291                va = (u32 *)&ary->wc_nchunks;
 292        else
 293                /*
 294                 * rs_length is the 2nd 4B field in wc_target and taking its
 295                 * address skips the list terminator
 296                 */
 297                va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
 298
 299        /* Skip reply-array */
 300        ary = (struct rpcrdma_write_array *)va;
 301        if (ary->wc_discrim == xdr_zero)
 302                va = (u32 *)&ary->wc_nchunks;
 303        else
 304                va = (u32 *)&ary->wc_array[ary->wc_nchunks];
 305
 306        rqstp->rq_arg.head[0].iov_base = va;
 307        hdrlen = (unsigned long)va - (unsigned long)rmsgp;
 308        rqstp->rq_arg.head[0].iov_len -= hdrlen;
 309
 310        return hdrlen;
 311}
 312
 313int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
 314                              struct rpcrdma_msg *rmsgp,
 315                              enum rpcrdma_errcode err, u32 *va)
 316{
 317        u32 *startp = va;
 318
 319        *va++ = htonl(rmsgp->rm_xid);
 320        *va++ = htonl(rmsgp->rm_vers);
 321        *va++ = htonl(xprt->sc_max_requests);
 322        *va++ = htonl(RDMA_ERROR);
 323        *va++ = htonl(err);
 324        if (err == ERR_VERS) {
 325                *va++ = htonl(RPCRDMA_VERSION);
 326                *va++ = htonl(RPCRDMA_VERSION);
 327        }
 328
 329        return (int)((unsigned long)va - (unsigned long)startp);
 330}
 331
 332int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
 333{
 334        struct rpcrdma_write_array *wr_ary;
 335
 336        /* There is no read-list in a reply */
 337
 338        /* skip write list */
 339        wr_ary = (struct rpcrdma_write_array *)
 340                &rmsgp->rm_body.rm_chunks[1];
 341        if (wr_ary->wc_discrim)
 342                wr_ary = (struct rpcrdma_write_array *)
 343                        &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
 344                        wc_target.rs_length;
 345        else
 346                wr_ary = (struct rpcrdma_write_array *)
 347                        &wr_ary->wc_nchunks;
 348
 349        /* skip reply array */
 350        if (wr_ary->wc_discrim)
 351                wr_ary = (struct rpcrdma_write_array *)
 352                        &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
 353        else
 354                wr_ary = (struct rpcrdma_write_array *)
 355                        &wr_ary->wc_nchunks;
 356
 357        return (unsigned long) wr_ary - (unsigned long) rmsgp;
 358}
 359
 360void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
 361{
 362        struct rpcrdma_write_array *ary;
 363
 364        /* no read-list */
 365        rmsgp->rm_body.rm_chunks[0] = xdr_zero;
 366
 367        /* write-array discrim */
 368        ary = (struct rpcrdma_write_array *)
 369                &rmsgp->rm_body.rm_chunks[1];
 370        ary->wc_discrim = xdr_one;
 371        ary->wc_nchunks = htonl(chunks);
 372
 373        /* write-list terminator */
 374        ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
 375
 376        /* reply-array discriminator */
 377        ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
 378}
 379
 380void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
 381                                 int chunks)
 382{
 383        ary->wc_discrim = xdr_one;
 384        ary->wc_nchunks = htonl(chunks);
 385}
 386
 387void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
 388                                     int chunk_no,
 389                                     u32 rs_handle, u64 rs_offset,
 390                                     u32 write_len)
 391{
 392        struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
 393        seg->rs_handle = htonl(rs_handle);
 394        seg->rs_length = htonl(write_len);
 395        xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset);
 396}
 397
 398void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
 399                                  struct rpcrdma_msg *rdma_argp,
 400                                  struct rpcrdma_msg *rdma_resp,
 401                                  enum rpcrdma_proc rdma_type)
 402{
 403        rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
 404        rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
 405        rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
 406        rdma_resp->rm_type = htonl(rdma_type);
 407
 408        /* Encode <nul> chunks lists */
 409        rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
 410        rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
 411        rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
 412}
 413