linux/fs/nfs/filelayout/filelayoutdev.c
<<
>>
Prefs
   1/*
   2 *  Device operations for the pnfs nfs4 file layout driver.
   3 *
   4 *  Copyright (c) 2002
   5 *  The Regents of the University of Michigan
   6 *  All Rights Reserved
   7 *
   8 *  Dean Hildebrand <dhildebz@umich.edu>
   9 *  Garth Goodson   <Garth.Goodson@netapp.com>
  10 *
  11 *  Permission is granted to use, copy, create derivative works, and
  12 *  redistribute this software and such derivative works for any purpose,
  13 *  so long as the name of the University of Michigan is not used in
  14 *  any advertising or publicity pertaining to the use or distribution
  15 *  of this software without specific, written prior authorization. If
  16 *  the above copyright notice or any other identification of the
  17 *  University of Michigan is included in any copy of any portion of
  18 *  this software, then the disclaimer below must also be included.
  19 *
  20 *  This software is provided as is, without representation or warranty
  21 *  of any kind either express or implied, including without limitation
  22 *  the implied warranties of merchantability, fitness for a particular
  23 *  purpose, or noninfringement.  The Regents of the University of
  24 *  Michigan shall not be liable for any damages, including special,
  25 *  indirect, incidental, or consequential damages, with respect to any
  26 *  claim arising out of or in connection with the use of the software,
  27 *  even if it has been or is hereafter advised of the possibility of
  28 *  such damages.
  29 */
  30
  31#include <linux/nfs_fs.h>
  32#include <linux/vmalloc.h>
  33#include <linux/module.h>
  34#include <linux/sunrpc/addr.h>
  35
  36#include "../internal.h"
  37#include "../nfs4session.h"
  38#include "filelayout.h"
  39
  40#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
  41
  42static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
  43static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
  44
  45/*
  46 * Data server cache
  47 *
  48 * Data servers can be mapped to different device ids.
  49 * nfs4_pnfs_ds reference counting
  50 *   - set to 1 on allocation
  51 *   - incremented when a device id maps a data server already in the cache.
  52 *   - decremented when deviceid is removed from the cache.
  53 */
  54static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
  55static LIST_HEAD(nfs4_data_server_cache);
  56
  57/* Debug routines */
  58void
  59print_ds(struct nfs4_pnfs_ds *ds)
  60{
  61        if (ds == NULL) {
  62                printk("%s NULL device\n", __func__);
  63                return;
  64        }
  65        printk("        ds %s\n"
  66                "        ref count %d\n"
  67                "        client %p\n"
  68                "        cl_exchange_flags %x\n",
  69                ds->ds_remotestr,
  70                atomic_read(&ds->ds_count), ds->ds_clp,
  71                ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
  72}
  73
  74static bool
  75same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
  76{
  77        struct sockaddr_in *a, *b;
  78        struct sockaddr_in6 *a6, *b6;
  79
  80        if (addr1->sa_family != addr2->sa_family)
  81                return false;
  82
  83        switch (addr1->sa_family) {
  84        case AF_INET:
  85                a = (struct sockaddr_in *)addr1;
  86                b = (struct sockaddr_in *)addr2;
  87
  88                if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
  89                    a->sin_port == b->sin_port)
  90                        return true;
  91                break;
  92
  93        case AF_INET6:
  94                a6 = (struct sockaddr_in6 *)addr1;
  95                b6 = (struct sockaddr_in6 *)addr2;
  96
  97                /* LINKLOCAL addresses must have matching scope_id */
  98                if (ipv6_addr_src_scope(&a6->sin6_addr) ==
  99                    IPV6_ADDR_SCOPE_LINKLOCAL &&
 100                    a6->sin6_scope_id != b6->sin6_scope_id)
 101                        return false;
 102
 103                if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
 104                    a6->sin6_port == b6->sin6_port)
 105                        return true;
 106                break;
 107
 108        default:
 109                dprintk("%s: unhandled address family: %u\n",
 110                        __func__, addr1->sa_family);
 111                return false;
 112        }
 113
 114        return false;
 115}
 116
 117static bool
 118_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
 119                               const struct list_head *dsaddrs2)
 120{
 121        struct nfs4_pnfs_ds_addr *da1, *da2;
 122
 123        /* step through both lists, comparing as we go */
 124        for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node),
 125             da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node);
 126             da1 != NULL && da2 != NULL;
 127             da1 = list_entry(da1->da_node.next, typeof(*da1), da_node),
 128             da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) {
 129                if (!same_sockaddr((struct sockaddr *)&da1->da_addr,
 130                                   (struct sockaddr *)&da2->da_addr))
 131                        return false;
 132        }
 133        if (da1 == NULL && da2 == NULL)
 134                return true;
 135
 136        return false;
 137}
 138
 139/*
 140 * Lookup DS by addresses.  nfs4_ds_cache_lock is held
 141 */
 142static struct nfs4_pnfs_ds *
 143_data_server_lookup_locked(const struct list_head *dsaddrs)
 144{
 145        struct nfs4_pnfs_ds *ds;
 146
 147        list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
 148                if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
 149                        return ds;
 150        return NULL;
 151}
 152
 153/*
 154 * Create an rpc connection to the nfs4_pnfs_ds data server
 155 * Currently only supports IPv4 and IPv6 addresses
 156 */
 157static int
 158nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
 159{
 160        struct nfs_client *clp = ERR_PTR(-EIO);
 161        struct nfs4_pnfs_ds_addr *da;
 162        int status = 0;
 163
 164        dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
 165                mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
 166
 167        list_for_each_entry(da, &ds->ds_addrs, da_node) {
 168                dprintk("%s: DS %s: trying address %s\n",
 169                        __func__, ds->ds_remotestr, da->da_remotestr);
 170
 171                clp = nfs4_set_ds_client(mds_srv->nfs_client,
 172                                        (struct sockaddr *)&da->da_addr,
 173                                        da->da_addrlen, IPPROTO_TCP,
 174                                        dataserver_timeo, dataserver_retrans);
 175                if (!IS_ERR(clp))
 176                        break;
 177        }
 178
 179        if (IS_ERR(clp)) {
 180                status = PTR_ERR(clp);
 181                goto out;
 182        }
 183
 184        status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
 185        if (status)
 186                goto out_put;
 187
 188        smp_wmb();
 189        ds->ds_clp = clp;
 190        dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 191out:
 192        return status;
 193out_put:
 194        nfs_put_client(clp);
 195        goto out;
 196}
 197
 198static void
 199destroy_ds(struct nfs4_pnfs_ds *ds)
 200{
 201        struct nfs4_pnfs_ds_addr *da;
 202
 203        dprintk("--> %s\n", __func__);
 204        ifdebug(FACILITY)
 205                print_ds(ds);
 206
 207        nfs_put_client(ds->ds_clp);
 208
 209        while (!list_empty(&ds->ds_addrs)) {
 210                da = list_first_entry(&ds->ds_addrs,
 211                                      struct nfs4_pnfs_ds_addr,
 212                                      da_node);
 213                list_del_init(&da->da_node);
 214                kfree(da->da_remotestr);
 215                kfree(da);
 216        }
 217
 218        kfree(ds->ds_remotestr);
 219        kfree(ds);
 220}
 221
 222void
 223nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 224{
 225        struct nfs4_pnfs_ds *ds;
 226        int i;
 227
 228        nfs4_print_deviceid(&dsaddr->id_node.deviceid);
 229
 230        for (i = 0; i < dsaddr->ds_num; i++) {
 231                ds = dsaddr->ds_list[i];
 232                if (ds != NULL) {
 233                        if (atomic_dec_and_lock(&ds->ds_count,
 234                                                &nfs4_ds_cache_lock)) {
 235                                list_del_init(&ds->ds_node);
 236                                spin_unlock(&nfs4_ds_cache_lock);
 237                                destroy_ds(ds);
 238                        }
 239                }
 240        }
 241        kfree(dsaddr->stripe_indices);
 242        kfree(dsaddr);
 243}
 244
 245/*
 246 * Create a string with a human readable address and port to avoid
 247 * complicated setup around many dprinks.
 248 */
 249static char *
 250nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
 251{
 252        struct nfs4_pnfs_ds_addr *da;
 253        char *remotestr;
 254        size_t len;
 255        char *p;
 256
 257        len = 3;        /* '{', '}' and eol */
 258        list_for_each_entry(da, dsaddrs, da_node) {
 259                len += strlen(da->da_remotestr) + 1;    /* string plus comma */
 260        }
 261
 262        remotestr = kzalloc(len, gfp_flags);
 263        if (!remotestr)
 264                return NULL;
 265
 266        p = remotestr;
 267        *(p++) = '{';
 268        len--;
 269        list_for_each_entry(da, dsaddrs, da_node) {
 270                size_t ll = strlen(da->da_remotestr);
 271
 272                if (ll > len)
 273                        goto out_err;
 274
 275                memcpy(p, da->da_remotestr, ll);
 276                p += ll;
 277                len -= ll;
 278
 279                if (len < 1)
 280                        goto out_err;
 281                (*p++) = ',';
 282                len--;
 283        }
 284        if (len < 2)
 285                goto out_err;
 286        *(p++) = '}';
 287        *p = '\0';
 288        return remotestr;
 289out_err:
 290        kfree(remotestr);
 291        return NULL;
 292}
 293
 294static struct nfs4_pnfs_ds *
 295nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
 296{
 297        struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
 298        char *remotestr;
 299
 300        if (list_empty(dsaddrs)) {
 301                dprintk("%s: no addresses defined\n", __func__);
 302                goto out;
 303        }
 304
 305        ds = kzalloc(sizeof(*ds), gfp_flags);
 306        if (!ds)
 307                goto out;
 308
 309        /* this is only used for debugging, so it's ok if its NULL */
 310        remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
 311
 312        spin_lock(&nfs4_ds_cache_lock);
 313        tmp_ds = _data_server_lookup_locked(dsaddrs);
 314        if (tmp_ds == NULL) {
 315                INIT_LIST_HEAD(&ds->ds_addrs);
 316                list_splice_init(dsaddrs, &ds->ds_addrs);
 317                ds->ds_remotestr = remotestr;
 318                atomic_set(&ds->ds_count, 1);
 319                INIT_LIST_HEAD(&ds->ds_node);
 320                ds->ds_clp = NULL;
 321                list_add(&ds->ds_node, &nfs4_data_server_cache);
 322                dprintk("%s add new data server %s\n", __func__,
 323                        ds->ds_remotestr);
 324        } else {
 325                kfree(remotestr);
 326                kfree(ds);
 327                atomic_inc(&tmp_ds->ds_count);
 328                dprintk("%s data server %s found, inc'ed ds_count to %d\n",
 329                        __func__, tmp_ds->ds_remotestr,
 330                        atomic_read(&tmp_ds->ds_count));
 331                ds = tmp_ds;
 332        }
 333        spin_unlock(&nfs4_ds_cache_lock);
 334out:
 335        return ds;
 336}
 337
 338/*
 339 * Currently only supports ipv4, ipv6 and one multi-path address.
 340 */
 341static struct nfs4_pnfs_ds_addr *
 342decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags)
 343{
 344        struct nfs4_pnfs_ds_addr *da = NULL;
 345        char *buf, *portstr;
 346        __be16 port;
 347        int nlen, rlen;
 348        int tmp[2];
 349        __be32 *p;
 350        char *netid, *match_netid;
 351        size_t len, match_netid_len;
 352        char *startsep = "";
 353        char *endsep = "";
 354
 355
 356        /* r_netid */
 357        p = xdr_inline_decode(streamp, 4);
 358        if (unlikely(!p))
 359                goto out_err;
 360        nlen = be32_to_cpup(p++);
 361
 362        p = xdr_inline_decode(streamp, nlen);
 363        if (unlikely(!p))
 364                goto out_err;
 365
 366        netid = kmalloc(nlen+1, gfp_flags);
 367        if (unlikely(!netid))
 368                goto out_err;
 369
 370        netid[nlen] = '\0';
 371        memcpy(netid, p, nlen);
 372
 373        /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
 374        p = xdr_inline_decode(streamp, 4);
 375        if (unlikely(!p))
 376                goto out_free_netid;
 377        rlen = be32_to_cpup(p);
 378
 379        p = xdr_inline_decode(streamp, rlen);
 380        if (unlikely(!p))
 381                goto out_free_netid;
 382
 383        /* port is ".ABC.DEF", 8 chars max */
 384        if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
 385                dprintk("%s: Invalid address, length %d\n", __func__,
 386                        rlen);
 387                goto out_free_netid;
 388        }
 389        buf = kmalloc(rlen + 1, gfp_flags);
 390        if (!buf) {
 391                dprintk("%s: Not enough memory\n", __func__);
 392                goto out_free_netid;
 393        }
 394        buf[rlen] = '\0';
 395        memcpy(buf, p, rlen);
 396
 397        /* replace port '.' with '-' */
 398        portstr = strrchr(buf, '.');
 399        if (!portstr) {
 400                dprintk("%s: Failed finding expected dot in port\n",
 401                        __func__);
 402                goto out_free_buf;
 403        }
 404        *portstr = '-';
 405
 406        /* find '.' between address and port */
 407        portstr = strrchr(buf, '.');
 408        if (!portstr) {
 409                dprintk("%s: Failed finding expected dot between address and "
 410                        "port\n", __func__);
 411                goto out_free_buf;
 412        }
 413        *portstr = '\0';
 414
 415        da = kzalloc(sizeof(*da), gfp_flags);
 416        if (unlikely(!da))
 417                goto out_free_buf;
 418
 419        INIT_LIST_HEAD(&da->da_node);
 420
 421        if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
 422                      sizeof(da->da_addr))) {
 423                dprintk("%s: error parsing address %s\n", __func__, buf);
 424                goto out_free_da;
 425        }
 426
 427        portstr++;
 428        sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
 429        port = htons((tmp[0] << 8) | (tmp[1]));
 430
 431        switch (da->da_addr.ss_family) {
 432        case AF_INET:
 433                ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
 434                da->da_addrlen = sizeof(struct sockaddr_in);
 435                match_netid = "tcp";
 436                match_netid_len = 3;
 437                break;
 438
 439        case AF_INET6:
 440                ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
 441                da->da_addrlen = sizeof(struct sockaddr_in6);
 442                match_netid = "tcp6";
 443                match_netid_len = 4;
 444                startsep = "[";
 445                endsep = "]";
 446                break;
 447
 448        default:
 449                dprintk("%s: unsupported address family: %u\n",
 450                        __func__, da->da_addr.ss_family);
 451                goto out_free_da;
 452        }
 453
 454        if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
 455                dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
 456                        __func__, netid, match_netid);
 457                goto out_free_da;
 458        }
 459
 460        /* save human readable address */
 461        len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
 462        da->da_remotestr = kzalloc(len, gfp_flags);
 463
 464        /* NULL is ok, only used for dprintk */
 465        if (da->da_remotestr)
 466                snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
 467                         buf, endsep, ntohs(port));
 468
 469        dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
 470        kfree(buf);
 471        kfree(netid);
 472        return da;
 473
 474out_free_da:
 475        kfree(da);
 476out_free_buf:
 477        dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
 478        kfree(buf);
 479out_free_netid:
 480        kfree(netid);
 481out_err:
 482        return NULL;
 483}
 484
 485/* Decode opaque device data and return the result */
 486struct nfs4_file_layout_dsaddr *
 487nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
 488                gfp_t gfp_flags)
 489{
 490        int i;
 491        u32 cnt, num;
 492        u8 *indexp;
 493        __be32 *p;
 494        u8 *stripe_indices;
 495        u8 max_stripe_index;
 496        struct nfs4_file_layout_dsaddr *dsaddr = NULL;
 497        struct xdr_stream stream;
 498        struct xdr_buf buf;
 499        struct page *scratch;
 500        struct list_head dsaddrs;
 501        struct nfs4_pnfs_ds_addr *da;
 502
 503        /* set up xdr stream */
 504        scratch = alloc_page(gfp_flags);
 505        if (!scratch)
 506                goto out_err;
 507
 508        xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
 509        xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 510
 511        /* Get the stripe count (number of stripe index) */
 512        p = xdr_inline_decode(&stream, 4);
 513        if (unlikely(!p))
 514                goto out_err_free_scratch;
 515
 516        cnt = be32_to_cpup(p);
 517        dprintk("%s stripe count  %d\n", __func__, cnt);
 518        if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
 519                printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
 520                       "supported maximum %d\n", __func__,
 521                        cnt, NFS4_PNFS_MAX_STRIPE_CNT);
 522                goto out_err_free_scratch;
 523        }
 524
 525        /* read stripe indices */
 526        stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
 527        if (!stripe_indices)
 528                goto out_err_free_scratch;
 529
 530        p = xdr_inline_decode(&stream, cnt << 2);
 531        if (unlikely(!p))
 532                goto out_err_free_stripe_indices;
 533
 534        indexp = &stripe_indices[0];
 535        max_stripe_index = 0;
 536        for (i = 0; i < cnt; i++) {
 537                *indexp = be32_to_cpup(p++);
 538                max_stripe_index = max(max_stripe_index, *indexp);
 539                indexp++;
 540        }
 541
 542        /* Check the multipath list count */
 543        p = xdr_inline_decode(&stream, 4);
 544        if (unlikely(!p))
 545                goto out_err_free_stripe_indices;
 546
 547        num = be32_to_cpup(p);
 548        dprintk("%s ds_num %u\n", __func__, num);
 549        if (num > NFS4_PNFS_MAX_MULTI_CNT) {
 550                printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
 551                        "supported maximum %d\n", __func__,
 552                        num, NFS4_PNFS_MAX_MULTI_CNT);
 553                goto out_err_free_stripe_indices;
 554        }
 555
 556        /* validate stripe indices are all < num */
 557        if (max_stripe_index >= num) {
 558                printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
 559                        __func__, max_stripe_index, num);
 560                goto out_err_free_stripe_indices;
 561        }
 562
 563        dsaddr = kzalloc(sizeof(*dsaddr) +
 564                        (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
 565                        gfp_flags);
 566        if (!dsaddr)
 567                goto out_err_free_stripe_indices;
 568
 569        dsaddr->stripe_count = cnt;
 570        dsaddr->stripe_indices = stripe_indices;
 571        stripe_indices = NULL;
 572        dsaddr->ds_num = num;
 573        nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id);
 574
 575        INIT_LIST_HEAD(&dsaddrs);
 576
 577        for (i = 0; i < dsaddr->ds_num; i++) {
 578                int j;
 579                u32 mp_count;
 580
 581                p = xdr_inline_decode(&stream, 4);
 582                if (unlikely(!p))
 583                        goto out_err_free_deviceid;
 584
 585                mp_count = be32_to_cpup(p); /* multipath count */
 586                for (j = 0; j < mp_count; j++) {
 587                        da = decode_ds_addr(server->nfs_client->cl_net,
 588                                            &stream, gfp_flags);
 589                        if (da)
 590                                list_add_tail(&da->da_node, &dsaddrs);
 591                }
 592                if (list_empty(&dsaddrs)) {
 593                        dprintk("%s: no suitable DS addresses found\n",
 594                                __func__);
 595                        goto out_err_free_deviceid;
 596                }
 597
 598                dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
 599                if (!dsaddr->ds_list[i])
 600                        goto out_err_drain_dsaddrs;
 601
 602                /* If DS was already in cache, free ds addrs */
 603                while (!list_empty(&dsaddrs)) {
 604                        da = list_first_entry(&dsaddrs,
 605                                              struct nfs4_pnfs_ds_addr,
 606                                              da_node);
 607                        list_del_init(&da->da_node);
 608                        kfree(da->da_remotestr);
 609                        kfree(da);
 610                }
 611        }
 612
 613        __free_page(scratch);
 614        return dsaddr;
 615
 616out_err_drain_dsaddrs:
 617        while (!list_empty(&dsaddrs)) {
 618                da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
 619                                      da_node);
 620                list_del_init(&da->da_node);
 621                kfree(da->da_remotestr);
 622                kfree(da);
 623        }
 624out_err_free_deviceid:
 625        nfs4_fl_free_deviceid(dsaddr);
 626        /* stripe_indicies was part of dsaddr */
 627        goto out_err_free_scratch;
 628out_err_free_stripe_indices:
 629        kfree(stripe_indices);
 630out_err_free_scratch:
 631        __free_page(scratch);
 632out_err:
 633        dprintk("%s ERROR: returning NULL\n", __func__);
 634        return NULL;
 635}
 636
 637void
 638nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 639{
 640        nfs4_put_deviceid_node(&dsaddr->id_node);
 641}
 642
 643/*
 644 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
 645 * Then: ((res + fsi) % dsaddr->stripe_count)
 646 */
 647u32
 648nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
 649{
 650        struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
 651        u64 tmp;
 652
 653        tmp = offset - flseg->pattern_offset;
 654        do_div(tmp, flseg->stripe_unit);
 655        tmp += flseg->first_stripe_index;
 656        return do_div(tmp, flseg->dsaddr->stripe_count);
 657}
 658
 659u32
 660nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
 661{
 662        return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
 663}
 664
 665struct nfs_fh *
 666nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
 667{
 668        struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
 669        u32 i;
 670
 671        if (flseg->stripe_type == STRIPE_SPARSE) {
 672                if (flseg->num_fh == 1)
 673                        i = 0;
 674                else if (flseg->num_fh == 0)
 675                        /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
 676                        return NULL;
 677                else
 678                        i = nfs4_fl_calc_ds_index(lseg, j);
 679        } else
 680                i = j;
 681        return flseg->fh_array[i];
 682}
 683
 684static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
 685{
 686        might_sleep();
 687        wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
 688                           nfs_wait_bit_killable, TASK_KILLABLE);
 689}
 690
 691static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
 692{
 693        smp_mb__before_atomic();
 694        clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
 695        smp_mb__after_atomic();
 696        wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
 697}
 698
 699
 700struct nfs4_pnfs_ds *
 701nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 702{
 703        struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
 704        struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
 705        struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
 706        struct nfs4_pnfs_ds *ret = ds;
 707
 708        if (ds == NULL) {
 709                printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
 710                        __func__, ds_idx);
 711                filelayout_mark_devid_invalid(devid);
 712                goto out;
 713        }
 714        smp_rmb();
 715        if (ds->ds_clp)
 716                goto out_test_devid;
 717
 718        if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
 719                struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
 720                int err;
 721
 722                err = nfs4_ds_connect(s, ds);
 723                if (err)
 724                        nfs4_mark_deviceid_unavailable(devid);
 725                nfs4_clear_ds_conn_bit(ds);
 726        } else {
 727                /* Either ds is connected, or ds is NULL */
 728                nfs4_wait_ds_connect(ds);
 729        }
 730out_test_devid:
 731        if (filelayout_test_devid_unavailable(devid))
 732                ret = NULL;
 733out:
 734        return ret;
 735}
 736
 737module_param(dataserver_retrans, uint, 0644);
 738MODULE_PARM_DESC(dataserver_retrans, "The  number of times the NFSv4.1 client "
 739                        "retries a request before it attempts further "
 740                        " recovery  action.");
 741module_param(dataserver_timeo, uint, 0644);
 742MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
 743                        "NFSv4.1  client  waits for a response from a "
 744                        " data server before it retries an NFS request.");
 745