linux/fs/nfs/nfs4filelayoutdev.c
<<
>>
Prefs
   1/*
   2 *  Device operations for the pnfs nfs4 file layout driver.
   3 *
   4 *  Copyright (c) 2002
   5 *  The Regents of the University of Michigan
   6 *  All Rights Reserved
   7 *
   8 *  Dean Hildebrand <dhildebz@umich.edu>
   9 *  Garth Goodson   <Garth.Goodson@netapp.com>
  10 *
  11 *  Permission is granted to use, copy, create derivative works, and
  12 *  redistribute this software and such derivative works for any purpose,
  13 *  so long as the name of the University of Michigan is not used in
  14 *  any advertising or publicity pertaining to the use or distribution
  15 *  of this software without specific, written prior authorization. If
  16 *  the above copyright notice or any other identification of the
  17 *  University of Michigan is included in any copy of any portion of
  18 *  this software, then the disclaimer below must also be included.
  19 *
  20 *  This software is provided as is, without representation or warranty
  21 *  of any kind either express or implied, including without limitation
  22 *  the implied warranties of merchantability, fitness for a particular
  23 *  purpose, or noninfringement.  The Regents of the University of
  24 *  Michigan shall not be liable for any damages, including special,
  25 *  indirect, incidental, or consequential damages, with respect to any
  26 *  claim arising out of or in connection with the use of the software,
  27 *  even if it has been or is hereafter advised of the possibility of
  28 *  such damages.
  29 */
  30
  31#include <linux/nfs_fs.h>
  32#include <linux/vmalloc.h>
  33#include <linux/module.h>
  34
  35#include "internal.h"
  36#include "nfs4filelayout.h"
  37
  38#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
  39
  40static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
  41static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
  42
  43/*
  44 * Data server cache
  45 *
  46 * Data servers can be mapped to different device ids.
  47 * nfs4_pnfs_ds reference counting
  48 *   - set to 1 on allocation
  49 *   - incremented when a device id maps a data server already in the cache.
  50 *   - decremented when deviceid is removed from the cache.
  51 */
  52static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
  53static LIST_HEAD(nfs4_data_server_cache);
  54
  55/* Debug routines */
  56void
  57print_ds(struct nfs4_pnfs_ds *ds)
  58{
  59        if (ds == NULL) {
  60                printk("%s NULL device\n", __func__);
  61                return;
  62        }
  63        printk("        ds %s\n"
  64                "        ref count %d\n"
  65                "        client %p\n"
  66                "        cl_exchange_flags %x\n",
  67                ds->ds_remotestr,
  68                atomic_read(&ds->ds_count), ds->ds_clp,
  69                ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
  70}
  71
  72static bool
  73same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
  74{
  75        struct sockaddr_in *a, *b;
  76        struct sockaddr_in6 *a6, *b6;
  77
  78        if (addr1->sa_family != addr2->sa_family)
  79                return false;
  80
  81        switch (addr1->sa_family) {
  82        case AF_INET:
  83                a = (struct sockaddr_in *)addr1;
  84                b = (struct sockaddr_in *)addr2;
  85
  86                if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
  87                    a->sin_port == b->sin_port)
  88                        return true;
  89                break;
  90
  91        case AF_INET6:
  92                a6 = (struct sockaddr_in6 *)addr1;
  93                b6 = (struct sockaddr_in6 *)addr2;
  94
  95                /* LINKLOCAL addresses must have matching scope_id */
  96                if (ipv6_addr_scope(&a6->sin6_addr) ==
  97                    IPV6_ADDR_SCOPE_LINKLOCAL &&
  98                    a6->sin6_scope_id != b6->sin6_scope_id)
  99                        return false;
 100
 101                if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
 102                    a6->sin6_port == b6->sin6_port)
 103                        return true;
 104                break;
 105
 106        default:
 107                dprintk("%s: unhandled address family: %u\n",
 108                        __func__, addr1->sa_family);
 109                return false;
 110        }
 111
 112        return false;
 113}
 114
 115static bool
 116_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
 117                               const struct list_head *dsaddrs2)
 118{
 119        struct nfs4_pnfs_ds_addr *da1, *da2;
 120
 121        /* step through both lists, comparing as we go */
 122        for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node),
 123             da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node);
 124             da1 != NULL && da2 != NULL;
 125             da1 = list_entry(da1->da_node.next, typeof(*da1), da_node),
 126             da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) {
 127                if (!same_sockaddr((struct sockaddr *)&da1->da_addr,
 128                                   (struct sockaddr *)&da2->da_addr))
 129                        return false;
 130        }
 131        if (da1 == NULL && da2 == NULL)
 132                return true;
 133
 134        return false;
 135}
 136
 137/*
 138 * Lookup DS by addresses.  nfs4_ds_cache_lock is held
 139 */
 140static struct nfs4_pnfs_ds *
 141_data_server_lookup_locked(const struct list_head *dsaddrs)
 142{
 143        struct nfs4_pnfs_ds *ds;
 144
 145        list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
 146                if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
 147                        return ds;
 148        return NULL;
 149}
 150
 151/*
 152 * Lookup DS by nfs_client pointer. Zero data server client pointer
 153 */
 154void nfs4_ds_disconnect(struct nfs_client *clp)
 155{
 156        struct nfs4_pnfs_ds *ds;
 157        struct nfs_client *found = NULL;
 158
 159        dprintk("%s clp %p\n", __func__, clp);
 160        spin_lock(&nfs4_ds_cache_lock);
 161        list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
 162                if (ds->ds_clp && ds->ds_clp == clp) {
 163                        found = ds->ds_clp;
 164                        ds->ds_clp = NULL;
 165                }
 166        spin_unlock(&nfs4_ds_cache_lock);
 167        if (found) {
 168                set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
 169                nfs_put_client(clp);
 170        }
 171}
 172
 173/*
 174 * Create an rpc connection to the nfs4_pnfs_ds data server
 175 * Currently only supports IPv4 and IPv6 addresses
 176 */
 177static int
 178nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
 179{
 180        struct nfs_client *clp = ERR_PTR(-EIO);
 181        struct nfs4_pnfs_ds_addr *da;
 182        int status = 0;
 183
 184        dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
 185                mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
 186
 187        BUG_ON(list_empty(&ds->ds_addrs));
 188
 189        list_for_each_entry(da, &ds->ds_addrs, da_node) {
 190                dprintk("%s: DS %s: trying address %s\n",
 191                        __func__, ds->ds_remotestr, da->da_remotestr);
 192
 193                clp = nfs4_set_ds_client(mds_srv->nfs_client,
 194                                        (struct sockaddr *)&da->da_addr,
 195                                        da->da_addrlen, IPPROTO_TCP,
 196                                        dataserver_timeo, dataserver_retrans);
 197                if (!IS_ERR(clp))
 198                        break;
 199        }
 200
 201        if (IS_ERR(clp)) {
 202                status = PTR_ERR(clp);
 203                goto out;
 204        }
 205
 206        status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
 207        if (status)
 208                goto out_put;
 209
 210        ds->ds_clp = clp;
 211        dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 212out:
 213        return status;
 214out_put:
 215        nfs_put_client(clp);
 216        goto out;
 217}
 218
 219static void
 220destroy_ds(struct nfs4_pnfs_ds *ds)
 221{
 222        struct nfs4_pnfs_ds_addr *da;
 223
 224        dprintk("--> %s\n", __func__);
 225        ifdebug(FACILITY)
 226                print_ds(ds);
 227
 228        if (ds->ds_clp)
 229                nfs_put_client(ds->ds_clp);
 230
 231        while (!list_empty(&ds->ds_addrs)) {
 232                da = list_first_entry(&ds->ds_addrs,
 233                                      struct nfs4_pnfs_ds_addr,
 234                                      da_node);
 235                list_del_init(&da->da_node);
 236                kfree(da->da_remotestr);
 237                kfree(da);
 238        }
 239
 240        kfree(ds->ds_remotestr);
 241        kfree(ds);
 242}
 243
 244void
 245nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 246{
 247        struct nfs4_pnfs_ds *ds;
 248        int i;
 249
 250        nfs4_print_deviceid(&dsaddr->id_node.deviceid);
 251
 252        for (i = 0; i < dsaddr->ds_num; i++) {
 253                ds = dsaddr->ds_list[i];
 254                if (ds != NULL) {
 255                        if (atomic_dec_and_lock(&ds->ds_count,
 256                                                &nfs4_ds_cache_lock)) {
 257                                list_del_init(&ds->ds_node);
 258                                spin_unlock(&nfs4_ds_cache_lock);
 259                                destroy_ds(ds);
 260                        }
 261                }
 262        }
 263        kfree(dsaddr->stripe_indices);
 264        kfree(dsaddr);
 265}
 266
 267/*
 268 * Create a string with a human readable address and port to avoid
 269 * complicated setup around many dprinks.
 270 */
 271static char *
 272nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
 273{
 274        struct nfs4_pnfs_ds_addr *da;
 275        char *remotestr;
 276        size_t len;
 277        char *p;
 278
 279        len = 3;        /* '{', '}' and eol */
 280        list_for_each_entry(da, dsaddrs, da_node) {
 281                len += strlen(da->da_remotestr) + 1;    /* string plus comma */
 282        }
 283
 284        remotestr = kzalloc(len, gfp_flags);
 285        if (!remotestr)
 286                return NULL;
 287
 288        p = remotestr;
 289        *(p++) = '{';
 290        len--;
 291        list_for_each_entry(da, dsaddrs, da_node) {
 292                size_t ll = strlen(da->da_remotestr);
 293
 294                if (ll > len)
 295                        goto out_err;
 296
 297                memcpy(p, da->da_remotestr, ll);
 298                p += ll;
 299                len -= ll;
 300
 301                if (len < 1)
 302                        goto out_err;
 303                (*p++) = ',';
 304                len--;
 305        }
 306        if (len < 2)
 307                goto out_err;
 308        *(p++) = '}';
 309        *p = '\0';
 310        return remotestr;
 311out_err:
 312        kfree(remotestr);
 313        return NULL;
 314}
 315
 316static struct nfs4_pnfs_ds *
 317nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
 318{
 319        struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
 320        char *remotestr;
 321
 322        if (list_empty(dsaddrs)) {
 323                dprintk("%s: no addresses defined\n", __func__);
 324                goto out;
 325        }
 326
 327        ds = kzalloc(sizeof(*ds), gfp_flags);
 328        if (!ds)
 329                goto out;
 330
 331        /* this is only used for debugging, so it's ok if its NULL */
 332        remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
 333
 334        spin_lock(&nfs4_ds_cache_lock);
 335        tmp_ds = _data_server_lookup_locked(dsaddrs);
 336        if (tmp_ds == NULL) {
 337                INIT_LIST_HEAD(&ds->ds_addrs);
 338                list_splice_init(dsaddrs, &ds->ds_addrs);
 339                ds->ds_remotestr = remotestr;
 340                atomic_set(&ds->ds_count, 1);
 341                INIT_LIST_HEAD(&ds->ds_node);
 342                ds->ds_clp = NULL;
 343                list_add(&ds->ds_node, &nfs4_data_server_cache);
 344                dprintk("%s add new data server %s\n", __func__,
 345                        ds->ds_remotestr);
 346        } else {
 347                kfree(remotestr);
 348                kfree(ds);
 349                atomic_inc(&tmp_ds->ds_count);
 350                dprintk("%s data server %s found, inc'ed ds_count to %d\n",
 351                        __func__, tmp_ds->ds_remotestr,
 352                        atomic_read(&tmp_ds->ds_count));
 353                ds = tmp_ds;
 354        }
 355        spin_unlock(&nfs4_ds_cache_lock);
 356out:
 357        return ds;
 358}
 359
 360/*
 361 * Currently only supports ipv4, ipv6 and one multi-path address.
 362 */
 363static struct nfs4_pnfs_ds_addr *
 364decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags)
 365{
 366        struct nfs4_pnfs_ds_addr *da = NULL;
 367        char *buf, *portstr;
 368        __be16 port;
 369        int nlen, rlen;
 370        int tmp[2];
 371        __be32 *p;
 372        char *netid, *match_netid;
 373        size_t len, match_netid_len;
 374        char *startsep = "";
 375        char *endsep = "";
 376
 377
 378        /* r_netid */
 379        p = xdr_inline_decode(streamp, 4);
 380        if (unlikely(!p))
 381                goto out_err;
 382        nlen = be32_to_cpup(p++);
 383
 384        p = xdr_inline_decode(streamp, nlen);
 385        if (unlikely(!p))
 386                goto out_err;
 387
 388        netid = kmalloc(nlen+1, gfp_flags);
 389        if (unlikely(!netid))
 390                goto out_err;
 391
 392        netid[nlen] = '\0';
 393        memcpy(netid, p, nlen);
 394
 395        /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
 396        p = xdr_inline_decode(streamp, 4);
 397        if (unlikely(!p))
 398                goto out_free_netid;
 399        rlen = be32_to_cpup(p);
 400
 401        p = xdr_inline_decode(streamp, rlen);
 402        if (unlikely(!p))
 403                goto out_free_netid;
 404
 405        /* port is ".ABC.DEF", 8 chars max */
 406        if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
 407                dprintk("%s: Invalid address, length %d\n", __func__,
 408                        rlen);
 409                goto out_free_netid;
 410        }
 411        buf = kmalloc(rlen + 1, gfp_flags);
 412        if (!buf) {
 413                dprintk("%s: Not enough memory\n", __func__);
 414                goto out_free_netid;
 415        }
 416        buf[rlen] = '\0';
 417        memcpy(buf, p, rlen);
 418
 419        /* replace port '.' with '-' */
 420        portstr = strrchr(buf, '.');
 421        if (!portstr) {
 422                dprintk("%s: Failed finding expected dot in port\n",
 423                        __func__);
 424                goto out_free_buf;
 425        }
 426        *portstr = '-';
 427
 428        /* find '.' between address and port */
 429        portstr = strrchr(buf, '.');
 430        if (!portstr) {
 431                dprintk("%s: Failed finding expected dot between address and "
 432                        "port\n", __func__);
 433                goto out_free_buf;
 434        }
 435        *portstr = '\0';
 436
 437        da = kzalloc(sizeof(*da), gfp_flags);
 438        if (unlikely(!da))
 439                goto out_free_buf;
 440
 441        INIT_LIST_HEAD(&da->da_node);
 442
 443        if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
 444                      sizeof(da->da_addr))) {
 445                dprintk("%s: error parsing address %s\n", __func__, buf);
 446                goto out_free_da;
 447        }
 448
 449        portstr++;
 450        sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
 451        port = htons((tmp[0] << 8) | (tmp[1]));
 452
 453        switch (da->da_addr.ss_family) {
 454        case AF_INET:
 455                ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
 456                da->da_addrlen = sizeof(struct sockaddr_in);
 457                match_netid = "tcp";
 458                match_netid_len = 3;
 459                break;
 460
 461        case AF_INET6:
 462                ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
 463                da->da_addrlen = sizeof(struct sockaddr_in6);
 464                match_netid = "tcp6";
 465                match_netid_len = 4;
 466                startsep = "[";
 467                endsep = "]";
 468                break;
 469
 470        default:
 471                dprintk("%s: unsupported address family: %u\n",
 472                        __func__, da->da_addr.ss_family);
 473                goto out_free_da;
 474        }
 475
 476        if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
 477                dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
 478                        __func__, netid, match_netid);
 479                goto out_free_da;
 480        }
 481
 482        /* save human readable address */
 483        len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
 484        da->da_remotestr = kzalloc(len, gfp_flags);
 485
 486        /* NULL is ok, only used for dprintk */
 487        if (da->da_remotestr)
 488                snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
 489                         buf, endsep, ntohs(port));
 490
 491        dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
 492        kfree(buf);
 493        kfree(netid);
 494        return da;
 495
 496out_free_da:
 497        kfree(da);
 498out_free_buf:
 499        dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
 500        kfree(buf);
 501out_free_netid:
 502        kfree(netid);
 503out_err:
 504        return NULL;
 505}
 506
 507/* Decode opaque device data and return the result */
 508static struct nfs4_file_layout_dsaddr*
 509decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 510{
 511        int i;
 512        u32 cnt, num;
 513        u8 *indexp;
 514        __be32 *p;
 515        u8 *stripe_indices;
 516        u8 max_stripe_index;
 517        struct nfs4_file_layout_dsaddr *dsaddr = NULL;
 518        struct xdr_stream stream;
 519        struct xdr_buf buf;
 520        struct page *scratch;
 521        struct list_head dsaddrs;
 522        struct nfs4_pnfs_ds_addr *da;
 523
 524        /* set up xdr stream */
 525        scratch = alloc_page(gfp_flags);
 526        if (!scratch)
 527                goto out_err;
 528
 529        xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
 530        xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 531
 532        /* Get the stripe count (number of stripe index) */
 533        p = xdr_inline_decode(&stream, 4);
 534        if (unlikely(!p))
 535                goto out_err_free_scratch;
 536
 537        cnt = be32_to_cpup(p);
 538        dprintk("%s stripe count  %d\n", __func__, cnt);
 539        if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
 540                printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
 541                       "supported maximum %d\n", __func__,
 542                        cnt, NFS4_PNFS_MAX_STRIPE_CNT);
 543                goto out_err_free_scratch;
 544        }
 545
 546        /* read stripe indices */
 547        stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
 548        if (!stripe_indices)
 549                goto out_err_free_scratch;
 550
 551        p = xdr_inline_decode(&stream, cnt << 2);
 552        if (unlikely(!p))
 553                goto out_err_free_stripe_indices;
 554
 555        indexp = &stripe_indices[0];
 556        max_stripe_index = 0;
 557        for (i = 0; i < cnt; i++) {
 558                *indexp = be32_to_cpup(p++);
 559                max_stripe_index = max(max_stripe_index, *indexp);
 560                indexp++;
 561        }
 562
 563        /* Check the multipath list count */
 564        p = xdr_inline_decode(&stream, 4);
 565        if (unlikely(!p))
 566                goto out_err_free_stripe_indices;
 567
 568        num = be32_to_cpup(p);
 569        dprintk("%s ds_num %u\n", __func__, num);
 570        if (num > NFS4_PNFS_MAX_MULTI_CNT) {
 571                printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
 572                        "supported maximum %d\n", __func__,
 573                        num, NFS4_PNFS_MAX_MULTI_CNT);
 574                goto out_err_free_stripe_indices;
 575        }
 576
 577        /* validate stripe indices are all < num */
 578        if (max_stripe_index >= num) {
 579                printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
 580                        __func__, max_stripe_index, num);
 581                goto out_err_free_stripe_indices;
 582        }
 583
 584        dsaddr = kzalloc(sizeof(*dsaddr) +
 585                        (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
 586                        gfp_flags);
 587        if (!dsaddr)
 588                goto out_err_free_stripe_indices;
 589
 590        dsaddr->stripe_count = cnt;
 591        dsaddr->stripe_indices = stripe_indices;
 592        stripe_indices = NULL;
 593        dsaddr->ds_num = num;
 594        nfs4_init_deviceid_node(&dsaddr->id_node,
 595                                NFS_SERVER(ino)->pnfs_curr_ld,
 596                                NFS_SERVER(ino)->nfs_client,
 597                                &pdev->dev_id);
 598
 599        INIT_LIST_HEAD(&dsaddrs);
 600
 601        for (i = 0; i < dsaddr->ds_num; i++) {
 602                int j;
 603                u32 mp_count;
 604
 605                p = xdr_inline_decode(&stream, 4);
 606                if (unlikely(!p))
 607                        goto out_err_free_deviceid;
 608
 609                mp_count = be32_to_cpup(p); /* multipath count */
 610                for (j = 0; j < mp_count; j++) {
 611                        da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,
 612                                            &stream, gfp_flags);
 613                        if (da)
 614                                list_add_tail(&da->da_node, &dsaddrs);
 615                }
 616                if (list_empty(&dsaddrs)) {
 617                        dprintk("%s: no suitable DS addresses found\n",
 618                                __func__);
 619                        goto out_err_free_deviceid;
 620                }
 621
 622                dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
 623                if (!dsaddr->ds_list[i])
 624                        goto out_err_drain_dsaddrs;
 625
 626                /* If DS was already in cache, free ds addrs */
 627                while (!list_empty(&dsaddrs)) {
 628                        da = list_first_entry(&dsaddrs,
 629                                              struct nfs4_pnfs_ds_addr,
 630                                              da_node);
 631                        list_del_init(&da->da_node);
 632                        kfree(da->da_remotestr);
 633                        kfree(da);
 634                }
 635        }
 636
 637        __free_page(scratch);
 638        return dsaddr;
 639
 640out_err_drain_dsaddrs:
 641        while (!list_empty(&dsaddrs)) {
 642                da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
 643                                      da_node);
 644                list_del_init(&da->da_node);
 645                kfree(da->da_remotestr);
 646                kfree(da);
 647        }
 648out_err_free_deviceid:
 649        nfs4_fl_free_deviceid(dsaddr);
 650        /* stripe_indicies was part of dsaddr */
 651        goto out_err_free_scratch;
 652out_err_free_stripe_indices:
 653        kfree(stripe_indices);
 654out_err_free_scratch:
 655        __free_page(scratch);
 656out_err:
 657        dprintk("%s ERROR: returning NULL\n", __func__);
 658        return NULL;
 659}
 660
 661/*
 662 * Decode the opaque device specified in 'dev' and add it to the cache of
 663 * available devices.
 664 */
 665static struct nfs4_file_layout_dsaddr *
 666decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
 667{
 668        struct nfs4_deviceid_node *d;
 669        struct nfs4_file_layout_dsaddr *n, *new;
 670
 671        new = decode_device(inode, dev, gfp_flags);
 672        if (!new) {
 673                printk(KERN_WARNING "NFS: %s: Could not decode or add device\n",
 674                        __func__);
 675                return NULL;
 676        }
 677
 678        d = nfs4_insert_deviceid_node(&new->id_node);
 679        n = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
 680        if (n != new) {
 681                nfs4_fl_free_deviceid(new);
 682                return n;
 683        }
 684
 685        return new;
 686}
 687
 688/*
 689 * Retrieve the information for dev_id, add it to the list
 690 * of available devices, and return it.
 691 */
 692struct nfs4_file_layout_dsaddr *
 693get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
 694{
 695        struct pnfs_device *pdev = NULL;
 696        u32 max_resp_sz;
 697        int max_pages;
 698        struct page **pages = NULL;
 699        struct nfs4_file_layout_dsaddr *dsaddr = NULL;
 700        int rc, i;
 701        struct nfs_server *server = NFS_SERVER(inode);
 702
 703        /*
 704         * Use the session max response size as the basis for setting
 705         * GETDEVICEINFO's maxcount
 706         */
 707        max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
 708        max_pages = nfs_page_array_len(0, max_resp_sz);
 709        dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
 710                __func__, inode, max_resp_sz, max_pages);
 711
 712        pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
 713        if (pdev == NULL)
 714                return NULL;
 715
 716        pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
 717        if (pages == NULL) {
 718                kfree(pdev);
 719                return NULL;
 720        }
 721        for (i = 0; i < max_pages; i++) {
 722                pages[i] = alloc_page(gfp_flags);
 723                if (!pages[i])
 724                        goto out_free;
 725        }
 726
 727        memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
 728        pdev->layout_type = LAYOUT_NFSV4_1_FILES;
 729        pdev->pages = pages;
 730        pdev->pgbase = 0;
 731        pdev->pglen = PAGE_SIZE * max_pages;
 732        pdev->mincount = 0;
 733
 734        rc = nfs4_proc_getdeviceinfo(server, pdev);
 735        dprintk("%s getdevice info returns %d\n", __func__, rc);
 736        if (rc)
 737                goto out_free;
 738
 739        /*
 740         * Found new device, need to decode it and then add it to the
 741         * list of known devices for this mountpoint.
 742         */
 743        dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
 744out_free:
 745        for (i = 0; i < max_pages; i++)
 746                __free_page(pages[i]);
 747        kfree(pages);
 748        kfree(pdev);
 749        dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
 750        return dsaddr;
 751}
 752
 753void
 754nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 755{
 756        nfs4_put_deviceid_node(&dsaddr->id_node);
 757}
 758
 759/*
 760 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
 761 * Then: ((res + fsi) % dsaddr->stripe_count)
 762 */
 763u32
 764nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
 765{
 766        struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
 767        u64 tmp;
 768
 769        tmp = offset - flseg->pattern_offset;
 770        do_div(tmp, flseg->stripe_unit);
 771        tmp += flseg->first_stripe_index;
 772        return do_div(tmp, flseg->dsaddr->stripe_count);
 773}
 774
 775u32
 776nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
 777{
 778        return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
 779}
 780
 781struct nfs_fh *
 782nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
 783{
 784        struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
 785        u32 i;
 786
 787        if (flseg->stripe_type == STRIPE_SPARSE) {
 788                if (flseg->num_fh == 1)
 789                        i = 0;
 790                else if (flseg->num_fh == 0)
 791                        /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
 792                        return NULL;
 793                else
 794                        i = nfs4_fl_calc_ds_index(lseg, j);
 795        } else
 796                i = j;
 797        return flseg->fh_array[i];
 798}
 799
 800struct nfs4_pnfs_ds *
 801nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 802{
 803        struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
 804        struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
 805        struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
 806
 807        if (filelayout_test_devid_invalid(devid))
 808                return NULL;
 809
 810        if (ds == NULL) {
 811                printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
 812                        __func__, ds_idx);
 813                goto mark_dev_invalid;
 814        }
 815
 816        if (!ds->ds_clp) {
 817                struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
 818                int err;
 819
 820                err = nfs4_ds_connect(s, ds);
 821                if (err)
 822                        goto mark_dev_invalid;
 823        }
 824        return ds;
 825
 826mark_dev_invalid:
 827        filelayout_mark_devid_invalid(devid);
 828        return NULL;
 829}
 830
 831module_param(dataserver_retrans, uint, 0644);
 832MODULE_PARM_DESC(dataserver_retrans, "The  number of times the NFSv4.1 client "
 833                        "retries a request before it attempts further "
 834                        " recovery  action.");
 835module_param(dataserver_timeo, uint, 0644);
 836MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
 837                        "NFSv4.1  client  waits for a response from a "
 838                        " data server before it retries an NFS request.");
 839