linux/fs/nfs/nfs4filelayoutdev.c
<<
>>
Prefs
   1/*
   2 *  Device operations for the pnfs nfs4 file layout driver.
   3 *
   4 *  Copyright (c) 2002
   5 *  The Regents of the University of Michigan
   6 *  All Rights Reserved
   7 *
   8 *  Dean Hildebrand <dhildebz@umich.edu>
   9 *  Garth Goodson   <Garth.Goodson@netapp.com>
  10 *
  11 *  Permission is granted to use, copy, create derivative works, and
  12 *  redistribute this software and such derivative works for any purpose,
  13 *  so long as the name of the University of Michigan is not used in
  14 *  any advertising or publicity pertaining to the use or distribution
  15 *  of this software without specific, written prior authorization. If
  16 *  the above copyright notice or any other identification of the
  17 *  University of Michigan is included in any copy of any portion of
  18 *  this software, then the disclaimer below must also be included.
  19 *
  20 *  This software is provided as is, without representation or warranty
  21 *  of any kind either express or implied, including without limitation
  22 *  the implied warranties of merchantability, fitness for a particular
  23 *  purpose, or noninfringement.  The Regents of the University of
  24 *  Michigan shall not be liable for any damages, including special,
  25 *  indirect, incidental, or consequential damages, with respect to any
  26 *  claim arising out of or in connection with the use of the software,
  27 *  even if it has been or is hereafter advised of the possibility of
  28 *  such damages.
  29 */
  30
  31#include <linux/nfs_fs.h>
  32#include <linux/vmalloc.h>
  33
  34#include "internal.h"
  35#include "nfs4filelayout.h"
  36
  37#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
  38
  39/*
  40 * Data server cache
  41 *
  42 * Data servers can be mapped to different device ids.
  43 * nfs4_pnfs_ds reference counting
  44 *   - set to 1 on allocation
  45 *   - incremented when a device id maps a data server already in the cache.
  46 *   - decremented when deviceid is removed from the cache.
  47 */
  48static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
  49static LIST_HEAD(nfs4_data_server_cache);
  50
  51/* Debug routines */
  52void
  53print_ds(struct nfs4_pnfs_ds *ds)
  54{
  55        if (ds == NULL) {
  56                printk("%s NULL device\n", __func__);
  57                return;
  58        }
  59        printk("        ds %s\n"
  60                "        ref count %d\n"
  61                "        client %p\n"
  62                "        cl_exchange_flags %x\n",
  63                ds->ds_remotestr,
  64                atomic_read(&ds->ds_count), ds->ds_clp,
  65                ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
  66}
  67
  68static bool
  69same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
  70{
  71        struct sockaddr_in *a, *b;
  72        struct sockaddr_in6 *a6, *b6;
  73
  74        if (addr1->sa_family != addr2->sa_family)
  75                return false;
  76
  77        switch (addr1->sa_family) {
  78        case AF_INET:
  79                a = (struct sockaddr_in *)addr1;
  80                b = (struct sockaddr_in *)addr2;
  81
  82                if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
  83                    a->sin_port == b->sin_port)
  84                        return true;
  85                break;
  86
  87        case AF_INET6:
  88                a6 = (struct sockaddr_in6 *)addr1;
  89                b6 = (struct sockaddr_in6 *)addr2;
  90
  91                /* LINKLOCAL addresses must have matching scope_id */
  92                if (ipv6_addr_scope(&a6->sin6_addr) ==
  93                    IPV6_ADDR_SCOPE_LINKLOCAL &&
  94                    a6->sin6_scope_id != b6->sin6_scope_id)
  95                        return false;
  96
  97                if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
  98                    a6->sin6_port == b6->sin6_port)
  99                        return true;
 100                break;
 101
 102        default:
 103                dprintk("%s: unhandled address family: %u\n",
 104                        __func__, addr1->sa_family);
 105                return false;
 106        }
 107
 108        return false;
 109}
 110
 111static bool
 112_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
 113                               const struct list_head *dsaddrs2)
 114{
 115        struct nfs4_pnfs_ds_addr *da1, *da2;
 116
 117        /* step through both lists, comparing as we go */
 118        for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node),
 119             da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node);
 120             da1 != NULL && da2 != NULL;
 121             da1 = list_entry(da1->da_node.next, typeof(*da1), da_node),
 122             da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) {
 123                if (!same_sockaddr((struct sockaddr *)&da1->da_addr,
 124                                   (struct sockaddr *)&da2->da_addr))
 125                        return false;
 126        }
 127        if (da1 == NULL && da2 == NULL)
 128                return true;
 129
 130        return false;
 131}
 132
 133/*
 134 * Lookup DS by addresses.  nfs4_ds_cache_lock is held
 135 */
 136static struct nfs4_pnfs_ds *
 137_data_server_lookup_locked(const struct list_head *dsaddrs)
 138{
 139        struct nfs4_pnfs_ds *ds;
 140
 141        list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
 142                if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
 143                        return ds;
 144        return NULL;
 145}
 146
 147/*
 148 * Create an rpc connection to the nfs4_pnfs_ds data server
 149 * Currently only supports IPv4 and IPv6 addresses
 150 */
 151static int
 152nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
 153{
 154        struct nfs_client *clp = ERR_PTR(-EIO);
 155        struct nfs4_pnfs_ds_addr *da;
 156        int status = 0;
 157
 158        dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
 159                mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
 160
 161        BUG_ON(list_empty(&ds->ds_addrs));
 162
 163        list_for_each_entry(da, &ds->ds_addrs, da_node) {
 164                dprintk("%s: DS %s: trying address %s\n",
 165                        __func__, ds->ds_remotestr, da->da_remotestr);
 166
 167                clp = nfs4_set_ds_client(mds_srv->nfs_client,
 168                                 (struct sockaddr *)&da->da_addr,
 169                                 da->da_addrlen, IPPROTO_TCP);
 170                if (!IS_ERR(clp))
 171                        break;
 172        }
 173
 174        if (IS_ERR(clp)) {
 175                status = PTR_ERR(clp);
 176                goto out;
 177        }
 178
 179        if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
 180                if (!is_ds_client(clp)) {
 181                        status = -ENODEV;
 182                        goto out_put;
 183                }
 184                ds->ds_clp = clp;
 185                dprintk("%s [existing] server=%s\n", __func__,
 186                        ds->ds_remotestr);
 187                goto out;
 188        }
 189
 190        /*
 191         * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
 192         * be equal to the MDS lease. Renewal is scheduled in create_session.
 193         */
 194        spin_lock(&mds_srv->nfs_client->cl_lock);
 195        clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
 196        spin_unlock(&mds_srv->nfs_client->cl_lock);
 197        clp->cl_last_renewal = jiffies;
 198
 199        /* New nfs_client */
 200        status = nfs4_init_ds_session(clp);
 201        if (status)
 202                goto out_put;
 203
 204        ds->ds_clp = clp;
 205        dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 206out:
 207        return status;
 208out_put:
 209        nfs_put_client(clp);
 210        goto out;
 211}
 212
 213static void
 214destroy_ds(struct nfs4_pnfs_ds *ds)
 215{
 216        struct nfs4_pnfs_ds_addr *da;
 217
 218        dprintk("--> %s\n", __func__);
 219        ifdebug(FACILITY)
 220                print_ds(ds);
 221
 222        if (ds->ds_clp)
 223                nfs_put_client(ds->ds_clp);
 224
 225        while (!list_empty(&ds->ds_addrs)) {
 226                da = list_first_entry(&ds->ds_addrs,
 227                                      struct nfs4_pnfs_ds_addr,
 228                                      da_node);
 229                list_del_init(&da->da_node);
 230                kfree(da->da_remotestr);
 231                kfree(da);
 232        }
 233
 234        kfree(ds->ds_remotestr);
 235        kfree(ds);
 236}
 237
 238void
 239nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 240{
 241        struct nfs4_pnfs_ds *ds;
 242        int i;
 243
 244        nfs4_print_deviceid(&dsaddr->id_node.deviceid);
 245
 246        for (i = 0; i < dsaddr->ds_num; i++) {
 247                ds = dsaddr->ds_list[i];
 248                if (ds != NULL) {
 249                        if (atomic_dec_and_lock(&ds->ds_count,
 250                                                &nfs4_ds_cache_lock)) {
 251                                list_del_init(&ds->ds_node);
 252                                spin_unlock(&nfs4_ds_cache_lock);
 253                                destroy_ds(ds);
 254                        }
 255                }
 256        }
 257        kfree(dsaddr->stripe_indices);
 258        kfree(dsaddr);
 259}
 260
 261/*
 262 * Create a string with a human readable address and port to avoid
 263 * complicated setup around many dprinks.
 264 */
 265static char *
 266nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
 267{
 268        struct nfs4_pnfs_ds_addr *da;
 269        char *remotestr;
 270        size_t len;
 271        char *p;
 272
 273        len = 3;        /* '{', '}' and eol */
 274        list_for_each_entry(da, dsaddrs, da_node) {
 275                len += strlen(da->da_remotestr) + 1;    /* string plus comma */
 276        }
 277
 278        remotestr = kzalloc(len, gfp_flags);
 279        if (!remotestr)
 280                return NULL;
 281
 282        p = remotestr;
 283        *(p++) = '{';
 284        len--;
 285        list_for_each_entry(da, dsaddrs, da_node) {
 286                size_t ll = strlen(da->da_remotestr);
 287
 288                if (ll > len)
 289                        goto out_err;
 290
 291                memcpy(p, da->da_remotestr, ll);
 292                p += ll;
 293                len -= ll;
 294
 295                if (len < 1)
 296                        goto out_err;
 297                (*p++) = ',';
 298                len--;
 299        }
 300        if (len < 2)
 301                goto out_err;
 302        *(p++) = '}';
 303        *p = '\0';
 304        return remotestr;
 305out_err:
 306        kfree(remotestr);
 307        return NULL;
 308}
 309
 310static struct nfs4_pnfs_ds *
 311nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
 312{
 313        struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
 314        char *remotestr;
 315
 316        if (list_empty(dsaddrs)) {
 317                dprintk("%s: no addresses defined\n", __func__);
 318                goto out;
 319        }
 320
 321        ds = kzalloc(sizeof(*ds), gfp_flags);
 322        if (!ds)
 323                goto out;
 324
 325        /* this is only used for debugging, so it's ok if its NULL */
 326        remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
 327
 328        spin_lock(&nfs4_ds_cache_lock);
 329        tmp_ds = _data_server_lookup_locked(dsaddrs);
 330        if (tmp_ds == NULL) {
 331                INIT_LIST_HEAD(&ds->ds_addrs);
 332                list_splice_init(dsaddrs, &ds->ds_addrs);
 333                ds->ds_remotestr = remotestr;
 334                atomic_set(&ds->ds_count, 1);
 335                INIT_LIST_HEAD(&ds->ds_node);
 336                ds->ds_clp = NULL;
 337                list_add(&ds->ds_node, &nfs4_data_server_cache);
 338                dprintk("%s add new data server %s\n", __func__,
 339                        ds->ds_remotestr);
 340        } else {
 341                kfree(remotestr);
 342                kfree(ds);
 343                atomic_inc(&tmp_ds->ds_count);
 344                dprintk("%s data server %s found, inc'ed ds_count to %d\n",
 345                        __func__, tmp_ds->ds_remotestr,
 346                        atomic_read(&tmp_ds->ds_count));
 347                ds = tmp_ds;
 348        }
 349        spin_unlock(&nfs4_ds_cache_lock);
 350out:
 351        return ds;
 352}
 353
 354/*
 355 * Currently only supports ipv4, ipv6 and one multi-path address.
 356 */
 357static struct nfs4_pnfs_ds_addr *
 358decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags)
 359{
 360        struct nfs4_pnfs_ds_addr *da = NULL;
 361        char *buf, *portstr;
 362        __be16 port;
 363        int nlen, rlen;
 364        int tmp[2];
 365        __be32 *p;
 366        char *netid, *match_netid;
 367        size_t len, match_netid_len;
 368        char *startsep = "";
 369        char *endsep = "";
 370
 371
 372        /* r_netid */
 373        p = xdr_inline_decode(streamp, 4);
 374        if (unlikely(!p))
 375                goto out_err;
 376        nlen = be32_to_cpup(p++);
 377
 378        p = xdr_inline_decode(streamp, nlen);
 379        if (unlikely(!p))
 380                goto out_err;
 381
 382        netid = kmalloc(nlen+1, gfp_flags);
 383        if (unlikely(!netid))
 384                goto out_err;
 385
 386        netid[nlen] = '\0';
 387        memcpy(netid, p, nlen);
 388
 389        /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
 390        p = xdr_inline_decode(streamp, 4);
 391        if (unlikely(!p))
 392                goto out_free_netid;
 393        rlen = be32_to_cpup(p);
 394
 395        p = xdr_inline_decode(streamp, rlen);
 396        if (unlikely(!p))
 397                goto out_free_netid;
 398
 399        /* port is ".ABC.DEF", 8 chars max */
 400        if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
 401                dprintk("%s: Invalid address, length %d\n", __func__,
 402                        rlen);
 403                goto out_free_netid;
 404        }
 405        buf = kmalloc(rlen + 1, gfp_flags);
 406        if (!buf) {
 407                dprintk("%s: Not enough memory\n", __func__);
 408                goto out_free_netid;
 409        }
 410        buf[rlen] = '\0';
 411        memcpy(buf, p, rlen);
 412
 413        /* replace port '.' with '-' */
 414        portstr = strrchr(buf, '.');
 415        if (!portstr) {
 416                dprintk("%s: Failed finding expected dot in port\n",
 417                        __func__);
 418                goto out_free_buf;
 419        }
 420        *portstr = '-';
 421
 422        /* find '.' between address and port */
 423        portstr = strrchr(buf, '.');
 424        if (!portstr) {
 425                dprintk("%s: Failed finding expected dot between address and "
 426                        "port\n", __func__);
 427                goto out_free_buf;
 428        }
 429        *portstr = '\0';
 430
 431        da = kzalloc(sizeof(*da), gfp_flags);
 432        if (unlikely(!da))
 433                goto out_free_buf;
 434
 435        INIT_LIST_HEAD(&da->da_node);
 436
 437        if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
 438                      sizeof(da->da_addr))) {
 439                dprintk("%s: error parsing address %s\n", __func__, buf);
 440                goto out_free_da;
 441        }
 442
 443        portstr++;
 444        sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
 445        port = htons((tmp[0] << 8) | (tmp[1]));
 446
 447        switch (da->da_addr.ss_family) {
 448        case AF_INET:
 449                ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
 450                da->da_addrlen = sizeof(struct sockaddr_in);
 451                match_netid = "tcp";
 452                match_netid_len = 3;
 453                break;
 454
 455        case AF_INET6:
 456                ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
 457                da->da_addrlen = sizeof(struct sockaddr_in6);
 458                match_netid = "tcp6";
 459                match_netid_len = 4;
 460                startsep = "[";
 461                endsep = "]";
 462                break;
 463
 464        default:
 465                dprintk("%s: unsupported address family: %u\n",
 466                        __func__, da->da_addr.ss_family);
 467                goto out_free_da;
 468        }
 469
 470        if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
 471                dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
 472                        __func__, netid, match_netid);
 473                goto out_free_da;
 474        }
 475
 476        /* save human readable address */
 477        len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
 478        da->da_remotestr = kzalloc(len, gfp_flags);
 479
 480        /* NULL is ok, only used for dprintk */
 481        if (da->da_remotestr)
 482                snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
 483                         buf, endsep, ntohs(port));
 484
 485        dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
 486        kfree(buf);
 487        kfree(netid);
 488        return da;
 489
 490out_free_da:
 491        kfree(da);
 492out_free_buf:
 493        dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
 494        kfree(buf);
 495out_free_netid:
 496        kfree(netid);
 497out_err:
 498        return NULL;
 499}
 500
 501/* Decode opaque device data and return the result */
 502static struct nfs4_file_layout_dsaddr*
 503decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 504{
 505        int i;
 506        u32 cnt, num;
 507        u8 *indexp;
 508        __be32 *p;
 509        u8 *stripe_indices;
 510        u8 max_stripe_index;
 511        struct nfs4_file_layout_dsaddr *dsaddr = NULL;
 512        struct xdr_stream stream;
 513        struct xdr_buf buf;
 514        struct page *scratch;
 515        struct list_head dsaddrs;
 516        struct nfs4_pnfs_ds_addr *da;
 517
 518        /* set up xdr stream */
 519        scratch = alloc_page(gfp_flags);
 520        if (!scratch)
 521                goto out_err;
 522
 523        xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
 524        xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 525
 526        /* Get the stripe count (number of stripe index) */
 527        p = xdr_inline_decode(&stream, 4);
 528        if (unlikely(!p))
 529                goto out_err_free_scratch;
 530
 531        cnt = be32_to_cpup(p);
 532        dprintk("%s stripe count  %d\n", __func__, cnt);
 533        if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
 534                printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
 535                       "supported maximum %d\n", __func__,
 536                        cnt, NFS4_PNFS_MAX_STRIPE_CNT);
 537                goto out_err_free_scratch;
 538        }
 539
 540        /* read stripe indices */
 541        stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
 542        if (!stripe_indices)
 543                goto out_err_free_scratch;
 544
 545        p = xdr_inline_decode(&stream, cnt << 2);
 546        if (unlikely(!p))
 547                goto out_err_free_stripe_indices;
 548
 549        indexp = &stripe_indices[0];
 550        max_stripe_index = 0;
 551        for (i = 0; i < cnt; i++) {
 552                *indexp = be32_to_cpup(p++);
 553                max_stripe_index = max(max_stripe_index, *indexp);
 554                indexp++;
 555        }
 556
 557        /* Check the multipath list count */
 558        p = xdr_inline_decode(&stream, 4);
 559        if (unlikely(!p))
 560                goto out_err_free_stripe_indices;
 561
 562        num = be32_to_cpup(p);
 563        dprintk("%s ds_num %u\n", __func__, num);
 564        if (num > NFS4_PNFS_MAX_MULTI_CNT) {
 565                printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
 566                        "supported maximum %d\n", __func__,
 567                        num, NFS4_PNFS_MAX_MULTI_CNT);
 568                goto out_err_free_stripe_indices;
 569        }
 570
 571        /* validate stripe indices are all < num */
 572        if (max_stripe_index >= num) {
 573                printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
 574                        __func__, max_stripe_index, num);
 575                goto out_err_free_stripe_indices;
 576        }
 577
 578        dsaddr = kzalloc(sizeof(*dsaddr) +
 579                        (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
 580                        gfp_flags);
 581        if (!dsaddr)
 582                goto out_err_free_stripe_indices;
 583
 584        dsaddr->stripe_count = cnt;
 585        dsaddr->stripe_indices = stripe_indices;
 586        stripe_indices = NULL;
 587        dsaddr->ds_num = num;
 588        nfs4_init_deviceid_node(&dsaddr->id_node,
 589                                NFS_SERVER(ino)->pnfs_curr_ld,
 590                                NFS_SERVER(ino)->nfs_client,
 591                                &pdev->dev_id);
 592
 593        INIT_LIST_HEAD(&dsaddrs);
 594
 595        for (i = 0; i < dsaddr->ds_num; i++) {
 596                int j;
 597                u32 mp_count;
 598
 599                p = xdr_inline_decode(&stream, 4);
 600                if (unlikely(!p))
 601                        goto out_err_free_deviceid;
 602
 603                mp_count = be32_to_cpup(p); /* multipath count */
 604                for (j = 0; j < mp_count; j++) {
 605                        da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net,
 606                                            &stream, gfp_flags);
 607                        if (da)
 608                                list_add_tail(&da->da_node, &dsaddrs);
 609                }
 610                if (list_empty(&dsaddrs)) {
 611                        dprintk("%s: no suitable DS addresses found\n",
 612                                __func__);
 613                        goto out_err_free_deviceid;
 614                }
 615
 616                dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
 617                if (!dsaddr->ds_list[i])
 618                        goto out_err_drain_dsaddrs;
 619
 620                /* If DS was already in cache, free ds addrs */
 621                while (!list_empty(&dsaddrs)) {
 622                        da = list_first_entry(&dsaddrs,
 623                                              struct nfs4_pnfs_ds_addr,
 624                                              da_node);
 625                        list_del_init(&da->da_node);
 626                        kfree(da->da_remotestr);
 627                        kfree(da);
 628                }
 629        }
 630
 631        __free_page(scratch);
 632        return dsaddr;
 633
 634out_err_drain_dsaddrs:
 635        while (!list_empty(&dsaddrs)) {
 636                da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
 637                                      da_node);
 638                list_del_init(&da->da_node);
 639                kfree(da->da_remotestr);
 640                kfree(da);
 641        }
 642out_err_free_deviceid:
 643        nfs4_fl_free_deviceid(dsaddr);
 644        /* stripe_indicies was part of dsaddr */
 645        goto out_err_free_scratch;
 646out_err_free_stripe_indices:
 647        kfree(stripe_indices);
 648out_err_free_scratch:
 649        __free_page(scratch);
 650out_err:
 651        dprintk("%s ERROR: returning NULL\n", __func__);
 652        return NULL;
 653}
 654
 655/*
 656 * Decode the opaque device specified in 'dev' and add it to the cache of
 657 * available devices.
 658 */
 659static struct nfs4_file_layout_dsaddr *
 660decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
 661{
 662        struct nfs4_deviceid_node *d;
 663        struct nfs4_file_layout_dsaddr *n, *new;
 664
 665        new = decode_device(inode, dev, gfp_flags);
 666        if (!new) {
 667                printk(KERN_WARNING "NFS: %s: Could not decode or add device\n",
 668                        __func__);
 669                return NULL;
 670        }
 671
 672        d = nfs4_insert_deviceid_node(&new->id_node);
 673        n = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
 674        if (n != new) {
 675                nfs4_fl_free_deviceid(new);
 676                return n;
 677        }
 678
 679        return new;
 680}
 681
 682/*
 683 * Retrieve the information for dev_id, add it to the list
 684 * of available devices, and return it.
 685 */
 686struct nfs4_file_layout_dsaddr *
 687get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
 688{
 689        struct pnfs_device *pdev = NULL;
 690        u32 max_resp_sz;
 691        int max_pages;
 692        struct page **pages = NULL;
 693        struct nfs4_file_layout_dsaddr *dsaddr = NULL;
 694        int rc, i;
 695        struct nfs_server *server = NFS_SERVER(inode);
 696
 697        /*
 698         * Use the session max response size as the basis for setting
 699         * GETDEVICEINFO's maxcount
 700         */
 701        max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
 702        max_pages = nfs_page_array_len(0, max_resp_sz);
 703        dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
 704                __func__, inode, max_resp_sz, max_pages);
 705
 706        pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
 707        if (pdev == NULL)
 708                return NULL;
 709
 710        pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
 711        if (pages == NULL) {
 712                kfree(pdev);
 713                return NULL;
 714        }
 715        for (i = 0; i < max_pages; i++) {
 716                pages[i] = alloc_page(gfp_flags);
 717                if (!pages[i])
 718                        goto out_free;
 719        }
 720
 721        memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
 722        pdev->layout_type = LAYOUT_NFSV4_1_FILES;
 723        pdev->pages = pages;
 724        pdev->pgbase = 0;
 725        pdev->pglen = PAGE_SIZE * max_pages;
 726        pdev->mincount = 0;
 727
 728        rc = nfs4_proc_getdeviceinfo(server, pdev);
 729        dprintk("%s getdevice info returns %d\n", __func__, rc);
 730        if (rc)
 731                goto out_free;
 732
 733        /*
 734         * Found new device, need to decode it and then add it to the
 735         * list of known devices for this mountpoint.
 736         */
 737        dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
 738out_free:
 739        for (i = 0; i < max_pages; i++)
 740                __free_page(pages[i]);
 741        kfree(pages);
 742        kfree(pdev);
 743        dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
 744        return dsaddr;
 745}
 746
 747void
 748nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 749{
 750        nfs4_put_deviceid_node(&dsaddr->id_node);
 751}
 752
 753/*
 754 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
 755 * Then: ((res + fsi) % dsaddr->stripe_count)
 756 */
 757u32
 758nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
 759{
 760        struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
 761        u64 tmp;
 762
 763        tmp = offset - flseg->pattern_offset;
 764        do_div(tmp, flseg->stripe_unit);
 765        tmp += flseg->first_stripe_index;
 766        return do_div(tmp, flseg->dsaddr->stripe_count);
 767}
 768
 769u32
 770nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
 771{
 772        return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
 773}
 774
 775struct nfs_fh *
 776nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
 777{
 778        struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
 779        u32 i;
 780
 781        if (flseg->stripe_type == STRIPE_SPARSE) {
 782                if (flseg->num_fh == 1)
 783                        i = 0;
 784                else if (flseg->num_fh == 0)
 785                        /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
 786                        return NULL;
 787                else
 788                        i = nfs4_fl_calc_ds_index(lseg, j);
 789        } else
 790                i = j;
 791        return flseg->fh_array[i];
 792}
 793
 794static void
 795filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
 796                               int err, const char *ds_remotestr)
 797{
 798        u32 *p = (u32 *)&dsaddr->id_node.deviceid;
 799
 800        printk(KERN_ERR "NFS: data server %s connection error %d."
 801                " Deviceid [%x%x%x%x] marked out of use.\n",
 802                ds_remotestr, err, p[0], p[1], p[2], p[3]);
 803
 804        spin_lock(&nfs4_ds_cache_lock);
 805        dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
 806        spin_unlock(&nfs4_ds_cache_lock);
 807}
 808
 809struct nfs4_pnfs_ds *
 810nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 811{
 812        struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
 813        struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
 814
 815        if (ds == NULL) {
 816                printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
 817                        __func__, ds_idx);
 818                return NULL;
 819        }
 820
 821        if (!ds->ds_clp) {
 822                struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
 823                int err;
 824
 825                if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
 826                        /* Already tried to connect, don't try again */
 827                        dprintk("%s Deviceid marked out of use\n", __func__);
 828                        return NULL;
 829                }
 830                err = nfs4_ds_connect(s, ds);
 831                if (err) {
 832                        filelayout_mark_devid_negative(dsaddr, err,
 833                                                       ds->ds_remotestr);
 834                        return NULL;
 835                }
 836        }
 837        return ds;
 838}
 839