linux/fs/nfs/pnfs_nfs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Common NFS I/O  operations for the pnfs file based
   4 * layout drivers.
   5 *
   6 * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
   7 *
   8 * Tom Haynes <loghyr@primarydata.com>
   9 */
  10
  11#include <linux/nfs_fs.h>
  12#include <linux/nfs_page.h>
  13#include <linux/sunrpc/addr.h>
  14#include <linux/module.h>
  15
  16#include "nfs4session.h"
  17#include "internal.h"
  18#include "pnfs.h"
  19
  20#define NFSDBG_FACILITY         NFSDBG_PNFS
  21
  22void pnfs_generic_rw_release(void *data)
  23{
  24        struct nfs_pgio_header *hdr = data;
  25
  26        nfs_put_client(hdr->ds_clp);
  27        hdr->mds_ops->rpc_release(data);
  28}
  29EXPORT_SYMBOL_GPL(pnfs_generic_rw_release);
  30
  31/* Fake up some data that will cause nfs_commit_release to retry the writes. */
  32void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data)
  33{
  34        struct nfs_page *first = nfs_list_entry(data->pages.next);
  35
  36        data->task.tk_status = 0;
  37        memcpy(&data->verf.verifier, &first->wb_verf,
  38               sizeof(data->verf.verifier));
  39        data->verf.verifier.data[0]++; /* ensure verifier mismatch */
  40}
  41EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
  42
  43void pnfs_generic_write_commit_done(struct rpc_task *task, void *data)
  44{
  45        struct nfs_commit_data *wdata = data;
  46
  47        /* Note this may cause RPC to be resent */
  48        wdata->mds_ops->rpc_call_done(task, data);
  49}
  50EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done);
  51
  52void pnfs_generic_commit_release(void *calldata)
  53{
  54        struct nfs_commit_data *data = calldata;
  55
  56        data->completion_ops->completion(data);
  57        pnfs_put_lseg(data->lseg);
  58        nfs_put_client(data->ds_clp);
  59        nfs_commitdata_release(data);
  60}
  61EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
  62
  63/* The generic layer is about to remove the req from the commit list.
  64 * If this will make the bucket empty, it will need to put the lseg reference.
  65 * Note this must be called holding nfsi->commit_mutex
  66 */
  67void
  68pnfs_generic_clear_request_commit(struct nfs_page *req,
  69                                  struct nfs_commit_info *cinfo)
  70{
  71        struct pnfs_layout_segment *freeme = NULL;
  72
  73        if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
  74                goto out;
  75        cinfo->ds->nwritten--;
  76        if (list_is_singular(&req->wb_list)) {
  77                struct pnfs_commit_bucket *bucket;
  78
  79                bucket = list_first_entry(&req->wb_list,
  80                                          struct pnfs_commit_bucket,
  81                                          written);
  82                freeme = bucket->wlseg;
  83                bucket->wlseg = NULL;
  84        }
  85out:
  86        nfs_request_remove_commit_list(req, cinfo);
  87        pnfs_put_lseg(freeme);
  88}
  89EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);
  90
  91static int
  92pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
  93                                 struct nfs_commit_info *cinfo,
  94                                 int max)
  95{
  96        struct list_head *src = &bucket->written;
  97        struct list_head *dst = &bucket->committing;
  98        int ret;
  99
 100        lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
 101        ret = nfs_scan_commit_list(src, dst, cinfo, max);
 102        if (ret) {
 103                cinfo->ds->nwritten -= ret;
 104                cinfo->ds->ncommitting += ret;
 105                if (bucket->clseg == NULL)
 106                        bucket->clseg = pnfs_get_lseg(bucket->wlseg);
 107                if (list_empty(src)) {
 108                        pnfs_put_lseg(bucket->wlseg);
 109                        bucket->wlseg = NULL;
 110                }
 111        }
 112        return ret;
 113}
 114
 115/* Move reqs from written to committing lists, returning count
 116 * of number moved.
 117 */
 118int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo,
 119                                   int max)
 120{
 121        int i, rv = 0, cnt;
 122
 123        lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
 124        for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
 125                cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i],
 126                                                       cinfo, max);
 127                max -= cnt;
 128                rv += cnt;
 129        }
 130        return rv;
 131}
 132EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists);
 133
 134/* Pull everything off the committing lists and dump into @dst.  */
 135void pnfs_generic_recover_commit_reqs(struct list_head *dst,
 136                                      struct nfs_commit_info *cinfo)
 137{
 138        struct pnfs_commit_bucket *b;
 139        struct pnfs_layout_segment *freeme;
 140        int nwritten;
 141        int i;
 142
 143        lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
 144restart:
 145        for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
 146                nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0);
 147                if (!nwritten)
 148                        continue;
 149                cinfo->ds->nwritten -= nwritten;
 150                if (list_empty(&b->written)) {
 151                        freeme = b->wlseg;
 152                        b->wlseg = NULL;
 153                        pnfs_put_lseg(freeme);
 154                        goto restart;
 155                }
 156        }
 157}
 158EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
 159
 160static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
 161{
 162        struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
 163        struct pnfs_commit_bucket *bucket;
 164        struct pnfs_layout_segment *freeme;
 165        struct list_head *pos;
 166        LIST_HEAD(pages);
 167        int i;
 168
 169        mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
 170        for (i = idx; i < fl_cinfo->nbuckets; i++) {
 171                bucket = &fl_cinfo->buckets[i];
 172                if (list_empty(&bucket->committing))
 173                        continue;
 174                freeme = bucket->clseg;
 175                bucket->clseg = NULL;
 176                list_for_each(pos, &bucket->committing)
 177                        cinfo->ds->ncommitting--;
 178                list_splice_init(&bucket->committing, &pages);
 179                mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
 180                nfs_retry_commit(&pages, freeme, cinfo, i);
 181                pnfs_put_lseg(freeme);
 182                mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
 183        }
 184        mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
 185}
 186
 187static unsigned int
 188pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
 189                              struct list_head *list)
 190{
 191        struct pnfs_ds_commit_info *fl_cinfo;
 192        struct pnfs_commit_bucket *bucket;
 193        struct nfs_commit_data *data;
 194        int i;
 195        unsigned int nreq = 0;
 196
 197        fl_cinfo = cinfo->ds;
 198        bucket = fl_cinfo->buckets;
 199        for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
 200                if (list_empty(&bucket->committing))
 201                        continue;
 202                data = nfs_commitdata_alloc(false);
 203                if (!data)
 204                        break;
 205                data->ds_commit_index = i;
 206                list_add(&data->pages, list);
 207                nreq++;
 208        }
 209
 210        /* Clean up on error */
 211        pnfs_generic_retry_commit(cinfo, i);
 212        return nreq;
 213}
 214
 215static inline
 216void pnfs_fetch_commit_bucket_list(struct list_head *pages,
 217                struct nfs_commit_data *data,
 218                struct nfs_commit_info *cinfo)
 219{
 220        struct pnfs_commit_bucket *bucket;
 221        struct list_head *pos;
 222
 223        bucket = &cinfo->ds->buckets[data->ds_commit_index];
 224        mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
 225        list_for_each(pos, &bucket->committing)
 226                cinfo->ds->ncommitting--;
 227        list_splice_init(&bucket->committing, pages);
 228        data->lseg = bucket->clseg;
 229        bucket->clseg = NULL;
 230        mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
 231
 232}
 233
 234/* Helper function for pnfs_generic_commit_pagelist to catch an empty
 235 * page list. This can happen when two commits race.
 236 *
 237 * This must be called instead of nfs_init_commit - call one or the other, but
 238 * not both!
 239 */
 240static bool
 241pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
 242                                          struct nfs_commit_data *data,
 243                                          struct nfs_commit_info *cinfo)
 244{
 245        if (list_empty(pages)) {
 246                if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
 247                        wake_up_var(&cinfo->mds->rpcs_out);
 248                /* don't call nfs_commitdata_release - it tries to put
 249                 * the open_context which is not acquired until nfs_init_commit
 250                 * which has not been called on @data */
 251                WARN_ON_ONCE(data->context);
 252                nfs_commit_free(data);
 253                return true;
 254        }
 255
 256        return false;
 257}
 258
 259/* This follows nfs_commit_list pretty closely */
 260int
 261pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 262                             int how, struct nfs_commit_info *cinfo,
 263                             int (*initiate_commit)(struct nfs_commit_data *data,
 264                                                    int how))
 265{
 266        struct nfs_commit_data *data, *tmp;
 267        LIST_HEAD(list);
 268        unsigned int nreq = 0;
 269
 270        if (!list_empty(mds_pages)) {
 271                data = nfs_commitdata_alloc(true);
 272                data->ds_commit_index = -1;
 273                list_add(&data->pages, &list);
 274                nreq++;
 275        }
 276
 277        nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
 278
 279        if (nreq == 0)
 280                goto out;
 281
 282        atomic_add(nreq, &cinfo->mds->rpcs_out);
 283
 284        list_for_each_entry_safe(data, tmp, &list, pages) {
 285                list_del_init(&data->pages);
 286                if (data->ds_commit_index < 0) {
 287                        /* another commit raced with us */
 288                        if (pnfs_generic_commit_cancel_empty_pagelist(mds_pages,
 289                                data, cinfo))
 290                                continue;
 291
 292                        nfs_init_commit(data, mds_pages, NULL, cinfo);
 293                        nfs_initiate_commit(NFS_CLIENT(inode), data,
 294                                            NFS_PROTO(data->inode),
 295                                            data->mds_ops, how, 0);
 296                } else {
 297                        LIST_HEAD(pages);
 298
 299                        pnfs_fetch_commit_bucket_list(&pages, data, cinfo);
 300
 301                        /* another commit raced with us */
 302                        if (pnfs_generic_commit_cancel_empty_pagelist(&pages,
 303                                data, cinfo))
 304                                continue;
 305
 306                        nfs_init_commit(data, &pages, data->lseg, cinfo);
 307                        initiate_commit(data, how);
 308                }
 309        }
 310out:
 311        return PNFS_ATTEMPTED;
 312}
 313EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
 314
 315/*
 316 * Data server cache
 317 *
 318 * Data servers can be mapped to different device ids.
 319 * nfs4_pnfs_ds reference counting
 320 *   - set to 1 on allocation
 321 *   - incremented when a device id maps a data server already in the cache.
 322 *   - decremented when deviceid is removed from the cache.
 323 */
 324static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
 325static LIST_HEAD(nfs4_data_server_cache);
 326
 327/* Debug routines */
 328static void
 329print_ds(struct nfs4_pnfs_ds *ds)
 330{
 331        if (ds == NULL) {
 332                printk(KERN_WARNING "%s NULL device\n", __func__);
 333                return;
 334        }
 335        printk(KERN_WARNING "        ds %s\n"
 336                "        ref count %d\n"
 337                "        client %p\n"
 338                "        cl_exchange_flags %x\n",
 339                ds->ds_remotestr,
 340                refcount_read(&ds->ds_count), ds->ds_clp,
 341                ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
 342}
 343
 344static bool
 345same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
 346{
 347        struct sockaddr_in *a, *b;
 348        struct sockaddr_in6 *a6, *b6;
 349
 350        if (addr1->sa_family != addr2->sa_family)
 351                return false;
 352
 353        switch (addr1->sa_family) {
 354        case AF_INET:
 355                a = (struct sockaddr_in *)addr1;
 356                b = (struct sockaddr_in *)addr2;
 357
 358                if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
 359                    a->sin_port == b->sin_port)
 360                        return true;
 361                break;
 362
 363        case AF_INET6:
 364                a6 = (struct sockaddr_in6 *)addr1;
 365                b6 = (struct sockaddr_in6 *)addr2;
 366
 367                /* LINKLOCAL addresses must have matching scope_id */
 368                if (ipv6_addr_src_scope(&a6->sin6_addr) ==
 369                    IPV6_ADDR_SCOPE_LINKLOCAL &&
 370                    a6->sin6_scope_id != b6->sin6_scope_id)
 371                        return false;
 372
 373                if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
 374                    a6->sin6_port == b6->sin6_port)
 375                        return true;
 376                break;
 377
 378        default:
 379                dprintk("%s: unhandled address family: %u\n",
 380                        __func__, addr1->sa_family);
 381                return false;
 382        }
 383
 384        return false;
 385}
 386
 387/*
 388 * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
 389 * declare a match.
 390 */
 391static bool
 392_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
 393                               const struct list_head *dsaddrs2)
 394{
 395        struct nfs4_pnfs_ds_addr *da1, *da2;
 396        struct sockaddr *sa1, *sa2;
 397        bool match = false;
 398
 399        list_for_each_entry(da1, dsaddrs1, da_node) {
 400                sa1 = (struct sockaddr *)&da1->da_addr;
 401                match = false;
 402                list_for_each_entry(da2, dsaddrs2, da_node) {
 403                        sa2 = (struct sockaddr *)&da2->da_addr;
 404                        match = same_sockaddr(sa1, sa2);
 405                        if (match)
 406                                break;
 407                }
 408                if (!match)
 409                        break;
 410        }
 411        return match;
 412}
 413
 414/*
 415 * Lookup DS by addresses.  nfs4_ds_cache_lock is held
 416 */
 417static struct nfs4_pnfs_ds *
 418_data_server_lookup_locked(const struct list_head *dsaddrs)
 419{
 420        struct nfs4_pnfs_ds *ds;
 421
 422        list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
 423                if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
 424                        return ds;
 425        return NULL;
 426}
 427
 428static void destroy_ds(struct nfs4_pnfs_ds *ds)
 429{
 430        struct nfs4_pnfs_ds_addr *da;
 431
 432        dprintk("--> %s\n", __func__);
 433        ifdebug(FACILITY)
 434                print_ds(ds);
 435
 436        nfs_put_client(ds->ds_clp);
 437
 438        while (!list_empty(&ds->ds_addrs)) {
 439                da = list_first_entry(&ds->ds_addrs,
 440                                      struct nfs4_pnfs_ds_addr,
 441                                      da_node);
 442                list_del_init(&da->da_node);
 443                kfree(da->da_remotestr);
 444                kfree(da);
 445        }
 446
 447        kfree(ds->ds_remotestr);
 448        kfree(ds);
 449}
 450
 451void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
 452{
 453        if (refcount_dec_and_lock(&ds->ds_count,
 454                                &nfs4_ds_cache_lock)) {
 455                list_del_init(&ds->ds_node);
 456                spin_unlock(&nfs4_ds_cache_lock);
 457                destroy_ds(ds);
 458        }
 459}
 460EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put);
 461
 462/*
 463 * Create a string with a human readable address and port to avoid
 464 * complicated setup around many dprinks.
 465 */
 466static char *
 467nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
 468{
 469        struct nfs4_pnfs_ds_addr *da;
 470        char *remotestr;
 471        size_t len;
 472        char *p;
 473
 474        len = 3;        /* '{', '}' and eol */
 475        list_for_each_entry(da, dsaddrs, da_node) {
 476                len += strlen(da->da_remotestr) + 1;    /* string plus comma */
 477        }
 478
 479        remotestr = kzalloc(len, gfp_flags);
 480        if (!remotestr)
 481                return NULL;
 482
 483        p = remotestr;
 484        *(p++) = '{';
 485        len--;
 486        list_for_each_entry(da, dsaddrs, da_node) {
 487                size_t ll = strlen(da->da_remotestr);
 488
 489                if (ll > len)
 490                        goto out_err;
 491
 492                memcpy(p, da->da_remotestr, ll);
 493                p += ll;
 494                len -= ll;
 495
 496                if (len < 1)
 497                        goto out_err;
 498                (*p++) = ',';
 499                len--;
 500        }
 501        if (len < 2)
 502                goto out_err;
 503        *(p++) = '}';
 504        *p = '\0';
 505        return remotestr;
 506out_err:
 507        kfree(remotestr);
 508        return NULL;
 509}
 510
 511/*
 512 * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if
 513 * uncached and return cached struct nfs4_pnfs_ds.
 514 */
 515struct nfs4_pnfs_ds *
 516nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
 517{
 518        struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
 519        char *remotestr;
 520
 521        if (list_empty(dsaddrs)) {
 522                dprintk("%s: no addresses defined\n", __func__);
 523                goto out;
 524        }
 525
 526        ds = kzalloc(sizeof(*ds), gfp_flags);
 527        if (!ds)
 528                goto out;
 529
 530        /* this is only used for debugging, so it's ok if its NULL */
 531        remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
 532
 533        spin_lock(&nfs4_ds_cache_lock);
 534        tmp_ds = _data_server_lookup_locked(dsaddrs);
 535        if (tmp_ds == NULL) {
 536                INIT_LIST_HEAD(&ds->ds_addrs);
 537                list_splice_init(dsaddrs, &ds->ds_addrs);
 538                ds->ds_remotestr = remotestr;
 539                refcount_set(&ds->ds_count, 1);
 540                INIT_LIST_HEAD(&ds->ds_node);
 541                ds->ds_clp = NULL;
 542                list_add(&ds->ds_node, &nfs4_data_server_cache);
 543                dprintk("%s add new data server %s\n", __func__,
 544                        ds->ds_remotestr);
 545        } else {
 546                kfree(remotestr);
 547                kfree(ds);
 548                refcount_inc(&tmp_ds->ds_count);
 549                dprintk("%s data server %s found, inc'ed ds_count to %d\n",
 550                        __func__, tmp_ds->ds_remotestr,
 551                        refcount_read(&tmp_ds->ds_count));
 552                ds = tmp_ds;
 553        }
 554        spin_unlock(&nfs4_ds_cache_lock);
 555out:
 556        return ds;
 557}
 558EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
 559
 560static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
 561{
 562        might_sleep();
 563        wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
 564                        TASK_KILLABLE);
 565}
 566
 567static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
 568{
 569        smp_mb__before_atomic();
 570        clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
 571        smp_mb__after_atomic();
 572        wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
 573}
 574
 575static struct nfs_client *(*get_v3_ds_connect)(
 576                        struct nfs_server *mds_srv,
 577                        const struct sockaddr *ds_addr,
 578                        int ds_addrlen,
 579                        int ds_proto,
 580                        unsigned int ds_timeo,
 581                        unsigned int ds_retrans);
 582
 583static bool load_v3_ds_connect(void)
 584{
 585        if (!get_v3_ds_connect) {
 586                get_v3_ds_connect = symbol_request(nfs3_set_ds_client);
 587                WARN_ON_ONCE(!get_v3_ds_connect);
 588        }
 589
 590        return(get_v3_ds_connect != NULL);
 591}
 592
 593void nfs4_pnfs_v3_ds_connect_unload(void)
 594{
 595        if (get_v3_ds_connect) {
 596                symbol_put(nfs3_set_ds_client);
 597                get_v3_ds_connect = NULL;
 598        }
 599}
 600
 601static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 602                                 struct nfs4_pnfs_ds *ds,
 603                                 unsigned int timeo,
 604                                 unsigned int retrans)
 605{
 606        struct nfs_client *clp = ERR_PTR(-EIO);
 607        struct nfs4_pnfs_ds_addr *da;
 608        int status = 0;
 609
 610        dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
 611
 612        if (!load_v3_ds_connect())
 613                goto out;
 614
 615        list_for_each_entry(da, &ds->ds_addrs, da_node) {
 616                dprintk("%s: DS %s: trying address %s\n",
 617                        __func__, ds->ds_remotestr, da->da_remotestr);
 618
 619                if (!IS_ERR(clp)) {
 620                        struct xprt_create xprt_args = {
 621                                .ident = XPRT_TRANSPORT_TCP,
 622                                .net = clp->cl_net,
 623                                .dstaddr = (struct sockaddr *)&da->da_addr,
 624                                .addrlen = da->da_addrlen,
 625                                .servername = clp->cl_hostname,
 626                        };
 627                        /* Add this address as an alias */
 628                        rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
 629                                        rpc_clnt_test_and_add_xprt, NULL);
 630                        continue;
 631                }
 632                clp = get_v3_ds_connect(mds_srv,
 633                                (struct sockaddr *)&da->da_addr,
 634                                da->da_addrlen, IPPROTO_TCP,
 635                                timeo, retrans);
 636                if (IS_ERR(clp))
 637                        continue;
 638                clp->cl_rpcclient->cl_softerr = 0;
 639                clp->cl_rpcclient->cl_softrtry = 0;
 640        }
 641
 642        if (IS_ERR(clp)) {
 643                status = PTR_ERR(clp);
 644                goto out;
 645        }
 646
 647        smp_wmb();
 648        ds->ds_clp = clp;
 649        dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 650out:
 651        return status;
 652}
 653
 654static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 655                                 struct nfs4_pnfs_ds *ds,
 656                                 unsigned int timeo,
 657                                 unsigned int retrans,
 658                                 u32 minor_version)
 659{
 660        struct nfs_client *clp = ERR_PTR(-EIO);
 661        struct nfs4_pnfs_ds_addr *da;
 662        int status = 0;
 663
 664        dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
 665
 666        list_for_each_entry(da, &ds->ds_addrs, da_node) {
 667                dprintk("%s: DS %s: trying address %s\n",
 668                        __func__, ds->ds_remotestr, da->da_remotestr);
 669
 670                if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
 671                        struct xprt_create xprt_args = {
 672                                .ident = XPRT_TRANSPORT_TCP,
 673                                .net = clp->cl_net,
 674                                .dstaddr = (struct sockaddr *)&da->da_addr,
 675                                .addrlen = da->da_addrlen,
 676                                .servername = clp->cl_hostname,
 677                        };
 678                        struct nfs4_add_xprt_data xprtdata = {
 679                                .clp = clp,
 680                                .cred = nfs4_get_clid_cred(clp),
 681                        };
 682                        struct rpc_add_xprt_test rpcdata = {
 683                                .add_xprt_test = clp->cl_mvops->session_trunk,
 684                                .data = &xprtdata,
 685                        };
 686
 687                        /**
 688                        * Test this address for session trunking and
 689                        * add as an alias
 690                        */
 691                        rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
 692                                          rpc_clnt_setup_test_and_add_xprt,
 693                                          &rpcdata);
 694                        if (xprtdata.cred)
 695                                put_cred(xprtdata.cred);
 696                } else {
 697                        clp = nfs4_set_ds_client(mds_srv,
 698                                                (struct sockaddr *)&da->da_addr,
 699                                                da->da_addrlen, IPPROTO_TCP,
 700                                                timeo, retrans, minor_version);
 701                        if (IS_ERR(clp))
 702                                continue;
 703
 704                        status = nfs4_init_ds_session(clp,
 705                                        mds_srv->nfs_client->cl_lease_time);
 706                        if (status) {
 707                                nfs_put_client(clp);
 708                                clp = ERR_PTR(-EIO);
 709                                continue;
 710                        }
 711
 712                }
 713        }
 714
 715        if (IS_ERR(clp)) {
 716                status = PTR_ERR(clp);
 717                goto out;
 718        }
 719
 720        smp_wmb();
 721        ds->ds_clp = clp;
 722        dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 723out:
 724        return status;
 725}
 726
 727/*
 728 * Create an rpc connection to the nfs4_pnfs_ds data server.
 729 * Currently only supports IPv4 and IPv6 addresses.
 730 * If connection fails, make devid unavailable and return a -errno.
 731 */
 732int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 733                          struct nfs4_deviceid_node *devid, unsigned int timeo,
 734                          unsigned int retrans, u32 version, u32 minor_version)
 735{
 736        int err;
 737
 738again:
 739        err = 0;
 740        if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
 741                if (version == 3) {
 742                        err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
 743                                                       retrans);
 744                } else if (version == 4) {
 745                        err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo,
 746                                                       retrans, minor_version);
 747                } else {
 748                        dprintk("%s: unsupported DS version %d\n", __func__,
 749                                version);
 750                        err = -EPROTONOSUPPORT;
 751                }
 752
 753                nfs4_clear_ds_conn_bit(ds);
 754        } else {
 755                nfs4_wait_ds_connect(ds);
 756
 757                /* what was waited on didn't connect AND didn't mark unavail */
 758                if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
 759                        goto again;
 760        }
 761
 762        /*
 763         * At this point the ds->ds_clp should be ready, but it might have
 764         * hit an error.
 765         */
 766        if (!err) {
 767                if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
 768                        WARN_ON_ONCE(ds->ds_clp ||
 769                                !nfs4_test_deviceid_unavailable(devid));
 770                        return -EINVAL;
 771                }
 772                err = nfs_client_init_status(ds->ds_clp);
 773        }
 774
 775        return err;
 776}
 777EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
 778
 779/*
 780 * Currently only supports ipv4, ipv6 and one multi-path address.
 781 */
 782struct nfs4_pnfs_ds_addr *
 783nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 784{
 785        struct nfs4_pnfs_ds_addr *da = NULL;
 786        char *buf, *portstr;
 787        __be16 port;
 788        int nlen, rlen;
 789        int tmp[2];
 790        __be32 *p;
 791        char *netid, *match_netid;
 792        size_t len, match_netid_len;
 793        char *startsep = "";
 794        char *endsep = "";
 795
 796
 797        /* r_netid */
 798        p = xdr_inline_decode(xdr, 4);
 799        if (unlikely(!p))
 800                goto out_err;
 801        nlen = be32_to_cpup(p++);
 802
 803        p = xdr_inline_decode(xdr, nlen);
 804        if (unlikely(!p))
 805                goto out_err;
 806
 807        netid = kmalloc(nlen+1, gfp_flags);
 808        if (unlikely(!netid))
 809                goto out_err;
 810
 811        netid[nlen] = '\0';
 812        memcpy(netid, p, nlen);
 813
 814        /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
 815        p = xdr_inline_decode(xdr, 4);
 816        if (unlikely(!p))
 817                goto out_free_netid;
 818        rlen = be32_to_cpup(p);
 819
 820        p = xdr_inline_decode(xdr, rlen);
 821        if (unlikely(!p))
 822                goto out_free_netid;
 823
 824        /* port is ".ABC.DEF", 8 chars max */
 825        if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
 826                dprintk("%s: Invalid address, length %d\n", __func__,
 827                        rlen);
 828                goto out_free_netid;
 829        }
 830        buf = kmalloc(rlen + 1, gfp_flags);
 831        if (!buf) {
 832                dprintk("%s: Not enough memory\n", __func__);
 833                goto out_free_netid;
 834        }
 835        buf[rlen] = '\0';
 836        memcpy(buf, p, rlen);
 837
 838        /* replace port '.' with '-' */
 839        portstr = strrchr(buf, '.');
 840        if (!portstr) {
 841                dprintk("%s: Failed finding expected dot in port\n",
 842                        __func__);
 843                goto out_free_buf;
 844        }
 845        *portstr = '-';
 846
 847        /* find '.' between address and port */
 848        portstr = strrchr(buf, '.');
 849        if (!portstr) {
 850                dprintk("%s: Failed finding expected dot between address and "
 851                        "port\n", __func__);
 852                goto out_free_buf;
 853        }
 854        *portstr = '\0';
 855
 856        da = kzalloc(sizeof(*da), gfp_flags);
 857        if (unlikely(!da))
 858                goto out_free_buf;
 859
 860        INIT_LIST_HEAD(&da->da_node);
 861
 862        if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
 863                      sizeof(da->da_addr))) {
 864                dprintk("%s: error parsing address %s\n", __func__, buf);
 865                goto out_free_da;
 866        }
 867
 868        portstr++;
 869        sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
 870        port = htons((tmp[0] << 8) | (tmp[1]));
 871
 872        switch (da->da_addr.ss_family) {
 873        case AF_INET:
 874                ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
 875                da->da_addrlen = sizeof(struct sockaddr_in);
 876                match_netid = "tcp";
 877                match_netid_len = 3;
 878                break;
 879
 880        case AF_INET6:
 881                ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
 882                da->da_addrlen = sizeof(struct sockaddr_in6);
 883                match_netid = "tcp6";
 884                match_netid_len = 4;
 885                startsep = "[";
 886                endsep = "]";
 887                break;
 888
 889        default:
 890                dprintk("%s: unsupported address family: %u\n",
 891                        __func__, da->da_addr.ss_family);
 892                goto out_free_da;
 893        }
 894
 895        if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
 896                dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
 897                        __func__, netid, match_netid);
 898                goto out_free_da;
 899        }
 900
 901        /* save human readable address */
 902        len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
 903        da->da_remotestr = kzalloc(len, gfp_flags);
 904
 905        /* NULL is ok, only used for dprintk */
 906        if (da->da_remotestr)
 907                snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
 908                         buf, endsep, ntohs(port));
 909
 910        dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
 911        kfree(buf);
 912        kfree(netid);
 913        return da;
 914
 915out_free_da:
 916        kfree(da);
 917out_free_buf:
 918        dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
 919        kfree(buf);
 920out_free_netid:
 921        kfree(netid);
 922out_err:
 923        return NULL;
 924}
 925EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr);
 926
 927void
 928pnfs_layout_mark_request_commit(struct nfs_page *req,
 929                                struct pnfs_layout_segment *lseg,
 930                                struct nfs_commit_info *cinfo,
 931                                u32 ds_commit_idx)
 932{
 933        struct list_head *list;
 934        struct pnfs_commit_bucket *buckets;
 935
 936        mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
 937        buckets = cinfo->ds->buckets;
 938        list = &buckets[ds_commit_idx].written;
 939        if (list_empty(list)) {
 940                if (!pnfs_is_valid_lseg(lseg)) {
 941                        mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
 942                        cinfo->completion_ops->resched_write(cinfo, req);
 943                        return;
 944                }
 945                /* Non-empty buckets hold a reference on the lseg.  That ref
 946                 * is normally transferred to the COMMIT call and released
 947                 * there.  It could also be released if the last req is pulled
 948                 * off due to a rewrite, in which case it will be done in
 949                 * pnfs_common_clear_request_commit
 950                 */
 951                WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL);
 952                buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg);
 953        }
 954        set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
 955        cinfo->ds->nwritten++;
 956
 957        nfs_request_add_commit_list_locked(req, list, cinfo);
 958        mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
 959        nfs_mark_page_unstable(req->wb_page, cinfo);
 960}
 961EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
 962
 963int
 964pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
 965{
 966        int ret;
 967
 968        if (!pnfs_layoutcommit_outstanding(inode))
 969                return 0;
 970        ret = nfs_commit_inode(inode, FLUSH_SYNC);
 971        if (ret < 0)
 972                return ret;
 973        if (datasync)
 974                return 0;
 975        return pnfs_layoutcommit_inode(inode, true);
 976}
 977EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);
 978
 979