linux/fs/nfs/flexfilelayout/flexfilelayout.c
<<
>>
Prefs
   1/*
   2 * Module for pnfs flexfile layout driver.
   3 *
   4 * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
   5 *
   6 * Tao Peng <bergwolf@primarydata.com>
   7 */
   8
   9#include <linux/nfs_fs.h>
  10#include <linux/nfs_mount.h>
  11#include <linux/nfs_page.h>
  12#include <linux/module.h>
  13#include <linux/sched/mm.h>
  14
  15#include <linux/sunrpc/metrics.h>
  16
  17#include "flexfilelayout.h"
  18#include "../nfs4session.h"
  19#include "../nfs4idmap.h"
  20#include "../internal.h"
  21#include "../delegation.h"
  22#include "../nfs4trace.h"
  23#include "../iostat.h"
  24#include "../nfs.h"
  25#include "../nfs42.h"
  26
  27#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
  28
  29#define FF_LAYOUT_POLL_RETRY_MAX     (15*HZ)
  30#define FF_LAYOUTRETURN_MAXERR 20
  31
  32static unsigned short io_maxretrans;
  33
  34static const struct pnfs_commit_ops ff_layout_commit_ops;
  35static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
  36                struct nfs_pgio_header *hdr);
  37static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
  38                               struct nfs42_layoutstat_devinfo *devinfo,
  39                               int dev_limit);
  40static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
  41                              const struct nfs42_layoutstat_devinfo *devinfo,
  42                              struct nfs4_ff_layout_mirror *mirror);
  43
  44static struct pnfs_layout_hdr *
  45ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
  46{
  47        struct nfs4_flexfile_layout *ffl;
  48
  49        ffl = kzalloc(sizeof(*ffl), gfp_flags);
  50        if (ffl) {
  51                pnfs_init_ds_commit_info(&ffl->commit_info);
  52                INIT_LIST_HEAD(&ffl->error_list);
  53                INIT_LIST_HEAD(&ffl->mirrors);
  54                ffl->last_report_time = ktime_get();
  55                ffl->commit_info.ops = &ff_layout_commit_ops;
  56                return &ffl->generic_hdr;
  57        } else
  58                return NULL;
  59}
  60
  61static void
  62ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo)
  63{
  64        struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(lo);
  65        struct nfs4_ff_layout_ds_err *err, *n;
  66
  67        list_for_each_entry_safe(err, n, &ffl->error_list, list) {
  68                list_del(&err->list);
  69                kfree(err);
  70        }
  71        kfree_rcu(ffl, generic_hdr.plh_rcu);
  72}
  73
  74static int decode_pnfs_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
  75{
  76        __be32 *p;
  77
  78        p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE);
  79        if (unlikely(p == NULL))
  80                return -ENOBUFS;
  81        stateid->type = NFS4_PNFS_DS_STATEID_TYPE;
  82        memcpy(stateid->data, p, NFS4_STATEID_SIZE);
  83        dprintk("%s: stateid id= [%x%x%x%x]\n", __func__,
  84                p[0], p[1], p[2], p[3]);
  85        return 0;
  86}
  87
  88static int decode_deviceid(struct xdr_stream *xdr, struct nfs4_deviceid *devid)
  89{
  90        __be32 *p;
  91
  92        p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE);
  93        if (unlikely(!p))
  94                return -ENOBUFS;
  95        memcpy(devid, p, NFS4_DEVICEID4_SIZE);
  96        nfs4_print_deviceid(devid);
  97        return 0;
  98}
  99
 100static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
 101{
 102        __be32 *p;
 103
 104        p = xdr_inline_decode(xdr, 4);
 105        if (unlikely(!p))
 106                return -ENOBUFS;
 107        fh->size = be32_to_cpup(p++);
 108        if (fh->size > NFS_MAXFHSIZE) {
 109                printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
 110                       fh->size);
 111                return -EOVERFLOW;
 112        }
 113        /* fh.data */
 114        p = xdr_inline_decode(xdr, fh->size);
 115        if (unlikely(!p))
 116                return -ENOBUFS;
 117        memcpy(&fh->data, p, fh->size);
 118        dprintk("%s: fh len %d\n", __func__, fh->size);
 119
 120        return 0;
 121}
 122
 123/*
 124 * Currently only stringified uids and gids are accepted.
 125 * I.e., kerberos is not supported to the DSes, so no pricipals.
 126 *
 127 * That means that one common function will suffice, but when
 128 * principals are added, this should be split to accomodate
 129 * calls to both nfs_map_name_to_uid() and nfs_map_group_to_gid().
 130 */
 131static int
 132decode_name(struct xdr_stream *xdr, u32 *id)
 133{
 134        __be32 *p;
 135        int len;
 136
 137        /* opaque_length(4)*/
 138        p = xdr_inline_decode(xdr, 4);
 139        if (unlikely(!p))
 140                return -ENOBUFS;
 141        len = be32_to_cpup(p++);
 142        if (len < 0)
 143                return -EINVAL;
 144
 145        dprintk("%s: len %u\n", __func__, len);
 146
 147        /* opaque body */
 148        p = xdr_inline_decode(xdr, len);
 149        if (unlikely(!p))
 150                return -ENOBUFS;
 151
 152        if (!nfs_map_string_to_numeric((char *)p, len, id))
 153                return -EINVAL;
 154
 155        return 0;
 156}
 157
 158static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
 159                const struct nfs4_ff_layout_mirror *m2)
 160{
 161        int i, j;
 162
 163        if (m1->fh_versions_cnt != m2->fh_versions_cnt)
 164                return false;
 165        for (i = 0; i < m1->fh_versions_cnt; i++) {
 166                bool found_fh = false;
 167                for (j = 0; j < m2->fh_versions_cnt; j++) {
 168                        if (nfs_compare_fh(&m1->fh_versions[i],
 169                                        &m2->fh_versions[j]) == 0) {
 170                                found_fh = true;
 171                                break;
 172                        }
 173                }
 174                if (!found_fh)
 175                        return false;
 176        }
 177        return true;
 178}
 179
 180static struct nfs4_ff_layout_mirror *
 181ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
 182                struct nfs4_ff_layout_mirror *mirror)
 183{
 184        struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);
 185        struct nfs4_ff_layout_mirror *pos;
 186        struct inode *inode = lo->plh_inode;
 187
 188        spin_lock(&inode->i_lock);
 189        list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
 190                if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0)
 191                        continue;
 192                if (!ff_mirror_match_fh(mirror, pos))
 193                        continue;
 194                if (refcount_inc_not_zero(&pos->ref)) {
 195                        spin_unlock(&inode->i_lock);
 196                        return pos;
 197                }
 198        }
 199        list_add(&mirror->mirrors, &ff_layout->mirrors);
 200        mirror->layout = lo;
 201        spin_unlock(&inode->i_lock);
 202        return mirror;
 203}
 204
 205static void
 206ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
 207{
 208        struct inode *inode;
 209        if (mirror->layout == NULL)
 210                return;
 211        inode = mirror->layout->plh_inode;
 212        spin_lock(&inode->i_lock);
 213        list_del(&mirror->mirrors);
 214        spin_unlock(&inode->i_lock);
 215        mirror->layout = NULL;
 216}
 217
 218static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
 219{
 220        struct nfs4_ff_layout_mirror *mirror;
 221
 222        mirror = kzalloc(sizeof(*mirror), gfp_flags);
 223        if (mirror != NULL) {
 224                spin_lock_init(&mirror->lock);
 225                refcount_set(&mirror->ref, 1);
 226                INIT_LIST_HEAD(&mirror->mirrors);
 227        }
 228        return mirror;
 229}
 230
 231static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
 232{
 233        const struct cred       *cred;
 234
 235        ff_layout_remove_mirror(mirror);
 236        kfree(mirror->fh_versions);
 237        cred = rcu_access_pointer(mirror->ro_cred);
 238        put_cred(cred);
 239        cred = rcu_access_pointer(mirror->rw_cred);
 240        put_cred(cred);
 241        nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
 242        kfree(mirror);
 243}
 244
 245static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror)
 246{
 247        if (mirror != NULL && refcount_dec_and_test(&mirror->ref))
 248                ff_layout_free_mirror(mirror);
 249}
 250
 251static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls)
 252{
 253        u32 i;
 254
 255        for (i = 0; i < fls->mirror_array_cnt; i++)
 256                ff_layout_put_mirror(fls->mirror_array[i]);
 257}
 258
 259static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls)
 260{
 261        if (fls) {
 262                ff_layout_free_mirror_array(fls);
 263                kfree(fls);
 264        }
 265}
 266
 267static bool
 268ff_lseg_match_mirrors(struct pnfs_layout_segment *l1,
 269                struct pnfs_layout_segment *l2)
 270{
 271        const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1);
 272        const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1);
 273        u32 i;
 274
 275        if (fl1->mirror_array_cnt != fl2->mirror_array_cnt)
 276                return false;
 277        for (i = 0; i < fl1->mirror_array_cnt; i++) {
 278                if (fl1->mirror_array[i] != fl2->mirror_array[i])
 279                        return false;
 280        }
 281        return true;
 282}
 283
 284static bool
 285ff_lseg_range_is_after(const struct pnfs_layout_range *l1,
 286                const struct pnfs_layout_range *l2)
 287{
 288        u64 end1, end2;
 289
 290        if (l1->iomode != l2->iomode)
 291                return l1->iomode != IOMODE_READ;
 292        end1 = pnfs_calc_offset_end(l1->offset, l1->length);
 293        end2 = pnfs_calc_offset_end(l2->offset, l2->length);
 294        if (end1 < l2->offset)
 295                return false;
 296        if (end2 < l1->offset)
 297                return true;
 298        return l2->offset <= l1->offset;
 299}
 300
 301static bool
 302ff_lseg_merge(struct pnfs_layout_segment *new,
 303                struct pnfs_layout_segment *old)
 304{
 305        u64 new_end, old_end;
 306
 307        if (test_bit(NFS_LSEG_LAYOUTRETURN, &old->pls_flags))
 308                return false;
 309        if (new->pls_range.iomode != old->pls_range.iomode)
 310                return false;
 311        old_end = pnfs_calc_offset_end(old->pls_range.offset,
 312                        old->pls_range.length);
 313        if (old_end < new->pls_range.offset)
 314                return false;
 315        new_end = pnfs_calc_offset_end(new->pls_range.offset,
 316                        new->pls_range.length);
 317        if (new_end < old->pls_range.offset)
 318                return false;
 319        if (!ff_lseg_match_mirrors(new, old))
 320                return false;
 321
 322        /* Mergeable: copy info from 'old' to 'new' */
 323        if (new_end < old_end)
 324                new_end = old_end;
 325        if (new->pls_range.offset < old->pls_range.offset)
 326                new->pls_range.offset = old->pls_range.offset;
 327        new->pls_range.length = pnfs_calc_offset_length(new->pls_range.offset,
 328                        new_end);
 329        if (test_bit(NFS_LSEG_ROC, &old->pls_flags))
 330                set_bit(NFS_LSEG_ROC, &new->pls_flags);
 331        return true;
 332}
 333
 334static void
 335ff_layout_add_lseg(struct pnfs_layout_hdr *lo,
 336                struct pnfs_layout_segment *lseg,
 337                struct list_head *free_me)
 338{
 339        pnfs_generic_layout_insert_lseg(lo, lseg,
 340                        ff_lseg_range_is_after,
 341                        ff_lseg_merge,
 342                        free_me);
 343}
 344
 345static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
 346{
 347        int i, j;
 348
 349        for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
 350                for (j = i + 1; j < fls->mirror_array_cnt; j++)
 351                        if (fls->mirror_array[i]->efficiency <
 352                            fls->mirror_array[j]->efficiency)
 353                                swap(fls->mirror_array[i],
 354                                     fls->mirror_array[j]);
 355        }
 356}
 357
 358static struct pnfs_layout_segment *
 359ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
 360                     struct nfs4_layoutget_res *lgr,
 361                     gfp_t gfp_flags)
 362{
 363        struct pnfs_layout_segment *ret;
 364        struct nfs4_ff_layout_segment *fls = NULL;
 365        struct xdr_stream stream;
 366        struct xdr_buf buf;
 367        struct page *scratch;
 368        u64 stripe_unit;
 369        u32 mirror_array_cnt;
 370        __be32 *p;
 371        int i, rc;
 372
 373        dprintk("--> %s\n", __func__);
 374        scratch = alloc_page(gfp_flags);
 375        if (!scratch)
 376                return ERR_PTR(-ENOMEM);
 377
 378        xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages,
 379                              lgr->layoutp->len);
 380        xdr_set_scratch_page(&stream, scratch);
 381
 382        /* stripe unit and mirror_array_cnt */
 383        rc = -EIO;
 384        p = xdr_inline_decode(&stream, 8 + 4);
 385        if (!p)
 386                goto out_err_free;
 387
 388        p = xdr_decode_hyper(p, &stripe_unit);
 389        mirror_array_cnt = be32_to_cpup(p++);
 390        dprintk("%s: stripe_unit=%llu mirror_array_cnt=%u\n", __func__,
 391                stripe_unit, mirror_array_cnt);
 392
 393        if (mirror_array_cnt > NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT ||
 394            mirror_array_cnt == 0)
 395                goto out_err_free;
 396
 397        rc = -ENOMEM;
 398        fls = kzalloc(struct_size(fls, mirror_array, mirror_array_cnt),
 399                        gfp_flags);
 400        if (!fls)
 401                goto out_err_free;
 402
 403        fls->mirror_array_cnt = mirror_array_cnt;
 404        fls->stripe_unit = stripe_unit;
 405
 406        for (i = 0; i < fls->mirror_array_cnt; i++) {
 407                struct nfs4_ff_layout_mirror *mirror;
 408                struct cred *kcred;
 409                const struct cred __rcu *cred;
 410                kuid_t uid;
 411                kgid_t gid;
 412                u32 ds_count, fh_count, id;
 413                int j;
 414
 415                rc = -EIO;
 416                p = xdr_inline_decode(&stream, 4);
 417                if (!p)
 418                        goto out_err_free;
 419                ds_count = be32_to_cpup(p);
 420
 421                /* FIXME: allow for striping? */
 422                if (ds_count != 1)
 423                        goto out_err_free;
 424
 425                fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
 426                if (fls->mirror_array[i] == NULL) {
 427                        rc = -ENOMEM;
 428                        goto out_err_free;
 429                }
 430
 431                fls->mirror_array[i]->ds_count = ds_count;
 432
 433                /* deviceid */
 434                rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid);
 435                if (rc)
 436                        goto out_err_free;
 437
 438                /* efficiency */
 439                rc = -EIO;
 440                p = xdr_inline_decode(&stream, 4);
 441                if (!p)
 442                        goto out_err_free;
 443                fls->mirror_array[i]->efficiency = be32_to_cpup(p);
 444
 445                /* stateid */
 446                rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->stateid);
 447                if (rc)
 448                        goto out_err_free;
 449
 450                /* fh */
 451                rc = -EIO;
 452                p = xdr_inline_decode(&stream, 4);
 453                if (!p)
 454                        goto out_err_free;
 455                fh_count = be32_to_cpup(p);
 456
 457                fls->mirror_array[i]->fh_versions =
 458                        kcalloc(fh_count, sizeof(struct nfs_fh),
 459                                gfp_flags);
 460                if (fls->mirror_array[i]->fh_versions == NULL) {
 461                        rc = -ENOMEM;
 462                        goto out_err_free;
 463                }
 464
 465                for (j = 0; j < fh_count; j++) {
 466                        rc = decode_nfs_fh(&stream,
 467                                           &fls->mirror_array[i]->fh_versions[j]);
 468                        if (rc)
 469                                goto out_err_free;
 470                }
 471
 472                fls->mirror_array[i]->fh_versions_cnt = fh_count;
 473
 474                /* user */
 475                rc = decode_name(&stream, &id);
 476                if (rc)
 477                        goto out_err_free;
 478
 479                uid = make_kuid(&init_user_ns, id);
 480
 481                /* group */
 482                rc = decode_name(&stream, &id);
 483                if (rc)
 484                        goto out_err_free;
 485
 486                gid = make_kgid(&init_user_ns, id);
 487
 488                if (gfp_flags & __GFP_FS)
 489                        kcred = prepare_kernel_cred(NULL);
 490                else {
 491                        unsigned int nofs_flags = memalloc_nofs_save();
 492                        kcred = prepare_kernel_cred(NULL);
 493                        memalloc_nofs_restore(nofs_flags);
 494                }
 495                rc = -ENOMEM;
 496                if (!kcred)
 497                        goto out_err_free;
 498                kcred->fsuid = uid;
 499                kcred->fsgid = gid;
 500                cred = RCU_INITIALIZER(kcred);
 501
 502                if (lgr->range.iomode == IOMODE_READ)
 503                        rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
 504                else
 505                        rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);
 506
 507                mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
 508                if (mirror != fls->mirror_array[i]) {
 509                        /* swap cred ptrs so free_mirror will clean up old */
 510                        if (lgr->range.iomode == IOMODE_READ) {
 511                                cred = xchg(&mirror->ro_cred, cred);
 512                                rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
 513                        } else {
 514                                cred = xchg(&mirror->rw_cred, cred);
 515                                rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);
 516                        }
 517                        ff_layout_free_mirror(fls->mirror_array[i]);
 518                        fls->mirror_array[i] = mirror;
 519                }
 520
 521                dprintk("%s: iomode %s uid %u gid %u\n", __func__,
 522                        lgr->range.iomode == IOMODE_READ ? "READ" : "RW",
 523                        from_kuid(&init_user_ns, uid),
 524                        from_kgid(&init_user_ns, gid));
 525        }
 526
 527        p = xdr_inline_decode(&stream, 4);
 528        if (!p)
 529                goto out_sort_mirrors;
 530        fls->flags = be32_to_cpup(p);
 531
 532        p = xdr_inline_decode(&stream, 4);
 533        if (!p)
 534                goto out_sort_mirrors;
 535        for (i=0; i < fls->mirror_array_cnt; i++)
 536                fls->mirror_array[i]->report_interval = be32_to_cpup(p);
 537
 538out_sort_mirrors:
 539        ff_layout_sort_mirrors(fls);
 540        ret = &fls->generic_hdr;
 541        dprintk("<-- %s (success)\n", __func__);
 542out_free_page:
 543        __free_page(scratch);
 544        return ret;
 545out_err_free:
 546        _ff_layout_free_lseg(fls);
 547        ret = ERR_PTR(rc);
 548        dprintk("<-- %s (%d)\n", __func__, rc);
 549        goto out_free_page;
 550}
 551
 552static void
 553ff_layout_free_lseg(struct pnfs_layout_segment *lseg)
 554{
 555        struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
 556
 557        dprintk("--> %s\n", __func__);
 558
 559        if (lseg->pls_range.iomode == IOMODE_RW) {
 560                struct nfs4_flexfile_layout *ffl;
 561                struct inode *inode;
 562
 563                ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout);
 564                inode = ffl->generic_hdr.plh_inode;
 565                spin_lock(&inode->i_lock);
 566                pnfs_generic_ds_cinfo_release_lseg(&ffl->commit_info, lseg);
 567                spin_unlock(&inode->i_lock);
 568        }
 569        _ff_layout_free_lseg(fls);
 570}
 571
 572static void
 573nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now)
 574{
 575        /* first IO request? */
 576        if (atomic_inc_return(&timer->n_ops) == 1) {
 577                timer->start_time = now;
 578        }
 579}
 580
 581static ktime_t
 582nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now)
 583{
 584        ktime_t start;
 585
 586        if (atomic_dec_return(&timer->n_ops) < 0)
 587                WARN_ON_ONCE(1);
 588
 589        start = timer->start_time;
 590        timer->start_time = now;
 591        return ktime_sub(now, start);
 592}
 593
 594static bool
 595nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
 596                            struct nfs4_ff_layoutstat *layoutstat,
 597                            ktime_t now)
 598{
 599        s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;
 600        struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
 601
 602        nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
 603        if (!mirror->start_time)
 604                mirror->start_time = now;
 605        if (mirror->report_interval != 0)
 606                report_interval = (s64)mirror->report_interval * 1000LL;
 607        else if (layoutstats_timer != 0)
 608                report_interval = (s64)layoutstats_timer * 1000LL;
 609        if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >=
 610                        report_interval) {
 611                ffl->last_report_time = now;
 612                return true;
 613        }
 614
 615        return false;
 616}
 617
 618static void
 619nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat,
 620                __u64 requested)
 621{
 622        struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
 623
 624        iostat->ops_requested++;
 625        iostat->bytes_requested += requested;
 626}
 627
 628static void
 629nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat,
 630                __u64 requested,
 631                __u64 completed,
 632                ktime_t time_completed,
 633                ktime_t time_started)
 634{
 635        struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
 636        ktime_t completion_time = ktime_sub(time_completed, time_started);
 637        ktime_t timer;
 638
 639        iostat->ops_completed++;
 640        iostat->bytes_completed += completed;
 641        iostat->bytes_not_delivered += requested - completed;
 642
 643        timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer, time_completed);
 644        iostat->total_busy_time =
 645                        ktime_add(iostat->total_busy_time, timer);
 646        iostat->aggregate_completion_time =
 647                        ktime_add(iostat->aggregate_completion_time,
 648                                        completion_time);
 649}
 650
 651static void
 652nfs4_ff_layout_stat_io_start_read(struct inode *inode,
 653                struct nfs4_ff_layout_mirror *mirror,
 654                __u64 requested, ktime_t now)
 655{
 656        bool report;
 657
 658        spin_lock(&mirror->lock);
 659        report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now);
 660        nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested);
 661        set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
 662        spin_unlock(&mirror->lock);
 663
 664        if (report)
 665                pnfs_report_layoutstat(inode, GFP_KERNEL);
 666}
 667
 668static void
 669nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
 670                struct nfs4_ff_layout_mirror *mirror,
 671                __u64 requested,
 672                __u64 completed)
 673{
 674        spin_lock(&mirror->lock);
 675        nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,
 676                        requested, completed,
 677                        ktime_get(), task->tk_start);
 678        set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
 679        spin_unlock(&mirror->lock);
 680}
 681
 682static void
 683nfs4_ff_layout_stat_io_start_write(struct inode *inode,
 684                struct nfs4_ff_layout_mirror *mirror,
 685                __u64 requested, ktime_t now)
 686{
 687        bool report;
 688
 689        spin_lock(&mirror->lock);
 690        report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now);
 691        nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested);
 692        set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
 693        spin_unlock(&mirror->lock);
 694
 695        if (report)
 696                pnfs_report_layoutstat(inode, GFP_NOIO);
 697}
 698
 699static void
 700nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
 701                struct nfs4_ff_layout_mirror *mirror,
 702                __u64 requested,
 703                __u64 completed,
 704                enum nfs3_stable_how committed)
 705{
 706        if (committed == NFS_UNSTABLE)
 707                requested = completed = 0;
 708
 709        spin_lock(&mirror->lock);
 710        nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,
 711                        requested, completed, ktime_get(), task->tk_start);
 712        set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
 713        spin_unlock(&mirror->lock);
 714}
 715
 716static void
 717ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx)
 718{
 719        struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
 720
 721        if (devid)
 722                nfs4_mark_deviceid_unavailable(devid);
 723}
 724
 725static void
 726ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
 727{
 728        struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
 729
 730        if (devid)
 731                nfs4_mark_deviceid_available(devid);
 732}
 733
 734static struct nfs4_pnfs_ds *
 735ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
 736                             u32 start_idx, u32 *best_idx,
 737                             bool check_device)
 738{
 739        struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
 740        struct nfs4_ff_layout_mirror *mirror;
 741        struct nfs4_pnfs_ds *ds;
 742        u32 idx;
 743
 744        /* mirrors are initially sorted by efficiency */
 745        for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
 746                mirror = FF_LAYOUT_COMP(lseg, idx);
 747                ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
 748                if (!ds)
 749                        continue;
 750
 751                if (check_device &&
 752                    nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
 753                        continue;
 754
 755                *best_idx = idx;
 756                return ds;
 757        }
 758
 759        return NULL;
 760}
 761
 762static struct nfs4_pnfs_ds *
 763ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
 764                                 u32 start_idx, u32 *best_idx)
 765{
 766        return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
 767}
 768
 769static struct nfs4_pnfs_ds *
 770ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
 771                                   u32 start_idx, u32 *best_idx)
 772{
 773        return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
 774}
 775
 776static struct nfs4_pnfs_ds *
 777ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
 778                                  u32 start_idx, u32 *best_idx)
 779{
 780        struct nfs4_pnfs_ds *ds;
 781
 782        ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
 783        if (ds)
 784                return ds;
 785        return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
 786}
 787
 788static struct nfs4_pnfs_ds *
 789ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
 790                          u32 *best_idx)
 791{
 792        struct pnfs_layout_segment *lseg = pgio->pg_lseg;
 793        struct nfs4_pnfs_ds *ds;
 794
 795        ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
 796                                               best_idx);
 797        if (ds || !pgio->pg_mirror_idx)
 798                return ds;
 799        return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
 800}
 801
 802static void
 803ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
 804                      struct nfs_page *req,
 805                      bool strict_iomode)
 806{
 807        pnfs_put_lseg(pgio->pg_lseg);
 808        pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 809                                           nfs_req_openctx(req),
 810                                           req_offset(req),
 811                                           req->wb_bytes,
 812                                           IOMODE_READ,
 813                                           strict_iomode,
 814                                           GFP_KERNEL);
 815        if (IS_ERR(pgio->pg_lseg)) {
 816                pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 817                pgio->pg_lseg = NULL;
 818        }
 819}
 820
 821static void
 822ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio,
 823                          struct nfs_page *req)
 824{
 825        pnfs_generic_pg_check_layout(pgio);
 826        pnfs_generic_pg_check_range(pgio, req);
 827}
 828
 829static void
 830ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 831                        struct nfs_page *req)
 832{
 833        struct nfs_pgio_mirror *pgm;
 834        struct nfs4_ff_layout_mirror *mirror;
 835        struct nfs4_pnfs_ds *ds;
 836        u32 ds_idx;
 837
 838retry:
 839        ff_layout_pg_check_layout(pgio, req);
 840        /* Use full layout for now */
 841        if (!pgio->pg_lseg) {
 842                ff_layout_pg_get_read(pgio, req, false);
 843                if (!pgio->pg_lseg)
 844                        goto out_nolseg;
 845        }
 846        if (ff_layout_avoid_read_on_rw(pgio->pg_lseg)) {
 847                ff_layout_pg_get_read(pgio, req, true);
 848                if (!pgio->pg_lseg)
 849                        goto out_nolseg;
 850        }
 851
 852        ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
 853        if (!ds) {
 854                if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
 855                        goto out_mds;
 856                pnfs_generic_pg_cleanup(pgio);
 857                /* Sleep for 1 second before retrying */
 858                ssleep(1);
 859                goto retry;
 860        }
 861
 862        mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
 863        pgm = &pgio->pg_mirrors[0];
 864        pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
 865
 866        pgio->pg_mirror_idx = ds_idx;
 867
 868        if (NFS_SERVER(pgio->pg_inode)->flags &
 869                        (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
 870                pgio->pg_maxretrans = io_maxretrans;
 871        return;
 872out_nolseg:
 873        if (pgio->pg_error < 0)
 874                return;
 875out_mds:
 876        trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode,
 877                        0, NFS4_MAX_UINT64, IOMODE_READ,
 878                        NFS_I(pgio->pg_inode)->layout,
 879                        pgio->pg_lseg);
 880        pgio->pg_maxretrans = 0;
 881        nfs_pageio_reset_read_mds(pgio);
 882}
 883
 884static void
 885ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 886                        struct nfs_page *req)
 887{
 888        struct nfs4_ff_layout_mirror *mirror;
 889        struct nfs_pgio_mirror *pgm;
 890        struct nfs4_pnfs_ds *ds;
 891        u32 i;
 892
 893retry:
 894        ff_layout_pg_check_layout(pgio, req);
 895        if (!pgio->pg_lseg) {
 896                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 897                                                   nfs_req_openctx(req),
 898                                                   req_offset(req),
 899                                                   req->wb_bytes,
 900                                                   IOMODE_RW,
 901                                                   false,
 902                                                   GFP_NOFS);
 903                if (IS_ERR(pgio->pg_lseg)) {
 904                        pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 905                        pgio->pg_lseg = NULL;
 906                        return;
 907                }
 908        }
 909        /* If no lseg, fall back to write through mds */
 910        if (pgio->pg_lseg == NULL)
 911                goto out_mds;
 912
 913        /* Use a direct mapping of ds_idx to pgio mirror_idx */
 914        if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
 915                goto out_eagain;
 916
 917        for (i = 0; i < pgio->pg_mirror_count; i++) {
 918                mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
 919                ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
 920                if (!ds) {
 921                        if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
 922                                goto out_mds;
 923                        pnfs_generic_pg_cleanup(pgio);
 924                        /* Sleep for 1 second before retrying */
 925                        ssleep(1);
 926                        goto retry;
 927                }
 928                pgm = &pgio->pg_mirrors[i];
 929                pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
 930        }
 931
 932        if (NFS_SERVER(pgio->pg_inode)->flags &
 933                        (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
 934                pgio->pg_maxretrans = io_maxretrans;
 935        return;
 936out_eagain:
 937        pnfs_generic_pg_cleanup(pgio);
 938        pgio->pg_error = -EAGAIN;
 939        return;
 940out_mds:
 941        trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode,
 942                        0, NFS4_MAX_UINT64, IOMODE_RW,
 943                        NFS_I(pgio->pg_inode)->layout,
 944                        pgio->pg_lseg);
 945        pgio->pg_maxretrans = 0;
 946        nfs_pageio_reset_write_mds(pgio);
 947        pgio->pg_error = -EAGAIN;
 948}
 949
 950static unsigned int
 951ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
 952                                    struct nfs_page *req)
 953{
 954        if (!pgio->pg_lseg) {
 955                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 956                                                   nfs_req_openctx(req),
 957                                                   req_offset(req),
 958                                                   req->wb_bytes,
 959                                                   IOMODE_RW,
 960                                                   false,
 961                                                   GFP_NOFS);
 962                if (IS_ERR(pgio->pg_lseg)) {
 963                        pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 964                        pgio->pg_lseg = NULL;
 965                        goto out;
 966                }
 967        }
 968        if (pgio->pg_lseg)
 969                return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);
 970
 971        trace_pnfs_mds_fallback_pg_get_mirror_count(pgio->pg_inode,
 972                        0, NFS4_MAX_UINT64, IOMODE_RW,
 973                        NFS_I(pgio->pg_inode)->layout,
 974                        pgio->pg_lseg);
 975        /* no lseg means that pnfs is not in use, so no mirroring here */
 976        nfs_pageio_reset_write_mds(pgio);
 977out:
 978        return 1;
 979}
 980
 981static u32
 982ff_layout_pg_set_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
 983{
 984        u32 old = desc->pg_mirror_idx;
 985
 986        desc->pg_mirror_idx = idx;
 987        return old;
 988}
 989
 990static struct nfs_pgio_mirror *
 991ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
 992{
 993        return &desc->pg_mirrors[idx];
 994}
 995
 996static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
 997        .pg_init = ff_layout_pg_init_read,
 998        .pg_test = pnfs_generic_pg_test,
 999        .pg_doio = pnfs_generic_pg_readpages,
1000        .pg_cleanup = pnfs_generic_pg_cleanup,
1001};
1002
1003static const struct nfs_pageio_ops ff_layout_pg_write_ops = {
1004        .pg_init = ff_layout_pg_init_write,
1005        .pg_test = pnfs_generic_pg_test,
1006        .pg_doio = pnfs_generic_pg_writepages,
1007        .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write,
1008        .pg_cleanup = pnfs_generic_pg_cleanup,
1009        .pg_get_mirror = ff_layout_pg_get_mirror_write,
1010        .pg_set_mirror = ff_layout_pg_set_mirror_write,
1011};
1012
1013static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
1014{
1015        struct rpc_task *task = &hdr->task;
1016
1017        pnfs_layoutcommit_inode(hdr->inode, false);
1018
1019        if (retry_pnfs) {
1020                dprintk("%s Reset task %5u for i/o through pNFS "
1021                        "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
1022                        hdr->task.tk_pid,
1023                        hdr->inode->i_sb->s_id,
1024                        (unsigned long long)NFS_FILEID(hdr->inode),
1025                        hdr->args.count,
1026                        (unsigned long long)hdr->args.offset);
1027
1028                hdr->completion_ops->reschedule_io(hdr);
1029                return;
1030        }
1031
1032        if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1033                dprintk("%s Reset task %5u for i/o through MDS "
1034                        "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
1035                        hdr->task.tk_pid,
1036                        hdr->inode->i_sb->s_id,
1037                        (unsigned long long)NFS_FILEID(hdr->inode),
1038                        hdr->args.count,
1039                        (unsigned long long)hdr->args.offset);
1040
1041                trace_pnfs_mds_fallback_write_done(hdr->inode,
1042                                hdr->args.offset, hdr->args.count,
1043                                IOMODE_RW, NFS_I(hdr->inode)->layout,
1044                                hdr->lseg);
1045                task->tk_status = pnfs_write_done_resend_to_mds(hdr);
1046        }
1047}
1048
1049static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
1050{
1051        u32 idx = hdr->pgio_mirror_idx + 1;
1052        u32 new_idx = 0;
1053
1054        if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx))
1055                ff_layout_send_layouterror(hdr->lseg);
1056        else
1057                pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
1058        pnfs_read_resend_pnfs(hdr, new_idx);
1059}
1060
1061static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
1062{
1063        struct rpc_task *task = &hdr->task;
1064
1065        pnfs_layoutcommit_inode(hdr->inode, false);
1066        pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
1067
1068        if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1069                dprintk("%s Reset task %5u for i/o through MDS "
1070                        "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
1071                        hdr->task.tk_pid,
1072                        hdr->inode->i_sb->s_id,
1073                        (unsigned long long)NFS_FILEID(hdr->inode),
1074                        hdr->args.count,
1075                        (unsigned long long)hdr->args.offset);
1076
1077                trace_pnfs_mds_fallback_read_done(hdr->inode,
1078                                hdr->args.offset, hdr->args.count,
1079                                IOMODE_READ, NFS_I(hdr->inode)->layout,
1080                                hdr->lseg);
1081                task->tk_status = pnfs_read_done_resend_to_mds(hdr);
1082        }
1083}
1084
1085static int ff_layout_async_handle_error_v4(struct rpc_task *task,
1086                                           struct nfs4_state *state,
1087                                           struct nfs_client *clp,
1088                                           struct pnfs_layout_segment *lseg,
1089                                           u32 idx)
1090{
1091        struct pnfs_layout_hdr *lo = lseg->pls_layout;
1092        struct inode *inode = lo->plh_inode;
1093        struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
1094        struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
1095
1096        switch (task->tk_status) {
1097        case -NFS4ERR_BADSESSION:
1098        case -NFS4ERR_BADSLOT:
1099        case -NFS4ERR_BAD_HIGH_SLOT:
1100        case -NFS4ERR_DEADSESSION:
1101        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1102        case -NFS4ERR_SEQ_FALSE_RETRY:
1103        case -NFS4ERR_SEQ_MISORDERED:
1104                dprintk("%s ERROR %d, Reset session. Exchangeid "
1105                        "flags 0x%x\n", __func__, task->tk_status,
1106                        clp->cl_exchange_flags);
1107                nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
1108                break;
1109        case -NFS4ERR_DELAY:
1110        case -NFS4ERR_GRACE:
1111                rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
1112                break;
1113        case -NFS4ERR_RETRY_UNCACHED_REP:
1114                break;
1115        /* Invalidate Layout errors */
1116        case -NFS4ERR_PNFS_NO_LAYOUT:
1117        case -ESTALE:           /* mapped NFS4ERR_STALE */
1118        case -EBADHANDLE:       /* mapped NFS4ERR_BADHANDLE */
1119        case -EISDIR:           /* mapped NFS4ERR_ISDIR */
1120        case -NFS4ERR_FHEXPIRED:
1121        case -NFS4ERR_WRONG_TYPE:
1122                dprintk("%s Invalid layout error %d\n", __func__,
1123                        task->tk_status);
1124                /*
1125                 * Destroy layout so new i/o will get a new layout.
1126                 * Layout will not be destroyed until all current lseg
1127                 * references are put. Mark layout as invalid to resend failed
1128                 * i/o and all i/o waiting on the slot table to the MDS until
1129                 * layout is destroyed and a new valid layout is obtained.
1130                 */
1131                pnfs_destroy_layout(NFS_I(inode));
1132                rpc_wake_up(&tbl->slot_tbl_waitq);
1133                goto reset;
1134        /* RPC connection errors */
1135        case -ECONNREFUSED:
1136        case -EHOSTDOWN:
1137        case -EHOSTUNREACH:
1138        case -ENETUNREACH:
1139        case -EIO:
1140        case -ETIMEDOUT:
1141        case -EPIPE:
1142                dprintk("%s DS connection error %d\n", __func__,
1143                        task->tk_status);
1144                nfs4_delete_deviceid(devid->ld, devid->nfs_client,
1145                                &devid->deviceid);
1146                rpc_wake_up(&tbl->slot_tbl_waitq);
1147                /* fall through */
1148        default:
1149                if (ff_layout_avoid_mds_available_ds(lseg))
1150                        return -NFS4ERR_RESET_TO_PNFS;
1151reset:
1152                dprintk("%s Retry through MDS. Error %d\n", __func__,
1153                        task->tk_status);
1154                return -NFS4ERR_RESET_TO_MDS;
1155        }
1156        task->tk_status = 0;
1157        return -EAGAIN;
1158}
1159
1160/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
1161static int ff_layout_async_handle_error_v3(struct rpc_task *task,
1162                                           struct pnfs_layout_segment *lseg,
1163                                           u32 idx)
1164{
1165        struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
1166
1167        switch (task->tk_status) {
1168        /* File access problems. Don't mark the device as unavailable */
1169        case -EACCES:
1170        case -ESTALE:
1171        case -EISDIR:
1172        case -EBADHANDLE:
1173        case -ELOOP:
1174        case -ENOSPC:
1175                break;
1176        case -EJUKEBOX:
1177                nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
1178                goto out_retry;
1179        default:
1180                dprintk("%s DS connection error %d\n", __func__,
1181                        task->tk_status);
1182                nfs4_delete_deviceid(devid->ld, devid->nfs_client,
1183                                &devid->deviceid);
1184        }
1185        /* FIXME: Need to prevent infinite looping here. */
1186        return -NFS4ERR_RESET_TO_PNFS;
1187out_retry:
1188        task->tk_status = 0;
1189        rpc_restart_call_prepare(task);
1190        rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
1191        return -EAGAIN;
1192}
1193
1194static int ff_layout_async_handle_error(struct rpc_task *task,
1195                                        struct nfs4_state *state,
1196                                        struct nfs_client *clp,
1197                                        struct pnfs_layout_segment *lseg,
1198                                        u32 idx)
1199{
1200        int vers = clp->cl_nfs_mod->rpc_vers->number;
1201
1202        if (task->tk_status >= 0) {
1203                ff_layout_mark_ds_reachable(lseg, idx);
1204                return 0;
1205        }
1206
1207        /* Handle the case of an invalid layout segment */
1208        if (!pnfs_is_valid_lseg(lseg))
1209                return -NFS4ERR_RESET_TO_PNFS;
1210
1211        switch (vers) {
1212        case 3:
1213                return ff_layout_async_handle_error_v3(task, lseg, idx);
1214        case 4:
1215                return ff_layout_async_handle_error_v4(task, state, clp,
1216                                                       lseg, idx);
1217        default:
1218                /* should never happen */
1219                WARN_ON_ONCE(1);
1220                return 0;
1221        }
1222}
1223
1224static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
1225                                        u32 idx, u64 offset, u64 length,
1226                                        u32 *op_status, int opnum, int error)
1227{
1228        struct nfs4_ff_layout_mirror *mirror;
1229        u32 status = *op_status;
1230        int err;
1231
1232        if (status == 0) {
1233                switch (error) {
1234                case -ETIMEDOUT:
1235                case -EPFNOSUPPORT:
1236                case -EPROTONOSUPPORT:
1237                case -EOPNOTSUPP:
1238                case -ECONNREFUSED:
1239                case -ECONNRESET:
1240                case -EHOSTDOWN:
1241                case -EHOSTUNREACH:
1242                case -ENETUNREACH:
1243                case -EADDRINUSE:
1244                case -ENOBUFS:
1245                case -EPIPE:
1246                case -EPERM:
1247                        *op_status = status = NFS4ERR_NXIO;
1248                        break;
1249                case -EACCES:
1250                        *op_status = status = NFS4ERR_ACCESS;
1251                        break;
1252                default:
1253                        return;
1254                }
1255        }
1256
1257        mirror = FF_LAYOUT_COMP(lseg, idx);
1258        err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
1259                                       mirror, offset, length, status, opnum,
1260                                       GFP_NOIO);
1261
1262        switch (status) {
1263        case NFS4ERR_DELAY:
1264        case NFS4ERR_GRACE:
1265                break;
1266        case NFS4ERR_NXIO:
1267                ff_layout_mark_ds_unreachable(lseg, idx);
1268                /*
1269                 * Don't return the layout if this is a read and we still
1270                 * have layouts to try
1271                 */
1272                if (opnum == OP_READ)
1273                        break;
1274                /* Fallthrough */
1275        default:
1276                pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
1277                                                  lseg);
1278        }
1279
1280        dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status);
1281}
1282
1283/* NFS_PROTO call done callback routines */
1284static int ff_layout_read_done_cb(struct rpc_task *task,
1285                                struct nfs_pgio_header *hdr)
1286{
1287        int err;
1288
1289        if (task->tk_status < 0) {
1290                ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
1291                                            hdr->args.offset, hdr->args.count,
1292                                            &hdr->res.op_status, OP_READ,
1293                                            task->tk_status);
1294                trace_ff_layout_read_error(hdr);
1295        }
1296
1297        err = ff_layout_async_handle_error(task, hdr->args.context->state,
1298                                           hdr->ds_clp, hdr->lseg,
1299                                           hdr->pgio_mirror_idx);
1300
1301        trace_nfs4_pnfs_read(hdr, err);
1302        clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
1303        clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
1304        switch (err) {
1305        case -NFS4ERR_RESET_TO_PNFS:
1306                set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
1307                return task->tk_status;
1308        case -NFS4ERR_RESET_TO_MDS:
1309                set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
1310                return task->tk_status;
1311        case -EAGAIN:
1312                goto out_eagain;
1313        }
1314
1315        return 0;
1316out_eagain:
1317        rpc_restart_call_prepare(task);
1318        return -EAGAIN;
1319}
1320
1321static bool
1322ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg)
1323{
1324        return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT);
1325}
1326
1327/*
1328 * We reference the rpc_cred of the first WRITE that triggers the need for
1329 * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
1330 * rfc5661 is not clear about which credential should be used.
1331 *
1332 * Flexlayout client should treat DS replied FILE_SYNC as DATA_SYNC, so
1333 * to follow http://www.rfc-editor.org/errata_search.php?rfc=5661&eid=2751
1334 * we always send layoutcommit after DS writes.
1335 */
1336static void
1337ff_layout_set_layoutcommit(struct inode *inode,
1338                struct pnfs_layout_segment *lseg,
1339                loff_t end_offset)
1340{
1341        if (!ff_layout_need_layoutcommit(lseg))
1342                return;
1343
1344        pnfs_set_layoutcommit(inode, lseg, end_offset);
1345        dprintk("%s inode %lu pls_end_pos %llu\n", __func__, inode->i_ino,
1346                (unsigned long long) NFS_I(inode)->layout->plh_lwb);
1347}
1348
1349static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
1350                struct nfs_pgio_header *hdr)
1351{
1352        if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
1353                return;
1354        nfs4_ff_layout_stat_io_start_read(hdr->inode,
1355                        FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1356                        hdr->args.count,
1357                        task->tk_start);
1358}
1359
1360static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
1361                struct nfs_pgio_header *hdr)
1362{
1363        if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
1364                return;
1365        nfs4_ff_layout_stat_io_end_read(task,
1366                        FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1367                        hdr->args.count,
1368                        hdr->res.count);
1369        set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags);
1370}
1371
1372static int ff_layout_read_prepare_common(struct rpc_task *task,
1373                                         struct nfs_pgio_header *hdr)
1374{
1375        if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
1376                rpc_exit(task, -EIO);
1377                return -EIO;
1378        }
1379
1380        ff_layout_read_record_layoutstats_start(task, hdr);
1381        return 0;
1382}
1383
1384/*
1385 * Call ops for the async read/write cases
1386 * In the case of dense layouts, the offset needs to be reset to its
1387 * original value.
1388 */
1389static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data)
1390{
1391        struct nfs_pgio_header *hdr = data;
1392
1393        if (ff_layout_read_prepare_common(task, hdr))
1394                return;
1395
1396        rpc_call_start(task);
1397}
1398
1399static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
1400{
1401        struct nfs_pgio_header *hdr = data;
1402
1403        if (nfs4_setup_sequence(hdr->ds_clp,
1404                                &hdr->args.seq_args,
1405                                &hdr->res.seq_res,
1406                                task))
1407                return;
1408
1409        ff_layout_read_prepare_common(task, hdr);
1410}
1411
1412static void ff_layout_read_call_done(struct rpc_task *task, void *data)
1413{
1414        struct nfs_pgio_header *hdr = data;
1415
1416        dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
1417
1418        if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
1419            task->tk_status == 0) {
1420                nfs4_sequence_done(task, &hdr->res.seq_res);
1421                return;
1422        }
1423
1424        /* Note this may cause RPC to be resent */
1425        hdr->mds_ops->rpc_call_done(task, hdr);
1426}
1427
1428static void ff_layout_read_count_stats(struct rpc_task *task, void *data)
1429{
1430        struct nfs_pgio_header *hdr = data;
1431
1432        ff_layout_read_record_layoutstats_done(task, hdr);
1433        rpc_count_iostats_metrics(task,
1434            &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]);
1435}
1436
1437static void ff_layout_read_release(void *data)
1438{
1439        struct nfs_pgio_header *hdr = data;
1440
1441        ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
1442        if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
1443                ff_layout_resend_pnfs_read(hdr);
1444        else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
1445                ff_layout_reset_read(hdr);
1446        pnfs_generic_rw_release(data);
1447}
1448
1449
1450static int ff_layout_write_done_cb(struct rpc_task *task,
1451                                struct nfs_pgio_header *hdr)
1452{
1453        loff_t end_offs = 0;
1454        int err;
1455
1456        if (task->tk_status < 0) {
1457                ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
1458                                            hdr->args.offset, hdr->args.count,
1459                                            &hdr->res.op_status, OP_WRITE,
1460                                            task->tk_status);
1461                trace_ff_layout_write_error(hdr);
1462        }
1463
1464        err = ff_layout_async_handle_error(task, hdr->args.context->state,
1465                                           hdr->ds_clp, hdr->lseg,
1466                                           hdr->pgio_mirror_idx);
1467
1468        trace_nfs4_pnfs_write(hdr, err);
1469        clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
1470        clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
1471        switch (err) {
1472        case -NFS4ERR_RESET_TO_PNFS:
1473                set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
1474                return task->tk_status;
1475        case -NFS4ERR_RESET_TO_MDS:
1476                set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
1477                return task->tk_status;
1478        case -EAGAIN:
1479                return -EAGAIN;
1480        }
1481
1482        if (hdr->res.verf->committed == NFS_FILE_SYNC ||
1483            hdr->res.verf->committed == NFS_DATA_SYNC)
1484                end_offs = hdr->mds_offset + (loff_t)hdr->res.count;
1485
1486        /* Note: if the write is unstable, don't set end_offs until commit */
1487        ff_layout_set_layoutcommit(hdr->inode, hdr->lseg, end_offs);
1488
1489        /* zero out fattr since we don't care DS attr at all */
1490        hdr->fattr.valid = 0;
1491        if (task->tk_status >= 0)
1492                nfs_writeback_update_inode(hdr);
1493
1494        return 0;
1495}
1496
1497static int ff_layout_commit_done_cb(struct rpc_task *task,
1498                                     struct nfs_commit_data *data)
1499{
1500        int err;
1501
1502        if (task->tk_status < 0) {
1503                ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index,
1504                                            data->args.offset, data->args.count,
1505                                            &data->res.op_status, OP_COMMIT,
1506                                            task->tk_status);
1507                trace_ff_layout_commit_error(data);
1508        }
1509
1510        err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
1511                                           data->lseg, data->ds_commit_index);
1512
1513        trace_nfs4_pnfs_commit_ds(data, err);
1514        switch (err) {
1515        case -NFS4ERR_RESET_TO_PNFS:
1516                pnfs_generic_prepare_to_resend_writes(data);
1517                return -EAGAIN;
1518        case -NFS4ERR_RESET_TO_MDS:
1519                pnfs_generic_prepare_to_resend_writes(data);
1520                return -EAGAIN;
1521        case -EAGAIN:
1522                rpc_restart_call_prepare(task);
1523                return -EAGAIN;
1524        }
1525
1526        ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb);
1527
1528        return 0;
1529}
1530
1531static void ff_layout_write_record_layoutstats_start(struct rpc_task *task,
1532                struct nfs_pgio_header *hdr)
1533{
1534        if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
1535                return;
1536        nfs4_ff_layout_stat_io_start_write(hdr->inode,
1537                        FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1538                        hdr->args.count,
1539                        task->tk_start);
1540}
1541
1542static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
1543                struct nfs_pgio_header *hdr)
1544{
1545        if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
1546                return;
1547        nfs4_ff_layout_stat_io_end_write(task,
1548                        FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1549                        hdr->args.count, hdr->res.count,
1550                        hdr->res.verf->committed);
1551        set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags);
1552}
1553
1554static int ff_layout_write_prepare_common(struct rpc_task *task,
1555                                          struct nfs_pgio_header *hdr)
1556{
1557        if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
1558                rpc_exit(task, -EIO);
1559                return -EIO;
1560        }
1561
1562        ff_layout_write_record_layoutstats_start(task, hdr);
1563        return 0;
1564}
1565
1566static void ff_layout_write_prepare_v3(struct rpc_task *task, void *data)
1567{
1568        struct nfs_pgio_header *hdr = data;
1569
1570        if (ff_layout_write_prepare_common(task, hdr))
1571                return;
1572
1573        rpc_call_start(task);
1574}
1575
1576static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
1577{
1578        struct nfs_pgio_header *hdr = data;
1579
1580        if (nfs4_setup_sequence(hdr->ds_clp,
1581                                &hdr->args.seq_args,
1582                                &hdr->res.seq_res,
1583                                task))
1584                return;
1585
1586        ff_layout_write_prepare_common(task, hdr);
1587}
1588
1589static void ff_layout_write_call_done(struct rpc_task *task, void *data)
1590{
1591        struct nfs_pgio_header *hdr = data;
1592
1593        if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
1594            task->tk_status == 0) {
1595                nfs4_sequence_done(task, &hdr->res.seq_res);
1596                return;
1597        }
1598
1599        /* Note this may cause RPC to be resent */
1600        hdr->mds_ops->rpc_call_done(task, hdr);
1601}
1602
1603static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
1604{
1605        struct nfs_pgio_header *hdr = data;
1606
1607        ff_layout_write_record_layoutstats_done(task, hdr);
1608        rpc_count_iostats_metrics(task,
1609            &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
1610}
1611
1612static void ff_layout_write_release(void *data)
1613{
1614        struct nfs_pgio_header *hdr = data;
1615
1616        ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
1617        if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
1618                ff_layout_send_layouterror(hdr->lseg);
1619                ff_layout_reset_write(hdr, true);
1620        } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
1621                ff_layout_reset_write(hdr, false);
1622        pnfs_generic_rw_release(data);
1623}
1624
1625static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
1626                struct nfs_commit_data *cdata)
1627{
1628        if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags))
1629                return;
1630        nfs4_ff_layout_stat_io_start_write(cdata->inode,
1631                        FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1632                        0, task->tk_start);
1633}
1634
1635static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
1636                struct nfs_commit_data *cdata)
1637{
1638        struct nfs_page *req;
1639        __u64 count = 0;
1640
1641        if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags))
1642                return;
1643
1644        if (task->tk_status == 0) {
1645                list_for_each_entry(req, &cdata->pages, wb_list)
1646                        count += req->wb_bytes;
1647        }
1648        nfs4_ff_layout_stat_io_end_write(task,
1649                        FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1650                        count, count, NFS_FILE_SYNC);
1651        set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags);
1652}
1653
1654static void ff_layout_commit_prepare_common(struct rpc_task *task,
1655                struct nfs_commit_data *cdata)
1656{
1657        ff_layout_commit_record_layoutstats_start(task, cdata);
1658}
1659
1660static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
1661{
1662        ff_layout_commit_prepare_common(task, data);
1663        rpc_call_start(task);
1664}
1665
1666static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
1667{
1668        struct nfs_commit_data *wdata = data;
1669
1670        if (nfs4_setup_sequence(wdata->ds_clp,
1671                                &wdata->args.seq_args,
1672                                &wdata->res.seq_res,
1673                                task))
1674                return;
1675        ff_layout_commit_prepare_common(task, data);
1676}
1677
1678static void ff_layout_commit_done(struct rpc_task *task, void *data)
1679{
1680        pnfs_generic_write_commit_done(task, data);
1681}
1682
1683static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
1684{
1685        struct nfs_commit_data *cdata = data;
1686
1687        ff_layout_commit_record_layoutstats_done(task, cdata);
1688        rpc_count_iostats_metrics(task,
1689            &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
1690}
1691
1692static void ff_layout_commit_release(void *data)
1693{
1694        struct nfs_commit_data *cdata = data;
1695
1696        ff_layout_commit_record_layoutstats_done(&cdata->task, cdata);
1697        pnfs_generic_commit_release(data);
1698}
1699
1700static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
1701        .rpc_call_prepare = ff_layout_read_prepare_v3,
1702        .rpc_call_done = ff_layout_read_call_done,
1703        .rpc_count_stats = ff_layout_read_count_stats,
1704        .rpc_release = ff_layout_read_release,
1705};
1706
1707static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
1708        .rpc_call_prepare = ff_layout_read_prepare_v4,
1709        .rpc_call_done = ff_layout_read_call_done,
1710        .rpc_count_stats = ff_layout_read_count_stats,
1711        .rpc_release = ff_layout_read_release,
1712};
1713
1714static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
1715        .rpc_call_prepare = ff_layout_write_prepare_v3,
1716        .rpc_call_done = ff_layout_write_call_done,
1717        .rpc_count_stats = ff_layout_write_count_stats,
1718        .rpc_release = ff_layout_write_release,
1719};
1720
1721static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
1722        .rpc_call_prepare = ff_layout_write_prepare_v4,
1723        .rpc_call_done = ff_layout_write_call_done,
1724        .rpc_count_stats = ff_layout_write_count_stats,
1725        .rpc_release = ff_layout_write_release,
1726};
1727
1728static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
1729        .rpc_call_prepare = ff_layout_commit_prepare_v3,
1730        .rpc_call_done = ff_layout_commit_done,
1731        .rpc_count_stats = ff_layout_commit_count_stats,
1732        .rpc_release = ff_layout_commit_release,
1733};
1734
1735static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
1736        .rpc_call_prepare = ff_layout_commit_prepare_v4,
1737        .rpc_call_done = ff_layout_commit_done,
1738        .rpc_count_stats = ff_layout_commit_count_stats,
1739        .rpc_release = ff_layout_commit_release,
1740};
1741
1742static enum pnfs_try_status
1743ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
1744{
1745        struct pnfs_layout_segment *lseg = hdr->lseg;
1746        struct nfs4_pnfs_ds *ds;
1747        struct rpc_clnt *ds_clnt;
1748        struct nfs4_ff_layout_mirror *mirror;
1749        const struct cred *ds_cred;
1750        loff_t offset = hdr->args.offset;
1751        u32 idx = hdr->pgio_mirror_idx;
1752        int vers;
1753        struct nfs_fh *fh;
1754
1755        dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
1756                __func__, hdr->inode->i_ino,
1757                hdr->args.pgbase, (size_t)hdr->args.count, offset);
1758
1759        mirror = FF_LAYOUT_COMP(lseg, idx);
1760        ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
1761        if (!ds)
1762                goto out_failed;
1763
1764        ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
1765                                                   hdr->inode);
1766        if (IS_ERR(ds_clnt))
1767                goto out_failed;
1768
1769        ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
1770        if (!ds_cred)
1771                goto out_failed;
1772
1773        vers = nfs4_ff_layout_ds_version(mirror);
1774
1775        dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
1776                ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
1777
1778        hdr->pgio_done_cb = ff_layout_read_done_cb;
1779        refcount_inc(&ds->ds_clp->cl_count);
1780        hdr->ds_clp = ds->ds_clp;
1781        fh = nfs4_ff_layout_select_ds_fh(mirror);
1782        if (fh)
1783                hdr->args.fh = fh;
1784
1785        nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
1786
1787        /*
1788         * Note that if we ever decide to split across DSes,
1789         * then we may need to handle dense-like offsets.
1790         */
1791        hdr->args.offset = offset;
1792        hdr->mds_offset = offset;
1793
1794        /* Perform an asynchronous read to ds */
1795        nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
1796                          vers == 3 ? &ff_layout_read_call_ops_v3 :
1797                                      &ff_layout_read_call_ops_v4,
1798                          0, RPC_TASK_SOFTCONN);
1799        put_cred(ds_cred);
1800        return PNFS_ATTEMPTED;
1801
1802out_failed:
1803        if (ff_layout_avoid_mds_available_ds(lseg))
1804                return PNFS_TRY_AGAIN;
1805        trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
1806                        hdr->args.offset, hdr->args.count,
1807                        IOMODE_READ, NFS_I(hdr->inode)->layout, lseg);
1808        return PNFS_NOT_ATTEMPTED;
1809}
1810
1811/* Perform async writes. */
1812static enum pnfs_try_status
1813ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
1814{
1815        struct pnfs_layout_segment *lseg = hdr->lseg;
1816        struct nfs4_pnfs_ds *ds;
1817        struct rpc_clnt *ds_clnt;
1818        struct nfs4_ff_layout_mirror *mirror;
1819        const struct cred *ds_cred;
1820        loff_t offset = hdr->args.offset;
1821        int vers;
1822        struct nfs_fh *fh;
1823        u32 idx = hdr->pgio_mirror_idx;
1824
1825        mirror = FF_LAYOUT_COMP(lseg, idx);
1826        ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
1827        if (!ds)
1828                goto out_failed;
1829
1830        ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
1831                                                   hdr->inode);
1832        if (IS_ERR(ds_clnt))
1833                goto out_failed;
1834
1835        ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
1836        if (!ds_cred)
1837                goto out_failed;
1838
1839        vers = nfs4_ff_layout_ds_version(mirror);
1840
1841        dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
1842                __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
1843                offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count),
1844                vers);
1845
1846        hdr->pgio_done_cb = ff_layout_write_done_cb;
1847        refcount_inc(&ds->ds_clp->cl_count);
1848        hdr->ds_clp = ds->ds_clp;
1849        hdr->ds_commit_idx = idx;
1850        fh = nfs4_ff_layout_select_ds_fh(mirror);
1851        if (fh)
1852                hdr->args.fh = fh;
1853
1854        nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
1855
1856        /*
1857         * Note that if we ever decide to split across DSes,
1858         * then we may need to handle dense-like offsets.
1859         */
1860        hdr->args.offset = offset;
1861
1862        /* Perform an asynchronous write */
1863        nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
1864                          vers == 3 ? &ff_layout_write_call_ops_v3 :
1865                                      &ff_layout_write_call_ops_v4,
1866                          sync, RPC_TASK_SOFTCONN);
1867        put_cred(ds_cred);
1868        return PNFS_ATTEMPTED;
1869
1870out_failed:
1871        if (ff_layout_avoid_mds_available_ds(lseg))
1872                return PNFS_TRY_AGAIN;
1873        trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
1874                        hdr->args.offset, hdr->args.count,
1875                        IOMODE_RW, NFS_I(hdr->inode)->layout, lseg);
1876        return PNFS_NOT_ATTEMPTED;
1877}
1878
1879static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
1880{
1881        return i;
1882}
1883
1884static struct nfs_fh *
1885select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
1886{
1887        struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
1888
1889        /* FIXME: Assume that there is only one NFS version available
1890         * for the DS.
1891         */
1892        return &flseg->mirror_array[i]->fh_versions[0];
1893}
1894
1895static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
1896{
1897        struct pnfs_layout_segment *lseg = data->lseg;
1898        struct nfs4_pnfs_ds *ds;
1899        struct rpc_clnt *ds_clnt;
1900        struct nfs4_ff_layout_mirror *mirror;
1901        const struct cred *ds_cred;
1902        u32 idx;
1903        int vers, ret;
1904        struct nfs_fh *fh;
1905
1906        if (!lseg || !(pnfs_is_valid_lseg(lseg) ||
1907            test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)))
1908                goto out_err;
1909
1910        idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
1911        mirror = FF_LAYOUT_COMP(lseg, idx);
1912        ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
1913        if (!ds)
1914                goto out_err;
1915
1916        ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
1917                                                   data->inode);
1918        if (IS_ERR(ds_clnt))
1919                goto out_err;
1920
1921        ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred);
1922        if (!ds_cred)
1923                goto out_err;
1924
1925        vers = nfs4_ff_layout_ds_version(mirror);
1926
1927        dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
1928                data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
1929                vers);
1930        data->commit_done_cb = ff_layout_commit_done_cb;
1931        data->cred = ds_cred;
1932        refcount_inc(&ds->ds_clp->cl_count);
1933        data->ds_clp = ds->ds_clp;
1934        fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
1935        if (fh)
1936                data->args.fh = fh;
1937
1938        ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
1939                                   vers == 3 ? &ff_layout_commit_call_ops_v3 :
1940                                               &ff_layout_commit_call_ops_v4,
1941                                   how, RPC_TASK_SOFTCONN);
1942        put_cred(ds_cred);
1943        return ret;
1944out_err:
1945        pnfs_generic_prepare_to_resend_writes(data);
1946        pnfs_generic_commit_release(data);
1947        return -EAGAIN;
1948}
1949
1950static int
1951ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1952                           int how, struct nfs_commit_info *cinfo)
1953{
1954        return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo,
1955                                            ff_layout_initiate_commit);
1956}
1957
1958static struct pnfs_ds_commit_info *
1959ff_layout_get_ds_info(struct inode *inode)
1960{
1961        struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
1962
1963        if (layout == NULL)
1964                return NULL;
1965
1966        return &FF_LAYOUT_FROM_HDR(layout)->commit_info;
1967}
1968
1969static void
1970ff_layout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
1971                struct pnfs_layout_segment *lseg)
1972{
1973        struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
1974        struct inode *inode = lseg->pls_layout->plh_inode;
1975        struct pnfs_commit_array *array, *new;
1976
1977        new = pnfs_alloc_commit_array(flseg->mirror_array_cnt, GFP_NOIO);
1978        if (new) {
1979                spin_lock(&inode->i_lock);
1980                array = pnfs_add_commit_array(fl_cinfo, new, lseg);
1981                spin_unlock(&inode->i_lock);
1982                if (array != new)
1983                        pnfs_free_commit_array(new);
1984        }
1985}
1986
1987static void
1988ff_layout_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
1989                struct inode *inode)
1990{
1991        spin_lock(&inode->i_lock);
1992        pnfs_generic_ds_cinfo_destroy(fl_cinfo);
1993        spin_unlock(&inode->i_lock);
1994}
1995
1996static void
1997ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d)
1998{
1999        nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds,
2000                                                  id_node));
2001}
2002
2003static int ff_layout_encode_ioerr(struct xdr_stream *xdr,
2004                                  const struct nfs4_layoutreturn_args *args,
2005                                  const struct nfs4_flexfile_layoutreturn_args *ff_args)
2006{
2007        __be32 *start;
2008
2009        start = xdr_reserve_space(xdr, 4);
2010        if (unlikely(!start))
2011                return -E2BIG;
2012
2013        *start = cpu_to_be32(ff_args->num_errors);
2014        /* This assume we always return _ALL_ layouts */
2015        return ff_layout_encode_ds_ioerr(xdr, &ff_args->errors);
2016}
2017
2018static void
2019encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
2020{
2021        WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
2022}
2023
2024static void
2025ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr,
2026                            const nfs4_stateid *stateid,
2027                            const struct nfs42_layoutstat_devinfo *devinfo)
2028{
2029        __be32 *p;
2030
2031        p = xdr_reserve_space(xdr, 8 + 8);
2032        p = xdr_encode_hyper(p, devinfo->offset);
2033        p = xdr_encode_hyper(p, devinfo->length);
2034        encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
2035        p = xdr_reserve_space(xdr, 4*8);
2036        p = xdr_encode_hyper(p, devinfo->read_count);
2037        p = xdr_encode_hyper(p, devinfo->read_bytes);
2038        p = xdr_encode_hyper(p, devinfo->write_count);
2039        p = xdr_encode_hyper(p, devinfo->write_bytes);
2040        encode_opaque_fixed(xdr, devinfo->dev_id.data, NFS4_DEVICEID4_SIZE);
2041}
2042
2043static void
2044ff_layout_encode_ff_iostat(struct xdr_stream *xdr,
2045                            const nfs4_stateid *stateid,
2046                            const struct nfs42_layoutstat_devinfo *devinfo)
2047{
2048        ff_layout_encode_ff_iostat_head(xdr, stateid, devinfo);
2049        ff_layout_encode_ff_layoutupdate(xdr, devinfo,
2050                        devinfo->ld_private.data);
2051}
2052
2053/* report nothing for now */
2054static void ff_layout_encode_iostats_array(struct xdr_stream *xdr,
2055                const struct nfs4_layoutreturn_args *args,
2056                struct nfs4_flexfile_layoutreturn_args *ff_args)
2057{
2058        __be32 *p;
2059        int i;
2060
2061        p = xdr_reserve_space(xdr, 4);
2062        *p = cpu_to_be32(ff_args->num_dev);
2063        for (i = 0; i < ff_args->num_dev; i++)
2064                ff_layout_encode_ff_iostat(xdr,
2065                                &args->layout->plh_stateid,
2066                                &ff_args->devinfo[i]);
2067}
2068
2069static void
2070ff_layout_free_iostats_array(struct nfs42_layoutstat_devinfo *devinfo,
2071                unsigned int num_entries)
2072{
2073        unsigned int i;
2074
2075        for (i = 0; i < num_entries; i++) {
2076                if (!devinfo[i].ld_private.ops)
2077                        continue;
2078                if (!devinfo[i].ld_private.ops->free)
2079                        continue;
2080                devinfo[i].ld_private.ops->free(&devinfo[i].ld_private);
2081        }
2082}
2083
2084static struct nfs4_deviceid_node *
2085ff_layout_alloc_deviceid_node(struct nfs_server *server,
2086                              struct pnfs_device *pdev, gfp_t gfp_flags)
2087{
2088        struct nfs4_ff_layout_ds *dsaddr;
2089
2090        dsaddr = nfs4_ff_alloc_deviceid_node(server, pdev, gfp_flags);
2091        if (!dsaddr)
2092                return NULL;
2093        return &dsaddr->id_node;
2094}
2095
2096static void
2097ff_layout_encode_layoutreturn(struct xdr_stream *xdr,
2098                const void *voidargs,
2099                const struct nfs4_xdr_opaque_data *ff_opaque)
2100{
2101        const struct nfs4_layoutreturn_args *args = voidargs;
2102        struct nfs4_flexfile_layoutreturn_args *ff_args = ff_opaque->data;
2103        struct xdr_buf tmp_buf = {
2104                .head = {
2105                        [0] = {
2106                                .iov_base = page_address(ff_args->pages[0]),
2107                        },
2108                },
2109                .buflen = PAGE_SIZE,
2110        };
2111        struct xdr_stream tmp_xdr;
2112        __be32 *start;
2113
2114        dprintk("%s: Begin\n", __func__);
2115
2116        xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL);
2117
2118        ff_layout_encode_ioerr(&tmp_xdr, args, ff_args);
2119        ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args);
2120
2121        start = xdr_reserve_space(xdr, 4);
2122        *start = cpu_to_be32(tmp_buf.len);
2123        xdr_write_pages(xdr, ff_args->pages, 0, tmp_buf.len);
2124
2125        dprintk("%s: Return\n", __func__);
2126}
2127
2128static void
2129ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args)
2130{
2131        struct nfs4_flexfile_layoutreturn_args *ff_args;
2132
2133        if (!args->data)
2134                return;
2135        ff_args = args->data;
2136        args->data = NULL;
2137
2138        ff_layout_free_ds_ioerr(&ff_args->errors);
2139        ff_layout_free_iostats_array(ff_args->devinfo, ff_args->num_dev);
2140
2141        put_page(ff_args->pages[0]);
2142        kfree(ff_args);
2143}
2144
2145static const struct nfs4_xdr_opaque_ops layoutreturn_ops = {
2146        .encode = ff_layout_encode_layoutreturn,
2147        .free = ff_layout_free_layoutreturn,
2148};
2149
2150static int
2151ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args)
2152{
2153        struct nfs4_flexfile_layoutreturn_args *ff_args;
2154        struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(args->layout);
2155
2156        ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL);
2157        if (!ff_args)
2158                goto out_nomem;
2159        ff_args->pages[0] = alloc_page(GFP_KERNEL);
2160        if (!ff_args->pages[0])
2161                goto out_nomem_free;
2162
2163        INIT_LIST_HEAD(&ff_args->errors);
2164        ff_args->num_errors = ff_layout_fetch_ds_ioerr(args->layout,
2165                        &args->range, &ff_args->errors,
2166                        FF_LAYOUTRETURN_MAXERR);
2167
2168        spin_lock(&args->inode->i_lock);
2169        ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr,
2170                        &ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo));
2171        spin_unlock(&args->inode->i_lock);
2172
2173        args->ld_private->ops = &layoutreturn_ops;
2174        args->ld_private->data = ff_args;
2175        return 0;
2176out_nomem_free:
2177        kfree(ff_args);
2178out_nomem:
2179        return -ENOMEM;
2180}
2181
2182#ifdef CONFIG_NFS_V4_2
2183void
2184ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
2185{
2186        struct pnfs_layout_hdr *lo = lseg->pls_layout;
2187        struct nfs42_layout_error *errors;
2188        LIST_HEAD(head);
2189
2190        if (!nfs_server_capable(lo->plh_inode, NFS_CAP_LAYOUTERROR))
2191                return;
2192        ff_layout_fetch_ds_ioerr(lo, &lseg->pls_range, &head, -1);
2193        if (list_empty(&head))
2194                return;
2195
2196        errors = kmalloc_array(NFS42_LAYOUTERROR_MAX,
2197                        sizeof(*errors), GFP_NOFS);
2198        if (errors != NULL) {
2199                const struct nfs4_ff_layout_ds_err *pos;
2200                size_t n = 0;
2201
2202                list_for_each_entry(pos, &head, list) {
2203                        errors[n].offset = pos->offset;
2204                        errors[n].length = pos->length;
2205                        nfs4_stateid_copy(&errors[n].stateid, &pos->stateid);
2206                        errors[n].errors[0].dev_id = pos->deviceid;
2207                        errors[n].errors[0].status = pos->status;
2208                        errors[n].errors[0].opnum = pos->opnum;
2209                        n++;
2210                        if (!list_is_last(&pos->list, &head) &&
2211                            n < NFS42_LAYOUTERROR_MAX)
2212                                continue;
2213                        if (nfs42_proc_layouterror(lseg, errors, n) < 0)
2214                                break;
2215                        n = 0;
2216                }
2217                kfree(errors);
2218        }
2219        ff_layout_free_ds_ioerr(&head);
2220}
2221#else
2222void
2223ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
2224{
2225}
2226#endif
2227
2228static int
2229ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
2230{
2231        const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
2232
2233        return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
2234}
2235
2236static size_t
2237ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf,
2238                          const int buflen)
2239{
2240        const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
2241        const struct in6_addr *addr = &sin6->sin6_addr;
2242
2243        /*
2244         * RFC 4291, Section 2.2.2
2245         *
2246         * Shorthanded ANY address
2247         */
2248        if (ipv6_addr_any(addr))
2249                return snprintf(buf, buflen, "::");
2250
2251        /*
2252         * RFC 4291, Section 2.2.2
2253         *
2254         * Shorthanded loopback address
2255         */
2256        if (ipv6_addr_loopback(addr))
2257                return snprintf(buf, buflen, "::1");
2258
2259        /*
2260         * RFC 4291, Section 2.2.3
2261         *
2262         * Special presentation address format for mapped v4
2263         * addresses.
2264         */
2265        if (ipv6_addr_v4mapped(addr))
2266                return snprintf(buf, buflen, "::ffff:%pI4",
2267                                        &addr->s6_addr32[3]);
2268
2269        /*
2270         * RFC 4291, Section 2.2.1
2271         */
2272        return snprintf(buf, buflen, "%pI6c", addr);
2273}
2274
2275/* Derived from rpc_sockaddr2uaddr */
2276static void
2277ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
2278{
2279        struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
2280        char portbuf[RPCBIND_MAXUADDRPLEN];
2281        char addrbuf[RPCBIND_MAXUADDRLEN];
2282        unsigned short port;
2283        int len, netid_len;
2284        __be32 *p;
2285
2286        switch (sap->sa_family) {
2287        case AF_INET:
2288                if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
2289                        return;
2290                port = ntohs(((struct sockaddr_in *)sap)->sin_port);
2291                break;
2292        case AF_INET6:
2293                if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
2294                        return;
2295                port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
2296                break;
2297        default:
2298                WARN_ON_ONCE(1);
2299                return;
2300        }
2301
2302        snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
2303        len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
2304
2305        netid_len = strlen(da->da_netid);
2306        p = xdr_reserve_space(xdr, 4 + netid_len);
2307        xdr_encode_opaque(p, da->da_netid, netid_len);
2308
2309        p = xdr_reserve_space(xdr, 4 + len);
2310        xdr_encode_opaque(p, addrbuf, len);
2311}
2312
2313static void
2314ff_layout_encode_nfstime(struct xdr_stream *xdr,
2315                         ktime_t t)
2316{
2317        struct timespec64 ts;
2318        __be32 *p;
2319
2320        p = xdr_reserve_space(xdr, 12);
2321        ts = ktime_to_timespec64(t);
2322        p = xdr_encode_hyper(p, ts.tv_sec);
2323        *p++ = cpu_to_be32(ts.tv_nsec);
2324}
2325
2326static void
2327ff_layout_encode_io_latency(struct xdr_stream *xdr,
2328                            struct nfs4_ff_io_stat *stat)
2329{
2330        __be32 *p;
2331
2332        p = xdr_reserve_space(xdr, 5 * 8);
2333        p = xdr_encode_hyper(p, stat->ops_requested);
2334        p = xdr_encode_hyper(p, stat->bytes_requested);
2335        p = xdr_encode_hyper(p, stat->ops_completed);
2336        p = xdr_encode_hyper(p, stat->bytes_completed);
2337        p = xdr_encode_hyper(p, stat->bytes_not_delivered);
2338        ff_layout_encode_nfstime(xdr, stat->total_busy_time);
2339        ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time);
2340}
2341
2342static void
2343ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
2344                              const struct nfs42_layoutstat_devinfo *devinfo,
2345                              struct nfs4_ff_layout_mirror *mirror)
2346{
2347        struct nfs4_pnfs_ds_addr *da;
2348        struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;
2349        struct nfs_fh *fh = &mirror->fh_versions[0];
2350        __be32 *p;
2351
2352        da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
2353        dprintk("%s: DS %s: encoding address %s\n",
2354                __func__, ds->ds_remotestr, da->da_remotestr);
2355        /* netaddr4 */
2356        ff_layout_encode_netaddr(xdr, da);
2357        /* nfs_fh4 */
2358        p = xdr_reserve_space(xdr, 4 + fh->size);
2359        xdr_encode_opaque(p, fh->data, fh->size);
2360        /* ff_io_latency4 read */
2361        spin_lock(&mirror->lock);
2362        ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat);
2363        /* ff_io_latency4 write */
2364        ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat);
2365        spin_unlock(&mirror->lock);
2366        /* nfstime4 */
2367        ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time));
2368        /* bool */
2369        p = xdr_reserve_space(xdr, 4);
2370        *p = cpu_to_be32(false);
2371}
2372
2373static void
2374ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args,
2375                             const struct nfs4_xdr_opaque_data *opaque)
2376{
2377        struct nfs42_layoutstat_devinfo *devinfo = container_of(opaque,
2378                        struct nfs42_layoutstat_devinfo, ld_private);
2379        __be32 *start;
2380
2381        /* layoutupdate length */
2382        start = xdr_reserve_space(xdr, 4);
2383        ff_layout_encode_ff_layoutupdate(xdr, devinfo, opaque->data);
2384
2385        *start = cpu_to_be32((xdr->p - start - 1) * 4);
2386}
2387
2388static void
2389ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque)
2390{
2391        struct nfs4_ff_layout_mirror *mirror = opaque->data;
2392
2393        ff_layout_put_mirror(mirror);
2394}
2395
2396static const struct nfs4_xdr_opaque_ops layoutstat_ops = {
2397        .encode = ff_layout_encode_layoutstats,
2398        .free   = ff_layout_free_layoutstats,
2399};
2400
2401static int
2402ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
2403                               struct nfs42_layoutstat_devinfo *devinfo,
2404                               int dev_limit)
2405{
2406        struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);
2407        struct nfs4_ff_layout_mirror *mirror;
2408        struct nfs4_deviceid_node *dev;
2409        int i = 0;
2410
2411        list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) {
2412                if (i >= dev_limit)
2413                        break;
2414                if (IS_ERR_OR_NULL(mirror->mirror_ds))
2415                        continue;
2416                if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags))
2417                        continue;
2418                /* mirror refcount put in cleanup_layoutstats */
2419                if (!refcount_inc_not_zero(&mirror->ref))
2420                        continue;
2421                dev = &mirror->mirror_ds->id_node; 
2422                memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
2423                devinfo->offset = 0;
2424                devinfo->length = NFS4_MAX_UINT64;
2425                spin_lock(&mirror->lock);
2426                devinfo->read_count = mirror->read_stat.io_stat.ops_completed;
2427                devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;
2428                devinfo->write_count = mirror->write_stat.io_stat.ops_completed;
2429                devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed;
2430                spin_unlock(&mirror->lock);
2431                devinfo->layout_type = LAYOUT_FLEX_FILES;
2432                devinfo->ld_private.ops = &layoutstat_ops;
2433                devinfo->ld_private.data = mirror;
2434
2435                devinfo++;
2436                i++;
2437        }
2438        return i;
2439}
2440
2441static int
2442ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
2443{
2444        struct nfs4_flexfile_layout *ff_layout;
2445        const int dev_count = PNFS_LAYOUTSTATS_MAXDEV;
2446
2447        /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */
2448        args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO);
2449        if (!args->devinfo)
2450                return -ENOMEM;
2451
2452        spin_lock(&args->inode->i_lock);
2453        ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout);
2454        args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr,
2455                        &args->devinfo[0], dev_count);
2456        spin_unlock(&args->inode->i_lock);
2457        if (!args->num_dev) {
2458                kfree(args->devinfo);
2459                args->devinfo = NULL;
2460                return -ENOENT;
2461        }
2462
2463        return 0;
2464}
2465
2466static int
2467ff_layout_set_layoutdriver(struct nfs_server *server,
2468                const struct nfs_fh *dummy)
2469{
2470#if IS_ENABLED(CONFIG_NFS_V4_2)
2471        server->caps |= NFS_CAP_LAYOUTSTATS;
2472#endif
2473        return 0;
2474}
2475
2476static const struct pnfs_commit_ops ff_layout_commit_ops = {
2477        .setup_ds_info          = ff_layout_setup_ds_info,
2478        .release_ds_info        = ff_layout_release_ds_info,
2479        .mark_request_commit    = pnfs_layout_mark_request_commit,
2480        .clear_request_commit   = pnfs_generic_clear_request_commit,
2481        .scan_commit_lists      = pnfs_generic_scan_commit_lists,
2482        .recover_commit_reqs    = pnfs_generic_recover_commit_reqs,
2483        .commit_pagelist        = ff_layout_commit_pagelist,
2484};
2485
2486static struct pnfs_layoutdriver_type flexfilelayout_type = {
2487        .id                     = LAYOUT_FLEX_FILES,
2488        .name                   = "LAYOUT_FLEX_FILES",
2489        .owner                  = THIS_MODULE,
2490        .flags                  = PNFS_LAYOUTGET_ON_OPEN,
2491        .max_layoutget_response = 4096, /* 1 page or so... */
2492        .set_layoutdriver       = ff_layout_set_layoutdriver,
2493        .alloc_layout_hdr       = ff_layout_alloc_layout_hdr,
2494        .free_layout_hdr        = ff_layout_free_layout_hdr,
2495        .alloc_lseg             = ff_layout_alloc_lseg,
2496        .free_lseg              = ff_layout_free_lseg,
2497        .add_lseg               = ff_layout_add_lseg,
2498        .pg_read_ops            = &ff_layout_pg_read_ops,
2499        .pg_write_ops           = &ff_layout_pg_write_ops,
2500        .get_ds_info            = ff_layout_get_ds_info,
2501        .free_deviceid_node     = ff_layout_free_deviceid_node,
2502        .read_pagelist          = ff_layout_read_pagelist,
2503        .write_pagelist         = ff_layout_write_pagelist,
2504        .alloc_deviceid_node    = ff_layout_alloc_deviceid_node,
2505        .prepare_layoutreturn   = ff_layout_prepare_layoutreturn,
2506        .sync                   = pnfs_nfs_generic_sync,
2507        .prepare_layoutstats    = ff_layout_prepare_layoutstats,
2508};
2509
2510static int __init nfs4flexfilelayout_init(void)
2511{
2512        printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n",
2513               __func__);
2514        return pnfs_register_layoutdriver(&flexfilelayout_type);
2515}
2516
2517static void __exit nfs4flexfilelayout_exit(void)
2518{
2519        printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n",
2520               __func__);
2521        pnfs_unregister_layoutdriver(&flexfilelayout_type);
2522}
2523
2524MODULE_ALIAS("nfs-layouttype4-4");
2525
2526MODULE_LICENSE("GPL");
2527MODULE_DESCRIPTION("The NFSv4 flexfile layout driver");
2528
2529module_init(nfs4flexfilelayout_init);
2530module_exit(nfs4flexfilelayout_exit);
2531
2532module_param(io_maxretrans, ushort, 0644);
2533MODULE_PARM_DESC(io_maxretrans, "The  number of times the NFSv4.1 client "
2534                        "retries an I/O request before returning an error. ");
2535