1
2
3
4
5
6
7
8
9#include <linux/nfs_fs.h>
10#include <linux/nfs_page.h>
11#include <linux/module.h>
12#include <linux/sched/mm.h>
13
14#include <linux/sunrpc/metrics.h>
15
16#include "flexfilelayout.h"
17#include "../nfs4session.h"
18#include "../nfs4idmap.h"
19#include "../internal.h"
20#include "../delegation.h"
21#include "../nfs4trace.h"
22#include "../iostat.h"
23#include "../nfs.h"
24#include "../nfs42.h"
25
26#define NFSDBG_FACILITY NFSDBG_PNFS_LD
27
28#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
29#define FF_LAYOUTRETURN_MAXERR 20
30
31static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
32 struct nfs_pgio_header *hdr);
33static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
34 struct nfs42_layoutstat_devinfo *devinfo,
35 int dev_limit);
36static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
37 const struct nfs42_layoutstat_devinfo *devinfo,
38 struct nfs4_ff_layout_mirror *mirror);
39
40static struct pnfs_layout_hdr *
41ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
42{
43 struct nfs4_flexfile_layout *ffl;
44
45 ffl = kzalloc(sizeof(*ffl), gfp_flags);
46 if (ffl) {
47 INIT_LIST_HEAD(&ffl->error_list);
48 INIT_LIST_HEAD(&ffl->mirrors);
49 ffl->last_report_time = ktime_get();
50 return &ffl->generic_hdr;
51 } else
52 return NULL;
53}
54
55static void
56ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo)
57{
58 struct nfs4_ff_layout_ds_err *err, *n;
59
60 list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list,
61 list) {
62 list_del(&err->list);
63 kfree(err);
64 }
65 kfree(FF_LAYOUT_FROM_HDR(lo));
66}
67
68static int decode_pnfs_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
69{
70 __be32 *p;
71
72 p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE);
73 if (unlikely(p == NULL))
74 return -ENOBUFS;
75 stateid->type = NFS4_PNFS_DS_STATEID_TYPE;
76 memcpy(stateid->data, p, NFS4_STATEID_SIZE);
77 dprintk("%s: stateid id= [%x%x%x%x]\n", __func__,
78 p[0], p[1], p[2], p[3]);
79 return 0;
80}
81
82static int decode_deviceid(struct xdr_stream *xdr, struct nfs4_deviceid *devid)
83{
84 __be32 *p;
85
86 p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE);
87 if (unlikely(!p))
88 return -ENOBUFS;
89 memcpy(devid, p, NFS4_DEVICEID4_SIZE);
90 nfs4_print_deviceid(devid);
91 return 0;
92}
93
94static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
95{
96 __be32 *p;
97
98 p = xdr_inline_decode(xdr, 4);
99 if (unlikely(!p))
100 return -ENOBUFS;
101 fh->size = be32_to_cpup(p++);
102 if (fh->size > sizeof(struct nfs_fh)) {
103 printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
104 fh->size);
105 return -EOVERFLOW;
106 }
107
108 p = xdr_inline_decode(xdr, fh->size);
109 if (unlikely(!p))
110 return -ENOBUFS;
111 memcpy(&fh->data, p, fh->size);
112 dprintk("%s: fh len %d\n", __func__, fh->size);
113
114 return 0;
115}
116
117
118
119
120
121
122
123
124
125static int
126decode_name(struct xdr_stream *xdr, u32 *id)
127{
128 __be32 *p;
129 int len;
130
131
132 p = xdr_inline_decode(xdr, 4);
133 if (unlikely(!p))
134 return -ENOBUFS;
135 len = be32_to_cpup(p++);
136 if (len < 0)
137 return -EINVAL;
138
139 dprintk("%s: len %u\n", __func__, len);
140
141
142 p = xdr_inline_decode(xdr, len);
143 if (unlikely(!p))
144 return -ENOBUFS;
145
146 if (!nfs_map_string_to_numeric((char *)p, len, id))
147 return -EINVAL;
148
149 return 0;
150}
151
152static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
153 const struct nfs4_ff_layout_mirror *m2)
154{
155 int i, j;
156
157 if (m1->fh_versions_cnt != m2->fh_versions_cnt)
158 return false;
159 for (i = 0; i < m1->fh_versions_cnt; i++) {
160 bool found_fh = false;
161 for (j = 0; j < m2->fh_versions_cnt; j++) {
162 if (nfs_compare_fh(&m1->fh_versions[i],
163 &m2->fh_versions[j]) == 0) {
164 found_fh = true;
165 break;
166 }
167 }
168 if (!found_fh)
169 return false;
170 }
171 return true;
172}
173
174static struct nfs4_ff_layout_mirror *
175ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
176 struct nfs4_ff_layout_mirror *mirror)
177{
178 struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);
179 struct nfs4_ff_layout_mirror *pos;
180 struct inode *inode = lo->plh_inode;
181
182 spin_lock(&inode->i_lock);
183 list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
184 if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0)
185 continue;
186 if (!ff_mirror_match_fh(mirror, pos))
187 continue;
188 if (refcount_inc_not_zero(&pos->ref)) {
189 spin_unlock(&inode->i_lock);
190 return pos;
191 }
192 }
193 list_add(&mirror->mirrors, &ff_layout->mirrors);
194 mirror->layout = lo;
195 spin_unlock(&inode->i_lock);
196 return mirror;
197}
198
199static void
200ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
201{
202 struct inode *inode;
203 if (mirror->layout == NULL)
204 return;
205 inode = mirror->layout->plh_inode;
206 spin_lock(&inode->i_lock);
207 list_del(&mirror->mirrors);
208 spin_unlock(&inode->i_lock);
209 mirror->layout = NULL;
210}
211
212static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
213{
214 struct nfs4_ff_layout_mirror *mirror;
215
216 mirror = kzalloc(sizeof(*mirror), gfp_flags);
217 if (mirror != NULL) {
218 spin_lock_init(&mirror->lock);
219 refcount_set(&mirror->ref, 1);
220 INIT_LIST_HEAD(&mirror->mirrors);
221 }
222 return mirror;
223}
224
225static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
226{
227 const struct cred *cred;
228
229 ff_layout_remove_mirror(mirror);
230 kfree(mirror->fh_versions);
231 cred = rcu_access_pointer(mirror->ro_cred);
232 put_cred(cred);
233 cred = rcu_access_pointer(mirror->rw_cred);
234 put_cred(cred);
235 nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
236 kfree(mirror);
237}
238
239static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror)
240{
241 if (mirror != NULL && refcount_dec_and_test(&mirror->ref))
242 ff_layout_free_mirror(mirror);
243}
244
245static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls)
246{
247 int i;
248
249 if (fls->mirror_array) {
250 for (i = 0; i < fls->mirror_array_cnt; i++) {
251
252
253
254 ff_layout_put_mirror(fls->mirror_array[i]);
255 }
256 kfree(fls->mirror_array);
257 fls->mirror_array = NULL;
258 }
259}
260
261static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr)
262{
263 int ret = 0;
264
265 dprintk("--> %s\n", __func__);
266
267
268 if (lgr->range.offset != 0 ||
269 lgr->range.length != NFS4_MAX_UINT64) {
270 dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
271 __func__);
272 ret = -EINVAL;
273 }
274
275 dprintk("--> %s returns %d\n", __func__, ret);
276 return ret;
277}
278
279static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls)
280{
281 if (fls) {
282 ff_layout_free_mirror_array(fls);
283 kfree(fls);
284 }
285}
286
287static bool
288ff_lseg_range_is_after(const struct pnfs_layout_range *l1,
289 const struct pnfs_layout_range *l2)
290{
291 u64 end1, end2;
292
293 if (l1->iomode != l2->iomode)
294 return l1->iomode != IOMODE_READ;
295 end1 = pnfs_calc_offset_end(l1->offset, l1->length);
296 end2 = pnfs_calc_offset_end(l2->offset, l2->length);
297 if (end1 < l2->offset)
298 return false;
299 if (end2 < l1->offset)
300 return true;
301 return l2->offset <= l1->offset;
302}
303
304static bool
305ff_lseg_merge(struct pnfs_layout_segment *new,
306 struct pnfs_layout_segment *old)
307{
308 u64 new_end, old_end;
309
310 if (test_bit(NFS_LSEG_LAYOUTRETURN, &old->pls_flags))
311 return false;
312 if (new->pls_range.iomode != old->pls_range.iomode)
313 return false;
314 old_end = pnfs_calc_offset_end(old->pls_range.offset,
315 old->pls_range.length);
316 if (old_end < new->pls_range.offset)
317 return false;
318 new_end = pnfs_calc_offset_end(new->pls_range.offset,
319 new->pls_range.length);
320 if (new_end < old->pls_range.offset)
321 return false;
322
323
324 if (new_end < old_end)
325 new_end = old_end;
326 if (new->pls_range.offset < old->pls_range.offset)
327 new->pls_range.offset = old->pls_range.offset;
328 new->pls_range.length = pnfs_calc_offset_length(new->pls_range.offset,
329 new_end);
330 if (test_bit(NFS_LSEG_ROC, &old->pls_flags))
331 set_bit(NFS_LSEG_ROC, &new->pls_flags);
332 return true;
333}
334
335static void
336ff_layout_add_lseg(struct pnfs_layout_hdr *lo,
337 struct pnfs_layout_segment *lseg,
338 struct list_head *free_me)
339{
340 pnfs_generic_layout_insert_lseg(lo, lseg,
341 ff_lseg_range_is_after,
342 ff_lseg_merge,
343 free_me);
344}
345
346static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
347{
348 int i, j;
349
350 for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
351 for (j = i + 1; j < fls->mirror_array_cnt; j++)
352 if (fls->mirror_array[i]->efficiency <
353 fls->mirror_array[j]->efficiency)
354 swap(fls->mirror_array[i],
355 fls->mirror_array[j]);
356 }
357}
358
359static struct pnfs_layout_segment *
360ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
361 struct nfs4_layoutget_res *lgr,
362 gfp_t gfp_flags)
363{
364 struct pnfs_layout_segment *ret;
365 struct nfs4_ff_layout_segment *fls = NULL;
366 struct xdr_stream stream;
367 struct xdr_buf buf;
368 struct page *scratch;
369 u64 stripe_unit;
370 u32 mirror_array_cnt;
371 __be32 *p;
372 int i, rc;
373
374 dprintk("--> %s\n", __func__);
375 scratch = alloc_page(gfp_flags);
376 if (!scratch)
377 return ERR_PTR(-ENOMEM);
378
379 xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages,
380 lgr->layoutp->len);
381 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
382
383
384 rc = -EIO;
385 p = xdr_inline_decode(&stream, 8 + 4);
386 if (!p)
387 goto out_err_free;
388
389 p = xdr_decode_hyper(p, &stripe_unit);
390 mirror_array_cnt = be32_to_cpup(p++);
391 dprintk("%s: stripe_unit=%llu mirror_array_cnt=%u\n", __func__,
392 stripe_unit, mirror_array_cnt);
393
394 if (mirror_array_cnt > NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT ||
395 mirror_array_cnt == 0)
396 goto out_err_free;
397
398 rc = -ENOMEM;
399 fls = kzalloc(sizeof(*fls), gfp_flags);
400 if (!fls)
401 goto out_err_free;
402
403 fls->mirror_array_cnt = mirror_array_cnt;
404 fls->stripe_unit = stripe_unit;
405 fls->mirror_array = kcalloc(fls->mirror_array_cnt,
406 sizeof(fls->mirror_array[0]), gfp_flags);
407 if (fls->mirror_array == NULL)
408 goto out_err_free;
409
410 for (i = 0; i < fls->mirror_array_cnt; i++) {
411 struct nfs4_ff_layout_mirror *mirror;
412 struct cred *kcred;
413 const struct cred *cred;
414 kuid_t uid;
415 kgid_t gid;
416 u32 ds_count, fh_count, id;
417 int j;
418
419 rc = -EIO;
420 p = xdr_inline_decode(&stream, 4);
421 if (!p)
422 goto out_err_free;
423 ds_count = be32_to_cpup(p);
424
425
426 if (ds_count != 1)
427 goto out_err_free;
428
429 fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
430 if (fls->mirror_array[i] == NULL) {
431 rc = -ENOMEM;
432 goto out_err_free;
433 }
434
435 fls->mirror_array[i]->ds_count = ds_count;
436
437
438 rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid);
439 if (rc)
440 goto out_err_free;
441
442
443 rc = -EIO;
444 p = xdr_inline_decode(&stream, 4);
445 if (!p)
446 goto out_err_free;
447 fls->mirror_array[i]->efficiency = be32_to_cpup(p);
448
449
450 rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->stateid);
451 if (rc)
452 goto out_err_free;
453
454
455 rc = -EIO;
456 p = xdr_inline_decode(&stream, 4);
457 if (!p)
458 goto out_err_free;
459 fh_count = be32_to_cpup(p);
460
461 fls->mirror_array[i]->fh_versions =
462 kcalloc(fh_count, sizeof(struct nfs_fh),
463 gfp_flags);
464 if (fls->mirror_array[i]->fh_versions == NULL) {
465 rc = -ENOMEM;
466 goto out_err_free;
467 }
468
469 for (j = 0; j < fh_count; j++) {
470 rc = decode_nfs_fh(&stream,
471 &fls->mirror_array[i]->fh_versions[j]);
472 if (rc)
473 goto out_err_free;
474 }
475
476 fls->mirror_array[i]->fh_versions_cnt = fh_count;
477
478
479 rc = decode_name(&stream, &id);
480 if (rc)
481 goto out_err_free;
482
483 uid = make_kuid(&init_user_ns, id);
484
485
486 rc = decode_name(&stream, &id);
487 if (rc)
488 goto out_err_free;
489
490 gid = make_kgid(&init_user_ns, id);
491
492 if (gfp_flags & __GFP_FS)
493 kcred = prepare_kernel_cred(NULL);
494 else {
495 unsigned int nofs_flags = memalloc_nofs_save();
496 kcred = prepare_kernel_cred(NULL);
497 memalloc_nofs_restore(nofs_flags);
498 }
499 rc = -ENOMEM;
500 if (!kcred)
501 goto out_err_free;
502 kcred->fsuid = uid;
503 kcred->fsgid = gid;
504 cred = kcred;
505
506 if (lgr->range.iomode == IOMODE_READ)
507 rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
508 else
509 rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);
510
511 mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
512 if (mirror != fls->mirror_array[i]) {
513
514 if (lgr->range.iomode == IOMODE_READ) {
515 cred = xchg(&mirror->ro_cred, cred);
516 rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
517 } else {
518 cred = xchg(&mirror->rw_cred, cred);
519 rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);
520 }
521 ff_layout_free_mirror(fls->mirror_array[i]);
522 fls->mirror_array[i] = mirror;
523 }
524
525 dprintk("%s: iomode %s uid %u gid %u\n", __func__,
526 lgr->range.iomode == IOMODE_READ ? "READ" : "RW",
527 from_kuid(&init_user_ns, uid),
528 from_kgid(&init_user_ns, gid));
529 }
530
531 p = xdr_inline_decode(&stream, 4);
532 if (!p)
533 goto out_sort_mirrors;
534 fls->flags = be32_to_cpup(p);
535
536 p = xdr_inline_decode(&stream, 4);
537 if (!p)
538 goto out_sort_mirrors;
539 for (i=0; i < fls->mirror_array_cnt; i++)
540 fls->mirror_array[i]->report_interval = be32_to_cpup(p);
541
542out_sort_mirrors:
543 ff_layout_sort_mirrors(fls);
544 rc = ff_layout_check_layout(lgr);
545 if (rc)
546 goto out_err_free;
547 ret = &fls->generic_hdr;
548 dprintk("<-- %s (success)\n", __func__);
549out_free_page:
550 __free_page(scratch);
551 return ret;
552out_err_free:
553 _ff_layout_free_lseg(fls);
554 ret = ERR_PTR(rc);
555 dprintk("<-- %s (%d)\n", __func__, rc);
556 goto out_free_page;
557}
558
559static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
560{
561 struct pnfs_layout_segment *lseg;
562
563 list_for_each_entry(lseg, &layout->plh_segs, pls_list)
564 if (lseg->pls_range.iomode == IOMODE_RW)
565 return true;
566
567 return false;
568}
569
570static void
571ff_layout_free_lseg(struct pnfs_layout_segment *lseg)
572{
573 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
574
575 dprintk("--> %s\n", __func__);
576
577 if (lseg->pls_range.iomode == IOMODE_RW) {
578 struct nfs4_flexfile_layout *ffl;
579 struct inode *inode;
580
581 ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout);
582 inode = ffl->generic_hdr.plh_inode;
583 spin_lock(&inode->i_lock);
584 if (!ff_layout_has_rw_segments(lseg->pls_layout)) {
585 ffl->commit_info.nbuckets = 0;
586 kfree(ffl->commit_info.buckets);
587 ffl->commit_info.buckets = NULL;
588 }
589 spin_unlock(&inode->i_lock);
590 }
591 _ff_layout_free_lseg(fls);
592}
593
594
595static int
596ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls)
597{
598 return 1;
599}
600
601static void
602nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now)
603{
604
605 if (atomic_inc_return(&timer->n_ops) == 1) {
606 timer->start_time = now;
607 }
608}
609
610static ktime_t
611nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now)
612{
613 ktime_t start;
614
615 if (atomic_dec_return(&timer->n_ops) < 0)
616 WARN_ON_ONCE(1);
617
618 start = timer->start_time;
619 timer->start_time = now;
620 return ktime_sub(now, start);
621}
622
623static bool
624nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
625 struct nfs4_ff_layoutstat *layoutstat,
626 ktime_t now)
627{
628 s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;
629 struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
630
631 nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
632 if (!mirror->start_time)
633 mirror->start_time = now;
634 if (mirror->report_interval != 0)
635 report_interval = (s64)mirror->report_interval * 1000LL;
636 else if (layoutstats_timer != 0)
637 report_interval = (s64)layoutstats_timer * 1000LL;
638 if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >=
639 report_interval) {
640 ffl->last_report_time = now;
641 return true;
642 }
643
644 return false;
645}
646
647static void
648nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat,
649 __u64 requested)
650{
651 struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
652
653 iostat->ops_requested++;
654 iostat->bytes_requested += requested;
655}
656
657static void
658nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat,
659 __u64 requested,
660 __u64 completed,
661 ktime_t time_completed,
662 ktime_t time_started)
663{
664 struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
665 ktime_t completion_time = ktime_sub(time_completed, time_started);
666 ktime_t timer;
667
668 iostat->ops_completed++;
669 iostat->bytes_completed += completed;
670 iostat->bytes_not_delivered += requested - completed;
671
672 timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer, time_completed);
673 iostat->total_busy_time =
674 ktime_add(iostat->total_busy_time, timer);
675 iostat->aggregate_completion_time =
676 ktime_add(iostat->aggregate_completion_time,
677 completion_time);
678}
679
680static void
681nfs4_ff_layout_stat_io_start_read(struct inode *inode,
682 struct nfs4_ff_layout_mirror *mirror,
683 __u64 requested, ktime_t now)
684{
685 bool report;
686
687 spin_lock(&mirror->lock);
688 report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now);
689 nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested);
690 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
691 spin_unlock(&mirror->lock);
692
693 if (report)
694 pnfs_report_layoutstat(inode, GFP_KERNEL);
695}
696
697static void
698nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
699 struct nfs4_ff_layout_mirror *mirror,
700 __u64 requested,
701 __u64 completed)
702{
703 spin_lock(&mirror->lock);
704 nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,
705 requested, completed,
706 ktime_get(), task->tk_start);
707 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
708 spin_unlock(&mirror->lock);
709}
710
711static void
712nfs4_ff_layout_stat_io_start_write(struct inode *inode,
713 struct nfs4_ff_layout_mirror *mirror,
714 __u64 requested, ktime_t now)
715{
716 bool report;
717
718 spin_lock(&mirror->lock);
719 report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now);
720 nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested);
721 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
722 spin_unlock(&mirror->lock);
723
724 if (report)
725 pnfs_report_layoutstat(inode, GFP_NOIO);
726}
727
728static void
729nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
730 struct nfs4_ff_layout_mirror *mirror,
731 __u64 requested,
732 __u64 completed,
733 enum nfs3_stable_how committed)
734{
735 if (committed == NFS_UNSTABLE)
736 requested = completed = 0;
737
738 spin_lock(&mirror->lock);
739 nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,
740 requested, completed, ktime_get(), task->tk_start);
741 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
742 spin_unlock(&mirror->lock);
743}
744
745static int
746ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
747 struct nfs_commit_info *cinfo,
748 gfp_t gfp_flags)
749{
750 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
751 struct pnfs_commit_bucket *buckets;
752 int size;
753
754 if (cinfo->ds->nbuckets != 0) {
755
756
757
758
759
760 return 0;
761 }
762
763 size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg);
764
765 buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
766 gfp_flags);
767 if (!buckets)
768 return -ENOMEM;
769 else {
770 int i;
771
772 spin_lock(&cinfo->inode->i_lock);
773 if (cinfo->ds->nbuckets != 0)
774 kfree(buckets);
775 else {
776 cinfo->ds->buckets = buckets;
777 cinfo->ds->nbuckets = size;
778 for (i = 0; i < size; i++) {
779 INIT_LIST_HEAD(&buckets[i].written);
780 INIT_LIST_HEAD(&buckets[i].committing);
781
782 buckets[i].direct_verf.committed =
783 NFS_INVALID_STABLE_HOW;
784 }
785 }
786 spin_unlock(&cinfo->inode->i_lock);
787 return 0;
788 }
789}
790
791static struct nfs4_pnfs_ds *
792ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
793 int start_idx,
794 int *best_idx)
795{
796 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
797 struct nfs4_pnfs_ds *ds;
798 bool fail_return = false;
799 int idx;
800
801
802 for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
803 if (idx+1 == fls->mirror_array_cnt)
804 fail_return = true;
805 ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
806 if (ds) {
807 *best_idx = idx;
808 return ds;
809 }
810 }
811
812 return NULL;
813}
814
815static void
816ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
817 struct nfs_page *req,
818 bool strict_iomode)
819{
820 pnfs_put_lseg(pgio->pg_lseg);
821 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
822 req->wb_context,
823 0,
824 NFS4_MAX_UINT64,
825 IOMODE_READ,
826 strict_iomode,
827 GFP_KERNEL);
828 if (IS_ERR(pgio->pg_lseg)) {
829 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
830 pgio->pg_lseg = NULL;
831 }
832}
833
834static void
835ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
836 struct nfs_page *req)
837{
838 struct nfs_pgio_mirror *pgm;
839 struct nfs4_ff_layout_mirror *mirror;
840 struct nfs4_pnfs_ds *ds;
841 int ds_idx;
842
843retry:
844 pnfs_generic_pg_check_layout(pgio);
845
846 if (!pgio->pg_lseg) {
847 ff_layout_pg_get_read(pgio, req, false);
848 if (!pgio->pg_lseg)
849 goto out_nolseg;
850 }
851 if (ff_layout_avoid_read_on_rw(pgio->pg_lseg)) {
852 ff_layout_pg_get_read(pgio, req, true);
853 if (!pgio->pg_lseg)
854 goto out_nolseg;
855 }
856
857 ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
858 if (!ds) {
859 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
860 goto out_mds;
861 pnfs_put_lseg(pgio->pg_lseg);
862 pgio->pg_lseg = NULL;
863
864 ssleep(1);
865 goto retry;
866 }
867
868 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
869
870 pgio->pg_mirror_idx = ds_idx;
871
872
873 pgm = &pgio->pg_mirrors[0];
874 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
875
876 return;
877out_nolseg:
878 if (pgio->pg_error < 0)
879 return;
880out_mds:
881 pnfs_put_lseg(pgio->pg_lseg);
882 pgio->pg_lseg = NULL;
883 nfs_pageio_reset_read_mds(pgio);
884}
885
886static void
887ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
888 struct nfs_page *req)
889{
890 struct nfs4_ff_layout_mirror *mirror;
891 struct nfs_pgio_mirror *pgm;
892 struct nfs_commit_info cinfo;
893 struct nfs4_pnfs_ds *ds;
894 int i;
895 int status;
896
897retry:
898 pnfs_generic_pg_check_layout(pgio);
899 if (!pgio->pg_lseg) {
900 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
901 req->wb_context,
902 0,
903 NFS4_MAX_UINT64,
904 IOMODE_RW,
905 false,
906 GFP_NOFS);
907 if (IS_ERR(pgio->pg_lseg)) {
908 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
909 pgio->pg_lseg = NULL;
910 return;
911 }
912 }
913
914 if (pgio->pg_lseg == NULL)
915 goto out_mds;
916
917 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
918 status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
919 if (status < 0)
920 goto out_mds;
921
922
923 if (WARN_ON_ONCE(pgio->pg_mirror_count !=
924 FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)))
925 goto out_mds;
926
927 for (i = 0; i < pgio->pg_mirror_count; i++) {
928 ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
929 if (!ds) {
930 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
931 goto out_mds;
932 pnfs_put_lseg(pgio->pg_lseg);
933 pgio->pg_lseg = NULL;
934
935 ssleep(1);
936 goto retry;
937 }
938 pgm = &pgio->pg_mirrors[i];
939 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
940 pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
941 }
942
943 return;
944
945out_mds:
946 pnfs_put_lseg(pgio->pg_lseg);
947 pgio->pg_lseg = NULL;
948 nfs_pageio_reset_write_mds(pgio);
949}
950
951static unsigned int
952ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
953 struct nfs_page *req)
954{
955 if (!pgio->pg_lseg) {
956 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
957 req->wb_context,
958 0,
959 NFS4_MAX_UINT64,
960 IOMODE_RW,
961 false,
962 GFP_NOFS);
963 if (IS_ERR(pgio->pg_lseg)) {
964 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
965 pgio->pg_lseg = NULL;
966 goto out;
967 }
968 }
969 if (pgio->pg_lseg)
970 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);
971
972
973 nfs_pageio_reset_write_mds(pgio);
974out:
975 return 1;
976}
977
978static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
979 .pg_init = ff_layout_pg_init_read,
980 .pg_test = pnfs_generic_pg_test,
981 .pg_doio = pnfs_generic_pg_readpages,
982 .pg_cleanup = pnfs_generic_pg_cleanup,
983};
984
985static const struct nfs_pageio_ops ff_layout_pg_write_ops = {
986 .pg_init = ff_layout_pg_init_write,
987 .pg_test = pnfs_generic_pg_test,
988 .pg_doio = pnfs_generic_pg_writepages,
989 .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write,
990 .pg_cleanup = pnfs_generic_pg_cleanup,
991};
992
993static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
994{
995 struct rpc_task *task = &hdr->task;
996
997 pnfs_layoutcommit_inode(hdr->inode, false);
998
999 if (retry_pnfs) {
1000 dprintk("%s Reset task %5u for i/o through pNFS "
1001 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
1002 hdr->task.tk_pid,
1003 hdr->inode->i_sb->s_id,
1004 (unsigned long long)NFS_FILEID(hdr->inode),
1005 hdr->args.count,
1006 (unsigned long long)hdr->args.offset);
1007
1008 hdr->completion_ops->reschedule_io(hdr);
1009 return;
1010 }
1011
1012 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1013 dprintk("%s Reset task %5u for i/o through MDS "
1014 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
1015 hdr->task.tk_pid,
1016 hdr->inode->i_sb->s_id,
1017 (unsigned long long)NFS_FILEID(hdr->inode),
1018 hdr->args.count,
1019 (unsigned long long)hdr->args.offset);
1020
1021 task->tk_status = pnfs_write_done_resend_to_mds(hdr);
1022 }
1023}
1024
1025static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
1026{
1027 struct rpc_task *task = &hdr->task;
1028
1029 pnfs_layoutcommit_inode(hdr->inode, false);
1030
1031 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1032 dprintk("%s Reset task %5u for i/o through MDS "
1033 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
1034 hdr->task.tk_pid,
1035 hdr->inode->i_sb->s_id,
1036 (unsigned long long)NFS_FILEID(hdr->inode),
1037 hdr->args.count,
1038 (unsigned long long)hdr->args.offset);
1039
1040 task->tk_status = pnfs_read_done_resend_to_mds(hdr);
1041 }
1042}
1043
1044static int ff_layout_async_handle_error_v4(struct rpc_task *task,
1045 struct nfs4_state *state,
1046 struct nfs_client *clp,
1047 struct pnfs_layout_segment *lseg,
1048 int idx)
1049{
1050 struct pnfs_layout_hdr *lo = lseg->pls_layout;
1051 struct inode *inode = lo->plh_inode;
1052 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
1053 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
1054
1055 switch (task->tk_status) {
1056 case -NFS4ERR_BADSESSION:
1057 case -NFS4ERR_BADSLOT:
1058 case -NFS4ERR_BAD_HIGH_SLOT:
1059 case -NFS4ERR_DEADSESSION:
1060 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1061 case -NFS4ERR_SEQ_FALSE_RETRY:
1062 case -NFS4ERR_SEQ_MISORDERED:
1063 dprintk("%s ERROR %d, Reset session. Exchangeid "
1064 "flags 0x%x\n", __func__, task->tk_status,
1065 clp->cl_exchange_flags);
1066 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
1067 break;
1068 case -NFS4ERR_DELAY:
1069 case -NFS4ERR_GRACE:
1070 rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
1071 break;
1072 case -NFS4ERR_RETRY_UNCACHED_REP:
1073 break;
1074
1075 case -NFS4ERR_PNFS_NO_LAYOUT:
1076 case -ESTALE:
1077 case -EBADHANDLE:
1078 case -EISDIR:
1079 case -NFS4ERR_FHEXPIRED:
1080 case -NFS4ERR_WRONG_TYPE:
1081 dprintk("%s Invalid layout error %d\n", __func__,
1082 task->tk_status);
1083
1084
1085
1086
1087
1088
1089
1090 pnfs_destroy_layout(NFS_I(inode));
1091 rpc_wake_up(&tbl->slot_tbl_waitq);
1092 goto reset;
1093
1094 case -ECONNREFUSED:
1095 case -EHOSTDOWN:
1096 case -EHOSTUNREACH:
1097 case -ENETUNREACH:
1098 case -EIO:
1099 case -ETIMEDOUT:
1100 case -EPIPE:
1101 dprintk("%s DS connection error %d\n", __func__,
1102 task->tk_status);
1103 nfs4_delete_deviceid(devid->ld, devid->nfs_client,
1104 &devid->deviceid);
1105 rpc_wake_up(&tbl->slot_tbl_waitq);
1106
1107 default:
1108 if (ff_layout_avoid_mds_available_ds(lseg))
1109 return -NFS4ERR_RESET_TO_PNFS;
1110reset:
1111 dprintk("%s Retry through MDS. Error %d\n", __func__,
1112 task->tk_status);
1113 return -NFS4ERR_RESET_TO_MDS;
1114 }
1115 task->tk_status = 0;
1116 return -EAGAIN;
1117}
1118
1119
1120static int ff_layout_async_handle_error_v3(struct rpc_task *task,
1121 struct pnfs_layout_segment *lseg,
1122 int idx)
1123{
1124 struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
1125
1126 switch (task->tk_status) {
1127
1128 case -EACCES:
1129 case -ESTALE:
1130 case -EISDIR:
1131 case -EBADHANDLE:
1132 case -ELOOP:
1133 case -ENOSPC:
1134 break;
1135 case -EJUKEBOX:
1136 nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
1137 goto out_retry;
1138 default:
1139 dprintk("%s DS connection error %d\n", __func__,
1140 task->tk_status);
1141 nfs4_delete_deviceid(devid->ld, devid->nfs_client,
1142 &devid->deviceid);
1143 }
1144
1145 return -NFS4ERR_RESET_TO_PNFS;
1146out_retry:
1147 task->tk_status = 0;
1148 rpc_restart_call_prepare(task);
1149 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
1150 return -EAGAIN;
1151}
1152
1153static int ff_layout_async_handle_error(struct rpc_task *task,
1154 struct nfs4_state *state,
1155 struct nfs_client *clp,
1156 struct pnfs_layout_segment *lseg,
1157 int idx)
1158{
1159 int vers = clp->cl_nfs_mod->rpc_vers->number;
1160
1161 if (task->tk_status >= 0)
1162 return 0;
1163
1164
1165 if (!pnfs_is_valid_lseg(lseg))
1166 return -NFS4ERR_RESET_TO_PNFS;
1167
1168 switch (vers) {
1169 case 3:
1170 return ff_layout_async_handle_error_v3(task, lseg, idx);
1171 case 4:
1172 return ff_layout_async_handle_error_v4(task, state, clp,
1173 lseg, idx);
1174 default:
1175
1176 WARN_ON_ONCE(1);
1177 return 0;
1178 }
1179}
1180
1181static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
1182 int idx, u64 offset, u64 length,
1183 u32 status, int opnum, int error)
1184{
1185 struct nfs4_ff_layout_mirror *mirror;
1186 int err;
1187
1188 if (status == 0) {
1189 switch (error) {
1190 case -ETIMEDOUT:
1191 case -EPFNOSUPPORT:
1192 case -EPROTONOSUPPORT:
1193 case -EOPNOTSUPP:
1194 case -ECONNREFUSED:
1195 case -ECONNRESET:
1196 case -EHOSTDOWN:
1197 case -EHOSTUNREACH:
1198 case -ENETUNREACH:
1199 case -EADDRINUSE:
1200 case -ENOBUFS:
1201 case -EPIPE:
1202 case -EPERM:
1203 status = NFS4ERR_NXIO;
1204 break;
1205 case -EACCES:
1206 status = NFS4ERR_ACCESS;
1207 break;
1208 default:
1209 return;
1210 }
1211 }
1212
1213 switch (status) {
1214 case NFS4ERR_DELAY:
1215 case NFS4ERR_GRACE:
1216 return;
1217 default:
1218 break;
1219 }
1220
1221 mirror = FF_LAYOUT_COMP(lseg, idx);
1222 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
1223 mirror, offset, length, status, opnum,
1224 GFP_NOIO);
1225 pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
1226 dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status);
1227}
1228
1229
1230static int ff_layout_read_done_cb(struct rpc_task *task,
1231 struct nfs_pgio_header *hdr)
1232{
1233 int err;
1234
1235 trace_nfs4_pnfs_read(hdr, task->tk_status);
1236 if (task->tk_status < 0)
1237 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
1238 hdr->args.offset, hdr->args.count,
1239 hdr->res.op_status, OP_READ,
1240 task->tk_status);
1241 err = ff_layout_async_handle_error(task, hdr->args.context->state,
1242 hdr->ds_clp, hdr->lseg,
1243 hdr->pgio_mirror_idx);
1244
1245 clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
1246 clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
1247 switch (err) {
1248 case -NFS4ERR_RESET_TO_PNFS:
1249 if (ff_layout_choose_best_ds_for_read(hdr->lseg,
1250 hdr->pgio_mirror_idx + 1,
1251 &hdr->pgio_mirror_idx))
1252 goto out_eagain;
1253 set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
1254 return task->tk_status;
1255 case -NFS4ERR_RESET_TO_MDS:
1256 set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
1257 return task->tk_status;
1258 case -EAGAIN:
1259 goto out_eagain;
1260 }
1261
1262 return 0;
1263out_eagain:
1264 rpc_restart_call_prepare(task);
1265 return -EAGAIN;
1266}
1267
1268static bool
1269ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg)
1270{
1271 return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT);
1272}
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283static void
1284ff_layout_set_layoutcommit(struct inode *inode,
1285 struct pnfs_layout_segment *lseg,
1286 loff_t end_offset)
1287{
1288 if (!ff_layout_need_layoutcommit(lseg))
1289 return;
1290
1291 pnfs_set_layoutcommit(inode, lseg, end_offset);
1292 dprintk("%s inode %lu pls_end_pos %llu\n", __func__, inode->i_ino,
1293 (unsigned long long) NFS_I(inode)->layout->plh_lwb);
1294}
1295
1296static bool
1297ff_layout_device_unavailable(struct pnfs_layout_segment *lseg, int idx)
1298{
1299
1300 struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx);
1301
1302 return ff_layout_test_devid_unavailable(node);
1303}
1304
1305static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
1306 struct nfs_pgio_header *hdr)
1307{
1308 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
1309 return;
1310 nfs4_ff_layout_stat_io_start_read(hdr->inode,
1311 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1312 hdr->args.count,
1313 task->tk_start);
1314}
1315
1316static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
1317 struct nfs_pgio_header *hdr)
1318{
1319 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
1320 return;
1321 nfs4_ff_layout_stat_io_end_read(task,
1322 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1323 hdr->args.count,
1324 hdr->res.count);
1325 set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags);
1326}
1327
1328static int ff_layout_read_prepare_common(struct rpc_task *task,
1329 struct nfs_pgio_header *hdr)
1330{
1331 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
1332 rpc_exit(task, -EIO);
1333 return -EIO;
1334 }
1335 if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
1336 rpc_exit(task, -EHOSTDOWN);
1337 return -EAGAIN;
1338 }
1339
1340 ff_layout_read_record_layoutstats_start(task, hdr);
1341 return 0;
1342}
1343
1344
1345
1346
1347
1348
1349static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data)
1350{
1351 struct nfs_pgio_header *hdr = data;
1352
1353 if (ff_layout_read_prepare_common(task, hdr))
1354 return;
1355
1356 rpc_call_start(task);
1357}
1358
1359static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
1360{
1361 struct nfs_pgio_header *hdr = data;
1362
1363 if (nfs4_setup_sequence(hdr->ds_clp,
1364 &hdr->args.seq_args,
1365 &hdr->res.seq_res,
1366 task))
1367 return;
1368
1369 ff_layout_read_prepare_common(task, hdr);
1370}
1371
1372static void ff_layout_read_call_done(struct rpc_task *task, void *data)
1373{
1374 struct nfs_pgio_header *hdr = data;
1375
1376 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
1377
1378 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
1379 task->tk_status == 0) {
1380 nfs4_sequence_done(task, &hdr->res.seq_res);
1381 return;
1382 }
1383
1384
1385 hdr->mds_ops->rpc_call_done(task, hdr);
1386}
1387
1388static void ff_layout_read_count_stats(struct rpc_task *task, void *data)
1389{
1390 struct nfs_pgio_header *hdr = data;
1391
1392 ff_layout_read_record_layoutstats_done(task, hdr);
1393 rpc_count_iostats_metrics(task,
1394 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]);
1395}
1396
1397static void ff_layout_read_release(void *data)
1398{
1399 struct nfs_pgio_header *hdr = data;
1400
1401 ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
1402 if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
1403 pnfs_read_resend_pnfs(hdr);
1404 else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
1405 ff_layout_reset_read(hdr);
1406 pnfs_generic_rw_release(data);
1407}
1408
1409
1410static int ff_layout_write_done_cb(struct rpc_task *task,
1411 struct nfs_pgio_header *hdr)
1412{
1413 loff_t end_offs = 0;
1414 int err;
1415
1416 trace_nfs4_pnfs_write(hdr, task->tk_status);
1417 if (task->tk_status < 0)
1418 ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
1419 hdr->args.offset, hdr->args.count,
1420 hdr->res.op_status, OP_WRITE,
1421 task->tk_status);
1422 err = ff_layout_async_handle_error(task, hdr->args.context->state,
1423 hdr->ds_clp, hdr->lseg,
1424 hdr->pgio_mirror_idx);
1425
1426 clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
1427 clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
1428 switch (err) {
1429 case -NFS4ERR_RESET_TO_PNFS:
1430 set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
1431 return task->tk_status;
1432 case -NFS4ERR_RESET_TO_MDS:
1433 set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
1434 return task->tk_status;
1435 case -EAGAIN:
1436 return -EAGAIN;
1437 }
1438
1439 if (hdr->res.verf->committed == NFS_FILE_SYNC ||
1440 hdr->res.verf->committed == NFS_DATA_SYNC)
1441 end_offs = hdr->mds_offset + (loff_t)hdr->res.count;
1442
1443
1444 ff_layout_set_layoutcommit(hdr->inode, hdr->lseg, end_offs);
1445
1446
1447 hdr->fattr.valid = 0;
1448 if (task->tk_status >= 0)
1449 nfs_writeback_update_inode(hdr);
1450
1451 return 0;
1452}
1453
1454static int ff_layout_commit_done_cb(struct rpc_task *task,
1455 struct nfs_commit_data *data)
1456{
1457 int err;
1458
1459 trace_nfs4_pnfs_commit_ds(data, task->tk_status);
1460 if (task->tk_status < 0)
1461 ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index,
1462 data->args.offset, data->args.count,
1463 data->res.op_status, OP_COMMIT,
1464 task->tk_status);
1465 err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
1466 data->lseg, data->ds_commit_index);
1467
1468 switch (err) {
1469 case -NFS4ERR_RESET_TO_PNFS:
1470 pnfs_generic_prepare_to_resend_writes(data);
1471 return -EAGAIN;
1472 case -NFS4ERR_RESET_TO_MDS:
1473 pnfs_generic_prepare_to_resend_writes(data);
1474 return -EAGAIN;
1475 case -EAGAIN:
1476 rpc_restart_call_prepare(task);
1477 return -EAGAIN;
1478 }
1479
1480 ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb);
1481
1482 return 0;
1483}
1484
1485static void ff_layout_write_record_layoutstats_start(struct rpc_task *task,
1486 struct nfs_pgio_header *hdr)
1487{
1488 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
1489 return;
1490 nfs4_ff_layout_stat_io_start_write(hdr->inode,
1491 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1492 hdr->args.count,
1493 task->tk_start);
1494}
1495
1496static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
1497 struct nfs_pgio_header *hdr)
1498{
1499 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
1500 return;
1501 nfs4_ff_layout_stat_io_end_write(task,
1502 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1503 hdr->args.count, hdr->res.count,
1504 hdr->res.verf->committed);
1505 set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags);
1506}
1507
1508static int ff_layout_write_prepare_common(struct rpc_task *task,
1509 struct nfs_pgio_header *hdr)
1510{
1511 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
1512 rpc_exit(task, -EIO);
1513 return -EIO;
1514 }
1515
1516 if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
1517 rpc_exit(task, -EHOSTDOWN);
1518 return -EAGAIN;
1519 }
1520
1521 ff_layout_write_record_layoutstats_start(task, hdr);
1522 return 0;
1523}
1524
1525static void ff_layout_write_prepare_v3(struct rpc_task *task, void *data)
1526{
1527 struct nfs_pgio_header *hdr = data;
1528
1529 if (ff_layout_write_prepare_common(task, hdr))
1530 return;
1531
1532 rpc_call_start(task);
1533}
1534
1535static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
1536{
1537 struct nfs_pgio_header *hdr = data;
1538
1539 if (nfs4_setup_sequence(hdr->ds_clp,
1540 &hdr->args.seq_args,
1541 &hdr->res.seq_res,
1542 task))
1543 return;
1544
1545 ff_layout_write_prepare_common(task, hdr);
1546}
1547
1548static void ff_layout_write_call_done(struct rpc_task *task, void *data)
1549{
1550 struct nfs_pgio_header *hdr = data;
1551
1552 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
1553 task->tk_status == 0) {
1554 nfs4_sequence_done(task, &hdr->res.seq_res);
1555 return;
1556 }
1557
1558
1559 hdr->mds_ops->rpc_call_done(task, hdr);
1560}
1561
1562static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
1563{
1564 struct nfs_pgio_header *hdr = data;
1565
1566 ff_layout_write_record_layoutstats_done(task, hdr);
1567 rpc_count_iostats_metrics(task,
1568 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
1569}
1570
1571static void ff_layout_write_release(void *data)
1572{
1573 struct nfs_pgio_header *hdr = data;
1574
1575 ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
1576 if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
1577 ff_layout_reset_write(hdr, true);
1578 else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
1579 ff_layout_reset_write(hdr, false);
1580 pnfs_generic_rw_release(data);
1581}
1582
1583static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
1584 struct nfs_commit_data *cdata)
1585{
1586 if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags))
1587 return;
1588 nfs4_ff_layout_stat_io_start_write(cdata->inode,
1589 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1590 0, task->tk_start);
1591}
1592
1593static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
1594 struct nfs_commit_data *cdata)
1595{
1596 struct nfs_page *req;
1597 __u64 count = 0;
1598
1599 if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags))
1600 return;
1601
1602 if (task->tk_status == 0) {
1603 list_for_each_entry(req, &cdata->pages, wb_list)
1604 count += req->wb_bytes;
1605 }
1606 nfs4_ff_layout_stat_io_end_write(task,
1607 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1608 count, count, NFS_FILE_SYNC);
1609 set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags);
1610}
1611
1612static void ff_layout_commit_prepare_common(struct rpc_task *task,
1613 struct nfs_commit_data *cdata)
1614{
1615 ff_layout_commit_record_layoutstats_start(task, cdata);
1616}
1617
1618static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
1619{
1620 ff_layout_commit_prepare_common(task, data);
1621 rpc_call_start(task);
1622}
1623
1624static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
1625{
1626 struct nfs_commit_data *wdata = data;
1627
1628 if (nfs4_setup_sequence(wdata->ds_clp,
1629 &wdata->args.seq_args,
1630 &wdata->res.seq_res,
1631 task))
1632 return;
1633 ff_layout_commit_prepare_common(task, data);
1634}
1635
1636static void ff_layout_commit_done(struct rpc_task *task, void *data)
1637{
1638 pnfs_generic_write_commit_done(task, data);
1639}
1640
1641static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
1642{
1643 struct nfs_commit_data *cdata = data;
1644
1645 ff_layout_commit_record_layoutstats_done(task, cdata);
1646 rpc_count_iostats_metrics(task,
1647 &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
1648}
1649
1650static void ff_layout_commit_release(void *data)
1651{
1652 struct nfs_commit_data *cdata = data;
1653
1654 ff_layout_commit_record_layoutstats_done(&cdata->task, cdata);
1655 pnfs_generic_commit_release(data);
1656}
1657
1658static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
1659 .rpc_call_prepare = ff_layout_read_prepare_v3,
1660 .rpc_call_done = ff_layout_read_call_done,
1661 .rpc_count_stats = ff_layout_read_count_stats,
1662 .rpc_release = ff_layout_read_release,
1663};
1664
1665static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
1666 .rpc_call_prepare = ff_layout_read_prepare_v4,
1667 .rpc_call_done = ff_layout_read_call_done,
1668 .rpc_count_stats = ff_layout_read_count_stats,
1669 .rpc_release = ff_layout_read_release,
1670};
1671
1672static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
1673 .rpc_call_prepare = ff_layout_write_prepare_v3,
1674 .rpc_call_done = ff_layout_write_call_done,
1675 .rpc_count_stats = ff_layout_write_count_stats,
1676 .rpc_release = ff_layout_write_release,
1677};
1678
1679static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
1680 .rpc_call_prepare = ff_layout_write_prepare_v4,
1681 .rpc_call_done = ff_layout_write_call_done,
1682 .rpc_count_stats = ff_layout_write_count_stats,
1683 .rpc_release = ff_layout_write_release,
1684};
1685
1686static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
1687 .rpc_call_prepare = ff_layout_commit_prepare_v3,
1688 .rpc_call_done = ff_layout_commit_done,
1689 .rpc_count_stats = ff_layout_commit_count_stats,
1690 .rpc_release = ff_layout_commit_release,
1691};
1692
1693static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
1694 .rpc_call_prepare = ff_layout_commit_prepare_v4,
1695 .rpc_call_done = ff_layout_commit_done,
1696 .rpc_count_stats = ff_layout_commit_count_stats,
1697 .rpc_release = ff_layout_commit_release,
1698};
1699
1700static enum pnfs_try_status
1701ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
1702{
1703 struct pnfs_layout_segment *lseg = hdr->lseg;
1704 struct nfs4_pnfs_ds *ds;
1705 struct rpc_clnt *ds_clnt;
1706 const struct cred *ds_cred;
1707 loff_t offset = hdr->args.offset;
1708 u32 idx = hdr->pgio_mirror_idx;
1709 int vers;
1710 struct nfs_fh *fh;
1711
1712 dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
1713 __func__, hdr->inode->i_ino,
1714 hdr->args.pgbase, (size_t)hdr->args.count, offset);
1715
1716 ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
1717 if (!ds)
1718 goto out_failed;
1719
1720 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
1721 hdr->inode);
1722 if (IS_ERR(ds_clnt))
1723 goto out_failed;
1724
1725 ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
1726 if (!ds_cred)
1727 goto out_failed;
1728
1729 vers = nfs4_ff_layout_ds_version(lseg, idx);
1730
1731 dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
1732 ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
1733
1734 hdr->pgio_done_cb = ff_layout_read_done_cb;
1735 refcount_inc(&ds->ds_clp->cl_count);
1736 hdr->ds_clp = ds->ds_clp;
1737 fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
1738 if (fh)
1739 hdr->args.fh = fh;
1740
1741 if (vers == 4 &&
1742 !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
1743 goto out_failed;
1744
1745
1746
1747
1748
1749 hdr->args.offset = offset;
1750 hdr->mds_offset = offset;
1751
1752
1753 nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
1754 vers == 3 ? &ff_layout_read_call_ops_v3 :
1755 &ff_layout_read_call_ops_v4,
1756 0, RPC_TASK_SOFTCONN);
1757 put_cred(ds_cred);
1758 return PNFS_ATTEMPTED;
1759
1760out_failed:
1761 if (ff_layout_avoid_mds_available_ds(lseg))
1762 return PNFS_TRY_AGAIN;
1763 return PNFS_NOT_ATTEMPTED;
1764}
1765
1766
1767static enum pnfs_try_status
1768ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
1769{
1770 struct pnfs_layout_segment *lseg = hdr->lseg;
1771 struct nfs4_pnfs_ds *ds;
1772 struct rpc_clnt *ds_clnt;
1773 const struct cred *ds_cred;
1774 loff_t offset = hdr->args.offset;
1775 int vers;
1776 struct nfs_fh *fh;
1777 int idx = hdr->pgio_mirror_idx;
1778
1779 ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
1780 if (!ds)
1781 goto out_failed;
1782
1783 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
1784 hdr->inode);
1785 if (IS_ERR(ds_clnt))
1786 goto out_failed;
1787
1788 ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
1789 if (!ds_cred)
1790 goto out_failed;
1791
1792 vers = nfs4_ff_layout_ds_version(lseg, idx);
1793
1794 dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
1795 __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
1796 offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count),
1797 vers);
1798
1799 hdr->pgio_done_cb = ff_layout_write_done_cb;
1800 refcount_inc(&ds->ds_clp->cl_count);
1801 hdr->ds_clp = ds->ds_clp;
1802 hdr->ds_commit_idx = idx;
1803 fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
1804 if (fh)
1805 hdr->args.fh = fh;
1806
1807 if (vers == 4 &&
1808 !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
1809 goto out_failed;
1810
1811
1812
1813
1814
1815 hdr->args.offset = offset;
1816
1817
1818 nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
1819 vers == 3 ? &ff_layout_write_call_ops_v3 :
1820 &ff_layout_write_call_ops_v4,
1821 sync, RPC_TASK_SOFTCONN);
1822 put_cred(ds_cred);
1823 return PNFS_ATTEMPTED;
1824
1825out_failed:
1826 if (ff_layout_avoid_mds_available_ds(lseg))
1827 return PNFS_TRY_AGAIN;
1828 return PNFS_NOT_ATTEMPTED;
1829}
1830
1831static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
1832{
1833 return i;
1834}
1835
1836static struct nfs_fh *
1837select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
1838{
1839 struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
1840
1841
1842
1843
1844 return &flseg->mirror_array[i]->fh_versions[0];
1845}
1846
1847static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
1848{
1849 struct pnfs_layout_segment *lseg = data->lseg;
1850 struct nfs4_pnfs_ds *ds;
1851 struct rpc_clnt *ds_clnt;
1852 const struct cred *ds_cred;
1853 u32 idx;
1854 int vers, ret;
1855 struct nfs_fh *fh;
1856
1857 if (!lseg || !(pnfs_is_valid_lseg(lseg) ||
1858 test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)))
1859 goto out_err;
1860
1861 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
1862 ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
1863 if (!ds)
1864 goto out_err;
1865
1866 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
1867 data->inode);
1868 if (IS_ERR(ds_clnt))
1869 goto out_err;
1870
1871 ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred);
1872 if (!ds_cred)
1873 goto out_err;
1874
1875 vers = nfs4_ff_layout_ds_version(lseg, idx);
1876
1877 dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
1878 data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
1879 vers);
1880 data->commit_done_cb = ff_layout_commit_done_cb;
1881 data->cred = ds_cred;
1882 refcount_inc(&ds->ds_clp->cl_count);
1883 data->ds_clp = ds->ds_clp;
1884 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
1885 if (fh)
1886 data->args.fh = fh;
1887
1888 ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
1889 vers == 3 ? &ff_layout_commit_call_ops_v3 :
1890 &ff_layout_commit_call_ops_v4,
1891 how, RPC_TASK_SOFTCONN);
1892 put_cred(ds_cred);
1893 return ret;
1894out_err:
1895 pnfs_generic_prepare_to_resend_writes(data);
1896 pnfs_generic_commit_release(data);
1897 return -EAGAIN;
1898}
1899
1900static int
1901ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1902 int how, struct nfs_commit_info *cinfo)
1903{
1904 return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo,
1905 ff_layout_initiate_commit);
1906}
1907
1908static struct pnfs_ds_commit_info *
1909ff_layout_get_ds_info(struct inode *inode)
1910{
1911 struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
1912
1913 if (layout == NULL)
1914 return NULL;
1915
1916 return &FF_LAYOUT_FROM_HDR(layout)->commit_info;
1917}
1918
1919static void
1920ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d)
1921{
1922 nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds,
1923 id_node));
1924}
1925
1926static int ff_layout_encode_ioerr(struct xdr_stream *xdr,
1927 const struct nfs4_layoutreturn_args *args,
1928 const struct nfs4_flexfile_layoutreturn_args *ff_args)
1929{
1930 __be32 *start;
1931
1932 start = xdr_reserve_space(xdr, 4);
1933 if (unlikely(!start))
1934 return -E2BIG;
1935
1936 *start = cpu_to_be32(ff_args->num_errors);
1937
1938 return ff_layout_encode_ds_ioerr(xdr, &ff_args->errors);
1939}
1940
1941static void
1942encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
1943{
1944 WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
1945}
1946
1947static void
1948ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr,
1949 const nfs4_stateid *stateid,
1950 const struct nfs42_layoutstat_devinfo *devinfo)
1951{
1952 __be32 *p;
1953
1954 p = xdr_reserve_space(xdr, 8 + 8);
1955 p = xdr_encode_hyper(p, devinfo->offset);
1956 p = xdr_encode_hyper(p, devinfo->length);
1957 encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
1958 p = xdr_reserve_space(xdr, 4*8);
1959 p = xdr_encode_hyper(p, devinfo->read_count);
1960 p = xdr_encode_hyper(p, devinfo->read_bytes);
1961 p = xdr_encode_hyper(p, devinfo->write_count);
1962 p = xdr_encode_hyper(p, devinfo->write_bytes);
1963 encode_opaque_fixed(xdr, devinfo->dev_id.data, NFS4_DEVICEID4_SIZE);
1964}
1965
1966static void
1967ff_layout_encode_ff_iostat(struct xdr_stream *xdr,
1968 const nfs4_stateid *stateid,
1969 const struct nfs42_layoutstat_devinfo *devinfo)
1970{
1971 ff_layout_encode_ff_iostat_head(xdr, stateid, devinfo);
1972 ff_layout_encode_ff_layoutupdate(xdr, devinfo,
1973 devinfo->ld_private.data);
1974}
1975
1976
1977static void ff_layout_encode_iostats_array(struct xdr_stream *xdr,
1978 const struct nfs4_layoutreturn_args *args,
1979 struct nfs4_flexfile_layoutreturn_args *ff_args)
1980{
1981 __be32 *p;
1982 int i;
1983
1984 p = xdr_reserve_space(xdr, 4);
1985 *p = cpu_to_be32(ff_args->num_dev);
1986 for (i = 0; i < ff_args->num_dev; i++)
1987 ff_layout_encode_ff_iostat(xdr,
1988 &args->layout->plh_stateid,
1989 &ff_args->devinfo[i]);
1990}
1991
1992static void
1993ff_layout_free_iostats_array(struct nfs42_layoutstat_devinfo *devinfo,
1994 unsigned int num_entries)
1995{
1996 unsigned int i;
1997
1998 for (i = 0; i < num_entries; i++) {
1999 if (!devinfo[i].ld_private.ops)
2000 continue;
2001 if (!devinfo[i].ld_private.ops->free)
2002 continue;
2003 devinfo[i].ld_private.ops->free(&devinfo[i].ld_private);
2004 }
2005}
2006
2007static struct nfs4_deviceid_node *
2008ff_layout_alloc_deviceid_node(struct nfs_server *server,
2009 struct pnfs_device *pdev, gfp_t gfp_flags)
2010{
2011 struct nfs4_ff_layout_ds *dsaddr;
2012
2013 dsaddr = nfs4_ff_alloc_deviceid_node(server, pdev, gfp_flags);
2014 if (!dsaddr)
2015 return NULL;
2016 return &dsaddr->id_node;
2017}
2018
2019static void
2020ff_layout_encode_layoutreturn(struct xdr_stream *xdr,
2021 const void *voidargs,
2022 const struct nfs4_xdr_opaque_data *ff_opaque)
2023{
2024 const struct nfs4_layoutreturn_args *args = voidargs;
2025 struct nfs4_flexfile_layoutreturn_args *ff_args = ff_opaque->data;
2026 struct xdr_buf tmp_buf = {
2027 .head = {
2028 [0] = {
2029 .iov_base = page_address(ff_args->pages[0]),
2030 },
2031 },
2032 .buflen = PAGE_SIZE,
2033 };
2034 struct xdr_stream tmp_xdr;
2035 __be32 *start;
2036
2037 dprintk("%s: Begin\n", __func__);
2038
2039 xdr_init_encode(&tmp_xdr, &tmp_buf, NULL);
2040
2041 ff_layout_encode_ioerr(&tmp_xdr, args, ff_args);
2042 ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args);
2043
2044 start = xdr_reserve_space(xdr, 4);
2045 *start = cpu_to_be32(tmp_buf.len);
2046 xdr_write_pages(xdr, ff_args->pages, 0, tmp_buf.len);
2047
2048 dprintk("%s: Return\n", __func__);
2049}
2050
2051static void
2052ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args)
2053{
2054 struct nfs4_flexfile_layoutreturn_args *ff_args;
2055
2056 if (!args->data)
2057 return;
2058 ff_args = args->data;
2059 args->data = NULL;
2060
2061 ff_layout_free_ds_ioerr(&ff_args->errors);
2062 ff_layout_free_iostats_array(ff_args->devinfo, ff_args->num_dev);
2063
2064 put_page(ff_args->pages[0]);
2065 kfree(ff_args);
2066}
2067
2068static const struct nfs4_xdr_opaque_ops layoutreturn_ops = {
2069 .encode = ff_layout_encode_layoutreturn,
2070 .free = ff_layout_free_layoutreturn,
2071};
2072
2073static int
2074ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args)
2075{
2076 struct nfs4_flexfile_layoutreturn_args *ff_args;
2077 struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(args->layout);
2078
2079 ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL);
2080 if (!ff_args)
2081 goto out_nomem;
2082 ff_args->pages[0] = alloc_page(GFP_KERNEL);
2083 if (!ff_args->pages[0])
2084 goto out_nomem_free;
2085
2086 INIT_LIST_HEAD(&ff_args->errors);
2087 ff_args->num_errors = ff_layout_fetch_ds_ioerr(args->layout,
2088 &args->range, &ff_args->errors,
2089 FF_LAYOUTRETURN_MAXERR);
2090
2091 spin_lock(&args->inode->i_lock);
2092 ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr,
2093 &ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo));
2094 spin_unlock(&args->inode->i_lock);
2095
2096 args->ld_private->ops = &layoutreturn_ops;
2097 args->ld_private->data = ff_args;
2098 return 0;
2099out_nomem_free:
2100 kfree(ff_args);
2101out_nomem:
2102 return -ENOMEM;
2103}
2104
2105static int
2106ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
2107{
2108 const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
2109
2110 return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
2111}
2112
2113static size_t
2114ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf,
2115 const int buflen)
2116{
2117 const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
2118 const struct in6_addr *addr = &sin6->sin6_addr;
2119
2120
2121
2122
2123
2124
2125 if (ipv6_addr_any(addr))
2126 return snprintf(buf, buflen, "::");
2127
2128
2129
2130
2131
2132
2133 if (ipv6_addr_loopback(addr))
2134 return snprintf(buf, buflen, "::1");
2135
2136
2137
2138
2139
2140
2141
2142 if (ipv6_addr_v4mapped(addr))
2143 return snprintf(buf, buflen, "::ffff:%pI4",
2144 &addr->s6_addr32[3]);
2145
2146
2147
2148
2149 return snprintf(buf, buflen, "%pI6c", addr);
2150}
2151
2152
2153static void
2154ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
2155{
2156 struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
2157 char portbuf[RPCBIND_MAXUADDRPLEN];
2158 char addrbuf[RPCBIND_MAXUADDRLEN];
2159 char *netid;
2160 unsigned short port;
2161 int len, netid_len;
2162 __be32 *p;
2163
2164 switch (sap->sa_family) {
2165 case AF_INET:
2166 if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
2167 return;
2168 port = ntohs(((struct sockaddr_in *)sap)->sin_port);
2169 netid = "tcp";
2170 netid_len = 3;
2171 break;
2172 case AF_INET6:
2173 if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
2174 return;
2175 port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
2176 netid = "tcp6";
2177 netid_len = 4;
2178 break;
2179 default:
2180
2181 WARN_ON_ONCE(1);
2182 return;
2183 }
2184
2185 snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
2186 len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
2187
2188 p = xdr_reserve_space(xdr, 4 + netid_len);
2189 xdr_encode_opaque(p, netid, netid_len);
2190
2191 p = xdr_reserve_space(xdr, 4 + len);
2192 xdr_encode_opaque(p, addrbuf, len);
2193}
2194
2195static void
2196ff_layout_encode_nfstime(struct xdr_stream *xdr,
2197 ktime_t t)
2198{
2199 struct timespec64 ts;
2200 __be32 *p;
2201
2202 p = xdr_reserve_space(xdr, 12);
2203 ts = ktime_to_timespec64(t);
2204 p = xdr_encode_hyper(p, ts.tv_sec);
2205 *p++ = cpu_to_be32(ts.tv_nsec);
2206}
2207
2208static void
2209ff_layout_encode_io_latency(struct xdr_stream *xdr,
2210 struct nfs4_ff_io_stat *stat)
2211{
2212 __be32 *p;
2213
2214 p = xdr_reserve_space(xdr, 5 * 8);
2215 p = xdr_encode_hyper(p, stat->ops_requested);
2216 p = xdr_encode_hyper(p, stat->bytes_requested);
2217 p = xdr_encode_hyper(p, stat->ops_completed);
2218 p = xdr_encode_hyper(p, stat->bytes_completed);
2219 p = xdr_encode_hyper(p, stat->bytes_not_delivered);
2220 ff_layout_encode_nfstime(xdr, stat->total_busy_time);
2221 ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time);
2222}
2223
2224static void
2225ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
2226 const struct nfs42_layoutstat_devinfo *devinfo,
2227 struct nfs4_ff_layout_mirror *mirror)
2228{
2229 struct nfs4_pnfs_ds_addr *da;
2230 struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;
2231 struct nfs_fh *fh = &mirror->fh_versions[0];
2232 __be32 *p;
2233
2234 da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
2235 dprintk("%s: DS %s: encoding address %s\n",
2236 __func__, ds->ds_remotestr, da->da_remotestr);
2237
2238 ff_layout_encode_netaddr(xdr, da);
2239
2240 p = xdr_reserve_space(xdr, 4 + fh->size);
2241 xdr_encode_opaque(p, fh->data, fh->size);
2242
2243 spin_lock(&mirror->lock);
2244 ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat);
2245
2246 ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat);
2247 spin_unlock(&mirror->lock);
2248
2249 ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time));
2250
2251 p = xdr_reserve_space(xdr, 4);
2252 *p = cpu_to_be32(false);
2253}
2254
2255static void
2256ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args,
2257 const struct nfs4_xdr_opaque_data *opaque)
2258{
2259 struct nfs42_layoutstat_devinfo *devinfo = container_of(opaque,
2260 struct nfs42_layoutstat_devinfo, ld_private);
2261 __be32 *start;
2262
2263
2264 start = xdr_reserve_space(xdr, 4);
2265 ff_layout_encode_ff_layoutupdate(xdr, devinfo, opaque->data);
2266
2267 *start = cpu_to_be32((xdr->p - start - 1) * 4);
2268}
2269
2270static void
2271ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque)
2272{
2273 struct nfs4_ff_layout_mirror *mirror = opaque->data;
2274
2275 ff_layout_put_mirror(mirror);
2276}
2277
2278static const struct nfs4_xdr_opaque_ops layoutstat_ops = {
2279 .encode = ff_layout_encode_layoutstats,
2280 .free = ff_layout_free_layoutstats,
2281};
2282
2283static int
2284ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
2285 struct nfs42_layoutstat_devinfo *devinfo,
2286 int dev_limit)
2287{
2288 struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);
2289 struct nfs4_ff_layout_mirror *mirror;
2290 struct nfs4_deviceid_node *dev;
2291 int i = 0;
2292
2293 list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) {
2294 if (i >= dev_limit)
2295 break;
2296 if (IS_ERR_OR_NULL(mirror->mirror_ds))
2297 continue;
2298 if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags))
2299 continue;
2300
2301 if (!refcount_inc_not_zero(&mirror->ref))
2302 continue;
2303 dev = &mirror->mirror_ds->id_node;
2304 memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
2305 devinfo->offset = 0;
2306 devinfo->length = NFS4_MAX_UINT64;
2307 spin_lock(&mirror->lock);
2308 devinfo->read_count = mirror->read_stat.io_stat.ops_completed;
2309 devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;
2310 devinfo->write_count = mirror->write_stat.io_stat.ops_completed;
2311 devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed;
2312 spin_unlock(&mirror->lock);
2313 devinfo->layout_type = LAYOUT_FLEX_FILES;
2314 devinfo->ld_private.ops = &layoutstat_ops;
2315 devinfo->ld_private.data = mirror;
2316
2317 devinfo++;
2318 i++;
2319 }
2320 return i;
2321}
2322
2323static int
2324ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
2325{
2326 struct nfs4_flexfile_layout *ff_layout;
2327 const int dev_count = PNFS_LAYOUTSTATS_MAXDEV;
2328
2329
2330 args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO);
2331 if (!args->devinfo)
2332 return -ENOMEM;
2333
2334 spin_lock(&args->inode->i_lock);
2335 ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout);
2336 args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr,
2337 &args->devinfo[0], dev_count);
2338 spin_unlock(&args->inode->i_lock);
2339 if (!args->num_dev) {
2340 kfree(args->devinfo);
2341 args->devinfo = NULL;
2342 return -ENOENT;
2343 }
2344
2345 return 0;
2346}
2347
2348static int
2349ff_layout_set_layoutdriver(struct nfs_server *server,
2350 const struct nfs_fh *dummy)
2351{
2352#if IS_ENABLED(CONFIG_NFS_V4_2)
2353 server->caps |= NFS_CAP_LAYOUTSTATS;
2354#endif
2355 return 0;
2356}
2357
2358static struct pnfs_layoutdriver_type flexfilelayout_type = {
2359 .id = LAYOUT_FLEX_FILES,
2360 .name = "LAYOUT_FLEX_FILES",
2361 .owner = THIS_MODULE,
2362 .flags = PNFS_LAYOUTGET_ON_OPEN,
2363 .max_layoutget_response = 4096,
2364 .set_layoutdriver = ff_layout_set_layoutdriver,
2365 .alloc_layout_hdr = ff_layout_alloc_layout_hdr,
2366 .free_layout_hdr = ff_layout_free_layout_hdr,
2367 .alloc_lseg = ff_layout_alloc_lseg,
2368 .free_lseg = ff_layout_free_lseg,
2369 .add_lseg = ff_layout_add_lseg,
2370 .pg_read_ops = &ff_layout_pg_read_ops,
2371 .pg_write_ops = &ff_layout_pg_write_ops,
2372 .get_ds_info = ff_layout_get_ds_info,
2373 .free_deviceid_node = ff_layout_free_deviceid_node,
2374 .mark_request_commit = pnfs_layout_mark_request_commit,
2375 .clear_request_commit = pnfs_generic_clear_request_commit,
2376 .scan_commit_lists = pnfs_generic_scan_commit_lists,
2377 .recover_commit_reqs = pnfs_generic_recover_commit_reqs,
2378 .commit_pagelist = ff_layout_commit_pagelist,
2379 .read_pagelist = ff_layout_read_pagelist,
2380 .write_pagelist = ff_layout_write_pagelist,
2381 .alloc_deviceid_node = ff_layout_alloc_deviceid_node,
2382 .prepare_layoutreturn = ff_layout_prepare_layoutreturn,
2383 .sync = pnfs_nfs_generic_sync,
2384 .prepare_layoutstats = ff_layout_prepare_layoutstats,
2385};
2386
2387static int __init nfs4flexfilelayout_init(void)
2388{
2389 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n",
2390 __func__);
2391 return pnfs_register_layoutdriver(&flexfilelayout_type);
2392}
2393
2394static void __exit nfs4flexfilelayout_exit(void)
2395{
2396 printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n",
2397 __func__);
2398 pnfs_unregister_layoutdriver(&flexfilelayout_type);
2399}
2400
2401MODULE_ALIAS("nfs-layouttype4-4");
2402
2403MODULE_LICENSE("GPL");
2404MODULE_DESCRIPTION("The NFSv4 flexfile layout driver");
2405
2406module_init(nfs4flexfilelayout_init);
2407module_exit(nfs4flexfilelayout_exit);
2408