1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#include <linux/errno.h>
42#include <linux/sched.h>
43#include <linux/kernel.h>
44#include <linux/file.h>
45#include <linux/pagemap.h>
46#include <linux/kref.h>
47#include <linux/slab.h>
48#include <linux/task_io_accounting_ops.h>
49#include <linux/module.h>
50
51#include <linux/nfs_fs.h>
52#include <linux/nfs_page.h>
53#include <linux/sunrpc/clnt.h>
54
55#include <asm/uaccess.h>
56#include <linux/atomic.h>
57
58#include "internal.h"
59#include "iostat.h"
60#include "pnfs.h"
61
62#define NFSDBG_FACILITY NFSDBG_VFS
63
64static struct kmem_cache *nfs_direct_cachep;
65
66
67
68
69struct nfs_direct_mirror {
70 ssize_t count;
71};
72
73struct nfs_direct_req {
74 struct kref kref;
75
76
77 struct nfs_open_context *ctx;
78 struct nfs_lock_context *l_ctx;
79 struct kiocb * iocb;
80 struct inode * inode;
81
82
83 atomic_t io_count;
84 spinlock_t lock;
85
86 struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
87 int mirror_count;
88
89 ssize_t count,
90 bytes_left,
91 io_start,
92 error;
93 struct completion completion;
94
95
96 struct nfs_mds_commit_info mds_cinfo;
97 struct pnfs_ds_commit_info ds_cinfo;
98 struct work_struct work;
99 int flags;
100#define NFS_ODIRECT_DO_COMMIT (1)
101#define NFS_ODIRECT_RESCHED_WRITES (2)
102 struct nfs_writeverf verf;
103};
104
105static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
106static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
107static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
108static void nfs_direct_write_schedule_work(struct work_struct *work);
109
110static inline void get_dreq(struct nfs_direct_req *dreq)
111{
112 atomic_inc(&dreq->io_count);
113}
114
115static inline int put_dreq(struct nfs_direct_req *dreq)
116{
117 return atomic_dec_and_test(&dreq->io_count);
118}
119
120static void
121nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
122{
123 int i;
124 ssize_t count;
125
126 if (dreq->mirror_count == 1) {
127 dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
128 dreq->count += hdr->good_bytes;
129 } else {
130
131 count = dreq->mirrors[hdr->pgio_mirror_idx].count;
132 if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
133 count = hdr->io_start + hdr->good_bytes - dreq->io_start;
134 dreq->mirrors[hdr->pgio_mirror_idx].count = count;
135 }
136
137
138 count = dreq->mirrors[0].count;
139
140 for (i = 1; i < dreq->mirror_count; i++)
141 count = min(count, dreq->mirrors[i].count);
142
143 dreq->count = count;
144 }
145}
146
147
148
149
150
151
152
153
154
155static struct nfs_writeverf *
156nfs_direct_select_verf(struct nfs_direct_req *dreq,
157 struct nfs_client *ds_clp,
158 int commit_idx)
159{
160 struct nfs_writeverf *verfp = &dreq->verf;
161
162#ifdef CONFIG_NFS_V4_1
163
164
165
166
167 if (ds_clp && dreq->ds_cinfo.nbuckets > 0) {
168 if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) {
169 gmb();
170 verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf;
171 } else
172 WARN_ON_ONCE(1);
173 }
174#endif
175 return verfp;
176}
177
178
179
180
181
182
183
184
185
186static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
187 struct nfs_pgio_header *hdr)
188{
189 struct nfs_writeverf *verfp;
190
191 verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx);
192 WARN_ON_ONCE(verfp->committed >= 0);
193 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
194 WARN_ON_ONCE(verfp->committed < 0);
195}
196
197static int nfs_direct_cmp_verf(const struct nfs_writeverf *v1,
198 const struct nfs_writeverf *v2)
199{
200 return nfs_write_verifier_cmp(&v1->verifier, &v2->verifier);
201}
202
203
204
205
206
207
208
209
210
211
212static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
213 struct nfs_pgio_header *hdr)
214{
215 struct nfs_writeverf *verfp;
216
217 verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx);
218 if (verfp->committed < 0) {
219 nfs_direct_set_hdr_verf(dreq, hdr);
220 return 0;
221 }
222 return nfs_direct_cmp_verf(verfp, &hdr->verf);
223}
224
225
226
227
228
229
230
231
232
233static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
234 struct nfs_commit_data *data)
235{
236 struct nfs_writeverf *verfp;
237
238 verfp = nfs_direct_select_verf(dreq, data->ds_clp,
239 data->ds_commit_index);
240
241
242 if (verfp->committed < 0)
243 return 1;
244
245 return nfs_direct_cmp_verf(verfp, &data->verf);
246}
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
262{
263 struct inode *inode = iocb->ki_filp->f_mapping->host;
264
265
266 if (!IS_SWAPFILE(inode))
267 return 0;
268
269 VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
270 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
271
272 if (rw == READ || rw == KERNEL_READ)
273 return nfs_file_direct_read(iocb, iov, nr_segs, pos,
274 rw == READ ? true : false);
275 return nfs_file_direct_write(iocb, iov, nr_segs, pos,
276 rw == WRITE ? true : false);
277}
278
279static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
280{
281 unsigned int i;
282 for (i = 0; i < npages; i++)
283 page_cache_release(pages[i]);
284}
285
286void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
287 struct nfs_direct_req *dreq)
288{
289 cinfo->inode = dreq->inode;
290 cinfo->mds = &dreq->mds_cinfo;
291 cinfo->ds = &dreq->ds_cinfo;
292 cinfo->dreq = dreq;
293 cinfo->completion_ops = &nfs_direct_commit_completion_ops;
294}
295
296static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq,
297 struct nfs_pageio_descriptor *pgio,
298 struct nfs_page *req)
299{
300 int mirror_count = 1;
301
302 if (pgio->pg_ops->pg_get_mirror_count)
303 mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
304
305 dreq->mirror_count = mirror_count;
306}
307
308static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
309{
310 struct nfs_direct_req *dreq;
311
312 dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
313 if (!dreq)
314 return NULL;
315
316 kref_init(&dreq->kref);
317 kref_get(&dreq->kref);
318 init_completion(&dreq->completion);
319 INIT_LIST_HEAD(&dreq->mds_cinfo.list);
320 dreq->verf.committed = NFS_INVALID_STABLE_HOW;
321 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
322 dreq->mirror_count = 1;
323 spin_lock_init(&dreq->lock);
324
325 return dreq;
326}
327
328static void nfs_direct_req_free(struct kref *kref)
329{
330 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
331
332 nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
333 if (dreq->l_ctx != NULL)
334 nfs_put_lock_context(dreq->l_ctx);
335 if (dreq->ctx != NULL)
336 put_nfs_open_context(dreq->ctx);
337 kmem_cache_free(nfs_direct_cachep, dreq);
338}
339
340static void nfs_direct_req_release(struct nfs_direct_req *dreq)
341{
342 kref_put(&dreq->kref, nfs_direct_req_free);
343}
344
345ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq)
346{
347 return dreq->bytes_left;
348}
349EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
350
351
352
353
354static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
355{
356 ssize_t result = -EIOCBQUEUED;
357
358
359 if (dreq->iocb)
360 goto out;
361
362 result = wait_for_completion_killable(&dreq->completion);
363
364 if (!result)
365 result = dreq->error;
366 if (!result)
367 result = dreq->count;
368
369out:
370 return (ssize_t) result;
371}
372
373
374
375
376
377static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
378{
379 struct inode *inode = dreq->inode;
380
381 if (dreq->iocb && write) {
382 loff_t pos = dreq->iocb->ki_pos + dreq->count;
383
384 spin_lock(&inode->i_lock);
385 if (i_size_read(inode) < pos)
386 i_size_write(inode, pos);
387 spin_unlock(&inode->i_lock);
388 }
389
390 if (write)
391 nfs_zap_mapping(inode, inode->i_mapping);
392
393 inode_dio_end(inode);
394
395 if (dreq->iocb) {
396 long res = (long) dreq->error;
397 if (!res)
398 res = (long) dreq->count;
399 aio_complete(dreq->iocb, res, 0);
400 }
401
402 complete_all(&dreq->completion);
403
404 nfs_direct_req_release(dreq);
405}
406
407static void nfs_direct_readpage_release(struct nfs_page *req)
408{
409 dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
410 req->wb_context->dentry->d_inode->i_sb->s_id,
411 (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
412 req->wb_bytes,
413 (long long)req_offset(req));
414 nfs_release_request(req);
415}
416
417static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
418{
419 unsigned long bytes = 0;
420 struct nfs_direct_req *dreq = hdr->dreq;
421
422 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
423 goto out_put;
424
425 spin_lock(&dreq->lock);
426 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
427 dreq->error = hdr->error;
428 else
429 nfs_direct_good_bytes(dreq, hdr);
430
431 spin_unlock(&dreq->lock);
432
433 while (!list_empty(&hdr->pages)) {
434 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
435 struct page *page = req->wb_page;
436
437 if (!PageCompound(page) && bytes < hdr->good_bytes)
438 set_page_dirty(page);
439 bytes += req->wb_bytes;
440 nfs_list_remove_request(req);
441 nfs_direct_readpage_release(req);
442 }
443out_put:
444 if (put_dreq(dreq))
445 nfs_direct_complete(dreq, false);
446 hdr->release(hdr);
447}
448
449static void nfs_read_sync_pgio_error(struct list_head *head)
450{
451 struct nfs_page *req;
452
453 while (!list_empty(head)) {
454 req = nfs_list_entry(head->next);
455 nfs_list_remove_request(req);
456 nfs_release_request(req);
457 }
458}
459
460static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
461{
462 get_dreq(hdr->dreq);
463}
464
465static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
466 .error_cleanup = nfs_read_sync_pgio_error,
467 .init_hdr = nfs_direct_pgio_init,
468 .completion = nfs_direct_read_completion,
469};
470
471
472
473
474
475
476
477
478static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
479 const struct iovec *iov,
480 loff_t pos, bool uio)
481{
482 struct nfs_direct_req *dreq = desc->pg_dreq;
483 struct nfs_open_context *ctx = dreq->ctx;
484 struct inode *inode = ctx->dentry->d_inode;
485 unsigned long user_addr = (unsigned long)iov->iov_base;
486 size_t count = iov->iov_len;
487 size_t rsize = NFS_SERVER(inode)->rsize;
488 unsigned int pgbase;
489 int result;
490 ssize_t started = 0;
491 struct page **pagevec = NULL;
492 unsigned int npages;
493
494 do {
495 size_t bytes;
496 int i;
497
498 pgbase = user_addr & ~PAGE_MASK;
499 bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
500
501 result = -ENOMEM;
502 npages = nfs_page_array_len(pgbase, bytes);
503 if (!pagevec)
504 pagevec = kmalloc(npages * sizeof(struct page *),
505 GFP_KERNEL);
506 if (!pagevec)
507 break;
508 if (uio) {
509 result = get_user_pages_unlocked(current, current->mm,
510 user_addr,
511 npages, 1, 0,
512 pagevec);
513 if (result < 0)
514 break;
515 } else {
516 WARN_ON(npages != 1);
517 result = get_kernel_page(user_addr, 1, pagevec);
518 if (WARN_ON(result != 1))
519 break;
520 }
521
522 if ((unsigned)result < npages) {
523 bytes = result * PAGE_SIZE;
524 if (bytes <= pgbase) {
525 nfs_direct_release_pages(pagevec, result);
526 break;
527 }
528 bytes -= pgbase;
529 npages = result;
530 }
531
532 for (i = 0; i < npages; i++) {
533 struct nfs_page *req;
534 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
535
536 req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
537 pgbase, req_len);
538 if (IS_ERR(req)) {
539 result = PTR_ERR(req);
540 break;
541 }
542 req->wb_index = pos >> PAGE_SHIFT;
543 req->wb_offset = pos & ~PAGE_MASK;
544 if (!nfs_pageio_add_request(desc, req)) {
545 result = desc->pg_error;
546 nfs_release_request(req);
547 break;
548 }
549 pgbase = 0;
550 bytes -= req_len;
551 started += req_len;
552 user_addr += req_len;
553 pos += req_len;
554 count -= req_len;
555 dreq->bytes_left -= req_len;
556 }
557
558 nfs_direct_release_pages(pagevec, npages);
559 } while (count != 0 && result >= 0);
560
561 kfree(pagevec);
562
563 if (started)
564 return started;
565 return result < 0 ? (ssize_t) result : -EFAULT;
566}
567
568static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
569 const struct iovec *iov,
570 unsigned long nr_segs,
571 loff_t pos, bool uio)
572{
573 struct nfs_pageio_descriptor desc;
574 struct inode *inode = dreq->inode;
575 ssize_t result = -EINVAL;
576 size_t requested_bytes = 0;
577 unsigned long seg;
578
579 nfs_pageio_init_read(&desc, dreq->inode, false,
580 &nfs_direct_read_completion_ops);
581 get_dreq(dreq);
582 desc.pg_dreq = dreq;
583 inode_dio_begin(inode);
584
585 for (seg = 0; seg < nr_segs; seg++) {
586 const struct iovec *vec = &iov[seg];
587 result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
588 if (result < 0)
589 break;
590 requested_bytes += result;
591 if ((size_t)result < vec->iov_len)
592 break;
593 pos += vec->iov_len;
594 }
595
596 nfs_pageio_complete(&desc);
597
598
599
600
601
602 if (requested_bytes == 0) {
603 inode_dio_end(inode);
604 nfs_direct_req_release(dreq);
605 return result < 0 ? result : -EIO;
606 }
607
608 if (put_dreq(dreq))
609 nfs_direct_complete(dreq, false);
610 return 0;
611}
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
635 unsigned long nr_segs, loff_t pos, bool uio)
636{
637 struct file *file = iocb->ki_filp;
638 struct address_space *mapping = file->f_mapping;
639 struct inode *inode = mapping->host;
640 struct nfs_direct_req *dreq;
641 struct nfs_lock_context *l_ctx;
642 ssize_t result = -EINVAL;
643 size_t count;
644
645 count = iov_length(iov, nr_segs);
646 nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
647
648 dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
649 file, count, (long long) pos);
650
651 result = 0;
652 if (!count)
653 goto out;
654
655 mutex_lock(&inode->i_mutex);
656 result = nfs_sync_mapping(mapping);
657 if (result)
658 goto out_unlock;
659
660 task_io_account_read(count);
661
662 result = -ENOMEM;
663 dreq = nfs_direct_req_alloc();
664 if (dreq == NULL)
665 goto out_unlock;
666
667 dreq->inode = inode;
668 dreq->bytes_left = iov_length(iov, nr_segs);
669 dreq->io_start = pos;
670 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
671 l_ctx = nfs_get_lock_context(dreq->ctx);
672 if (IS_ERR(l_ctx)) {
673 result = PTR_ERR(l_ctx);
674 goto out_release;
675 }
676 dreq->l_ctx = l_ctx;
677 if (!is_sync_kiocb(iocb))
678 dreq->iocb = iocb;
679
680 NFS_I(inode)->read_io += iov_length(iov, nr_segs);
681 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
682
683 mutex_unlock(&inode->i_mutex);
684
685 if (!result) {
686 result = nfs_direct_wait(dreq);
687 if (result > 0)
688 iocb->ki_pos = pos + result;
689 }
690
691 nfs_direct_req_release(dreq);
692 return result;
693
694out_release:
695 nfs_direct_req_release(dreq);
696out_unlock:
697 mutex_unlock(&inode->i_mutex);
698out:
699 return result;
700}
701
702static void
703nfs_direct_write_scan_commit_list(struct inode *inode,
704 struct list_head *list,
705 struct nfs_commit_info *cinfo)
706{
707 spin_lock(&cinfo->inode->i_lock);
708#ifdef CONFIG_NFS_V4_1
709 if (cinfo->ds != NULL && cinfo->ds->nwritten != 0)
710 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
711#endif
712 nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0);
713 spin_unlock(&cinfo->inode->i_lock);
714}
715
716static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
717{
718 struct nfs_pageio_descriptor desc;
719 struct nfs_page *req, *tmp;
720 LIST_HEAD(reqs);
721 struct nfs_commit_info cinfo;
722 LIST_HEAD(failed);
723 int i;
724
725 nfs_init_cinfo_from_dreq(&cinfo, dreq);
726 nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
727
728 dreq->count = 0;
729 dreq->verf.committed = NFS_INVALID_STABLE_HOW;
730 nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
731 for (i = 0; i < dreq->mirror_count; i++)
732 dreq->mirrors[i].count = 0;
733 get_dreq(dreq);
734
735 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
736 &nfs_direct_write_completion_ops);
737 desc.pg_dreq = dreq;
738
739 req = nfs_list_entry(reqs.next);
740 nfs_direct_setup_mirroring(dreq, &desc, req);
741 if (desc.pg_error < 0) {
742 list_splice_init(&reqs, &failed);
743 goto out_failed;
744 }
745
746 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
747 if (!nfs_pageio_add_request(&desc, req)) {
748 nfs_list_remove_request(req);
749 nfs_list_add_request(req, &failed);
750 spin_lock(&cinfo.inode->i_lock);
751 dreq->flags = 0;
752 if (desc.pg_error < 0)
753 dreq->error = desc.pg_error;
754 else
755 dreq->error = -EIO;
756 spin_unlock(&cinfo.inode->i_lock);
757 }
758 nfs_release_request(req);
759 }
760 nfs_pageio_complete(&desc);
761
762out_failed:
763 while (!list_empty(&failed)) {
764 req = nfs_list_entry(failed.next);
765 nfs_list_remove_request(req);
766 nfs_unlock_and_release_request(req);
767 }
768
769 if (put_dreq(dreq))
770 nfs_direct_write_complete(dreq, dreq->inode);
771}
772
773static void nfs_direct_commit_complete(struct nfs_commit_data *data)
774{
775 struct nfs_direct_req *dreq = data->dreq;
776 struct nfs_commit_info cinfo;
777 struct nfs_page *req;
778 int status = data->task.tk_status;
779
780 nfs_init_cinfo_from_dreq(&cinfo, dreq);
781 if (status < 0) {
782 dprintk("NFS: %5u commit failed with error %d.\n",
783 data->task.tk_pid, status);
784 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
785 } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
786 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
787 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
788 }
789
790 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
791 while (!list_empty(&data->pages)) {
792 req = nfs_list_entry(data->pages.next);
793 nfs_list_remove_request(req);
794 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
795
796 nfs_mark_request_commit(req, NULL, &cinfo, 0);
797 } else
798 nfs_release_request(req);
799 nfs_unlock_and_release_request(req);
800 }
801
802 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
803 nfs_direct_write_complete(dreq, data->inode);
804}
805
806static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
807 struct nfs_page *req)
808{
809 struct nfs_direct_req *dreq = cinfo->dreq;
810
811 spin_lock(&dreq->lock);
812 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
813 spin_unlock(&dreq->lock);
814 nfs_mark_request_commit(req, NULL, cinfo, 0);
815}
816
817static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
818 .completion = nfs_direct_commit_complete,
819 .resched_write = nfs_direct_resched_write,
820};
821
822static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
823{
824 int res;
825 struct nfs_commit_info cinfo;
826 LIST_HEAD(mds_list);
827
828 nfs_init_cinfo_from_dreq(&cinfo, dreq);
829 nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
830 res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
831 if (res < 0)
832 nfs_direct_write_reschedule(dreq);
833}
834
835static void nfs_direct_write_schedule_work(struct work_struct *work)
836{
837 struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
838 int flags = dreq->flags;
839
840 dreq->flags = 0;
841 switch (flags) {
842 case NFS_ODIRECT_DO_COMMIT:
843 nfs_direct_commit_schedule(dreq);
844 break;
845 case NFS_ODIRECT_RESCHED_WRITES:
846 nfs_direct_write_reschedule(dreq);
847 break;
848 default:
849 nfs_direct_complete(dreq, true);
850 }
851}
852
853static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
854{
855 schedule_work(&dreq->work);
856}
857
858
859
860
861
862
863
864
865
866
867
868
869static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
870 const struct iovec *iov,
871 loff_t pos, bool uio)
872{
873 struct nfs_direct_req *dreq = desc->pg_dreq;
874 struct nfs_open_context *ctx = dreq->ctx;
875 struct inode *inode = ctx->dentry->d_inode;
876 unsigned long user_addr = (unsigned long)iov->iov_base;
877 size_t count = iov->iov_len;
878 size_t wsize = NFS_SERVER(inode)->wsize;
879 unsigned int pgbase;
880 int result;
881 ssize_t started = 0;
882 struct page **pagevec = NULL;
883 unsigned int npages;
884
885 do {
886 size_t bytes;
887 int i;
888
889 pgbase = user_addr & ~PAGE_MASK;
890 bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
891
892 result = -ENOMEM;
893 npages = nfs_page_array_len(pgbase, bytes);
894 if (!pagevec)
895 pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
896 if (!pagevec)
897 break;
898
899 if (uio) {
900 result = get_user_pages_unlocked(current, current->mm,
901 user_addr,
902 npages, 0, 0,
903 pagevec);
904 if (result < 0)
905 break;
906 } else {
907 WARN_ON(npages != 1);
908 result = get_kernel_page(user_addr, 0, pagevec);
909 if (WARN_ON(result != 1))
910 break;
911 }
912
913 if ((unsigned)result < npages) {
914 bytes = result * PAGE_SIZE;
915 if (bytes <= pgbase) {
916 nfs_direct_release_pages(pagevec, result);
917 break;
918 }
919 bytes -= pgbase;
920 npages = result;
921 }
922
923 for (i = 0; i < npages; i++) {
924 struct nfs_page *req;
925 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
926
927 req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
928 pgbase, req_len);
929 if (IS_ERR(req)) {
930 result = PTR_ERR(req);
931 break;
932 }
933
934 nfs_direct_setup_mirroring(dreq, desc, req);
935
936 nfs_lock_request(req);
937 req->wb_index = pos >> PAGE_SHIFT;
938 req->wb_offset = pos & ~PAGE_MASK;
939 if (!nfs_pageio_add_request(desc, req)) {
940 result = desc->pg_error;
941 nfs_unlock_and_release_request(req);
942 break;
943 }
944 pgbase = 0;
945 bytes -= req_len;
946 started += req_len;
947 user_addr += req_len;
948 pos += req_len;
949 count -= req_len;
950 dreq->bytes_left -= req_len;
951 }
952
953 nfs_direct_release_pages(pagevec, npages);
954 } while (count != 0 && result >= 0);
955
956 kfree(pagevec);
957
958 if (started)
959 return started;
960 return result < 0 ? (ssize_t) result : -EFAULT;
961}
962
963static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
964{
965 struct nfs_direct_req *dreq = hdr->dreq;
966 struct nfs_commit_info cinfo;
967 bool request_commit = false;
968 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
969
970 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
971 goto out_put;
972
973 nfs_init_cinfo_from_dreq(&cinfo, dreq);
974
975 spin_lock(&dreq->lock);
976
977 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
978 dreq->error = hdr->error;
979 if (dreq->error == 0) {
980 nfs_direct_good_bytes(dreq, hdr);
981 if (nfs_write_need_commit(hdr)) {
982 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
983 request_commit = true;
984 else if (dreq->flags == 0) {
985 nfs_direct_set_hdr_verf(dreq, hdr);
986 request_commit = true;
987 dreq->flags = NFS_ODIRECT_DO_COMMIT;
988 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
989 request_commit = true;
990 if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
991 dreq->flags =
992 NFS_ODIRECT_RESCHED_WRITES;
993 }
994 }
995 }
996 spin_unlock(&dreq->lock);
997
998 while (!list_empty(&hdr->pages)) {
999
1000 req = nfs_list_entry(hdr->pages.next);
1001 nfs_list_remove_request(req);
1002 if (request_commit) {
1003 kref_get(&req->wb_kref);
1004 nfs_mark_request_commit(req, hdr->lseg, &cinfo,
1005 hdr->ds_commit_idx);
1006 }
1007 nfs_unlock_and_release_request(req);
1008 }
1009
1010out_put:
1011 if (put_dreq(dreq))
1012 nfs_direct_write_complete(dreq, hdr->inode);
1013 hdr->release(hdr);
1014}
1015
1016static void nfs_write_sync_pgio_error(struct list_head *head)
1017{
1018 struct nfs_page *req;
1019
1020 while (!list_empty(head)) {
1021 req = nfs_list_entry(head->next);
1022 nfs_list_remove_request(req);
1023 nfs_unlock_and_release_request(req);
1024 }
1025}
1026
1027static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
1028{
1029 struct nfs_direct_req *dreq = hdr->dreq;
1030
1031 spin_lock(&dreq->lock);
1032 if (dreq->error == 0) {
1033 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
1034
1035 hdr->verf.committed = NFS_UNSTABLE;
1036 hdr->good_bytes = hdr->args.count;
1037 }
1038 spin_unlock(&dreq->lock);
1039}
1040
1041static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
1042 .error_cleanup = nfs_write_sync_pgio_error,
1043 .init_hdr = nfs_direct_pgio_init,
1044 .completion = nfs_direct_write_completion,
1045 .reschedule_io = nfs_direct_write_reschedule_io,
1046};
1047
1048static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
1049 const struct iovec *iov,
1050 unsigned long nr_segs,
1051 loff_t pos, bool uio)
1052{
1053 struct nfs_pageio_descriptor desc;
1054 struct inode *inode = dreq->inode;
1055 ssize_t result = 0;
1056 size_t requested_bytes = 0;
1057 unsigned long seg;
1058
1059 nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
1060 &nfs_direct_write_completion_ops);
1061 desc.pg_dreq = dreq;
1062 get_dreq(dreq);
1063 inode_dio_begin(inode);
1064
1065 NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
1066 for (seg = 0; seg < nr_segs; seg++) {
1067 const struct iovec *vec = &iov[seg];
1068 result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
1069 if (result < 0)
1070 break;
1071 requested_bytes += result;
1072 if ((size_t)result < vec->iov_len)
1073 break;
1074 pos += vec->iov_len;
1075 }
1076 nfs_pageio_complete(&desc);
1077
1078
1079
1080
1081
1082 if (requested_bytes == 0) {
1083 inode_dio_end(inode);
1084 nfs_direct_req_release(dreq);
1085 return result < 0 ? result : -EIO;
1086 }
1087
1088 if (put_dreq(dreq))
1089 nfs_direct_write_complete(dreq, dreq->inode);
1090 return 0;
1091}
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
1116 unsigned long nr_segs, loff_t pos, bool uio)
1117{
1118 ssize_t result = -EINVAL;
1119 struct file *file = iocb->ki_filp;
1120 struct address_space *mapping = file->f_mapping;
1121 struct inode *inode = mapping->host;
1122 struct nfs_direct_req *dreq;
1123 struct nfs_lock_context *l_ctx;
1124 loff_t end;
1125 size_t count;
1126
1127 count = iov_length(iov, nr_segs);
1128 end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
1129
1130 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
1131
1132 dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
1133 file, count, (long long) pos);
1134
1135 result = generic_write_checks(file, &pos, &count, 0);
1136 if (result)
1137 goto out;
1138
1139 result = -EINVAL;
1140 if ((ssize_t) count < 0)
1141 goto out;
1142 result = 0;
1143 if (!count)
1144 goto out;
1145
1146 mutex_lock(&inode->i_mutex);
1147
1148 result = nfs_sync_mapping(mapping);
1149 if (result)
1150 goto out_unlock;
1151
1152 if (mapping->nrpages) {
1153 result = invalidate_inode_pages2_range(mapping,
1154 pos >> PAGE_CACHE_SHIFT, end);
1155 if (result)
1156 goto out_unlock;
1157 }
1158
1159 task_io_account_write(count);
1160
1161 result = -ENOMEM;
1162 dreq = nfs_direct_req_alloc();
1163 if (!dreq)
1164 goto out_unlock;
1165
1166 dreq->inode = inode;
1167 dreq->bytes_left = count;
1168 dreq->io_start = pos;
1169 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
1170 l_ctx = nfs_get_lock_context(dreq->ctx);
1171 if (IS_ERR(l_ctx)) {
1172 result = PTR_ERR(l_ctx);
1173 goto out_release;
1174 }
1175 dreq->l_ctx = l_ctx;
1176 if (!is_sync_kiocb(iocb))
1177 dreq->iocb = iocb;
1178
1179 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
1180
1181 if (mapping->nrpages) {
1182 invalidate_inode_pages2_range(mapping,
1183 pos >> PAGE_CACHE_SHIFT, end);
1184 }
1185
1186 mutex_unlock(&inode->i_mutex);
1187
1188 if (!result) {
1189 result = nfs_direct_wait(dreq);
1190 if (result > 0) {
1191 struct inode *inode = mapping->host;
1192
1193 iocb->ki_pos = pos + result;
1194 spin_lock(&inode->i_lock);
1195 if (i_size_read(inode) < iocb->ki_pos)
1196 i_size_write(inode, iocb->ki_pos);
1197 spin_unlock(&inode->i_lock);
1198 generic_write_sync(file, pos, result);
1199 }
1200 }
1201 nfs_direct_req_release(dreq);
1202 return result;
1203
1204out_release:
1205 nfs_direct_req_release(dreq);
1206out_unlock:
1207 mutex_unlock(&inode->i_mutex);
1208out:
1209 return result;
1210}
1211
1212
1213
1214
1215
1216int __init nfs_init_directcache(void)
1217{
1218 nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
1219 sizeof(struct nfs_direct_req),
1220 0, (SLAB_RECLAIM_ACCOUNT|
1221 SLAB_MEM_SPREAD),
1222 NULL);
1223 if (nfs_direct_cachep == NULL)
1224 return -ENOMEM;
1225
1226 return 0;
1227}
1228
1229
1230
1231
1232
1233void nfs_destroy_directcache(void)
1234{
1235 kmem_cache_destroy(nfs_direct_cachep);
1236}
1237