1
2
3
4
5
6
7
8
9#include <linux/atomic.h>
10#include <linux/blkdev.h>
11#include <linux/buffer_head.h>
12#include <linux/dax.h>
13#include <linux/fs.h>
14#include <linux/genhd.h>
15#include <linux/highmem.h>
16#include <linux/memcontrol.h>
17#include <linux/mm.h>
18#include <linux/mutex.h>
19#include <linux/pagevec.h>
20#include <linux/sched.h>
21#include <linux/sched/signal.h>
22#include <linux/uio.h>
23#include <linux/vmstat.h>
24#include <linux/pfn_t.h>
25#include <linux/sizes.h>
26#include <linux/mmu_notifier.h>
27#include <linux/iomap.h>
28#include <asm/pgalloc.h>
29
30#define CREATE_TRACE_POINTS
31#include <trace/events/fs_dax.h>
32
33static inline unsigned int pe_order(enum page_entry_size pe_size)
34{
35 if (pe_size == PE_SIZE_PTE)
36 return PAGE_SHIFT - PAGE_SHIFT;
37 if (pe_size == PE_SIZE_PMD)
38 return PMD_SHIFT - PAGE_SHIFT;
39 if (pe_size == PE_SIZE_PUD)
40 return PUD_SHIFT - PAGE_SHIFT;
41 return ~0;
42}
43
44
45#define DAX_WAIT_TABLE_BITS 12
46#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
47
48
49#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
50#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
51
52
53#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
54
55static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
56
57static int __init init_dax_wait_table(void)
58{
59 int i;
60
61 for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++)
62 init_waitqueue_head(wait_table + i);
63 return 0;
64}
65fs_initcall(init_dax_wait_table);
66
67
68
69
70
71
72
73
74
75
76
77#define DAX_SHIFT (4)
78#define DAX_LOCKED (1UL << 0)
79#define DAX_PMD (1UL << 1)
80#define DAX_ZERO_PAGE (1UL << 2)
81#define DAX_EMPTY (1UL << 3)
82
83static unsigned long dax_to_pfn(void *entry)
84{
85 return xa_to_value(entry) >> DAX_SHIFT;
86}
87
88static void *dax_make_entry(pfn_t pfn, unsigned long flags)
89{
90 return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
91}
92
93static bool dax_is_locked(void *entry)
94{
95 return xa_to_value(entry) & DAX_LOCKED;
96}
97
98static unsigned int dax_entry_order(void *entry)
99{
100 if (xa_to_value(entry) & DAX_PMD)
101 return PMD_ORDER;
102 return 0;
103}
104
105static unsigned long dax_is_pmd_entry(void *entry)
106{
107 return xa_to_value(entry) & DAX_PMD;
108}
109
110static bool dax_is_pte_entry(void *entry)
111{
112 return !(xa_to_value(entry) & DAX_PMD);
113}
114
115static int dax_is_zero_entry(void *entry)
116{
117 return xa_to_value(entry) & DAX_ZERO_PAGE;
118}
119
120static int dax_is_empty_entry(void *entry)
121{
122 return xa_to_value(entry) & DAX_EMPTY;
123}
124
125
126
127
128
129static bool dax_is_conflict(void *entry)
130{
131 return entry == XA_RETRY_ENTRY;
132}
133
134
135
136
137struct exceptional_entry_key {
138 struct xarray *xa;
139 pgoff_t entry_start;
140};
141
142struct wait_exceptional_entry_queue {
143 wait_queue_entry_t wait;
144 struct exceptional_entry_key key;
145};
146
147static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
148 void *entry, struct exceptional_entry_key *key)
149{
150 unsigned long hash;
151 unsigned long index = xas->xa_index;
152
153
154
155
156
157
158 if (dax_is_pmd_entry(entry))
159 index &= ~PG_PMD_COLOUR;
160 key->xa = xas->xa;
161 key->entry_start = index;
162
163 hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS);
164 return wait_table + hash;
165}
166
167static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
168 unsigned int mode, int sync, void *keyp)
169{
170 struct exceptional_entry_key *key = keyp;
171 struct wait_exceptional_entry_queue *ewait =
172 container_of(wait, struct wait_exceptional_entry_queue, wait);
173
174 if (key->xa != ewait->key.xa ||
175 key->entry_start != ewait->key.entry_start)
176 return 0;
177 return autoremove_wake_function(wait, mode, sync, NULL);
178}
179
180
181
182
183
184
185static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
186{
187 struct exceptional_entry_key key;
188 wait_queue_head_t *wq;
189
190 wq = dax_entry_waitqueue(xas, entry, &key);
191
192
193
194
195
196
197
198 if (waitqueue_active(wq))
199 __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
200}
201
202
203
204
205
206
207
208
209
210
211
212static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
213{
214 void *entry;
215 struct wait_exceptional_entry_queue ewait;
216 wait_queue_head_t *wq;
217
218 init_wait(&ewait.wait);
219 ewait.wait.func = wake_exceptional_entry_func;
220
221 for (;;) {
222 entry = xas_find_conflict(xas);
223 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
224 return entry;
225 if (dax_entry_order(entry) < order)
226 return XA_RETRY_ENTRY;
227 if (!dax_is_locked(entry))
228 return entry;
229
230 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
231 prepare_to_wait_exclusive(wq, &ewait.wait,
232 TASK_UNINTERRUPTIBLE);
233 xas_unlock_irq(xas);
234 xas_reset(xas);
235 schedule();
236 finish_wait(wq, &ewait.wait);
237 xas_lock_irq(xas);
238 }
239}
240
241
242
243
244
245
246static void wait_entry_unlocked(struct xa_state *xas, void *entry)
247{
248 struct wait_exceptional_entry_queue ewait;
249 wait_queue_head_t *wq;
250
251 init_wait(&ewait.wait);
252 ewait.wait.func = wake_exceptional_entry_func;
253
254 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
255
256
257
258
259
260
261 prepare_to_wait(wq, &ewait.wait, TASK_UNINTERRUPTIBLE);
262 xas_unlock_irq(xas);
263 schedule();
264 finish_wait(wq, &ewait.wait);
265}
266
267static void put_unlocked_entry(struct xa_state *xas, void *entry)
268{
269
270 if (entry && !dax_is_conflict(entry))
271 dax_wake_entry(xas, entry, false);
272}
273
274
275
276
277
278
279static void dax_unlock_entry(struct xa_state *xas, void *entry)
280{
281 void *old;
282
283 BUG_ON(dax_is_locked(entry));
284 xas_reset(xas);
285 xas_lock_irq(xas);
286 old = xas_store(xas, entry);
287 xas_unlock_irq(xas);
288 BUG_ON(!dax_is_locked(old));
289 dax_wake_entry(xas, entry, false);
290}
291
292
293
294
295static void *dax_lock_entry(struct xa_state *xas, void *entry)
296{
297 unsigned long v = xa_to_value(entry);
298 return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
299}
300
301static unsigned long dax_entry_size(void *entry)
302{
303 if (dax_is_zero_entry(entry))
304 return 0;
305 else if (dax_is_empty_entry(entry))
306 return 0;
307 else if (dax_is_pmd_entry(entry))
308 return PMD_SIZE;
309 else
310 return PAGE_SIZE;
311}
312
313static unsigned long dax_end_pfn(void *entry)
314{
315 return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
316}
317
318
319
320
321
322#define for_each_mapped_pfn(entry, pfn) \
323 for (pfn = dax_to_pfn(entry); \
324 pfn < dax_end_pfn(entry); pfn++)
325
326
327
328
329
330
331static void dax_associate_entry(void *entry, struct address_space *mapping,
332 struct vm_area_struct *vma, unsigned long address)
333{
334 unsigned long size = dax_entry_size(entry), pfn, index;
335 int i = 0;
336
337 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
338 return;
339
340 index = linear_page_index(vma, address & ~(size - 1));
341 for_each_mapped_pfn(entry, pfn) {
342 struct page *page = pfn_to_page(pfn);
343
344 WARN_ON_ONCE(page->mapping);
345 page->mapping = mapping;
346 page->index = index + i++;
347 }
348}
349
350static void dax_disassociate_entry(void *entry, struct address_space *mapping,
351 bool trunc)
352{
353 unsigned long pfn;
354
355 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
356 return;
357
358 for_each_mapped_pfn(entry, pfn) {
359 struct page *page = pfn_to_page(pfn);
360
361 WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
362 WARN_ON_ONCE(page->mapping && page->mapping != mapping);
363 page->mapping = NULL;
364 page->index = 0;
365 }
366}
367
368static struct page *dax_busy_page(void *entry)
369{
370 unsigned long pfn;
371
372 for_each_mapped_pfn(entry, pfn) {
373 struct page *page = pfn_to_page(pfn);
374
375 if (page_ref_count(page) > 1)
376 return page;
377 }
378 return NULL;
379}
380
381
382
383
384
385
386
387
388
389dax_entry_t dax_lock_page(struct page *page)
390{
391 XA_STATE(xas, NULL, 0);
392 void *entry;
393
394
395 rcu_read_lock();
396 for (;;) {
397 struct address_space *mapping = READ_ONCE(page->mapping);
398
399 entry = NULL;
400 if (!mapping || !dax_mapping(mapping))
401 break;
402
403
404
405
406
407
408
409
410 entry = (void *)~0UL;
411 if (S_ISCHR(mapping->host->i_mode))
412 break;
413
414 xas.xa = &mapping->i_pages;
415 xas_lock_irq(&xas);
416 if (mapping != page->mapping) {
417 xas_unlock_irq(&xas);
418 continue;
419 }
420 xas_set(&xas, page->index);
421 entry = xas_load(&xas);
422 if (dax_is_locked(entry)) {
423 rcu_read_unlock();
424 wait_entry_unlocked(&xas, entry);
425 rcu_read_lock();
426 continue;
427 }
428 dax_lock_entry(&xas, entry);
429 xas_unlock_irq(&xas);
430 break;
431 }
432 rcu_read_unlock();
433 return (dax_entry_t)entry;
434}
435
436void dax_unlock_page(struct page *page, dax_entry_t cookie)
437{
438 struct address_space *mapping = page->mapping;
439 XA_STATE(xas, &mapping->i_pages, page->index);
440
441 if (S_ISCHR(mapping->host->i_mode))
442 return;
443
444 dax_unlock_entry(&xas, (void *)cookie);
445}
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476static void *grab_mapping_entry(struct xa_state *xas,
477 struct address_space *mapping, unsigned int order)
478{
479 unsigned long index = xas->xa_index;
480 bool pmd_downgrade = false;
481 void *entry;
482
483retry:
484 xas_lock_irq(xas);
485 entry = get_unlocked_entry(xas, order);
486
487 if (entry) {
488 if (dax_is_conflict(entry))
489 goto fallback;
490 if (!xa_is_value(entry)) {
491 xas_set_err(xas, EIO);
492 goto out_unlock;
493 }
494
495 if (order == 0) {
496 if (dax_is_pmd_entry(entry) &&
497 (dax_is_zero_entry(entry) ||
498 dax_is_empty_entry(entry))) {
499 pmd_downgrade = true;
500 }
501 }
502 }
503
504 if (pmd_downgrade) {
505
506
507
508
509 dax_lock_entry(xas, entry);
510
511
512
513
514
515
516 if (dax_is_zero_entry(entry)) {
517 xas_unlock_irq(xas);
518 unmap_mapping_pages(mapping,
519 xas->xa_index & ~PG_PMD_COLOUR,
520 PG_PMD_NR, false);
521 xas_reset(xas);
522 xas_lock_irq(xas);
523 }
524
525 dax_disassociate_entry(entry, mapping, false);
526 xas_store(xas, NULL);
527 dax_wake_entry(xas, entry, true);
528 mapping->nrexceptional--;
529 entry = NULL;
530 xas_set(xas, index);
531 }
532
533 if (entry) {
534 dax_lock_entry(xas, entry);
535 } else {
536 unsigned long flags = DAX_EMPTY;
537
538 if (order > 0)
539 flags |= DAX_PMD;
540 entry = dax_make_entry(pfn_to_pfn_t(0), flags);
541 dax_lock_entry(xas, entry);
542 if (xas_error(xas))
543 goto out_unlock;
544 mapping->nrexceptional++;
545 }
546
547out_unlock:
548 xas_unlock_irq(xas);
549 if (xas_nomem(xas, mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM))
550 goto retry;
551 if (xas->xa_node == XA_ERROR(-ENOMEM))
552 return xa_mk_internal(VM_FAULT_OOM);
553 if (xas_error(xas))
554 return xa_mk_internal(VM_FAULT_SIGBUS);
555 return entry;
556fallback:
557 xas_unlock_irq(xas);
558 return xa_mk_internal(VM_FAULT_FALLBACK);
559}
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576struct page *dax_layout_busy_page(struct address_space *mapping)
577{
578 XA_STATE(xas, &mapping->i_pages, 0);
579 void *entry;
580 unsigned int scanned = 0;
581 struct page *page = NULL;
582
583
584
585
586 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
587 return NULL;
588
589 if (!dax_mapping(mapping) || !mapping_mapped(mapping))
590 return NULL;
591
592
593
594
595
596
597
598
599
600
601
602
603
604 unmap_mapping_range(mapping, 0, 0, 0);
605
606 xas_lock_irq(&xas);
607 xas_for_each(&xas, entry, ULONG_MAX) {
608 if (WARN_ON_ONCE(!xa_is_value(entry)))
609 continue;
610 if (unlikely(dax_is_locked(entry)))
611 entry = get_unlocked_entry(&xas, 0);
612 if (entry)
613 page = dax_busy_page(entry);
614 put_unlocked_entry(&xas, entry);
615 if (page)
616 break;
617 if (++scanned % XA_CHECK_SCHED)
618 continue;
619
620 xas_pause(&xas);
621 xas_unlock_irq(&xas);
622 cond_resched();
623 xas_lock_irq(&xas);
624 }
625 xas_unlock_irq(&xas);
626 return page;
627}
628EXPORT_SYMBOL_GPL(dax_layout_busy_page);
629
630static int __dax_invalidate_entry(struct address_space *mapping,
631 pgoff_t index, bool trunc)
632{
633 XA_STATE(xas, &mapping->i_pages, index);
634 int ret = 0;
635 void *entry;
636
637 xas_lock_irq(&xas);
638 entry = get_unlocked_entry(&xas, 0);
639 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
640 goto out;
641 if (!trunc &&
642 (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY) ||
643 xas_get_mark(&xas, PAGECACHE_TAG_TOWRITE)))
644 goto out;
645 dax_disassociate_entry(entry, mapping, trunc);
646 xas_store(&xas, NULL);
647 mapping->nrexceptional--;
648 ret = 1;
649out:
650 put_unlocked_entry(&xas, entry);
651 xas_unlock_irq(&xas);
652 return ret;
653}
654
655
656
657
658
659int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
660{
661 int ret = __dax_invalidate_entry(mapping, index, true);
662
663
664
665
666
667
668
669
670 WARN_ON_ONCE(!ret);
671 return ret;
672}
673
674
675
676
677int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
678 pgoff_t index)
679{
680 return __dax_invalidate_entry(mapping, index, false);
681}
682
683static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
684 sector_t sector, size_t size, struct page *to,
685 unsigned long vaddr)
686{
687 void *vto, *kaddr;
688 pgoff_t pgoff;
689 long rc;
690 int id;
691
692 rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
693 if (rc)
694 return rc;
695
696 id = dax_read_lock();
697 rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, NULL);
698 if (rc < 0) {
699 dax_read_unlock(id);
700 return rc;
701 }
702 vto = kmap_atomic(to);
703 copy_user_page(vto, (void __force *)kaddr, vaddr, to);
704 kunmap_atomic(vto);
705 dax_read_unlock(id);
706 return 0;
707}
708
709
710
711
712
713
714
715
716static void *dax_insert_entry(struct xa_state *xas,
717 struct address_space *mapping, struct vm_fault *vmf,
718 void *entry, pfn_t pfn, unsigned long flags, bool dirty)
719{
720 void *new_entry = dax_make_entry(pfn, flags);
721
722 if (dirty)
723 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
724
725 if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
726 unsigned long index = xas->xa_index;
727
728 if (dax_is_pmd_entry(entry))
729 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
730 PG_PMD_NR, false);
731 else
732 unmap_mapping_pages(mapping, index, 1, false);
733 }
734
735 xas_reset(xas);
736 xas_lock_irq(xas);
737 if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
738 void *old;
739
740 dax_disassociate_entry(entry, mapping, false);
741 dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
742
743
744
745
746
747
748
749
750 old = dax_lock_entry(xas, new_entry);
751 WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
752 DAX_LOCKED));
753 entry = new_entry;
754 } else {
755 xas_load(xas);
756 }
757
758 if (dirty)
759 xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
760
761 xas_unlock_irq(xas);
762 return entry;
763}
764
765static inline
766unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
767{
768 unsigned long address;
769
770 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
771 VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
772 return address;
773}
774
775
776static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
777 unsigned long pfn)
778{
779 struct vm_area_struct *vma;
780 pte_t pte, *ptep = NULL;
781 pmd_t *pmdp = NULL;
782 spinlock_t *ptl;
783
784 i_mmap_lock_read(mapping);
785 vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
786 struct mmu_notifier_range range;
787 unsigned long address;
788
789 cond_resched();
790
791 if (!(vma->vm_flags & VM_SHARED))
792 continue;
793
794 address = pgoff_address(index, vma);
795
796
797
798
799
800
801 if (follow_pte_pmd(vma->vm_mm, address, &range,
802 &ptep, &pmdp, &ptl))
803 continue;
804
805
806
807
808
809
810
811
812 if (pmdp) {
813#ifdef CONFIG_FS_DAX_PMD
814 pmd_t pmd;
815
816 if (pfn != pmd_pfn(*pmdp))
817 goto unlock_pmd;
818 if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
819 goto unlock_pmd;
820
821 flush_cache_page(vma, address, pfn);
822 pmd = pmdp_invalidate(vma, address, pmdp);
823 pmd = pmd_wrprotect(pmd);
824 pmd = pmd_mkclean(pmd);
825 set_pmd_at(vma->vm_mm, address, pmdp, pmd);
826unlock_pmd:
827#endif
828 spin_unlock(ptl);
829 } else {
830 if (pfn != pte_pfn(*ptep))
831 goto unlock_pte;
832 if (!pte_dirty(*ptep) && !pte_write(*ptep))
833 goto unlock_pte;
834
835 flush_cache_page(vma, address, pfn);
836 pte = ptep_clear_flush(vma, address, ptep);
837 pte = pte_wrprotect(pte);
838 pte = pte_mkclean(pte);
839 set_pte_at(vma->vm_mm, address, ptep, pte);
840unlock_pte:
841 pte_unmap_unlock(ptep, ptl);
842 }
843
844 mmu_notifier_invalidate_range_end(&range);
845 }
846 i_mmap_unlock_read(mapping);
847}
848
849static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
850 struct address_space *mapping, void *entry)
851{
852 unsigned long pfn, index, count;
853 long ret = 0;
854
855
856
857
858
859 if (WARN_ON(!xa_is_value(entry)))
860 return -EIO;
861
862 if (unlikely(dax_is_locked(entry))) {
863 void *old_entry = entry;
864
865 entry = get_unlocked_entry(xas, 0);
866
867
868 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
869 goto put_unlocked;
870
871
872
873
874
875 if (dax_to_pfn(old_entry) != dax_to_pfn(entry))
876 goto put_unlocked;
877 if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
878 dax_is_zero_entry(entry))) {
879 ret = -EIO;
880 goto put_unlocked;
881 }
882
883
884 if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE))
885 goto put_unlocked;
886 }
887
888
889 dax_lock_entry(xas, entry);
890
891
892
893
894
895
896
897
898 xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE);
899 xas_unlock_irq(xas);
900
901
902
903
904
905
906
907
908 pfn = dax_to_pfn(entry);
909 count = 1UL << dax_entry_order(entry);
910 index = xas->xa_index & ~(count - 1);
911
912 dax_entry_mkclean(mapping, index, pfn);
913 dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE);
914
915
916
917
918
919
920 xas_reset(xas);
921 xas_lock_irq(xas);
922 xas_store(xas, entry);
923 xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
924 dax_wake_entry(xas, entry, false);
925
926 trace_dax_writeback_one(mapping->host, index, count);
927 return ret;
928
929 put_unlocked:
930 put_unlocked_entry(xas, entry);
931 return ret;
932}
933
934
935
936
937
938
939int dax_writeback_mapping_range(struct address_space *mapping,
940 struct block_device *bdev, struct writeback_control *wbc)
941{
942 XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT);
943 struct inode *inode = mapping->host;
944 pgoff_t end_index = wbc->range_end >> PAGE_SHIFT;
945 struct dax_device *dax_dev;
946 void *entry;
947 int ret = 0;
948 unsigned int scanned = 0;
949
950 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
951 return -EIO;
952
953 if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
954 return 0;
955
956 dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
957 if (!dax_dev)
958 return -EIO;
959
960 trace_dax_writeback_range(inode, xas.xa_index, end_index);
961
962 tag_pages_for_writeback(mapping, xas.xa_index, end_index);
963
964 xas_lock_irq(&xas);
965 xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) {
966 ret = dax_writeback_one(&xas, dax_dev, mapping, entry);
967 if (ret < 0) {
968 mapping_set_error(mapping, ret);
969 break;
970 }
971 if (++scanned % XA_CHECK_SCHED)
972 continue;
973
974 xas_pause(&xas);
975 xas_unlock_irq(&xas);
976 cond_resched();
977 xas_lock_irq(&xas);
978 }
979 xas_unlock_irq(&xas);
980 put_dax(dax_dev);
981 trace_dax_writeback_range_done(inode, xas.xa_index, end_index);
982 return ret;
983}
984EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
985
986static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
987{
988 return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
989}
990
991static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
992 pfn_t *pfnp)
993{
994 const sector_t sector = dax_iomap_sector(iomap, pos);
995 pgoff_t pgoff;
996 int id, rc;
997 long length;
998
999 rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
1000 if (rc)
1001 return rc;
1002 id = dax_read_lock();
1003 length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
1004 NULL, pfnp);
1005 if (length < 0) {
1006 rc = length;
1007 goto out;
1008 }
1009 rc = -EINVAL;
1010 if (PFN_PHYS(length) < size)
1011 goto out;
1012 if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
1013 goto out;
1014
1015 if (length > 1 && !pfn_t_devmap(*pfnp))
1016 goto out;
1017 rc = 0;
1018out:
1019 dax_read_unlock(id);
1020 return rc;
1021}
1022
1023
1024
1025
1026
1027
1028
1029
1030static vm_fault_t dax_load_hole(struct xa_state *xas,
1031 struct address_space *mapping, void **entry,
1032 struct vm_fault *vmf)
1033{
1034 struct inode *inode = mapping->host;
1035 unsigned long vaddr = vmf->address;
1036 pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
1037 vm_fault_t ret;
1038
1039 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1040 DAX_ZERO_PAGE, false);
1041
1042 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
1043 trace_dax_load_hole(inode, vmf, ret);
1044 return ret;
1045}
1046
1047static bool dax_range_is_aligned(struct block_device *bdev,
1048 unsigned int offset, unsigned int length)
1049{
1050 unsigned short sector_size = bdev_logical_block_size(bdev);
1051
1052 if (!IS_ALIGNED(offset, sector_size))
1053 return false;
1054 if (!IS_ALIGNED(length, sector_size))
1055 return false;
1056
1057 return true;
1058}
1059
1060int __dax_zero_page_range(struct block_device *bdev,
1061 struct dax_device *dax_dev, sector_t sector,
1062 unsigned int offset, unsigned int size)
1063{
1064 if (dax_range_is_aligned(bdev, offset, size)) {
1065 sector_t start_sector = sector + (offset >> 9);
1066
1067 return blkdev_issue_zeroout(bdev, start_sector,
1068 size >> 9, GFP_NOFS, 0);
1069 } else {
1070 pgoff_t pgoff;
1071 long rc, id;
1072 void *kaddr;
1073
1074 rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
1075 if (rc)
1076 return rc;
1077
1078 id = dax_read_lock();
1079 rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
1080 if (rc < 0) {
1081 dax_read_unlock(id);
1082 return rc;
1083 }
1084 memset(kaddr + offset, 0, size);
1085 dax_flush(dax_dev, kaddr + offset, size);
1086 dax_read_unlock(id);
1087 }
1088 return 0;
1089}
1090EXPORT_SYMBOL_GPL(__dax_zero_page_range);
1091
1092static loff_t
1093dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
1094 struct iomap *iomap)
1095{
1096 struct block_device *bdev = iomap->bdev;
1097 struct dax_device *dax_dev = iomap->dax_dev;
1098 struct iov_iter *iter = data;
1099 loff_t end = pos + length, done = 0;
1100 ssize_t ret = 0;
1101 size_t xfer;
1102 int id;
1103
1104 if (iov_iter_rw(iter) == READ) {
1105 end = min(end, i_size_read(inode));
1106 if (pos >= end)
1107 return 0;
1108
1109 if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
1110 return iov_iter_zero(min(length, end - pos), iter);
1111 }
1112
1113 if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
1114 return -EIO;
1115
1116
1117
1118
1119
1120
1121 if (iomap->flags & IOMAP_F_NEW) {
1122 invalidate_inode_pages2_range(inode->i_mapping,
1123 pos >> PAGE_SHIFT,
1124 (end - 1) >> PAGE_SHIFT);
1125 }
1126
1127 id = dax_read_lock();
1128 while (pos < end) {
1129 unsigned offset = pos & (PAGE_SIZE - 1);
1130 const size_t size = ALIGN(length + offset, PAGE_SIZE);
1131 const sector_t sector = dax_iomap_sector(iomap, pos);
1132 ssize_t map_len;
1133 pgoff_t pgoff;
1134 void *kaddr;
1135
1136 if (fatal_signal_pending(current)) {
1137 ret = -EINTR;
1138 break;
1139 }
1140
1141 ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
1142 if (ret)
1143 break;
1144
1145 map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
1146 &kaddr, NULL);
1147 if (map_len < 0) {
1148 ret = map_len;
1149 break;
1150 }
1151
1152 map_len = PFN_PHYS(map_len);
1153 kaddr += offset;
1154 map_len -= offset;
1155 if (map_len > end - pos)
1156 map_len = end - pos;
1157
1158
1159
1160
1161
1162
1163 if (iov_iter_rw(iter) == WRITE)
1164 xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
1165 map_len, iter);
1166 else
1167 xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
1168 map_len, iter);
1169
1170 pos += xfer;
1171 length -= xfer;
1172 done += xfer;
1173
1174 if (xfer == 0)
1175 ret = -EFAULT;
1176 if (xfer < map_len)
1177 break;
1178 }
1179 dax_read_unlock(id);
1180
1181 return done ? done : ret;
1182}
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194ssize_t
1195dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
1196 const struct iomap_ops *ops)
1197{
1198 struct address_space *mapping = iocb->ki_filp->f_mapping;
1199 struct inode *inode = mapping->host;
1200 loff_t pos = iocb->ki_pos, ret = 0, done = 0;
1201 unsigned flags = 0;
1202
1203 if (iov_iter_rw(iter) == WRITE) {
1204 lockdep_assert_held_write(&inode->i_rwsem);
1205 flags |= IOMAP_WRITE;
1206 } else {
1207 lockdep_assert_held(&inode->i_rwsem);
1208 }
1209
1210 while (iov_iter_count(iter)) {
1211 ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
1212 iter, dax_iomap_actor);
1213 if (ret <= 0)
1214 break;
1215 pos += ret;
1216 done += ret;
1217 }
1218
1219 iocb->ki_pos += done;
1220 return done ? done : ret;
1221}
1222EXPORT_SYMBOL_GPL(dax_iomap_rw);
1223
1224static vm_fault_t dax_fault_return(int error)
1225{
1226 if (error == 0)
1227 return VM_FAULT_NOPAGE;
1228 return vmf_error(error);
1229}
1230
1231
1232
1233
1234
1235static bool dax_fault_is_synchronous(unsigned long flags,
1236 struct vm_area_struct *vma, struct iomap *iomap)
1237{
1238 return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
1239 && (iomap->flags & IOMAP_F_DIRTY);
1240}
1241
1242static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1243 int *iomap_errp, const struct iomap_ops *ops)
1244{
1245 struct vm_area_struct *vma = vmf->vma;
1246 struct address_space *mapping = vma->vm_file->f_mapping;
1247 XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
1248 struct inode *inode = mapping->host;
1249 unsigned long vaddr = vmf->address;
1250 loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
1251 struct iomap iomap = { 0 };
1252 unsigned flags = IOMAP_FAULT;
1253 int error, major = 0;
1254 bool write = vmf->flags & FAULT_FLAG_WRITE;
1255 bool sync;
1256 vm_fault_t ret = 0;
1257 void *entry;
1258 pfn_t pfn;
1259
1260 trace_dax_pte_fault(inode, vmf, ret);
1261
1262
1263
1264
1265
1266 if (pos >= i_size_read(inode)) {
1267 ret = VM_FAULT_SIGBUS;
1268 goto out;
1269 }
1270
1271 if (write && !vmf->cow_page)
1272 flags |= IOMAP_WRITE;
1273
1274 entry = grab_mapping_entry(&xas, mapping, 0);
1275 if (xa_is_internal(entry)) {
1276 ret = xa_to_internal(entry);
1277 goto out;
1278 }
1279
1280
1281
1282
1283
1284
1285
1286 if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
1287 ret = VM_FAULT_NOPAGE;
1288 goto unlock_entry;
1289 }
1290
1291
1292
1293
1294
1295
1296 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
1297 if (iomap_errp)
1298 *iomap_errp = error;
1299 if (error) {
1300 ret = dax_fault_return(error);
1301 goto unlock_entry;
1302 }
1303 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
1304 error = -EIO;
1305 goto error_finish_iomap;
1306 }
1307
1308 if (vmf->cow_page) {
1309 sector_t sector = dax_iomap_sector(&iomap, pos);
1310
1311 switch (iomap.type) {
1312 case IOMAP_HOLE:
1313 case IOMAP_UNWRITTEN:
1314 clear_user_highpage(vmf->cow_page, vaddr);
1315 break;
1316 case IOMAP_MAPPED:
1317 error = copy_user_dax(iomap.bdev, iomap.dax_dev,
1318 sector, PAGE_SIZE, vmf->cow_page, vaddr);
1319 break;
1320 default:
1321 WARN_ON_ONCE(1);
1322 error = -EIO;
1323 break;
1324 }
1325
1326 if (error)
1327 goto error_finish_iomap;
1328
1329 __SetPageUptodate(vmf->cow_page);
1330 ret = finish_fault(vmf);
1331 if (!ret)
1332 ret = VM_FAULT_DONE_COW;
1333 goto finish_iomap;
1334 }
1335
1336 sync = dax_fault_is_synchronous(flags, vma, &iomap);
1337
1338 switch (iomap.type) {
1339 case IOMAP_MAPPED:
1340 if (iomap.flags & IOMAP_F_NEW) {
1341 count_vm_event(PGMAJFAULT);
1342 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
1343 major = VM_FAULT_MAJOR;
1344 }
1345 error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
1346 if (error < 0)
1347 goto error_finish_iomap;
1348
1349 entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
1350 0, write && !sync);
1351
1352
1353
1354
1355
1356
1357
1358 if (sync) {
1359 if (WARN_ON_ONCE(!pfnp)) {
1360 error = -EIO;
1361 goto error_finish_iomap;
1362 }
1363 *pfnp = pfn;
1364 ret = VM_FAULT_NEEDDSYNC | major;
1365 goto finish_iomap;
1366 }
1367 trace_dax_insert_mapping(inode, vmf, entry);
1368 if (write)
1369 ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn);
1370 else
1371 ret = vmf_insert_mixed(vma, vaddr, pfn);
1372
1373 goto finish_iomap;
1374 case IOMAP_UNWRITTEN:
1375 case IOMAP_HOLE:
1376 if (!write) {
1377 ret = dax_load_hole(&xas, mapping, &entry, vmf);
1378 goto finish_iomap;
1379 }
1380
1381 default:
1382 WARN_ON_ONCE(1);
1383 error = -EIO;
1384 break;
1385 }
1386
1387 error_finish_iomap:
1388 ret = dax_fault_return(error);
1389 finish_iomap:
1390 if (ops->iomap_end) {
1391 int copied = PAGE_SIZE;
1392
1393 if (ret & VM_FAULT_ERROR)
1394 copied = 0;
1395
1396
1397
1398
1399
1400
1401 ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
1402 }
1403 unlock_entry:
1404 dax_unlock_entry(&xas, entry);
1405 out:
1406 trace_dax_pte_fault_done(inode, vmf, ret);
1407 return ret | major;
1408}
1409
1410#ifdef CONFIG_FS_DAX_PMD
1411static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
1412 struct iomap *iomap, void **entry)
1413{
1414 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1415 unsigned long pmd_addr = vmf->address & PMD_MASK;
1416 struct vm_area_struct *vma = vmf->vma;
1417 struct inode *inode = mapping->host;
1418 pgtable_t pgtable = NULL;
1419 struct page *zero_page;
1420 spinlock_t *ptl;
1421 pmd_t pmd_entry;
1422 pfn_t pfn;
1423
1424 zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
1425
1426 if (unlikely(!zero_page))
1427 goto fallback;
1428
1429 pfn = page_to_pfn_t(zero_page);
1430 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1431 DAX_PMD | DAX_ZERO_PAGE, false);
1432
1433 if (arch_needs_pgtable_deposit()) {
1434 pgtable = pte_alloc_one(vma->vm_mm);
1435 if (!pgtable)
1436 return VM_FAULT_OOM;
1437 }
1438
1439 ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
1440 if (!pmd_none(*(vmf->pmd))) {
1441 spin_unlock(ptl);
1442 goto fallback;
1443 }
1444
1445 if (pgtable) {
1446 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
1447 mm_inc_nr_ptes(vma->vm_mm);
1448 }
1449 pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
1450 pmd_entry = pmd_mkhuge(pmd_entry);
1451 set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
1452 spin_unlock(ptl);
1453 trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry);
1454 return VM_FAULT_NOPAGE;
1455
1456fallback:
1457 if (pgtable)
1458 pte_free(vma->vm_mm, pgtable);
1459 trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
1460 return VM_FAULT_FALLBACK;
1461}
1462
1463static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1464 const struct iomap_ops *ops)
1465{
1466 struct vm_area_struct *vma = vmf->vma;
1467 struct address_space *mapping = vma->vm_file->f_mapping;
1468 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
1469 unsigned long pmd_addr = vmf->address & PMD_MASK;
1470 bool write = vmf->flags & FAULT_FLAG_WRITE;
1471 bool sync;
1472 unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
1473 struct inode *inode = mapping->host;
1474 vm_fault_t result = VM_FAULT_FALLBACK;
1475 struct iomap iomap = { 0 };
1476 pgoff_t max_pgoff;
1477 void *entry;
1478 loff_t pos;
1479 int error;
1480 pfn_t pfn;
1481
1482
1483
1484
1485
1486
1487 max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1488
1489 trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
1490
1491
1492
1493
1494
1495
1496
1497 if ((vmf->pgoff & PG_PMD_COLOUR) !=
1498 ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
1499 goto fallback;
1500
1501
1502 if (write && !(vma->vm_flags & VM_SHARED))
1503 goto fallback;
1504
1505
1506 if (pmd_addr < vma->vm_start)
1507 goto fallback;
1508 if ((pmd_addr + PMD_SIZE) > vma->vm_end)
1509 goto fallback;
1510
1511 if (xas.xa_index >= max_pgoff) {
1512 result = VM_FAULT_SIGBUS;
1513 goto out;
1514 }
1515
1516
1517 if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
1518 goto fallback;
1519
1520
1521
1522
1523
1524
1525
1526 entry = grab_mapping_entry(&xas, mapping, PMD_ORDER);
1527 if (xa_is_internal(entry)) {
1528 result = xa_to_internal(entry);
1529 goto fallback;
1530 }
1531
1532
1533
1534
1535
1536
1537
1538 if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
1539 !pmd_devmap(*vmf->pmd)) {
1540 result = 0;
1541 goto unlock_entry;
1542 }
1543
1544
1545
1546
1547
1548
1549 pos = (loff_t)xas.xa_index << PAGE_SHIFT;
1550 error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
1551 if (error)
1552 goto unlock_entry;
1553
1554 if (iomap.offset + iomap.length < pos + PMD_SIZE)
1555 goto finish_iomap;
1556
1557 sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap);
1558
1559 switch (iomap.type) {
1560 case IOMAP_MAPPED:
1561 error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
1562 if (error < 0)
1563 goto finish_iomap;
1564
1565 entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
1566 DAX_PMD, write && !sync);
1567
1568
1569
1570
1571
1572
1573
1574 if (sync) {
1575 if (WARN_ON_ONCE(!pfnp))
1576 goto finish_iomap;
1577 *pfnp = pfn;
1578 result = VM_FAULT_NEEDDSYNC;
1579 goto finish_iomap;
1580 }
1581
1582 trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
1583 result = vmf_insert_pfn_pmd(vmf, pfn, write);
1584 break;
1585 case IOMAP_UNWRITTEN:
1586 case IOMAP_HOLE:
1587 if (WARN_ON_ONCE(write))
1588 break;
1589 result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
1590 break;
1591 default:
1592 WARN_ON_ONCE(1);
1593 break;
1594 }
1595
1596 finish_iomap:
1597 if (ops->iomap_end) {
1598 int copied = PMD_SIZE;
1599
1600 if (result == VM_FAULT_FALLBACK)
1601 copied = 0;
1602
1603
1604
1605
1606
1607
1608 ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
1609 &iomap);
1610 }
1611 unlock_entry:
1612 dax_unlock_entry(&xas, entry);
1613 fallback:
1614 if (result == VM_FAULT_FALLBACK) {
1615 split_huge_pmd(vma, vmf->pmd, vmf->address);
1616 count_vm_event(THP_FAULT_FALLBACK);
1617 }
1618out:
1619 trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
1620 return result;
1621}
1622#else
1623static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1624 const struct iomap_ops *ops)
1625{
1626 return VM_FAULT_FALLBACK;
1627}
1628#endif
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
1644 pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
1645{
1646 switch (pe_size) {
1647 case PE_SIZE_PTE:
1648 return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops);
1649 case PE_SIZE_PMD:
1650 return dax_iomap_pmd_fault(vmf, pfnp, ops);
1651 default:
1652 return VM_FAULT_FALLBACK;
1653 }
1654}
1655EXPORT_SYMBOL_GPL(dax_iomap_fault);
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666static vm_fault_t
1667dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
1668{
1669 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1670 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
1671 void *entry;
1672 vm_fault_t ret;
1673
1674 xas_lock_irq(&xas);
1675 entry = get_unlocked_entry(&xas, order);
1676
1677 if (!entry || dax_is_conflict(entry) ||
1678 (order == 0 && !dax_is_pte_entry(entry))) {
1679 put_unlocked_entry(&xas, entry);
1680 xas_unlock_irq(&xas);
1681 trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
1682 VM_FAULT_NOPAGE);
1683 return VM_FAULT_NOPAGE;
1684 }
1685 xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
1686 dax_lock_entry(&xas, entry);
1687 xas_unlock_irq(&xas);
1688 if (order == 0)
1689 ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
1690#ifdef CONFIG_FS_DAX_PMD
1691 else if (order == PMD_ORDER)
1692 ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
1693#endif
1694 else
1695 ret = VM_FAULT_FALLBACK;
1696 dax_unlock_entry(&xas, entry);
1697 trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
1698 return ret;
1699}
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
1712 enum page_entry_size pe_size, pfn_t pfn)
1713{
1714 int err;
1715 loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
1716 unsigned int order = pe_order(pe_size);
1717 size_t len = PAGE_SIZE << order;
1718
1719 err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
1720 if (err)
1721 return VM_FAULT_SIGBUS;
1722 return dax_insert_pfn_mkwrite(vmf, pfn, order);
1723}
1724EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
1725