1
2
3
4
5
6
7
8
9#include <linux/atomic.h>
10#include <linux/blkdev.h>
11#include <linux/buffer_head.h>
12#include <linux/dax.h>
13#include <linux/fs.h>
14#include <linux/genhd.h>
15#include <linux/highmem.h>
16#include <linux/memcontrol.h>
17#include <linux/mm.h>
18#include <linux/mutex.h>
19#include <linux/pagevec.h>
20#include <linux/sched.h>
21#include <linux/sched/signal.h>
22#include <linux/uio.h>
23#include <linux/vmstat.h>
24#include <linux/pfn_t.h>
25#include <linux/sizes.h>
26#include <linux/mmu_notifier.h>
27#include <linux/iomap.h>
28#include <asm/pgalloc.h>
29
30#define CREATE_TRACE_POINTS
31#include <trace/events/fs_dax.h>
32
33static inline unsigned int pe_order(enum page_entry_size pe_size)
34{
35 if (pe_size == PE_SIZE_PTE)
36 return PAGE_SHIFT - PAGE_SHIFT;
37 if (pe_size == PE_SIZE_PMD)
38 return PMD_SHIFT - PAGE_SHIFT;
39 if (pe_size == PE_SIZE_PUD)
40 return PUD_SHIFT - PAGE_SHIFT;
41 return ~0;
42}
43
44
45#define DAX_WAIT_TABLE_BITS 12
46#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
47
48
49#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
50#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
51
52
53#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
54
55static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
56
57static int __init init_dax_wait_table(void)
58{
59 int i;
60
61 for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++)
62 init_waitqueue_head(wait_table + i);
63 return 0;
64}
65fs_initcall(init_dax_wait_table);
66
67
68
69
70
71
72
73
74
75
76
77#define DAX_SHIFT (4)
78#define DAX_LOCKED (1UL << 0)
79#define DAX_PMD (1UL << 1)
80#define DAX_ZERO_PAGE (1UL << 2)
81#define DAX_EMPTY (1UL << 3)
82
83static unsigned long dax_to_pfn(void *entry)
84{
85 return xa_to_value(entry) >> DAX_SHIFT;
86}
87
88static void *dax_make_entry(pfn_t pfn, unsigned long flags)
89{
90 return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
91}
92
93static bool dax_is_locked(void *entry)
94{
95 return xa_to_value(entry) & DAX_LOCKED;
96}
97
98static unsigned int dax_entry_order(void *entry)
99{
100 if (xa_to_value(entry) & DAX_PMD)
101 return PMD_ORDER;
102 return 0;
103}
104
105static unsigned long dax_is_pmd_entry(void *entry)
106{
107 return xa_to_value(entry) & DAX_PMD;
108}
109
110static bool dax_is_pte_entry(void *entry)
111{
112 return !(xa_to_value(entry) & DAX_PMD);
113}
114
115static int dax_is_zero_entry(void *entry)
116{
117 return xa_to_value(entry) & DAX_ZERO_PAGE;
118}
119
120static int dax_is_empty_entry(void *entry)
121{
122 return xa_to_value(entry) & DAX_EMPTY;
123}
124
125
126
127
128
129static bool dax_is_conflict(void *entry)
130{
131 return entry == XA_RETRY_ENTRY;
132}
133
134
135
136
137struct exceptional_entry_key {
138 struct xarray *xa;
139 pgoff_t entry_start;
140};
141
142struct wait_exceptional_entry_queue {
143 wait_queue_entry_t wait;
144 struct exceptional_entry_key key;
145};
146
147static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
148 void *entry, struct exceptional_entry_key *key)
149{
150 unsigned long hash;
151 unsigned long index = xas->xa_index;
152
153
154
155
156
157
158 if (dax_is_pmd_entry(entry))
159 index &= ~PG_PMD_COLOUR;
160 key->xa = xas->xa;
161 key->entry_start = index;
162
163 hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS);
164 return wait_table + hash;
165}
166
167static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
168 unsigned int mode, int sync, void *keyp)
169{
170 struct exceptional_entry_key *key = keyp;
171 struct wait_exceptional_entry_queue *ewait =
172 container_of(wait, struct wait_exceptional_entry_queue, wait);
173
174 if (key->xa != ewait->key.xa ||
175 key->entry_start != ewait->key.entry_start)
176 return 0;
177 return autoremove_wake_function(wait, mode, sync, NULL);
178}
179
180
181
182
183
184
185static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
186{
187 struct exceptional_entry_key key;
188 wait_queue_head_t *wq;
189
190 wq = dax_entry_waitqueue(xas, entry, &key);
191
192
193
194
195
196
197
198 if (waitqueue_active(wq))
199 __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
200}
201
202
203
204
205
206
207
208
209
210
211
212static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
213{
214 void *entry;
215 struct wait_exceptional_entry_queue ewait;
216 wait_queue_head_t *wq;
217
218 init_wait(&ewait.wait);
219 ewait.wait.func = wake_exceptional_entry_func;
220
221 for (;;) {
222 entry = xas_find_conflict(xas);
223 if (dax_entry_order(entry) < order)
224 return XA_RETRY_ENTRY;
225 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) ||
226 !dax_is_locked(entry))
227 return entry;
228
229 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
230 prepare_to_wait_exclusive(wq, &ewait.wait,
231 TASK_UNINTERRUPTIBLE);
232 xas_unlock_irq(xas);
233 xas_reset(xas);
234 schedule();
235 finish_wait(wq, &ewait.wait);
236 xas_lock_irq(xas);
237 }
238}
239
240
241
242
243
244
245static void wait_entry_unlocked(struct xa_state *xas, void *entry)
246{
247 struct wait_exceptional_entry_queue ewait;
248 wait_queue_head_t *wq;
249
250 init_wait(&ewait.wait);
251 ewait.wait.func = wake_exceptional_entry_func;
252
253 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
254
255
256
257
258
259
260 prepare_to_wait(wq, &ewait.wait, TASK_UNINTERRUPTIBLE);
261 xas_unlock_irq(xas);
262 schedule();
263 finish_wait(wq, &ewait.wait);
264}
265
266static void put_unlocked_entry(struct xa_state *xas, void *entry)
267{
268
269 if (entry && !dax_is_conflict(entry))
270 dax_wake_entry(xas, entry, false);
271}
272
273
274
275
276
277
278static void dax_unlock_entry(struct xa_state *xas, void *entry)
279{
280 void *old;
281
282 BUG_ON(dax_is_locked(entry));
283 xas_reset(xas);
284 xas_lock_irq(xas);
285 old = xas_store(xas, entry);
286 xas_unlock_irq(xas);
287 BUG_ON(!dax_is_locked(old));
288 dax_wake_entry(xas, entry, false);
289}
290
291
292
293
294static void *dax_lock_entry(struct xa_state *xas, void *entry)
295{
296 unsigned long v = xa_to_value(entry);
297 return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
298}
299
300static unsigned long dax_entry_size(void *entry)
301{
302 if (dax_is_zero_entry(entry))
303 return 0;
304 else if (dax_is_empty_entry(entry))
305 return 0;
306 else if (dax_is_pmd_entry(entry))
307 return PMD_SIZE;
308 else
309 return PAGE_SIZE;
310}
311
312static unsigned long dax_end_pfn(void *entry)
313{
314 return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
315}
316
317
318
319
320
321#define for_each_mapped_pfn(entry, pfn) \
322 for (pfn = dax_to_pfn(entry); \
323 pfn < dax_end_pfn(entry); pfn++)
324
325
326
327
328
329
330static void dax_associate_entry(void *entry, struct address_space *mapping,
331 struct vm_area_struct *vma, unsigned long address)
332{
333 unsigned long size = dax_entry_size(entry), pfn, index;
334 int i = 0;
335
336 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
337 return;
338
339 index = linear_page_index(vma, address & ~(size - 1));
340 for_each_mapped_pfn(entry, pfn) {
341 struct page *page = pfn_to_page(pfn);
342
343 WARN_ON_ONCE(page->mapping);
344 page->mapping = mapping;
345 page->index = index + i++;
346 }
347}
348
349static void dax_disassociate_entry(void *entry, struct address_space *mapping,
350 bool trunc)
351{
352 unsigned long pfn;
353
354 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
355 return;
356
357 for_each_mapped_pfn(entry, pfn) {
358 struct page *page = pfn_to_page(pfn);
359
360 WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
361 WARN_ON_ONCE(page->mapping && page->mapping != mapping);
362 page->mapping = NULL;
363 page->index = 0;
364 }
365}
366
367static struct page *dax_busy_page(void *entry)
368{
369 unsigned long pfn;
370
371 for_each_mapped_pfn(entry, pfn) {
372 struct page *page = pfn_to_page(pfn);
373
374 if (page_ref_count(page) > 1)
375 return page;
376 }
377 return NULL;
378}
379
380
381
382
383
384
385
386
387
388dax_entry_t dax_lock_page(struct page *page)
389{
390 XA_STATE(xas, NULL, 0);
391 void *entry;
392
393
394 rcu_read_lock();
395 for (;;) {
396 struct address_space *mapping = READ_ONCE(page->mapping);
397
398 entry = NULL;
399 if (!mapping || !dax_mapping(mapping))
400 break;
401
402
403
404
405
406
407
408
409 entry = (void *)~0UL;
410 if (S_ISCHR(mapping->host->i_mode))
411 break;
412
413 xas.xa = &mapping->i_pages;
414 xas_lock_irq(&xas);
415 if (mapping != page->mapping) {
416 xas_unlock_irq(&xas);
417 continue;
418 }
419 xas_set(&xas, page->index);
420 entry = xas_load(&xas);
421 if (dax_is_locked(entry)) {
422 rcu_read_unlock();
423 wait_entry_unlocked(&xas, entry);
424 rcu_read_lock();
425 continue;
426 }
427 dax_lock_entry(&xas, entry);
428 xas_unlock_irq(&xas);
429 break;
430 }
431 rcu_read_unlock();
432 return (dax_entry_t)entry;
433}
434
435void dax_unlock_page(struct page *page, dax_entry_t cookie)
436{
437 struct address_space *mapping = page->mapping;
438 XA_STATE(xas, &mapping->i_pages, page->index);
439
440 if (S_ISCHR(mapping->host->i_mode))
441 return;
442
443 dax_unlock_entry(&xas, (void *)cookie);
444}
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475static void *grab_mapping_entry(struct xa_state *xas,
476 struct address_space *mapping, unsigned int order)
477{
478 unsigned long index = xas->xa_index;
479 bool pmd_downgrade = false;
480 void *entry;
481
482retry:
483 xas_lock_irq(xas);
484 entry = get_unlocked_entry(xas, order);
485
486 if (entry) {
487 if (dax_is_conflict(entry))
488 goto fallback;
489 if (!xa_is_value(entry)) {
490 xas_set_err(xas, EIO);
491 goto out_unlock;
492 }
493
494 if (order == 0) {
495 if (dax_is_pmd_entry(entry) &&
496 (dax_is_zero_entry(entry) ||
497 dax_is_empty_entry(entry))) {
498 pmd_downgrade = true;
499 }
500 }
501 }
502
503 if (pmd_downgrade) {
504
505
506
507
508 dax_lock_entry(xas, entry);
509
510
511
512
513
514
515 if (dax_is_zero_entry(entry)) {
516 xas_unlock_irq(xas);
517 unmap_mapping_pages(mapping,
518 xas->xa_index & ~PG_PMD_COLOUR,
519 PG_PMD_NR, false);
520 xas_reset(xas);
521 xas_lock_irq(xas);
522 }
523
524 dax_disassociate_entry(entry, mapping, false);
525 xas_store(xas, NULL);
526 dax_wake_entry(xas, entry, true);
527 mapping->nrexceptional--;
528 entry = NULL;
529 xas_set(xas, index);
530 }
531
532 if (entry) {
533 dax_lock_entry(xas, entry);
534 } else {
535 unsigned long flags = DAX_EMPTY;
536
537 if (order > 0)
538 flags |= DAX_PMD;
539 entry = dax_make_entry(pfn_to_pfn_t(0), flags);
540 dax_lock_entry(xas, entry);
541 if (xas_error(xas))
542 goto out_unlock;
543 mapping->nrexceptional++;
544 }
545
546out_unlock:
547 xas_unlock_irq(xas);
548 if (xas_nomem(xas, mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM))
549 goto retry;
550 if (xas->xa_node == XA_ERROR(-ENOMEM))
551 return xa_mk_internal(VM_FAULT_OOM);
552 if (xas_error(xas))
553 return xa_mk_internal(VM_FAULT_SIGBUS);
554 return entry;
555fallback:
556 xas_unlock_irq(xas);
557 return xa_mk_internal(VM_FAULT_FALLBACK);
558}
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575struct page *dax_layout_busy_page(struct address_space *mapping)
576{
577 XA_STATE(xas, &mapping->i_pages, 0);
578 void *entry;
579 unsigned int scanned = 0;
580 struct page *page = NULL;
581
582
583
584
585 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
586 return NULL;
587
588 if (!dax_mapping(mapping) || !mapping_mapped(mapping))
589 return NULL;
590
591
592
593
594
595
596
597
598
599
600
601
602
603 unmap_mapping_range(mapping, 0, 0, 0);
604
605 xas_lock_irq(&xas);
606 xas_for_each(&xas, entry, ULONG_MAX) {
607 if (WARN_ON_ONCE(!xa_is_value(entry)))
608 continue;
609 if (unlikely(dax_is_locked(entry)))
610 entry = get_unlocked_entry(&xas, 0);
611 if (entry)
612 page = dax_busy_page(entry);
613 put_unlocked_entry(&xas, entry);
614 if (page)
615 break;
616 if (++scanned % XA_CHECK_SCHED)
617 continue;
618
619 xas_pause(&xas);
620 xas_unlock_irq(&xas);
621 cond_resched();
622 xas_lock_irq(&xas);
623 }
624 xas_unlock_irq(&xas);
625 return page;
626}
627EXPORT_SYMBOL_GPL(dax_layout_busy_page);
628
629static int __dax_invalidate_entry(struct address_space *mapping,
630 pgoff_t index, bool trunc)
631{
632 XA_STATE(xas, &mapping->i_pages, index);
633 int ret = 0;
634 void *entry;
635
636 xas_lock_irq(&xas);
637 entry = get_unlocked_entry(&xas, 0);
638 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
639 goto out;
640 if (!trunc &&
641 (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY) ||
642 xas_get_mark(&xas, PAGECACHE_TAG_TOWRITE)))
643 goto out;
644 dax_disassociate_entry(entry, mapping, trunc);
645 xas_store(&xas, NULL);
646 mapping->nrexceptional--;
647 ret = 1;
648out:
649 put_unlocked_entry(&xas, entry);
650 xas_unlock_irq(&xas);
651 return ret;
652}
653
654
655
656
657
658int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
659{
660 int ret = __dax_invalidate_entry(mapping, index, true);
661
662
663
664
665
666
667
668
669 WARN_ON_ONCE(!ret);
670 return ret;
671}
672
673
674
675
676int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
677 pgoff_t index)
678{
679 return __dax_invalidate_entry(mapping, index, false);
680}
681
682static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
683 sector_t sector, size_t size, struct page *to,
684 unsigned long vaddr)
685{
686 void *vto, *kaddr;
687 pgoff_t pgoff;
688 long rc;
689 int id;
690
691 rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
692 if (rc)
693 return rc;
694
695 id = dax_read_lock();
696 rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, NULL);
697 if (rc < 0) {
698 dax_read_unlock(id);
699 return rc;
700 }
701 vto = kmap_atomic(to);
702 copy_user_page(vto, (void __force *)kaddr, vaddr, to);
703 kunmap_atomic(vto);
704 dax_read_unlock(id);
705 return 0;
706}
707
708
709
710
711
712
713
714
715static void *dax_insert_entry(struct xa_state *xas,
716 struct address_space *mapping, struct vm_fault *vmf,
717 void *entry, pfn_t pfn, unsigned long flags, bool dirty)
718{
719 void *new_entry = dax_make_entry(pfn, flags);
720
721 if (dirty)
722 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
723
724 if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
725 unsigned long index = xas->xa_index;
726
727 if (dax_is_pmd_entry(entry))
728 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
729 PG_PMD_NR, false);
730 else
731 unmap_mapping_pages(mapping, index, 1, false);
732 }
733
734 xas_reset(xas);
735 xas_lock_irq(xas);
736 if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
737 void *old;
738
739 dax_disassociate_entry(entry, mapping, false);
740 dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
741
742
743
744
745
746
747
748
749 old = dax_lock_entry(xas, new_entry);
750 WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
751 DAX_LOCKED));
752 entry = new_entry;
753 } else {
754 xas_load(xas);
755 }
756
757 if (dirty)
758 xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
759
760 xas_unlock_irq(xas);
761 return entry;
762}
763
764static inline
765unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
766{
767 unsigned long address;
768
769 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
770 VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
771 return address;
772}
773
774
775static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
776 unsigned long pfn)
777{
778 struct vm_area_struct *vma;
779 pte_t pte, *ptep = NULL;
780 pmd_t *pmdp = NULL;
781 spinlock_t *ptl;
782
783 i_mmap_lock_read(mapping);
784 vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
785 struct mmu_notifier_range range;
786 unsigned long address;
787
788 cond_resched();
789
790 if (!(vma->vm_flags & VM_SHARED))
791 continue;
792
793 address = pgoff_address(index, vma);
794
795
796
797
798
799
800 if (follow_pte_pmd(vma->vm_mm, address, &range,
801 &ptep, &pmdp, &ptl))
802 continue;
803
804
805
806
807
808
809
810
811 if (pmdp) {
812#ifdef CONFIG_FS_DAX_PMD
813 pmd_t pmd;
814
815 if (pfn != pmd_pfn(*pmdp))
816 goto unlock_pmd;
817 if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
818 goto unlock_pmd;
819
820 flush_cache_page(vma, address, pfn);
821 pmd = pmdp_invalidate(vma, address, pmdp);
822 pmd = pmd_wrprotect(pmd);
823 pmd = pmd_mkclean(pmd);
824 set_pmd_at(vma->vm_mm, address, pmdp, pmd);
825unlock_pmd:
826#endif
827 spin_unlock(ptl);
828 } else {
829 if (pfn != pte_pfn(*ptep))
830 goto unlock_pte;
831 if (!pte_dirty(*ptep) && !pte_write(*ptep))
832 goto unlock_pte;
833
834 flush_cache_page(vma, address, pfn);
835 pte = ptep_clear_flush(vma, address, ptep);
836 pte = pte_wrprotect(pte);
837 pte = pte_mkclean(pte);
838 set_pte_at(vma->vm_mm, address, ptep, pte);
839unlock_pte:
840 pte_unmap_unlock(ptep, ptl);
841 }
842
843 mmu_notifier_invalidate_range_end(&range);
844 }
845 i_mmap_unlock_read(mapping);
846}
847
848static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
849 struct address_space *mapping, void *entry)
850{
851 unsigned long pfn, index, count;
852 long ret = 0;
853
854
855
856
857
858 if (WARN_ON(!xa_is_value(entry)))
859 return -EIO;
860
861 if (unlikely(dax_is_locked(entry))) {
862 void *old_entry = entry;
863
864 entry = get_unlocked_entry(xas, 0);
865
866
867 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
868 goto put_unlocked;
869
870
871
872
873
874 if (dax_to_pfn(old_entry) != dax_to_pfn(entry))
875 goto put_unlocked;
876 if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
877 dax_is_zero_entry(entry))) {
878 ret = -EIO;
879 goto put_unlocked;
880 }
881
882
883 if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE))
884 goto put_unlocked;
885 }
886
887
888 dax_lock_entry(xas, entry);
889
890
891
892
893
894
895
896
897 xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE);
898 xas_unlock_irq(xas);
899
900
901
902
903
904
905
906
907 pfn = dax_to_pfn(entry);
908 count = 1UL << dax_entry_order(entry);
909 index = xas->xa_index & ~(count - 1);
910
911 dax_entry_mkclean(mapping, index, pfn);
912 dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE);
913
914
915
916
917
918
919 xas_reset(xas);
920 xas_lock_irq(xas);
921 xas_store(xas, entry);
922 xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
923 dax_wake_entry(xas, entry, false);
924
925 trace_dax_writeback_one(mapping->host, index, count);
926 return ret;
927
928 put_unlocked:
929 put_unlocked_entry(xas, entry);
930 return ret;
931}
932
933
934
935
936
937
938int dax_writeback_mapping_range(struct address_space *mapping,
939 struct block_device *bdev, struct writeback_control *wbc)
940{
941 XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT);
942 struct inode *inode = mapping->host;
943 pgoff_t end_index = wbc->range_end >> PAGE_SHIFT;
944 struct dax_device *dax_dev;
945 void *entry;
946 int ret = 0;
947 unsigned int scanned = 0;
948
949 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
950 return -EIO;
951
952 if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
953 return 0;
954
955 dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
956 if (!dax_dev)
957 return -EIO;
958
959 trace_dax_writeback_range(inode, xas.xa_index, end_index);
960
961 tag_pages_for_writeback(mapping, xas.xa_index, end_index);
962
963 xas_lock_irq(&xas);
964 xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) {
965 ret = dax_writeback_one(&xas, dax_dev, mapping, entry);
966 if (ret < 0) {
967 mapping_set_error(mapping, ret);
968 break;
969 }
970 if (++scanned % XA_CHECK_SCHED)
971 continue;
972
973 xas_pause(&xas);
974 xas_unlock_irq(&xas);
975 cond_resched();
976 xas_lock_irq(&xas);
977 }
978 xas_unlock_irq(&xas);
979 put_dax(dax_dev);
980 trace_dax_writeback_range_done(inode, xas.xa_index, end_index);
981 return ret;
982}
983EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
984
985static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
986{
987 return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
988}
989
990static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
991 pfn_t *pfnp)
992{
993 const sector_t sector = dax_iomap_sector(iomap, pos);
994 pgoff_t pgoff;
995 int id, rc;
996 long length;
997
998 rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
999 if (rc)
1000 return rc;
1001 id = dax_read_lock();
1002 length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
1003 NULL, pfnp);
1004 if (length < 0) {
1005 rc = length;
1006 goto out;
1007 }
1008 rc = -EINVAL;
1009 if (PFN_PHYS(length) < size)
1010 goto out;
1011 if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
1012 goto out;
1013
1014 if (length > 1 && !pfn_t_devmap(*pfnp))
1015 goto out;
1016 rc = 0;
1017out:
1018 dax_read_unlock(id);
1019 return rc;
1020}
1021
1022
1023
1024
1025
1026
1027
1028
1029static vm_fault_t dax_load_hole(struct xa_state *xas,
1030 struct address_space *mapping, void **entry,
1031 struct vm_fault *vmf)
1032{
1033 struct inode *inode = mapping->host;
1034 unsigned long vaddr = vmf->address;
1035 pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
1036 vm_fault_t ret;
1037
1038 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1039 DAX_ZERO_PAGE, false);
1040
1041 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
1042 trace_dax_load_hole(inode, vmf, ret);
1043 return ret;
1044}
1045
1046static bool dax_range_is_aligned(struct block_device *bdev,
1047 unsigned int offset, unsigned int length)
1048{
1049 unsigned short sector_size = bdev_logical_block_size(bdev);
1050
1051 if (!IS_ALIGNED(offset, sector_size))
1052 return false;
1053 if (!IS_ALIGNED(length, sector_size))
1054 return false;
1055
1056 return true;
1057}
1058
1059int __dax_zero_page_range(struct block_device *bdev,
1060 struct dax_device *dax_dev, sector_t sector,
1061 unsigned int offset, unsigned int size)
1062{
1063 if (dax_range_is_aligned(bdev, offset, size)) {
1064 sector_t start_sector = sector + (offset >> 9);
1065
1066 return blkdev_issue_zeroout(bdev, start_sector,
1067 size >> 9, GFP_NOFS, 0);
1068 } else {
1069 pgoff_t pgoff;
1070 long rc, id;
1071 void *kaddr;
1072
1073 rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
1074 if (rc)
1075 return rc;
1076
1077 id = dax_read_lock();
1078 rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
1079 if (rc < 0) {
1080 dax_read_unlock(id);
1081 return rc;
1082 }
1083 memset(kaddr + offset, 0, size);
1084 dax_flush(dax_dev, kaddr + offset, size);
1085 dax_read_unlock(id);
1086 }
1087 return 0;
1088}
1089EXPORT_SYMBOL_GPL(__dax_zero_page_range);
1090
1091static loff_t
1092dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
1093 struct iomap *iomap)
1094{
1095 struct block_device *bdev = iomap->bdev;
1096 struct dax_device *dax_dev = iomap->dax_dev;
1097 struct iov_iter *iter = data;
1098 loff_t end = pos + length, done = 0;
1099 ssize_t ret = 0;
1100 size_t xfer;
1101 int id;
1102
1103 if (iov_iter_rw(iter) == READ) {
1104 end = min(end, i_size_read(inode));
1105 if (pos >= end)
1106 return 0;
1107
1108 if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
1109 return iov_iter_zero(min(length, end - pos), iter);
1110 }
1111
1112 if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
1113 return -EIO;
1114
1115
1116
1117
1118
1119
1120 if (iomap->flags & IOMAP_F_NEW) {
1121 invalidate_inode_pages2_range(inode->i_mapping,
1122 pos >> PAGE_SHIFT,
1123 (end - 1) >> PAGE_SHIFT);
1124 }
1125
1126 id = dax_read_lock();
1127 while (pos < end) {
1128 unsigned offset = pos & (PAGE_SIZE - 1);
1129 const size_t size = ALIGN(length + offset, PAGE_SIZE);
1130 const sector_t sector = dax_iomap_sector(iomap, pos);
1131 ssize_t map_len;
1132 pgoff_t pgoff;
1133 void *kaddr;
1134
1135 if (fatal_signal_pending(current)) {
1136 ret = -EINTR;
1137 break;
1138 }
1139
1140 ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
1141 if (ret)
1142 break;
1143
1144 map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
1145 &kaddr, NULL);
1146 if (map_len < 0) {
1147 ret = map_len;
1148 break;
1149 }
1150
1151 map_len = PFN_PHYS(map_len);
1152 kaddr += offset;
1153 map_len -= offset;
1154 if (map_len > end - pos)
1155 map_len = end - pos;
1156
1157
1158
1159
1160
1161
1162 if (iov_iter_rw(iter) == WRITE)
1163 xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
1164 map_len, iter);
1165 else
1166 xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
1167 map_len, iter);
1168
1169 pos += xfer;
1170 length -= xfer;
1171 done += xfer;
1172
1173 if (xfer == 0)
1174 ret = -EFAULT;
1175 if (xfer < map_len)
1176 break;
1177 }
1178 dax_read_unlock(id);
1179
1180 return done ? done : ret;
1181}
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193ssize_t
1194dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
1195 const struct iomap_ops *ops)
1196{
1197 struct address_space *mapping = iocb->ki_filp->f_mapping;
1198 struct inode *inode = mapping->host;
1199 loff_t pos = iocb->ki_pos, ret = 0, done = 0;
1200 unsigned flags = 0;
1201
1202 if (iov_iter_rw(iter) == WRITE) {
1203 lockdep_assert_held_write(&inode->i_rwsem);
1204 flags |= IOMAP_WRITE;
1205 } else {
1206 lockdep_assert_held(&inode->i_rwsem);
1207 }
1208
1209 while (iov_iter_count(iter)) {
1210 ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
1211 iter, dax_iomap_actor);
1212 if (ret <= 0)
1213 break;
1214 pos += ret;
1215 done += ret;
1216 }
1217
1218 iocb->ki_pos += done;
1219 return done ? done : ret;
1220}
1221EXPORT_SYMBOL_GPL(dax_iomap_rw);
1222
1223static vm_fault_t dax_fault_return(int error)
1224{
1225 if (error == 0)
1226 return VM_FAULT_NOPAGE;
1227 return vmf_error(error);
1228}
1229
1230
1231
1232
1233
1234static bool dax_fault_is_synchronous(unsigned long flags,
1235 struct vm_area_struct *vma, struct iomap *iomap)
1236{
1237 return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
1238 && (iomap->flags & IOMAP_F_DIRTY);
1239}
1240
1241static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1242 int *iomap_errp, const struct iomap_ops *ops)
1243{
1244 struct vm_area_struct *vma = vmf->vma;
1245 struct address_space *mapping = vma->vm_file->f_mapping;
1246 XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
1247 struct inode *inode = mapping->host;
1248 unsigned long vaddr = vmf->address;
1249 loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
1250 struct iomap iomap = { 0 };
1251 unsigned flags = IOMAP_FAULT;
1252 int error, major = 0;
1253 bool write = vmf->flags & FAULT_FLAG_WRITE;
1254 bool sync;
1255 vm_fault_t ret = 0;
1256 void *entry;
1257 pfn_t pfn;
1258
1259 trace_dax_pte_fault(inode, vmf, ret);
1260
1261
1262
1263
1264
1265 if (pos >= i_size_read(inode)) {
1266 ret = VM_FAULT_SIGBUS;
1267 goto out;
1268 }
1269
1270 if (write && !vmf->cow_page)
1271 flags |= IOMAP_WRITE;
1272
1273 entry = grab_mapping_entry(&xas, mapping, 0);
1274 if (xa_is_internal(entry)) {
1275 ret = xa_to_internal(entry);
1276 goto out;
1277 }
1278
1279
1280
1281
1282
1283
1284
1285 if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
1286 ret = VM_FAULT_NOPAGE;
1287 goto unlock_entry;
1288 }
1289
1290
1291
1292
1293
1294
1295 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
1296 if (iomap_errp)
1297 *iomap_errp = error;
1298 if (error) {
1299 ret = dax_fault_return(error);
1300 goto unlock_entry;
1301 }
1302 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
1303 error = -EIO;
1304 goto error_finish_iomap;
1305 }
1306
1307 if (vmf->cow_page) {
1308 sector_t sector = dax_iomap_sector(&iomap, pos);
1309
1310 switch (iomap.type) {
1311 case IOMAP_HOLE:
1312 case IOMAP_UNWRITTEN:
1313 clear_user_highpage(vmf->cow_page, vaddr);
1314 break;
1315 case IOMAP_MAPPED:
1316 error = copy_user_dax(iomap.bdev, iomap.dax_dev,
1317 sector, PAGE_SIZE, vmf->cow_page, vaddr);
1318 break;
1319 default:
1320 WARN_ON_ONCE(1);
1321 error = -EIO;
1322 break;
1323 }
1324
1325 if (error)
1326 goto error_finish_iomap;
1327
1328 __SetPageUptodate(vmf->cow_page);
1329 ret = finish_fault(vmf);
1330 if (!ret)
1331 ret = VM_FAULT_DONE_COW;
1332 goto finish_iomap;
1333 }
1334
1335 sync = dax_fault_is_synchronous(flags, vma, &iomap);
1336
1337 switch (iomap.type) {
1338 case IOMAP_MAPPED:
1339 if (iomap.flags & IOMAP_F_NEW) {
1340 count_vm_event(PGMAJFAULT);
1341 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
1342 major = VM_FAULT_MAJOR;
1343 }
1344 error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
1345 if (error < 0)
1346 goto error_finish_iomap;
1347
1348 entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
1349 0, write && !sync);
1350
1351
1352
1353
1354
1355
1356
1357 if (sync) {
1358 if (WARN_ON_ONCE(!pfnp)) {
1359 error = -EIO;
1360 goto error_finish_iomap;
1361 }
1362 *pfnp = pfn;
1363 ret = VM_FAULT_NEEDDSYNC | major;
1364 goto finish_iomap;
1365 }
1366 trace_dax_insert_mapping(inode, vmf, entry);
1367 if (write)
1368 ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn);
1369 else
1370 ret = vmf_insert_mixed(vma, vaddr, pfn);
1371
1372 goto finish_iomap;
1373 case IOMAP_UNWRITTEN:
1374 case IOMAP_HOLE:
1375 if (!write) {
1376 ret = dax_load_hole(&xas, mapping, &entry, vmf);
1377 goto finish_iomap;
1378 }
1379
1380 default:
1381 WARN_ON_ONCE(1);
1382 error = -EIO;
1383 break;
1384 }
1385
1386 error_finish_iomap:
1387 ret = dax_fault_return(error);
1388 finish_iomap:
1389 if (ops->iomap_end) {
1390 int copied = PAGE_SIZE;
1391
1392 if (ret & VM_FAULT_ERROR)
1393 copied = 0;
1394
1395
1396
1397
1398
1399
1400 ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
1401 }
1402 unlock_entry:
1403 dax_unlock_entry(&xas, entry);
1404 out:
1405 trace_dax_pte_fault_done(inode, vmf, ret);
1406 return ret | major;
1407}
1408
1409#ifdef CONFIG_FS_DAX_PMD
1410static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
1411 struct iomap *iomap, void **entry)
1412{
1413 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1414 unsigned long pmd_addr = vmf->address & PMD_MASK;
1415 struct vm_area_struct *vma = vmf->vma;
1416 struct inode *inode = mapping->host;
1417 pgtable_t pgtable = NULL;
1418 struct page *zero_page;
1419 spinlock_t *ptl;
1420 pmd_t pmd_entry;
1421 pfn_t pfn;
1422
1423 zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
1424
1425 if (unlikely(!zero_page))
1426 goto fallback;
1427
1428 pfn = page_to_pfn_t(zero_page);
1429 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1430 DAX_PMD | DAX_ZERO_PAGE, false);
1431
1432 if (arch_needs_pgtable_deposit()) {
1433 pgtable = pte_alloc_one(vma->vm_mm);
1434 if (!pgtable)
1435 return VM_FAULT_OOM;
1436 }
1437
1438 ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
1439 if (!pmd_none(*(vmf->pmd))) {
1440 spin_unlock(ptl);
1441 goto fallback;
1442 }
1443
1444 if (pgtable) {
1445 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
1446 mm_inc_nr_ptes(vma->vm_mm);
1447 }
1448 pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
1449 pmd_entry = pmd_mkhuge(pmd_entry);
1450 set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
1451 spin_unlock(ptl);
1452 trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry);
1453 return VM_FAULT_NOPAGE;
1454
1455fallback:
1456 if (pgtable)
1457 pte_free(vma->vm_mm, pgtable);
1458 trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
1459 return VM_FAULT_FALLBACK;
1460}
1461
1462static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1463 const struct iomap_ops *ops)
1464{
1465 struct vm_area_struct *vma = vmf->vma;
1466 struct address_space *mapping = vma->vm_file->f_mapping;
1467 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
1468 unsigned long pmd_addr = vmf->address & PMD_MASK;
1469 bool write = vmf->flags & FAULT_FLAG_WRITE;
1470 bool sync;
1471 unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
1472 struct inode *inode = mapping->host;
1473 vm_fault_t result = VM_FAULT_FALLBACK;
1474 struct iomap iomap = { 0 };
1475 pgoff_t max_pgoff;
1476 void *entry;
1477 loff_t pos;
1478 int error;
1479 pfn_t pfn;
1480
1481
1482
1483
1484
1485
1486 max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1487
1488 trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
1489
1490
1491
1492
1493
1494
1495
1496 if ((vmf->pgoff & PG_PMD_COLOUR) !=
1497 ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
1498 goto fallback;
1499
1500
1501 if (write && !(vma->vm_flags & VM_SHARED))
1502 goto fallback;
1503
1504
1505 if (pmd_addr < vma->vm_start)
1506 goto fallback;
1507 if ((pmd_addr + PMD_SIZE) > vma->vm_end)
1508 goto fallback;
1509
1510 if (xas.xa_index >= max_pgoff) {
1511 result = VM_FAULT_SIGBUS;
1512 goto out;
1513 }
1514
1515
1516 if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
1517 goto fallback;
1518
1519
1520
1521
1522
1523
1524
1525 entry = grab_mapping_entry(&xas, mapping, PMD_ORDER);
1526 if (xa_is_internal(entry)) {
1527 result = xa_to_internal(entry);
1528 goto fallback;
1529 }
1530
1531
1532
1533
1534
1535
1536
1537 if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
1538 !pmd_devmap(*vmf->pmd)) {
1539 result = 0;
1540 goto unlock_entry;
1541 }
1542
1543
1544
1545
1546
1547
1548 pos = (loff_t)xas.xa_index << PAGE_SHIFT;
1549 error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
1550 if (error)
1551 goto unlock_entry;
1552
1553 if (iomap.offset + iomap.length < pos + PMD_SIZE)
1554 goto finish_iomap;
1555
1556 sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap);
1557
1558 switch (iomap.type) {
1559 case IOMAP_MAPPED:
1560 error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
1561 if (error < 0)
1562 goto finish_iomap;
1563
1564 entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
1565 DAX_PMD, write && !sync);
1566
1567
1568
1569
1570
1571
1572
1573 if (sync) {
1574 if (WARN_ON_ONCE(!pfnp))
1575 goto finish_iomap;
1576 *pfnp = pfn;
1577 result = VM_FAULT_NEEDDSYNC;
1578 goto finish_iomap;
1579 }
1580
1581 trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
1582 result = vmf_insert_pfn_pmd(vmf, pfn, write);
1583 break;
1584 case IOMAP_UNWRITTEN:
1585 case IOMAP_HOLE:
1586 if (WARN_ON_ONCE(write))
1587 break;
1588 result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
1589 break;
1590 default:
1591 WARN_ON_ONCE(1);
1592 break;
1593 }
1594
1595 finish_iomap:
1596 if (ops->iomap_end) {
1597 int copied = PMD_SIZE;
1598
1599 if (result == VM_FAULT_FALLBACK)
1600 copied = 0;
1601
1602
1603
1604
1605
1606
1607 ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
1608 &iomap);
1609 }
1610 unlock_entry:
1611 dax_unlock_entry(&xas, entry);
1612 fallback:
1613 if (result == VM_FAULT_FALLBACK) {
1614 split_huge_pmd(vma, vmf->pmd, vmf->address);
1615 count_vm_event(THP_FAULT_FALLBACK);
1616 }
1617out:
1618 trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
1619 return result;
1620}
1621#else
1622static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1623 const struct iomap_ops *ops)
1624{
1625 return VM_FAULT_FALLBACK;
1626}
1627#endif
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
1643 pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
1644{
1645 switch (pe_size) {
1646 case PE_SIZE_PTE:
1647 return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops);
1648 case PE_SIZE_PMD:
1649 return dax_iomap_pmd_fault(vmf, pfnp, ops);
1650 default:
1651 return VM_FAULT_FALLBACK;
1652 }
1653}
1654EXPORT_SYMBOL_GPL(dax_iomap_fault);
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665static vm_fault_t
1666dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
1667{
1668 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1669 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
1670 void *entry;
1671 vm_fault_t ret;
1672
1673 xas_lock_irq(&xas);
1674 entry = get_unlocked_entry(&xas, order);
1675
1676 if (!entry || dax_is_conflict(entry) ||
1677 (order == 0 && !dax_is_pte_entry(entry))) {
1678 put_unlocked_entry(&xas, entry);
1679 xas_unlock_irq(&xas);
1680 trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
1681 VM_FAULT_NOPAGE);
1682 return VM_FAULT_NOPAGE;
1683 }
1684 xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
1685 dax_lock_entry(&xas, entry);
1686 xas_unlock_irq(&xas);
1687 if (order == 0)
1688 ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
1689#ifdef CONFIG_FS_DAX_PMD
1690 else if (order == PMD_ORDER)
1691 ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
1692#endif
1693 else
1694 ret = VM_FAULT_FALLBACK;
1695 dax_unlock_entry(&xas, entry);
1696 trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
1697 return ret;
1698}
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
1711 enum page_entry_size pe_size, pfn_t pfn)
1712{
1713 int err;
1714 loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
1715 unsigned int order = pe_order(pe_size);
1716 size_t len = PAGE_SIZE << order;
1717
1718 err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
1719 if (err)
1720 return VM_FAULT_SIGBUS;
1721 return dax_insert_pfn_mkwrite(vmf, pfn, order);
1722}
1723EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
1724