1
2
3
4
5
6
7
8
9#include <linux/atomic.h>
10#include <linux/blkdev.h>
11#include <linux/buffer_head.h>
12#include <linux/dax.h>
13#include <linux/fs.h>
14#include <linux/genhd.h>
15#include <linux/highmem.h>
16#include <linux/memcontrol.h>
17#include <linux/mm.h>
18#include <linux/mutex.h>
19#include <linux/pagevec.h>
20#include <linux/sched.h>
21#include <linux/sched/signal.h>
22#include <linux/uio.h>
23#include <linux/vmstat.h>
24#include <linux/pfn_t.h>
25#include <linux/sizes.h>
26#include <linux/mmu_notifier.h>
27#include <linux/iomap.h>
28#include <asm/pgalloc.h>
29
30#define CREATE_TRACE_POINTS
31#include <trace/events/fs_dax.h>
32
33static inline unsigned int pe_order(enum page_entry_size pe_size)
34{
35 if (pe_size == PE_SIZE_PTE)
36 return PAGE_SHIFT - PAGE_SHIFT;
37 if (pe_size == PE_SIZE_PMD)
38 return PMD_SHIFT - PAGE_SHIFT;
39 if (pe_size == PE_SIZE_PUD)
40 return PUD_SHIFT - PAGE_SHIFT;
41 return ~0;
42}
43
44
45#define DAX_WAIT_TABLE_BITS 12
46#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
47
48
49#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
50#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
51
52
53#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
54
55static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
56
57static int __init init_dax_wait_table(void)
58{
59 int i;
60
61 for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++)
62 init_waitqueue_head(wait_table + i);
63 return 0;
64}
65fs_initcall(init_dax_wait_table);
66
67
68
69
70
71
72
73
74
75
76
77#define DAX_SHIFT (4)
78#define DAX_LOCKED (1UL << 0)
79#define DAX_PMD (1UL << 1)
80#define DAX_ZERO_PAGE (1UL << 2)
81#define DAX_EMPTY (1UL << 3)
82
83static unsigned long dax_to_pfn(void *entry)
84{
85 return xa_to_value(entry) >> DAX_SHIFT;
86}
87
88static void *dax_make_entry(pfn_t pfn, unsigned long flags)
89{
90 return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
91}
92
93static bool dax_is_locked(void *entry)
94{
95 return xa_to_value(entry) & DAX_LOCKED;
96}
97
98static unsigned int dax_entry_order(void *entry)
99{
100 if (xa_to_value(entry) & DAX_PMD)
101 return PMD_ORDER;
102 return 0;
103}
104
105static unsigned long dax_is_pmd_entry(void *entry)
106{
107 return xa_to_value(entry) & DAX_PMD;
108}
109
110static bool dax_is_pte_entry(void *entry)
111{
112 return !(xa_to_value(entry) & DAX_PMD);
113}
114
115static int dax_is_zero_entry(void *entry)
116{
117 return xa_to_value(entry) & DAX_ZERO_PAGE;
118}
119
120static int dax_is_empty_entry(void *entry)
121{
122 return xa_to_value(entry) & DAX_EMPTY;
123}
124
125
126
127
128
129static bool dax_is_conflict(void *entry)
130{
131 return entry == XA_RETRY_ENTRY;
132}
133
134
135
136
137struct exceptional_entry_key {
138 struct xarray *xa;
139 pgoff_t entry_start;
140};
141
142struct wait_exceptional_entry_queue {
143 wait_queue_entry_t wait;
144 struct exceptional_entry_key key;
145};
146
147
148
149
150
151
152enum dax_wake_mode {
153 WAKE_ALL,
154 WAKE_NEXT,
155};
156
157static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
158 void *entry, struct exceptional_entry_key *key)
159{
160 unsigned long hash;
161 unsigned long index = xas->xa_index;
162
163
164
165
166
167
168 if (dax_is_pmd_entry(entry))
169 index &= ~PG_PMD_COLOUR;
170 key->xa = xas->xa;
171 key->entry_start = index;
172
173 hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS);
174 return wait_table + hash;
175}
176
177static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
178 unsigned int mode, int sync, void *keyp)
179{
180 struct exceptional_entry_key *key = keyp;
181 struct wait_exceptional_entry_queue *ewait =
182 container_of(wait, struct wait_exceptional_entry_queue, wait);
183
184 if (key->xa != ewait->key.xa ||
185 key->entry_start != ewait->key.entry_start)
186 return 0;
187 return autoremove_wake_function(wait, mode, sync, NULL);
188}
189
190
191
192
193
194
195static void dax_wake_entry(struct xa_state *xas, void *entry,
196 enum dax_wake_mode mode)
197{
198 struct exceptional_entry_key key;
199 wait_queue_head_t *wq;
200
201 wq = dax_entry_waitqueue(xas, entry, &key);
202
203
204
205
206
207
208
209 if (waitqueue_active(wq))
210 __wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key);
211}
212
213
214
215
216
217
218
219
220
221
222
223static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
224{
225 void *entry;
226 struct wait_exceptional_entry_queue ewait;
227 wait_queue_head_t *wq;
228
229 init_wait(&ewait.wait);
230 ewait.wait.func = wake_exceptional_entry_func;
231
232 for (;;) {
233 entry = xas_find_conflict(xas);
234 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
235 return entry;
236 if (dax_entry_order(entry) < order)
237 return XA_RETRY_ENTRY;
238 if (!dax_is_locked(entry))
239 return entry;
240
241 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
242 prepare_to_wait_exclusive(wq, &ewait.wait,
243 TASK_UNINTERRUPTIBLE);
244 xas_unlock_irq(xas);
245 xas_reset(xas);
246 schedule();
247 finish_wait(wq, &ewait.wait);
248 xas_lock_irq(xas);
249 }
250}
251
252
253
254
255
256
257static void wait_entry_unlocked(struct xa_state *xas, void *entry)
258{
259 struct wait_exceptional_entry_queue ewait;
260 wait_queue_head_t *wq;
261
262 init_wait(&ewait.wait);
263 ewait.wait.func = wake_exceptional_entry_func;
264
265 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
266
267
268
269
270
271
272 prepare_to_wait(wq, &ewait.wait, TASK_UNINTERRUPTIBLE);
273 xas_unlock_irq(xas);
274 schedule();
275 finish_wait(wq, &ewait.wait);
276}
277
278static void put_unlocked_entry(struct xa_state *xas, void *entry,
279 enum dax_wake_mode mode)
280{
281 if (entry && !dax_is_conflict(entry))
282 dax_wake_entry(xas, entry, mode);
283}
284
285
286
287
288
289
290static void dax_unlock_entry(struct xa_state *xas, void *entry)
291{
292 void *old;
293
294 BUG_ON(dax_is_locked(entry));
295 xas_reset(xas);
296 xas_lock_irq(xas);
297 old = xas_store(xas, entry);
298 xas_unlock_irq(xas);
299 BUG_ON(!dax_is_locked(old));
300 dax_wake_entry(xas, entry, WAKE_NEXT);
301}
302
303
304
305
306static void *dax_lock_entry(struct xa_state *xas, void *entry)
307{
308 unsigned long v = xa_to_value(entry);
309 return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
310}
311
312static unsigned long dax_entry_size(void *entry)
313{
314 if (dax_is_zero_entry(entry))
315 return 0;
316 else if (dax_is_empty_entry(entry))
317 return 0;
318 else if (dax_is_pmd_entry(entry))
319 return PMD_SIZE;
320 else
321 return PAGE_SIZE;
322}
323
324static unsigned long dax_end_pfn(void *entry)
325{
326 return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
327}
328
329
330
331
332
333#define for_each_mapped_pfn(entry, pfn) \
334 for (pfn = dax_to_pfn(entry); \
335 pfn < dax_end_pfn(entry); pfn++)
336
337
338
339
340
341
342static void dax_associate_entry(void *entry, struct address_space *mapping,
343 struct vm_area_struct *vma, unsigned long address)
344{
345 unsigned long size = dax_entry_size(entry), pfn, index;
346 int i = 0;
347
348 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
349 return;
350
351 index = linear_page_index(vma, address & ~(size - 1));
352 for_each_mapped_pfn(entry, pfn) {
353 struct page *page = pfn_to_page(pfn);
354
355 WARN_ON_ONCE(page->mapping);
356 page->mapping = mapping;
357 page->index = index + i++;
358 }
359}
360
361static void dax_disassociate_entry(void *entry, struct address_space *mapping,
362 bool trunc)
363{
364 unsigned long pfn;
365
366 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
367 return;
368
369 for_each_mapped_pfn(entry, pfn) {
370 struct page *page = pfn_to_page(pfn);
371
372 WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
373 WARN_ON_ONCE(page->mapping && page->mapping != mapping);
374 page->mapping = NULL;
375 page->index = 0;
376 }
377}
378
379static struct page *dax_busy_page(void *entry)
380{
381 unsigned long pfn;
382
383 for_each_mapped_pfn(entry, pfn) {
384 struct page *page = pfn_to_page(pfn);
385
386 if (page_ref_count(page) > 1)
387 return page;
388 }
389 return NULL;
390}
391
392
393
394
395
396
397
398
399
400dax_entry_t dax_lock_page(struct page *page)
401{
402 XA_STATE(xas, NULL, 0);
403 void *entry;
404
405
406 rcu_read_lock();
407 for (;;) {
408 struct address_space *mapping = READ_ONCE(page->mapping);
409
410 entry = NULL;
411 if (!mapping || !dax_mapping(mapping))
412 break;
413
414
415
416
417
418
419
420
421 entry = (void *)~0UL;
422 if (S_ISCHR(mapping->host->i_mode))
423 break;
424
425 xas.xa = &mapping->i_pages;
426 xas_lock_irq(&xas);
427 if (mapping != page->mapping) {
428 xas_unlock_irq(&xas);
429 continue;
430 }
431 xas_set(&xas, page->index);
432 entry = xas_load(&xas);
433 if (dax_is_locked(entry)) {
434 rcu_read_unlock();
435 wait_entry_unlocked(&xas, entry);
436 rcu_read_lock();
437 continue;
438 }
439 dax_lock_entry(&xas, entry);
440 xas_unlock_irq(&xas);
441 break;
442 }
443 rcu_read_unlock();
444 return (dax_entry_t)entry;
445}
446
447void dax_unlock_page(struct page *page, dax_entry_t cookie)
448{
449 struct address_space *mapping = page->mapping;
450 XA_STATE(xas, &mapping->i_pages, page->index);
451
452 if (S_ISCHR(mapping->host->i_mode))
453 return;
454
455 dax_unlock_entry(&xas, (void *)cookie);
456}
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487static void *grab_mapping_entry(struct xa_state *xas,
488 struct address_space *mapping, unsigned int order)
489{
490 unsigned long index = xas->xa_index;
491 bool pmd_downgrade;
492 void *entry;
493
494retry:
495 pmd_downgrade = false;
496 xas_lock_irq(xas);
497 entry = get_unlocked_entry(xas, order);
498
499 if (entry) {
500 if (dax_is_conflict(entry))
501 goto fallback;
502 if (!xa_is_value(entry)) {
503 xas_set_err(xas, -EIO);
504 goto out_unlock;
505 }
506
507 if (order == 0) {
508 if (dax_is_pmd_entry(entry) &&
509 (dax_is_zero_entry(entry) ||
510 dax_is_empty_entry(entry))) {
511 pmd_downgrade = true;
512 }
513 }
514 }
515
516 if (pmd_downgrade) {
517
518
519
520
521 dax_lock_entry(xas, entry);
522
523
524
525
526
527
528 if (dax_is_zero_entry(entry)) {
529 xas_unlock_irq(xas);
530 unmap_mapping_pages(mapping,
531 xas->xa_index & ~PG_PMD_COLOUR,
532 PG_PMD_NR, false);
533 xas_reset(xas);
534 xas_lock_irq(xas);
535 }
536
537 dax_disassociate_entry(entry, mapping, false);
538 xas_store(xas, NULL);
539 dax_wake_entry(xas, entry, WAKE_ALL);
540 mapping->nrpages -= PG_PMD_NR;
541 entry = NULL;
542 xas_set(xas, index);
543 }
544
545 if (entry) {
546 dax_lock_entry(xas, entry);
547 } else {
548 unsigned long flags = DAX_EMPTY;
549
550 if (order > 0)
551 flags |= DAX_PMD;
552 entry = dax_make_entry(pfn_to_pfn_t(0), flags);
553 dax_lock_entry(xas, entry);
554 if (xas_error(xas))
555 goto out_unlock;
556 mapping->nrpages += 1UL << order;
557 }
558
559out_unlock:
560 xas_unlock_irq(xas);
561 if (xas_nomem(xas, mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM))
562 goto retry;
563 if (xas->xa_node == XA_ERROR(-ENOMEM))
564 return xa_mk_internal(VM_FAULT_OOM);
565 if (xas_error(xas))
566 return xa_mk_internal(VM_FAULT_SIGBUS);
567 return entry;
568fallback:
569 xas_unlock_irq(xas);
570 return xa_mk_internal(VM_FAULT_FALLBACK);
571}
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591struct page *dax_layout_busy_page_range(struct address_space *mapping,
592 loff_t start, loff_t end)
593{
594 void *entry;
595 unsigned int scanned = 0;
596 struct page *page = NULL;
597 pgoff_t start_idx = start >> PAGE_SHIFT;
598 pgoff_t end_idx;
599 XA_STATE(xas, &mapping->i_pages, start_idx);
600
601
602
603
604 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
605 return NULL;
606
607 if (!dax_mapping(mapping) || !mapping_mapped(mapping))
608 return NULL;
609
610
611 if (end == LLONG_MAX)
612 end_idx = ULONG_MAX;
613 else
614 end_idx = end >> PAGE_SHIFT;
615
616
617
618
619
620
621
622
623
624
625
626
627 unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0);
628
629 xas_lock_irq(&xas);
630 xas_for_each(&xas, entry, end_idx) {
631 if (WARN_ON_ONCE(!xa_is_value(entry)))
632 continue;
633 if (unlikely(dax_is_locked(entry)))
634 entry = get_unlocked_entry(&xas, 0);
635 if (entry)
636 page = dax_busy_page(entry);
637 put_unlocked_entry(&xas, entry, WAKE_NEXT);
638 if (page)
639 break;
640 if (++scanned % XA_CHECK_SCHED)
641 continue;
642
643 xas_pause(&xas);
644 xas_unlock_irq(&xas);
645 cond_resched();
646 xas_lock_irq(&xas);
647 }
648 xas_unlock_irq(&xas);
649 return page;
650}
651EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);
652
653struct page *dax_layout_busy_page(struct address_space *mapping)
654{
655 return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
656}
657EXPORT_SYMBOL_GPL(dax_layout_busy_page);
658
659static int __dax_invalidate_entry(struct address_space *mapping,
660 pgoff_t index, bool trunc)
661{
662 XA_STATE(xas, &mapping->i_pages, index);
663 int ret = 0;
664 void *entry;
665
666 xas_lock_irq(&xas);
667 entry = get_unlocked_entry(&xas, 0);
668 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
669 goto out;
670 if (!trunc &&
671 (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY) ||
672 xas_get_mark(&xas, PAGECACHE_TAG_TOWRITE)))
673 goto out;
674 dax_disassociate_entry(entry, mapping, trunc);
675 xas_store(&xas, NULL);
676 mapping->nrpages -= 1UL << dax_entry_order(entry);
677 ret = 1;
678out:
679 put_unlocked_entry(&xas, entry, WAKE_ALL);
680 xas_unlock_irq(&xas);
681 return ret;
682}
683
684
685
686
687
688int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
689{
690 int ret = __dax_invalidate_entry(mapping, index, true);
691
692
693
694
695
696
697
698
699 WARN_ON_ONCE(!ret);
700 return ret;
701}
702
703
704
705
706int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
707 pgoff_t index)
708{
709 return __dax_invalidate_entry(mapping, index, false);
710}
711
712static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev,
713 sector_t sector, struct page *to, unsigned long vaddr)
714{
715 void *vto, *kaddr;
716 pgoff_t pgoff;
717 long rc;
718 int id;
719
720 rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
721 if (rc)
722 return rc;
723
724 id = dax_read_lock();
725 rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
726 if (rc < 0) {
727 dax_read_unlock(id);
728 return rc;
729 }
730 vto = kmap_atomic(to);
731 copy_user_page(vto, (void __force *)kaddr, vaddr, to);
732 kunmap_atomic(vto);
733 dax_read_unlock(id);
734 return 0;
735}
736
737
738
739
740
741
742
743
744static void *dax_insert_entry(struct xa_state *xas,
745 struct address_space *mapping, struct vm_fault *vmf,
746 void *entry, pfn_t pfn, unsigned long flags, bool dirty)
747{
748 void *new_entry = dax_make_entry(pfn, flags);
749
750 if (dirty)
751 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
752
753 if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
754 unsigned long index = xas->xa_index;
755
756 if (dax_is_pmd_entry(entry))
757 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
758 PG_PMD_NR, false);
759 else
760 unmap_mapping_pages(mapping, index, 1, false);
761 }
762
763 xas_reset(xas);
764 xas_lock_irq(xas);
765 if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
766 void *old;
767
768 dax_disassociate_entry(entry, mapping, false);
769 dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
770
771
772
773
774
775
776
777
778 old = dax_lock_entry(xas, new_entry);
779 WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
780 DAX_LOCKED));
781 entry = new_entry;
782 } else {
783 xas_load(xas);
784 }
785
786 if (dirty)
787 xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
788
789 xas_unlock_irq(xas);
790 return entry;
791}
792
793static inline
794unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
795{
796 unsigned long address;
797
798 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
799 VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
800 return address;
801}
802
803
804static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
805 unsigned long pfn)
806{
807 struct vm_area_struct *vma;
808 pte_t pte, *ptep = NULL;
809 pmd_t *pmdp = NULL;
810 spinlock_t *ptl;
811
812 i_mmap_lock_read(mapping);
813 vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
814 struct mmu_notifier_range range;
815 unsigned long address;
816
817 cond_resched();
818
819 if (!(vma->vm_flags & VM_SHARED))
820 continue;
821
822 address = pgoff_address(index, vma);
823
824
825
826
827
828
829 if (follow_invalidate_pte(vma->vm_mm, address, &range, &ptep,
830 &pmdp, &ptl))
831 continue;
832
833
834
835
836
837
838
839
840 if (pmdp) {
841#ifdef CONFIG_FS_DAX_PMD
842 pmd_t pmd;
843
844 if (pfn != pmd_pfn(*pmdp))
845 goto unlock_pmd;
846 if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
847 goto unlock_pmd;
848
849 flush_cache_page(vma, address, pfn);
850 pmd = pmdp_invalidate(vma, address, pmdp);
851 pmd = pmd_wrprotect(pmd);
852 pmd = pmd_mkclean(pmd);
853 set_pmd_at(vma->vm_mm, address, pmdp, pmd);
854unlock_pmd:
855#endif
856 spin_unlock(ptl);
857 } else {
858 if (pfn != pte_pfn(*ptep))
859 goto unlock_pte;
860 if (!pte_dirty(*ptep) && !pte_write(*ptep))
861 goto unlock_pte;
862
863 flush_cache_page(vma, address, pfn);
864 pte = ptep_clear_flush(vma, address, ptep);
865 pte = pte_wrprotect(pte);
866 pte = pte_mkclean(pte);
867 set_pte_at(vma->vm_mm, address, ptep, pte);
868unlock_pte:
869 pte_unmap_unlock(ptep, ptl);
870 }
871
872 mmu_notifier_invalidate_range_end(&range);
873 }
874 i_mmap_unlock_read(mapping);
875}
876
877static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
878 struct address_space *mapping, void *entry)
879{
880 unsigned long pfn, index, count;
881 long ret = 0;
882
883
884
885
886
887 if (WARN_ON(!xa_is_value(entry)))
888 return -EIO;
889
890 if (unlikely(dax_is_locked(entry))) {
891 void *old_entry = entry;
892
893 entry = get_unlocked_entry(xas, 0);
894
895
896 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
897 goto put_unlocked;
898
899
900
901
902
903 if (dax_to_pfn(old_entry) != dax_to_pfn(entry))
904 goto put_unlocked;
905 if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
906 dax_is_zero_entry(entry))) {
907 ret = -EIO;
908 goto put_unlocked;
909 }
910
911
912 if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE))
913 goto put_unlocked;
914 }
915
916
917 dax_lock_entry(xas, entry);
918
919
920
921
922
923
924
925
926 xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE);
927 xas_unlock_irq(xas);
928
929
930
931
932
933
934
935
936 pfn = dax_to_pfn(entry);
937 count = 1UL << dax_entry_order(entry);
938 index = xas->xa_index & ~(count - 1);
939
940 dax_entry_mkclean(mapping, index, pfn);
941 dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE);
942
943
944
945
946
947
948 xas_reset(xas);
949 xas_lock_irq(xas);
950 xas_store(xas, entry);
951 xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
952 dax_wake_entry(xas, entry, WAKE_NEXT);
953
954 trace_dax_writeback_one(mapping->host, index, count);
955 return ret;
956
957 put_unlocked:
958 put_unlocked_entry(xas, entry, WAKE_NEXT);
959 return ret;
960}
961
962
963
964
965
966
967int dax_writeback_mapping_range(struct address_space *mapping,
968 struct dax_device *dax_dev, struct writeback_control *wbc)
969{
970 XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT);
971 struct inode *inode = mapping->host;
972 pgoff_t end_index = wbc->range_end >> PAGE_SHIFT;
973 void *entry;
974 int ret = 0;
975 unsigned int scanned = 0;
976
977 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
978 return -EIO;
979
980 if (mapping_empty(mapping) || wbc->sync_mode != WB_SYNC_ALL)
981 return 0;
982
983 trace_dax_writeback_range(inode, xas.xa_index, end_index);
984
985 tag_pages_for_writeback(mapping, xas.xa_index, end_index);
986
987 xas_lock_irq(&xas);
988 xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) {
989 ret = dax_writeback_one(&xas, dax_dev, mapping, entry);
990 if (ret < 0) {
991 mapping_set_error(mapping, ret);
992 break;
993 }
994 if (++scanned % XA_CHECK_SCHED)
995 continue;
996
997 xas_pause(&xas);
998 xas_unlock_irq(&xas);
999 cond_resched();
1000 xas_lock_irq(&xas);
1001 }
1002 xas_unlock_irq(&xas);
1003 trace_dax_writeback_range_done(inode, xas.xa_index, end_index);
1004 return ret;
1005}
1006EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
1007
1008static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
1009{
1010 return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
1011}
1012
1013static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
1014 pfn_t *pfnp)
1015{
1016 const sector_t sector = dax_iomap_sector(iomap, pos);
1017 pgoff_t pgoff;
1018 int id, rc;
1019 long length;
1020
1021 rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
1022 if (rc)
1023 return rc;
1024 id = dax_read_lock();
1025 length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
1026 NULL, pfnp);
1027 if (length < 0) {
1028 rc = length;
1029 goto out;
1030 }
1031 rc = -EINVAL;
1032 if (PFN_PHYS(length) < size)
1033 goto out;
1034 if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
1035 goto out;
1036
1037 if (length > 1 && !pfn_t_devmap(*pfnp))
1038 goto out;
1039 rc = 0;
1040out:
1041 dax_read_unlock(id);
1042 return rc;
1043}
1044
1045
1046
1047
1048
1049
1050
1051
1052static vm_fault_t dax_load_hole(struct xa_state *xas,
1053 struct address_space *mapping, void **entry,
1054 struct vm_fault *vmf)
1055{
1056 struct inode *inode = mapping->host;
1057 unsigned long vaddr = vmf->address;
1058 pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
1059 vm_fault_t ret;
1060
1061 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1062 DAX_ZERO_PAGE, false);
1063
1064 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
1065 trace_dax_load_hole(inode, vmf, ret);
1066 return ret;
1067}
1068
1069s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
1070{
1071 sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
1072 pgoff_t pgoff;
1073 long rc, id;
1074 void *kaddr;
1075 bool page_aligned = false;
1076 unsigned offset = offset_in_page(pos);
1077 unsigned size = min_t(u64, PAGE_SIZE - offset, length);
1078
1079 if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
1080 (size == PAGE_SIZE))
1081 page_aligned = true;
1082
1083 rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
1084 if (rc)
1085 return rc;
1086
1087 id = dax_read_lock();
1088
1089 if (page_aligned)
1090 rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
1091 else
1092 rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
1093 if (rc < 0) {
1094 dax_read_unlock(id);
1095 return rc;
1096 }
1097
1098 if (!page_aligned) {
1099 memset(kaddr + offset, 0, size);
1100 dax_flush(iomap->dax_dev, kaddr + offset, size);
1101 }
1102 dax_read_unlock(id);
1103 return size;
1104}
1105
1106static loff_t
1107dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
1108 struct iomap *iomap, struct iomap *srcmap)
1109{
1110 struct block_device *bdev = iomap->bdev;
1111 struct dax_device *dax_dev = iomap->dax_dev;
1112 struct iov_iter *iter = data;
1113 loff_t end = pos + length, done = 0;
1114 ssize_t ret = 0;
1115 size_t xfer;
1116 int id;
1117
1118 if (iov_iter_rw(iter) == READ) {
1119 end = min(end, i_size_read(inode));
1120 if (pos >= end)
1121 return 0;
1122
1123 if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
1124 return iov_iter_zero(min(length, end - pos), iter);
1125 }
1126
1127 if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
1128 return -EIO;
1129
1130
1131
1132
1133
1134
1135 if (iomap->flags & IOMAP_F_NEW) {
1136 invalidate_inode_pages2_range(inode->i_mapping,
1137 pos >> PAGE_SHIFT,
1138 (end - 1) >> PAGE_SHIFT);
1139 }
1140
1141 id = dax_read_lock();
1142 while (pos < end) {
1143 unsigned offset = pos & (PAGE_SIZE - 1);
1144 const size_t size = ALIGN(length + offset, PAGE_SIZE);
1145 const sector_t sector = dax_iomap_sector(iomap, pos);
1146 ssize_t map_len;
1147 pgoff_t pgoff;
1148 void *kaddr;
1149
1150 if (fatal_signal_pending(current)) {
1151 ret = -EINTR;
1152 break;
1153 }
1154
1155 ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
1156 if (ret)
1157 break;
1158
1159 map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
1160 &kaddr, NULL);
1161 if (map_len < 0) {
1162 ret = map_len;
1163 break;
1164 }
1165
1166 map_len = PFN_PHYS(map_len);
1167 kaddr += offset;
1168 map_len -= offset;
1169 if (map_len > end - pos)
1170 map_len = end - pos;
1171
1172
1173
1174
1175
1176
1177 if (iov_iter_rw(iter) == WRITE)
1178 xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
1179 map_len, iter);
1180 else
1181 xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
1182 map_len, iter);
1183
1184 pos += xfer;
1185 length -= xfer;
1186 done += xfer;
1187
1188 if (xfer == 0)
1189 ret = -EFAULT;
1190 if (xfer < map_len)
1191 break;
1192 }
1193 dax_read_unlock(id);
1194
1195 return done ? done : ret;
1196}
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208ssize_t
1209dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
1210 const struct iomap_ops *ops)
1211{
1212 struct address_space *mapping = iocb->ki_filp->f_mapping;
1213 struct inode *inode = mapping->host;
1214 loff_t pos = iocb->ki_pos, ret = 0, done = 0;
1215 unsigned flags = 0;
1216
1217 if (iov_iter_rw(iter) == WRITE) {
1218 lockdep_assert_held_write(&inode->i_rwsem);
1219 flags |= IOMAP_WRITE;
1220 } else {
1221 lockdep_assert_held(&inode->i_rwsem);
1222 }
1223
1224 if (iocb->ki_flags & IOCB_NOWAIT)
1225 flags |= IOMAP_NOWAIT;
1226
1227 while (iov_iter_count(iter)) {
1228 ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
1229 iter, dax_iomap_actor);
1230 if (ret <= 0)
1231 break;
1232 pos += ret;
1233 done += ret;
1234 }
1235
1236 iocb->ki_pos += done;
1237 return done ? done : ret;
1238}
1239EXPORT_SYMBOL_GPL(dax_iomap_rw);
1240
1241static vm_fault_t dax_fault_return(int error)
1242{
1243 if (error == 0)
1244 return VM_FAULT_NOPAGE;
1245 return vmf_error(error);
1246}
1247
1248
1249
1250
1251
1252static bool dax_fault_is_synchronous(unsigned long flags,
1253 struct vm_area_struct *vma, struct iomap *iomap)
1254{
1255 return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
1256 && (iomap->flags & IOMAP_F_DIRTY);
1257}
1258
1259static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1260 int *iomap_errp, const struct iomap_ops *ops)
1261{
1262 struct vm_area_struct *vma = vmf->vma;
1263 struct address_space *mapping = vma->vm_file->f_mapping;
1264 XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
1265 struct inode *inode = mapping->host;
1266 unsigned long vaddr = vmf->address;
1267 loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
1268 struct iomap iomap = { .type = IOMAP_HOLE };
1269 struct iomap srcmap = { .type = IOMAP_HOLE };
1270 unsigned flags = IOMAP_FAULT;
1271 int error, major = 0;
1272 bool write = vmf->flags & FAULT_FLAG_WRITE;
1273 bool sync;
1274 vm_fault_t ret = 0;
1275 void *entry;
1276 pfn_t pfn;
1277
1278 trace_dax_pte_fault(inode, vmf, ret);
1279
1280
1281
1282
1283
1284 if (pos >= i_size_read(inode)) {
1285 ret = VM_FAULT_SIGBUS;
1286 goto out;
1287 }
1288
1289 if (write && !vmf->cow_page)
1290 flags |= IOMAP_WRITE;
1291
1292 entry = grab_mapping_entry(&xas, mapping, 0);
1293 if (xa_is_internal(entry)) {
1294 ret = xa_to_internal(entry);
1295 goto out;
1296 }
1297
1298
1299
1300
1301
1302
1303
1304 if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
1305 ret = VM_FAULT_NOPAGE;
1306 goto unlock_entry;
1307 }
1308
1309
1310
1311
1312
1313
1314 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
1315 if (iomap_errp)
1316 *iomap_errp = error;
1317 if (error) {
1318 ret = dax_fault_return(error);
1319 goto unlock_entry;
1320 }
1321 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
1322 error = -EIO;
1323 goto error_finish_iomap;
1324 }
1325
1326 if (vmf->cow_page) {
1327 sector_t sector = dax_iomap_sector(&iomap, pos);
1328
1329 switch (iomap.type) {
1330 case IOMAP_HOLE:
1331 case IOMAP_UNWRITTEN:
1332 clear_user_highpage(vmf->cow_page, vaddr);
1333 break;
1334 case IOMAP_MAPPED:
1335 error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev,
1336 sector, vmf->cow_page, vaddr);
1337 break;
1338 default:
1339 WARN_ON_ONCE(1);
1340 error = -EIO;
1341 break;
1342 }
1343
1344 if (error)
1345 goto error_finish_iomap;
1346
1347 __SetPageUptodate(vmf->cow_page);
1348 ret = finish_fault(vmf);
1349 if (!ret)
1350 ret = VM_FAULT_DONE_COW;
1351 goto finish_iomap;
1352 }
1353
1354 sync = dax_fault_is_synchronous(flags, vma, &iomap);
1355
1356 switch (iomap.type) {
1357 case IOMAP_MAPPED:
1358 if (iomap.flags & IOMAP_F_NEW) {
1359 count_vm_event(PGMAJFAULT);
1360 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
1361 major = VM_FAULT_MAJOR;
1362 }
1363 error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
1364 if (error < 0)
1365 goto error_finish_iomap;
1366
1367 entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
1368 0, write && !sync);
1369
1370
1371
1372
1373
1374
1375
1376 if (sync) {
1377 if (WARN_ON_ONCE(!pfnp)) {
1378 error = -EIO;
1379 goto error_finish_iomap;
1380 }
1381 *pfnp = pfn;
1382 ret = VM_FAULT_NEEDDSYNC | major;
1383 goto finish_iomap;
1384 }
1385 trace_dax_insert_mapping(inode, vmf, entry);
1386 if (write)
1387 ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn);
1388 else
1389 ret = vmf_insert_mixed(vma, vaddr, pfn);
1390
1391 goto finish_iomap;
1392 case IOMAP_UNWRITTEN:
1393 case IOMAP_HOLE:
1394 if (!write) {
1395 ret = dax_load_hole(&xas, mapping, &entry, vmf);
1396 goto finish_iomap;
1397 }
1398 fallthrough;
1399 default:
1400 WARN_ON_ONCE(1);
1401 error = -EIO;
1402 break;
1403 }
1404
1405 error_finish_iomap:
1406 ret = dax_fault_return(error);
1407 finish_iomap:
1408 if (ops->iomap_end) {
1409 int copied = PAGE_SIZE;
1410
1411 if (ret & VM_FAULT_ERROR)
1412 copied = 0;
1413
1414
1415
1416
1417
1418
1419 ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
1420 }
1421 unlock_entry:
1422 dax_unlock_entry(&xas, entry);
1423 out:
1424 trace_dax_pte_fault_done(inode, vmf, ret);
1425 return ret | major;
1426}
1427
1428#ifdef CONFIG_FS_DAX_PMD
1429static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
1430 struct iomap *iomap, void **entry)
1431{
1432 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1433 unsigned long pmd_addr = vmf->address & PMD_MASK;
1434 struct vm_area_struct *vma = vmf->vma;
1435 struct inode *inode = mapping->host;
1436 pgtable_t pgtable = NULL;
1437 struct page *zero_page;
1438 spinlock_t *ptl;
1439 pmd_t pmd_entry;
1440 pfn_t pfn;
1441
1442 zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
1443
1444 if (unlikely(!zero_page))
1445 goto fallback;
1446
1447 pfn = page_to_pfn_t(zero_page);
1448 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1449 DAX_PMD | DAX_ZERO_PAGE, false);
1450
1451 if (arch_needs_pgtable_deposit()) {
1452 pgtable = pte_alloc_one(vma->vm_mm);
1453 if (!pgtable)
1454 return VM_FAULT_OOM;
1455 }
1456
1457 ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
1458 if (!pmd_none(*(vmf->pmd))) {
1459 spin_unlock(ptl);
1460 goto fallback;
1461 }
1462
1463 if (pgtable) {
1464 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
1465 mm_inc_nr_ptes(vma->vm_mm);
1466 }
1467 pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
1468 pmd_entry = pmd_mkhuge(pmd_entry);
1469 set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
1470 spin_unlock(ptl);
1471 trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry);
1472 return VM_FAULT_NOPAGE;
1473
1474fallback:
1475 if (pgtable)
1476 pte_free(vma->vm_mm, pgtable);
1477 trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
1478 return VM_FAULT_FALLBACK;
1479}
1480
1481static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1482 const struct iomap_ops *ops)
1483{
1484 struct vm_area_struct *vma = vmf->vma;
1485 struct address_space *mapping = vma->vm_file->f_mapping;
1486 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
1487 unsigned long pmd_addr = vmf->address & PMD_MASK;
1488 bool write = vmf->flags & FAULT_FLAG_WRITE;
1489 bool sync;
1490 unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
1491 struct inode *inode = mapping->host;
1492 vm_fault_t result = VM_FAULT_FALLBACK;
1493 struct iomap iomap = { .type = IOMAP_HOLE };
1494 struct iomap srcmap = { .type = IOMAP_HOLE };
1495 pgoff_t max_pgoff;
1496 void *entry;
1497 loff_t pos;
1498 int error;
1499 pfn_t pfn;
1500
1501
1502
1503
1504
1505
1506 max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1507
1508 trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
1509
1510
1511
1512
1513
1514
1515
1516 if ((vmf->pgoff & PG_PMD_COLOUR) !=
1517 ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
1518 goto fallback;
1519
1520
1521 if (write && !(vma->vm_flags & VM_SHARED))
1522 goto fallback;
1523
1524
1525 if (pmd_addr < vma->vm_start)
1526 goto fallback;
1527 if ((pmd_addr + PMD_SIZE) > vma->vm_end)
1528 goto fallback;
1529
1530 if (xas.xa_index >= max_pgoff) {
1531 result = VM_FAULT_SIGBUS;
1532 goto out;
1533 }
1534
1535
1536 if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
1537 goto fallback;
1538
1539
1540
1541
1542
1543
1544
1545 entry = grab_mapping_entry(&xas, mapping, PMD_ORDER);
1546 if (xa_is_internal(entry)) {
1547 result = xa_to_internal(entry);
1548 goto fallback;
1549 }
1550
1551
1552
1553
1554
1555
1556
1557 if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
1558 !pmd_devmap(*vmf->pmd)) {
1559 result = 0;
1560 goto unlock_entry;
1561 }
1562
1563
1564
1565
1566
1567
1568 pos = (loff_t)xas.xa_index << PAGE_SHIFT;
1569 error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
1570 &srcmap);
1571 if (error)
1572 goto unlock_entry;
1573
1574 if (iomap.offset + iomap.length < pos + PMD_SIZE)
1575 goto finish_iomap;
1576
1577 sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap);
1578
1579 switch (iomap.type) {
1580 case IOMAP_MAPPED:
1581 error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
1582 if (error < 0)
1583 goto finish_iomap;
1584
1585 entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
1586 DAX_PMD, write && !sync);
1587
1588
1589
1590
1591
1592
1593
1594 if (sync) {
1595 if (WARN_ON_ONCE(!pfnp))
1596 goto finish_iomap;
1597 *pfnp = pfn;
1598 result = VM_FAULT_NEEDDSYNC;
1599 goto finish_iomap;
1600 }
1601
1602 trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
1603 result = vmf_insert_pfn_pmd(vmf, pfn, write);
1604 break;
1605 case IOMAP_UNWRITTEN:
1606 case IOMAP_HOLE:
1607 if (WARN_ON_ONCE(write))
1608 break;
1609 result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
1610 break;
1611 default:
1612 WARN_ON_ONCE(1);
1613 break;
1614 }
1615
1616 finish_iomap:
1617 if (ops->iomap_end) {
1618 int copied = PMD_SIZE;
1619
1620 if (result == VM_FAULT_FALLBACK)
1621 copied = 0;
1622
1623
1624
1625
1626
1627
1628 ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
1629 &iomap);
1630 }
1631 unlock_entry:
1632 dax_unlock_entry(&xas, entry);
1633 fallback:
1634 if (result == VM_FAULT_FALLBACK) {
1635 split_huge_pmd(vma, vmf->pmd, vmf->address);
1636 count_vm_event(THP_FAULT_FALLBACK);
1637 }
1638out:
1639 trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
1640 return result;
1641}
1642#else
1643static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1644 const struct iomap_ops *ops)
1645{
1646 return VM_FAULT_FALLBACK;
1647}
1648#endif
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
1664 pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
1665{
1666 switch (pe_size) {
1667 case PE_SIZE_PTE:
1668 return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops);
1669 case PE_SIZE_PMD:
1670 return dax_iomap_pmd_fault(vmf, pfnp, ops);
1671 default:
1672 return VM_FAULT_FALLBACK;
1673 }
1674}
1675EXPORT_SYMBOL_GPL(dax_iomap_fault);
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686static vm_fault_t
1687dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
1688{
1689 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1690 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
1691 void *entry;
1692 vm_fault_t ret;
1693
1694 xas_lock_irq(&xas);
1695 entry = get_unlocked_entry(&xas, order);
1696
1697 if (!entry || dax_is_conflict(entry) ||
1698 (order == 0 && !dax_is_pte_entry(entry))) {
1699 put_unlocked_entry(&xas, entry, WAKE_NEXT);
1700 xas_unlock_irq(&xas);
1701 trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
1702 VM_FAULT_NOPAGE);
1703 return VM_FAULT_NOPAGE;
1704 }
1705 xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
1706 dax_lock_entry(&xas, entry);
1707 xas_unlock_irq(&xas);
1708 if (order == 0)
1709 ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
1710#ifdef CONFIG_FS_DAX_PMD
1711 else if (order == PMD_ORDER)
1712 ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
1713#endif
1714 else
1715 ret = VM_FAULT_FALLBACK;
1716 dax_unlock_entry(&xas, entry);
1717 trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
1718 return ret;
1719}
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
1732 enum page_entry_size pe_size, pfn_t pfn)
1733{
1734 int err;
1735 loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
1736 unsigned int order = pe_order(pe_size);
1737 size_t len = PAGE_SIZE << order;
1738
1739 err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
1740 if (err)
1741 return VM_FAULT_SIGBUS;
1742 return dax_insert_pfn_mkwrite(vmf, pfn, order);
1743}
1744EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
1745