1
2
3
4
5
6
7
8
9#include <linux/atomic.h>
10#include <linux/blkdev.h>
11#include <linux/buffer_head.h>
12#include <linux/dax.h>
13#include <linux/fs.h>
14#include <linux/genhd.h>
15#include <linux/highmem.h>
16#include <linux/memcontrol.h>
17#include <linux/mm.h>
18#include <linux/mutex.h>
19#include <linux/pagevec.h>
20#include <linux/sched.h>
21#include <linux/sched/signal.h>
22#include <linux/uio.h>
23#include <linux/vmstat.h>
24#include <linux/pfn_t.h>
25#include <linux/sizes.h>
26#include <linux/mmu_notifier.h>
27#include <linux/iomap.h>
28#include <asm/pgalloc.h>
29
30#define CREATE_TRACE_POINTS
31#include <trace/events/fs_dax.h>
32
33static inline unsigned int pe_order(enum page_entry_size pe_size)
34{
35 if (pe_size == PE_SIZE_PTE)
36 return PAGE_SHIFT - PAGE_SHIFT;
37 if (pe_size == PE_SIZE_PMD)
38 return PMD_SHIFT - PAGE_SHIFT;
39 if (pe_size == PE_SIZE_PUD)
40 return PUD_SHIFT - PAGE_SHIFT;
41 return ~0;
42}
43
44
45#define DAX_WAIT_TABLE_BITS 12
46#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
47
48
49#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
50#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
51
52
53#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
54
55static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
56
57static int __init init_dax_wait_table(void)
58{
59 int i;
60
61 for (i = 0; i < DAX_WAIT_TABLE_ENTRIES; i++)
62 init_waitqueue_head(wait_table + i);
63 return 0;
64}
65fs_initcall(init_dax_wait_table);
66
67
68
69
70
71
72
73
74
75
76
77#define DAX_SHIFT (4)
78#define DAX_LOCKED (1UL << 0)
79#define DAX_PMD (1UL << 1)
80#define DAX_ZERO_PAGE (1UL << 2)
81#define DAX_EMPTY (1UL << 3)
82
83static unsigned long dax_to_pfn(void *entry)
84{
85 return xa_to_value(entry) >> DAX_SHIFT;
86}
87
88static void *dax_make_entry(pfn_t pfn, unsigned long flags)
89{
90 return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
91}
92
93static bool dax_is_locked(void *entry)
94{
95 return xa_to_value(entry) & DAX_LOCKED;
96}
97
98static unsigned int dax_entry_order(void *entry)
99{
100 if (xa_to_value(entry) & DAX_PMD)
101 return PMD_ORDER;
102 return 0;
103}
104
105static unsigned long dax_is_pmd_entry(void *entry)
106{
107 return xa_to_value(entry) & DAX_PMD;
108}
109
110static bool dax_is_pte_entry(void *entry)
111{
112 return !(xa_to_value(entry) & DAX_PMD);
113}
114
115static int dax_is_zero_entry(void *entry)
116{
117 return xa_to_value(entry) & DAX_ZERO_PAGE;
118}
119
120static int dax_is_empty_entry(void *entry)
121{
122 return xa_to_value(entry) & DAX_EMPTY;
123}
124
125
126
127
128
129static bool dax_is_conflict(void *entry)
130{
131 return entry == XA_RETRY_ENTRY;
132}
133
134
135
136
137struct exceptional_entry_key {
138 struct xarray *xa;
139 pgoff_t entry_start;
140};
141
142struct wait_exceptional_entry_queue {
143 wait_queue_entry_t wait;
144 struct exceptional_entry_key key;
145};
146
147
148
149
150
151
152enum dax_wake_mode {
153 WAKE_ALL,
154 WAKE_NEXT,
155};
156
157static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
158 void *entry, struct exceptional_entry_key *key)
159{
160 unsigned long hash;
161 unsigned long index = xas->xa_index;
162
163
164
165
166
167
168 if (dax_is_pmd_entry(entry))
169 index &= ~PG_PMD_COLOUR;
170 key->xa = xas->xa;
171 key->entry_start = index;
172
173 hash = hash_long((unsigned long)xas->xa ^ index, DAX_WAIT_TABLE_BITS);
174 return wait_table + hash;
175}
176
177static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
178 unsigned int mode, int sync, void *keyp)
179{
180 struct exceptional_entry_key *key = keyp;
181 struct wait_exceptional_entry_queue *ewait =
182 container_of(wait, struct wait_exceptional_entry_queue, wait);
183
184 if (key->xa != ewait->key.xa ||
185 key->entry_start != ewait->key.entry_start)
186 return 0;
187 return autoremove_wake_function(wait, mode, sync, NULL);
188}
189
190
191
192
193
194
195static void dax_wake_entry(struct xa_state *xas, void *entry,
196 enum dax_wake_mode mode)
197{
198 struct exceptional_entry_key key;
199 wait_queue_head_t *wq;
200
201 wq = dax_entry_waitqueue(xas, entry, &key);
202
203
204
205
206
207
208
209 if (waitqueue_active(wq))
210 __wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key);
211}
212
213
214
215
216
217
218
219
220
221
222
223static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
224{
225 void *entry;
226 struct wait_exceptional_entry_queue ewait;
227 wait_queue_head_t *wq;
228
229 init_wait(&ewait.wait);
230 ewait.wait.func = wake_exceptional_entry_func;
231
232 for (;;) {
233 entry = xas_find_conflict(xas);
234 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
235 return entry;
236 if (dax_entry_order(entry) < order)
237 return XA_RETRY_ENTRY;
238 if (!dax_is_locked(entry))
239 return entry;
240
241 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
242 prepare_to_wait_exclusive(wq, &ewait.wait,
243 TASK_UNINTERRUPTIBLE);
244 xas_unlock_irq(xas);
245 xas_reset(xas);
246 schedule();
247 finish_wait(wq, &ewait.wait);
248 xas_lock_irq(xas);
249 }
250}
251
252
253
254
255
256
257static void wait_entry_unlocked(struct xa_state *xas, void *entry)
258{
259 struct wait_exceptional_entry_queue ewait;
260 wait_queue_head_t *wq;
261
262 init_wait(&ewait.wait);
263 ewait.wait.func = wake_exceptional_entry_func;
264
265 wq = dax_entry_waitqueue(xas, entry, &ewait.key);
266
267
268
269
270
271
272 prepare_to_wait(wq, &ewait.wait, TASK_UNINTERRUPTIBLE);
273 xas_unlock_irq(xas);
274 schedule();
275 finish_wait(wq, &ewait.wait);
276}
277
278static void put_unlocked_entry(struct xa_state *xas, void *entry,
279 enum dax_wake_mode mode)
280{
281 if (entry && !dax_is_conflict(entry))
282 dax_wake_entry(xas, entry, mode);
283}
284
285
286
287
288
289
290static void dax_unlock_entry(struct xa_state *xas, void *entry)
291{
292 void *old;
293
294 BUG_ON(dax_is_locked(entry));
295 xas_reset(xas);
296 xas_lock_irq(xas);
297 old = xas_store(xas, entry);
298 xas_unlock_irq(xas);
299 BUG_ON(!dax_is_locked(old));
300 dax_wake_entry(xas, entry, WAKE_NEXT);
301}
302
303
304
305
306static void *dax_lock_entry(struct xa_state *xas, void *entry)
307{
308 unsigned long v = xa_to_value(entry);
309 return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
310}
311
312static unsigned long dax_entry_size(void *entry)
313{
314 if (dax_is_zero_entry(entry))
315 return 0;
316 else if (dax_is_empty_entry(entry))
317 return 0;
318 else if (dax_is_pmd_entry(entry))
319 return PMD_SIZE;
320 else
321 return PAGE_SIZE;
322}
323
324static unsigned long dax_end_pfn(void *entry)
325{
326 return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
327}
328
329
330
331
332
333#define for_each_mapped_pfn(entry, pfn) \
334 for (pfn = dax_to_pfn(entry); \
335 pfn < dax_end_pfn(entry); pfn++)
336
337
338
339
340
341
342static void dax_associate_entry(void *entry, struct address_space *mapping,
343 struct vm_area_struct *vma, unsigned long address)
344{
345 unsigned long size = dax_entry_size(entry), pfn, index;
346 int i = 0;
347
348 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
349 return;
350
351 index = linear_page_index(vma, address & ~(size - 1));
352 for_each_mapped_pfn(entry, pfn) {
353 struct page *page = pfn_to_page(pfn);
354
355 WARN_ON_ONCE(page->mapping);
356 page->mapping = mapping;
357 page->index = index + i++;
358 }
359}
360
361static void dax_disassociate_entry(void *entry, struct address_space *mapping,
362 bool trunc)
363{
364 unsigned long pfn;
365
366 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
367 return;
368
369 for_each_mapped_pfn(entry, pfn) {
370 struct page *page = pfn_to_page(pfn);
371
372 WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
373 WARN_ON_ONCE(page->mapping && page->mapping != mapping);
374 page->mapping = NULL;
375 page->index = 0;
376 }
377}
378
379static struct page *dax_busy_page(void *entry)
380{
381 unsigned long pfn;
382
383 for_each_mapped_pfn(entry, pfn) {
384 struct page *page = pfn_to_page(pfn);
385
386 if (page_ref_count(page) > 1)
387 return page;
388 }
389 return NULL;
390}
391
392
393
394
395
396
397
398
399
400dax_entry_t dax_lock_page(struct page *page)
401{
402 XA_STATE(xas, NULL, 0);
403 void *entry;
404
405
406 rcu_read_lock();
407 for (;;) {
408 struct address_space *mapping = READ_ONCE(page->mapping);
409
410 entry = NULL;
411 if (!mapping || !dax_mapping(mapping))
412 break;
413
414
415
416
417
418
419
420
421 entry = (void *)~0UL;
422 if (S_ISCHR(mapping->host->i_mode))
423 break;
424
425 xas.xa = &mapping->i_pages;
426 xas_lock_irq(&xas);
427 if (mapping != page->mapping) {
428 xas_unlock_irq(&xas);
429 continue;
430 }
431 xas_set(&xas, page->index);
432 entry = xas_load(&xas);
433 if (dax_is_locked(entry)) {
434 rcu_read_unlock();
435 wait_entry_unlocked(&xas, entry);
436 rcu_read_lock();
437 continue;
438 }
439 dax_lock_entry(&xas, entry);
440 xas_unlock_irq(&xas);
441 break;
442 }
443 rcu_read_unlock();
444 return (dax_entry_t)entry;
445}
446
447void dax_unlock_page(struct page *page, dax_entry_t cookie)
448{
449 struct address_space *mapping = page->mapping;
450 XA_STATE(xas, &mapping->i_pages, page->index);
451
452 if (S_ISCHR(mapping->host->i_mode))
453 return;
454
455 dax_unlock_entry(&xas, (void *)cookie);
456}
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487static void *grab_mapping_entry(struct xa_state *xas,
488 struct address_space *mapping, unsigned int order)
489{
490 unsigned long index = xas->xa_index;
491 bool pmd_downgrade;
492 void *entry;
493
494retry:
495 pmd_downgrade = false;
496 xas_lock_irq(xas);
497 entry = get_unlocked_entry(xas, order);
498
499 if (entry) {
500 if (dax_is_conflict(entry))
501 goto fallback;
502 if (!xa_is_value(entry)) {
503 xas_set_err(xas, -EIO);
504 goto out_unlock;
505 }
506
507 if (order == 0) {
508 if (dax_is_pmd_entry(entry) &&
509 (dax_is_zero_entry(entry) ||
510 dax_is_empty_entry(entry))) {
511 pmd_downgrade = true;
512 }
513 }
514 }
515
516 if (pmd_downgrade) {
517
518
519
520
521 dax_lock_entry(xas, entry);
522
523
524
525
526
527
528 if (dax_is_zero_entry(entry)) {
529 xas_unlock_irq(xas);
530 unmap_mapping_pages(mapping,
531 xas->xa_index & ~PG_PMD_COLOUR,
532 PG_PMD_NR, false);
533 xas_reset(xas);
534 xas_lock_irq(xas);
535 }
536
537 dax_disassociate_entry(entry, mapping, false);
538 xas_store(xas, NULL);
539 dax_wake_entry(xas, entry, WAKE_ALL);
540 mapping->nrpages -= PG_PMD_NR;
541 entry = NULL;
542 xas_set(xas, index);
543 }
544
545 if (entry) {
546 dax_lock_entry(xas, entry);
547 } else {
548 unsigned long flags = DAX_EMPTY;
549
550 if (order > 0)
551 flags |= DAX_PMD;
552 entry = dax_make_entry(pfn_to_pfn_t(0), flags);
553 dax_lock_entry(xas, entry);
554 if (xas_error(xas))
555 goto out_unlock;
556 mapping->nrpages += 1UL << order;
557 }
558
559out_unlock:
560 xas_unlock_irq(xas);
561 if (xas_nomem(xas, mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM))
562 goto retry;
563 if (xas->xa_node == XA_ERROR(-ENOMEM))
564 return xa_mk_internal(VM_FAULT_OOM);
565 if (xas_error(xas))
566 return xa_mk_internal(VM_FAULT_SIGBUS);
567 return entry;
568fallback:
569 xas_unlock_irq(xas);
570 return xa_mk_internal(VM_FAULT_FALLBACK);
571}
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591struct page *dax_layout_busy_page_range(struct address_space *mapping,
592 loff_t start, loff_t end)
593{
594 void *entry;
595 unsigned int scanned = 0;
596 struct page *page = NULL;
597 pgoff_t start_idx = start >> PAGE_SHIFT;
598 pgoff_t end_idx;
599 XA_STATE(xas, &mapping->i_pages, start_idx);
600
601
602
603
604 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
605 return NULL;
606
607 if (!dax_mapping(mapping) || !mapping_mapped(mapping))
608 return NULL;
609
610
611 if (end == LLONG_MAX)
612 end_idx = ULONG_MAX;
613 else
614 end_idx = end >> PAGE_SHIFT;
615
616
617
618
619
620
621
622
623
624
625
626
627 unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0);
628
629 xas_lock_irq(&xas);
630 xas_for_each(&xas, entry, end_idx) {
631 if (WARN_ON_ONCE(!xa_is_value(entry)))
632 continue;
633 if (unlikely(dax_is_locked(entry)))
634 entry = get_unlocked_entry(&xas, 0);
635 if (entry)
636 page = dax_busy_page(entry);
637 put_unlocked_entry(&xas, entry, WAKE_NEXT);
638 if (page)
639 break;
640 if (++scanned % XA_CHECK_SCHED)
641 continue;
642
643 xas_pause(&xas);
644 xas_unlock_irq(&xas);
645 cond_resched();
646 xas_lock_irq(&xas);
647 }
648 xas_unlock_irq(&xas);
649 return page;
650}
651EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);
652
653struct page *dax_layout_busy_page(struct address_space *mapping)
654{
655 return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
656}
657EXPORT_SYMBOL_GPL(dax_layout_busy_page);
658
659static int __dax_invalidate_entry(struct address_space *mapping,
660 pgoff_t index, bool trunc)
661{
662 XA_STATE(xas, &mapping->i_pages, index);
663 int ret = 0;
664 void *entry;
665
666 xas_lock_irq(&xas);
667 entry = get_unlocked_entry(&xas, 0);
668 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
669 goto out;
670 if (!trunc &&
671 (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY) ||
672 xas_get_mark(&xas, PAGECACHE_TAG_TOWRITE)))
673 goto out;
674 dax_disassociate_entry(entry, mapping, trunc);
675 xas_store(&xas, NULL);
676 mapping->nrpages -= 1UL << dax_entry_order(entry);
677 ret = 1;
678out:
679 put_unlocked_entry(&xas, entry, WAKE_ALL);
680 xas_unlock_irq(&xas);
681 return ret;
682}
683
684
685
686
687
688int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
689{
690 int ret = __dax_invalidate_entry(mapping, index, true);
691
692
693
694
695
696
697
698
699 WARN_ON_ONCE(!ret);
700 return ret;
701}
702
703
704
705
706int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
707 pgoff_t index)
708{
709 return __dax_invalidate_entry(mapping, index, false);
710}
711
712static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev,
713 sector_t sector, struct page *to, unsigned long vaddr)
714{
715 void *vto, *kaddr;
716 pgoff_t pgoff;
717 long rc;
718 int id;
719
720 rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
721 if (rc)
722 return rc;
723
724 id = dax_read_lock();
725 rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
726 if (rc < 0) {
727 dax_read_unlock(id);
728 return rc;
729 }
730 vto = kmap_atomic(to);
731 copy_user_page(vto, (void __force *)kaddr, vaddr, to);
732 kunmap_atomic(vto);
733 dax_read_unlock(id);
734 return 0;
735}
736
737
738
739
740
741
742
743
744static void *dax_insert_entry(struct xa_state *xas,
745 struct address_space *mapping, struct vm_fault *vmf,
746 void *entry, pfn_t pfn, unsigned long flags, bool dirty)
747{
748 void *new_entry = dax_make_entry(pfn, flags);
749
750 if (dirty)
751 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
752
753 if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
754 unsigned long index = xas->xa_index;
755
756 if (dax_is_pmd_entry(entry))
757 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
758 PG_PMD_NR, false);
759 else
760 unmap_mapping_pages(mapping, index, 1, false);
761 }
762
763 xas_reset(xas);
764 xas_lock_irq(xas);
765 if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
766 void *old;
767
768 dax_disassociate_entry(entry, mapping, false);
769 dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
770
771
772
773
774
775
776
777
778 old = dax_lock_entry(xas, new_entry);
779 WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
780 DAX_LOCKED));
781 entry = new_entry;
782 } else {
783 xas_load(xas);
784 }
785
786 if (dirty)
787 xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
788
789 xas_unlock_irq(xas);
790 return entry;
791}
792
793static inline
794unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
795{
796 unsigned long address;
797
798 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
799 VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
800 return address;
801}
802
803
804static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
805 unsigned long pfn)
806{
807 struct vm_area_struct *vma;
808 pte_t pte, *ptep = NULL;
809 pmd_t *pmdp = NULL;
810 spinlock_t *ptl;
811
812 i_mmap_lock_read(mapping);
813 vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
814 struct mmu_notifier_range range;
815 unsigned long address;
816
817 cond_resched();
818
819 if (!(vma->vm_flags & VM_SHARED))
820 continue;
821
822 address = pgoff_address(index, vma);
823
824
825
826
827
828
829 if (follow_invalidate_pte(vma->vm_mm, address, &range, &ptep,
830 &pmdp, &ptl))
831 continue;
832
833
834
835
836
837
838
839
840 if (pmdp) {
841#ifdef CONFIG_FS_DAX_PMD
842 pmd_t pmd;
843
844 if (pfn != pmd_pfn(*pmdp))
845 goto unlock_pmd;
846 if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
847 goto unlock_pmd;
848
849 flush_cache_page(vma, address, pfn);
850 pmd = pmdp_invalidate(vma, address, pmdp);
851 pmd = pmd_wrprotect(pmd);
852 pmd = pmd_mkclean(pmd);
853 set_pmd_at(vma->vm_mm, address, pmdp, pmd);
854unlock_pmd:
855#endif
856 spin_unlock(ptl);
857 } else {
858 if (pfn != pte_pfn(*ptep))
859 goto unlock_pte;
860 if (!pte_dirty(*ptep) && !pte_write(*ptep))
861 goto unlock_pte;
862
863 flush_cache_page(vma, address, pfn);
864 pte = ptep_clear_flush(vma, address, ptep);
865 pte = pte_wrprotect(pte);
866 pte = pte_mkclean(pte);
867 set_pte_at(vma->vm_mm, address, ptep, pte);
868unlock_pte:
869 pte_unmap_unlock(ptep, ptl);
870 }
871
872 mmu_notifier_invalidate_range_end(&range);
873 }
874 i_mmap_unlock_read(mapping);
875}
876
877static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
878 struct address_space *mapping, void *entry)
879{
880 unsigned long pfn, index, count;
881 long ret = 0;
882
883
884
885
886
887 if (WARN_ON(!xa_is_value(entry)))
888 return -EIO;
889
890 if (unlikely(dax_is_locked(entry))) {
891 void *old_entry = entry;
892
893 entry = get_unlocked_entry(xas, 0);
894
895
896 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
897 goto put_unlocked;
898
899
900
901
902
903 if (dax_to_pfn(old_entry) != dax_to_pfn(entry))
904 goto put_unlocked;
905 if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
906 dax_is_zero_entry(entry))) {
907 ret = -EIO;
908 goto put_unlocked;
909 }
910
911
912 if (!xas_get_mark(xas, PAGECACHE_TAG_TOWRITE))
913 goto put_unlocked;
914 }
915
916
917 dax_lock_entry(xas, entry);
918
919
920
921
922
923
924
925
926 xas_clear_mark(xas, PAGECACHE_TAG_TOWRITE);
927 xas_unlock_irq(xas);
928
929
930
931
932
933
934
935
936 pfn = dax_to_pfn(entry);
937 count = 1UL << dax_entry_order(entry);
938 index = xas->xa_index & ~(count - 1);
939
940 dax_entry_mkclean(mapping, index, pfn);
941 dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE);
942
943
944
945
946
947
948 xas_reset(xas);
949 xas_lock_irq(xas);
950 xas_store(xas, entry);
951 xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
952 dax_wake_entry(xas, entry, WAKE_NEXT);
953
954 trace_dax_writeback_one(mapping->host, index, count);
955 return ret;
956
957 put_unlocked:
958 put_unlocked_entry(xas, entry, WAKE_NEXT);
959 return ret;
960}
961
962
963
964
965
966
967int dax_writeback_mapping_range(struct address_space *mapping,
968 struct dax_device *dax_dev, struct writeback_control *wbc)
969{
970 XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT);
971 struct inode *inode = mapping->host;
972 pgoff_t end_index = wbc->range_end >> PAGE_SHIFT;
973 void *entry;
974 int ret = 0;
975 unsigned int scanned = 0;
976
977 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
978 return -EIO;
979
980 if (mapping_empty(mapping) || wbc->sync_mode != WB_SYNC_ALL)
981 return 0;
982
983 trace_dax_writeback_range(inode, xas.xa_index, end_index);
984
985 tag_pages_for_writeback(mapping, xas.xa_index, end_index);
986
987 xas_lock_irq(&xas);
988 xas_for_each_marked(&xas, entry, end_index, PAGECACHE_TAG_TOWRITE) {
989 ret = dax_writeback_one(&xas, dax_dev, mapping, entry);
990 if (ret < 0) {
991 mapping_set_error(mapping, ret);
992 break;
993 }
994 if (++scanned % XA_CHECK_SCHED)
995 continue;
996
997 xas_pause(&xas);
998 xas_unlock_irq(&xas);
999 cond_resched();
1000 xas_lock_irq(&xas);
1001 }
1002 xas_unlock_irq(&xas);
1003 trace_dax_writeback_range_done(inode, xas.xa_index, end_index);
1004 return ret;
1005}
1006EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
1007
1008static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos)
1009{
1010 return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
1011}
1012
1013static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
1014 pfn_t *pfnp)
1015{
1016 const sector_t sector = dax_iomap_sector(iomap, pos);
1017 pgoff_t pgoff;
1018 int id, rc;
1019 long length;
1020
1021 rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
1022 if (rc)
1023 return rc;
1024 id = dax_read_lock();
1025 length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
1026 NULL, pfnp);
1027 if (length < 0) {
1028 rc = length;
1029 goto out;
1030 }
1031 rc = -EINVAL;
1032 if (PFN_PHYS(length) < size)
1033 goto out;
1034 if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
1035 goto out;
1036
1037 if (length > 1 && !pfn_t_devmap(*pfnp))
1038 goto out;
1039 rc = 0;
1040out:
1041 dax_read_unlock(id);
1042 return rc;
1043}
1044
1045
1046
1047
1048
1049
1050
1051
1052static vm_fault_t dax_load_hole(struct xa_state *xas,
1053 struct address_space *mapping, void **entry,
1054 struct vm_fault *vmf)
1055{
1056 struct inode *inode = mapping->host;
1057 unsigned long vaddr = vmf->address;
1058 pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
1059 vm_fault_t ret;
1060
1061 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1062 DAX_ZERO_PAGE, false);
1063
1064 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
1065 trace_dax_load_hole(inode, vmf, ret);
1066 return ret;
1067}
1068
1069#ifdef CONFIG_FS_DAX_PMD
1070static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
1071 const struct iomap *iomap, void **entry)
1072{
1073 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1074 unsigned long pmd_addr = vmf->address & PMD_MASK;
1075 struct vm_area_struct *vma = vmf->vma;
1076 struct inode *inode = mapping->host;
1077 pgtable_t pgtable = NULL;
1078 struct page *zero_page;
1079 spinlock_t *ptl;
1080 pmd_t pmd_entry;
1081 pfn_t pfn;
1082
1083 zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
1084
1085 if (unlikely(!zero_page))
1086 goto fallback;
1087
1088 pfn = page_to_pfn_t(zero_page);
1089 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1090 DAX_PMD | DAX_ZERO_PAGE, false);
1091
1092 if (arch_needs_pgtable_deposit()) {
1093 pgtable = pte_alloc_one(vma->vm_mm);
1094 if (!pgtable)
1095 return VM_FAULT_OOM;
1096 }
1097
1098 ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
1099 if (!pmd_none(*(vmf->pmd))) {
1100 spin_unlock(ptl);
1101 goto fallback;
1102 }
1103
1104 if (pgtable) {
1105 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
1106 mm_inc_nr_ptes(vma->vm_mm);
1107 }
1108 pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
1109 pmd_entry = pmd_mkhuge(pmd_entry);
1110 set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
1111 spin_unlock(ptl);
1112 trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry);
1113 return VM_FAULT_NOPAGE;
1114
1115fallback:
1116 if (pgtable)
1117 pte_free(vma->vm_mm, pgtable);
1118 trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
1119 return VM_FAULT_FALLBACK;
1120}
1121#else
1122static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
1123 const struct iomap *iomap, void **entry)
1124{
1125 return VM_FAULT_FALLBACK;
1126}
1127#endif
1128
1129s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
1130{
1131 sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
1132 pgoff_t pgoff;
1133 long rc, id;
1134 void *kaddr;
1135 bool page_aligned = false;
1136 unsigned offset = offset_in_page(pos);
1137 unsigned size = min_t(u64, PAGE_SIZE - offset, length);
1138
1139 if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
1140 (size == PAGE_SIZE))
1141 page_aligned = true;
1142
1143 rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
1144 if (rc)
1145 return rc;
1146
1147 id = dax_read_lock();
1148
1149 if (page_aligned)
1150 rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
1151 else
1152 rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
1153 if (rc < 0) {
1154 dax_read_unlock(id);
1155 return rc;
1156 }
1157
1158 if (!page_aligned) {
1159 memset(kaddr + offset, 0, size);
1160 dax_flush(iomap->dax_dev, kaddr + offset, size);
1161 }
1162 dax_read_unlock(id);
1163 return size;
1164}
1165
1166static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
1167 struct iov_iter *iter)
1168{
1169 const struct iomap *iomap = &iomi->iomap;
1170 loff_t length = iomap_length(iomi);
1171 loff_t pos = iomi->pos;
1172 struct block_device *bdev = iomap->bdev;
1173 struct dax_device *dax_dev = iomap->dax_dev;
1174 loff_t end = pos + length, done = 0;
1175 ssize_t ret = 0;
1176 size_t xfer;
1177 int id;
1178
1179 if (iov_iter_rw(iter) == READ) {
1180 end = min(end, i_size_read(iomi->inode));
1181 if (pos >= end)
1182 return 0;
1183
1184 if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
1185 return iov_iter_zero(min(length, end - pos), iter);
1186 }
1187
1188 if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
1189 return -EIO;
1190
1191
1192
1193
1194
1195
1196 if (iomap->flags & IOMAP_F_NEW) {
1197 invalidate_inode_pages2_range(iomi->inode->i_mapping,
1198 pos >> PAGE_SHIFT,
1199 (end - 1) >> PAGE_SHIFT);
1200 }
1201
1202 id = dax_read_lock();
1203 while (pos < end) {
1204 unsigned offset = pos & (PAGE_SIZE - 1);
1205 const size_t size = ALIGN(length + offset, PAGE_SIZE);
1206 const sector_t sector = dax_iomap_sector(iomap, pos);
1207 ssize_t map_len;
1208 pgoff_t pgoff;
1209 void *kaddr;
1210
1211 if (fatal_signal_pending(current)) {
1212 ret = -EINTR;
1213 break;
1214 }
1215
1216 ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
1217 if (ret)
1218 break;
1219
1220 map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
1221 &kaddr, NULL);
1222 if (map_len < 0) {
1223 ret = map_len;
1224 break;
1225 }
1226
1227 map_len = PFN_PHYS(map_len);
1228 kaddr += offset;
1229 map_len -= offset;
1230 if (map_len > end - pos)
1231 map_len = end - pos;
1232
1233
1234
1235
1236
1237
1238 if (iov_iter_rw(iter) == WRITE)
1239 xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
1240 map_len, iter);
1241 else
1242 xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
1243 map_len, iter);
1244
1245 pos += xfer;
1246 length -= xfer;
1247 done += xfer;
1248
1249 if (xfer == 0)
1250 ret = -EFAULT;
1251 if (xfer < map_len)
1252 break;
1253 }
1254 dax_read_unlock(id);
1255
1256 return done ? done : ret;
1257}
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269ssize_t
1270dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
1271 const struct iomap_ops *ops)
1272{
1273 struct iomap_iter iomi = {
1274 .inode = iocb->ki_filp->f_mapping->host,
1275 .pos = iocb->ki_pos,
1276 .len = iov_iter_count(iter),
1277 };
1278 loff_t done = 0;
1279 int ret;
1280
1281 if (iov_iter_rw(iter) == WRITE) {
1282 lockdep_assert_held_write(&iomi.inode->i_rwsem);
1283 iomi.flags |= IOMAP_WRITE;
1284 } else {
1285 lockdep_assert_held(&iomi.inode->i_rwsem);
1286 }
1287
1288 if (iocb->ki_flags & IOCB_NOWAIT)
1289 iomi.flags |= IOMAP_NOWAIT;
1290
1291 while ((ret = iomap_iter(&iomi, ops)) > 0)
1292 iomi.processed = dax_iomap_iter(&iomi, iter);
1293
1294 done = iomi.pos - iocb->ki_pos;
1295 iocb->ki_pos = iomi.pos;
1296 return done ? done : ret;
1297}
1298EXPORT_SYMBOL_GPL(dax_iomap_rw);
1299
1300static vm_fault_t dax_fault_return(int error)
1301{
1302 if (error == 0)
1303 return VM_FAULT_NOPAGE;
1304 return vmf_error(error);
1305}
1306
1307
1308
1309
1310
1311static bool dax_fault_is_synchronous(unsigned long flags,
1312 struct vm_area_struct *vma, const struct iomap *iomap)
1313{
1314 return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
1315 && (iomap->flags & IOMAP_F_DIRTY);
1316}
1317
1318
1319
1320
1321
1322
1323
1324static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
1325{
1326 if (WARN_ON_ONCE(!pfnp))
1327 return VM_FAULT_SIGBUS;
1328 *pfnp = pfn;
1329 return VM_FAULT_NEEDDSYNC;
1330}
1331
1332static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf,
1333 const struct iomap_iter *iter)
1334{
1335 sector_t sector = dax_iomap_sector(&iter->iomap, iter->pos);
1336 unsigned long vaddr = vmf->address;
1337 vm_fault_t ret;
1338 int error = 0;
1339
1340 switch (iter->iomap.type) {
1341 case IOMAP_HOLE:
1342 case IOMAP_UNWRITTEN:
1343 clear_user_highpage(vmf->cow_page, vaddr);
1344 break;
1345 case IOMAP_MAPPED:
1346 error = copy_cow_page_dax(iter->iomap.bdev, iter->iomap.dax_dev,
1347 sector, vmf->cow_page, vaddr);
1348 break;
1349 default:
1350 WARN_ON_ONCE(1);
1351 error = -EIO;
1352 break;
1353 }
1354
1355 if (error)
1356 return dax_fault_return(error);
1357
1358 __SetPageUptodate(vmf->cow_page);
1359 ret = finish_fault(vmf);
1360 if (!ret)
1361 return VM_FAULT_DONE_COW;
1362 return ret;
1363}
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
1375 const struct iomap_iter *iter, pfn_t *pfnp,
1376 struct xa_state *xas, void **entry, bool pmd)
1377{
1378 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1379 const struct iomap *iomap = &iter->iomap;
1380 size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
1381 loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
1382 bool write = vmf->flags & FAULT_FLAG_WRITE;
1383 bool sync = dax_fault_is_synchronous(iter->flags, vmf->vma, iomap);
1384 unsigned long entry_flags = pmd ? DAX_PMD : 0;
1385 int err = 0;
1386 pfn_t pfn;
1387
1388 if (!pmd && vmf->cow_page)
1389 return dax_fault_cow_page(vmf, iter);
1390
1391
1392 if (!write &&
1393 (iomap->type == IOMAP_UNWRITTEN || iomap->type == IOMAP_HOLE)) {
1394 if (!pmd)
1395 return dax_load_hole(xas, mapping, entry, vmf);
1396 return dax_pmd_load_hole(xas, vmf, iomap, entry);
1397 }
1398
1399 if (iomap->type != IOMAP_MAPPED) {
1400 WARN_ON_ONCE(1);
1401 return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS;
1402 }
1403
1404 err = dax_iomap_pfn(&iter->iomap, pos, size, &pfn);
1405 if (err)
1406 return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err);
1407
1408 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags,
1409 write && !sync);
1410
1411 if (sync)
1412 return dax_fault_synchronous_pfnp(pfnp, pfn);
1413
1414
1415 if (pmd)
1416 return vmf_insert_pfn_pmd(vmf, pfn, write);
1417
1418
1419 if (write)
1420 return vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
1421 return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
1422}
1423
1424static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
1425 int *iomap_errp, const struct iomap_ops *ops)
1426{
1427 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1428 XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
1429 struct iomap_iter iter = {
1430 .inode = mapping->host,
1431 .pos = (loff_t)vmf->pgoff << PAGE_SHIFT,
1432 .len = PAGE_SIZE,
1433 .flags = IOMAP_FAULT,
1434 };
1435 vm_fault_t ret = 0;
1436 void *entry;
1437 int error;
1438
1439 trace_dax_pte_fault(iter.inode, vmf, ret);
1440
1441
1442
1443
1444
1445 if (iter.pos >= i_size_read(iter.inode)) {
1446 ret = VM_FAULT_SIGBUS;
1447 goto out;
1448 }
1449
1450 if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
1451 iter.flags |= IOMAP_WRITE;
1452
1453 entry = grab_mapping_entry(&xas, mapping, 0);
1454 if (xa_is_internal(entry)) {
1455 ret = xa_to_internal(entry);
1456 goto out;
1457 }
1458
1459
1460
1461
1462
1463
1464
1465 if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
1466 ret = VM_FAULT_NOPAGE;
1467 goto unlock_entry;
1468 }
1469
1470 while ((error = iomap_iter(&iter, ops)) > 0) {
1471 if (WARN_ON_ONCE(iomap_length(&iter) < PAGE_SIZE)) {
1472 iter.processed = -EIO;
1473 continue;
1474 }
1475
1476 ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, false);
1477 if (ret != VM_FAULT_SIGBUS &&
1478 (iter.iomap.flags & IOMAP_F_NEW)) {
1479 count_vm_event(PGMAJFAULT);
1480 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
1481 ret |= VM_FAULT_MAJOR;
1482 }
1483
1484 if (!(ret & VM_FAULT_ERROR))
1485 iter.processed = PAGE_SIZE;
1486 }
1487
1488 if (iomap_errp)
1489 *iomap_errp = error;
1490 if (!ret && error)
1491 ret = dax_fault_return(error);
1492
1493unlock_entry:
1494 dax_unlock_entry(&xas, entry);
1495out:
1496 trace_dax_pte_fault_done(iter.inode, vmf, ret);
1497 return ret;
1498}
1499
1500#ifdef CONFIG_FS_DAX_PMD
1501static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas,
1502 pgoff_t max_pgoff)
1503{
1504 unsigned long pmd_addr = vmf->address & PMD_MASK;
1505 bool write = vmf->flags & FAULT_FLAG_WRITE;
1506
1507
1508
1509
1510
1511
1512
1513 if ((vmf->pgoff & PG_PMD_COLOUR) !=
1514 ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
1515 return true;
1516
1517
1518 if (write && !(vmf->vma->vm_flags & VM_SHARED))
1519 return true;
1520
1521
1522 if (pmd_addr < vmf->vma->vm_start)
1523 return true;
1524 if ((pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
1525 return true;
1526
1527
1528 if ((xas->xa_index | PG_PMD_COLOUR) >= max_pgoff)
1529 return true;
1530
1531 return false;
1532}
1533
1534static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1535 const struct iomap_ops *ops)
1536{
1537 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1538 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
1539 struct iomap_iter iter = {
1540 .inode = mapping->host,
1541 .len = PMD_SIZE,
1542 .flags = IOMAP_FAULT,
1543 };
1544 vm_fault_t ret = VM_FAULT_FALLBACK;
1545 pgoff_t max_pgoff;
1546 void *entry;
1547 int error;
1548
1549 if (vmf->flags & FAULT_FLAG_WRITE)
1550 iter.flags |= IOMAP_WRITE;
1551
1552
1553
1554
1555
1556
1557 max_pgoff = DIV_ROUND_UP(i_size_read(iter.inode), PAGE_SIZE);
1558
1559 trace_dax_pmd_fault(iter.inode, vmf, max_pgoff, 0);
1560
1561 if (xas.xa_index >= max_pgoff) {
1562 ret = VM_FAULT_SIGBUS;
1563 goto out;
1564 }
1565
1566 if (dax_fault_check_fallback(vmf, &xas, max_pgoff))
1567 goto fallback;
1568
1569
1570
1571
1572
1573
1574
1575 entry = grab_mapping_entry(&xas, mapping, PMD_ORDER);
1576 if (xa_is_internal(entry)) {
1577 ret = xa_to_internal(entry);
1578 goto fallback;
1579 }
1580
1581
1582
1583
1584
1585
1586
1587 if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
1588 !pmd_devmap(*vmf->pmd)) {
1589 ret = 0;
1590 goto unlock_entry;
1591 }
1592
1593 iter.pos = (loff_t)xas.xa_index << PAGE_SHIFT;
1594 while ((error = iomap_iter(&iter, ops)) > 0) {
1595 if (iomap_length(&iter) < PMD_SIZE)
1596 continue;
1597
1598 ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true);
1599 if (ret != VM_FAULT_FALLBACK)
1600 iter.processed = PMD_SIZE;
1601 }
1602
1603unlock_entry:
1604 dax_unlock_entry(&xas, entry);
1605fallback:
1606 if (ret == VM_FAULT_FALLBACK) {
1607 split_huge_pmd(vmf->vma, vmf->pmd, vmf->address);
1608 count_vm_event(THP_FAULT_FALLBACK);
1609 }
1610out:
1611 trace_dax_pmd_fault_done(iter.inode, vmf, max_pgoff, ret);
1612 return ret;
1613}
1614#else
1615static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1616 const struct iomap_ops *ops)
1617{
1618 return VM_FAULT_FALLBACK;
1619}
1620#endif
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
1636 pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
1637{
1638 switch (pe_size) {
1639 case PE_SIZE_PTE:
1640 return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops);
1641 case PE_SIZE_PMD:
1642 return dax_iomap_pmd_fault(vmf, pfnp, ops);
1643 default:
1644 return VM_FAULT_FALLBACK;
1645 }
1646}
1647EXPORT_SYMBOL_GPL(dax_iomap_fault);
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658static vm_fault_t
1659dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
1660{
1661 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1662 XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
1663 void *entry;
1664 vm_fault_t ret;
1665
1666 xas_lock_irq(&xas);
1667 entry = get_unlocked_entry(&xas, order);
1668
1669 if (!entry || dax_is_conflict(entry) ||
1670 (order == 0 && !dax_is_pte_entry(entry))) {
1671 put_unlocked_entry(&xas, entry, WAKE_NEXT);
1672 xas_unlock_irq(&xas);
1673 trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
1674 VM_FAULT_NOPAGE);
1675 return VM_FAULT_NOPAGE;
1676 }
1677 xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
1678 dax_lock_entry(&xas, entry);
1679 xas_unlock_irq(&xas);
1680 if (order == 0)
1681 ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
1682#ifdef CONFIG_FS_DAX_PMD
1683 else if (order == PMD_ORDER)
1684 ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
1685#endif
1686 else
1687 ret = VM_FAULT_FALLBACK;
1688 dax_unlock_entry(&xas, entry);
1689 trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
1690 return ret;
1691}
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
1704 enum page_entry_size pe_size, pfn_t pfn)
1705{
1706 int err;
1707 loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
1708 unsigned int order = pe_order(pe_size);
1709 size_t len = PAGE_SIZE << order;
1710
1711 err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
1712 if (err)
1713 return VM_FAULT_SIGBUS;
1714 return dax_insert_pfn_mkwrite(vmf, pfn, order);
1715}
1716EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
1717