1
2
3
4
5
6#include "xfs.h"
7#include <linux/backing-dev.h>
8
9#include "xfs_shared.h"
10#include "xfs_format.h"
11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
15#include "xfs_trace.h"
16#include "xfs_log.h"
17#include "xfs_log_recover.h"
18#include "xfs_trans.h"
19#include "xfs_buf_item.h"
20#include "xfs_errortag.h"
21#include "xfs_error.h"
22
23static kmem_zone_t *xfs_buf_zone;
24
25#define xb_to_gfp(flags) \
26 ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55static int __xfs_buf_submit(struct xfs_buf *bp, bool wait);
56
57static inline int
58xfs_buf_submit(
59 struct xfs_buf *bp)
60{
61 return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC));
62}
63
64static inline int
65xfs_buf_is_vmapped(
66 struct xfs_buf *bp)
67{
68
69
70
71
72
73
74
75 return bp->b_addr && bp->b_page_count > 1;
76}
77
78static inline int
79xfs_buf_vmap_len(
80 struct xfs_buf *bp)
81{
82 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
83}
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98static inline void
99xfs_buf_ioacct_inc(
100 struct xfs_buf *bp)
101{
102 if (bp->b_flags & XBF_NO_IOACCT)
103 return;
104
105 ASSERT(bp->b_flags & XBF_ASYNC);
106 spin_lock(&bp->b_lock);
107 if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
108 bp->b_state |= XFS_BSTATE_IN_FLIGHT;
109 percpu_counter_inc(&bp->b_target->bt_io_count);
110 }
111 spin_unlock(&bp->b_lock);
112}
113
114
115
116
117
118static inline void
119__xfs_buf_ioacct_dec(
120 struct xfs_buf *bp)
121{
122 lockdep_assert_held(&bp->b_lock);
123
124 if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
125 bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
126 percpu_counter_dec(&bp->b_target->bt_io_count);
127 }
128}
129
130static inline void
131xfs_buf_ioacct_dec(
132 struct xfs_buf *bp)
133{
134 spin_lock(&bp->b_lock);
135 __xfs_buf_ioacct_dec(bp);
136 spin_unlock(&bp->b_lock);
137}
138
139
140
141
142
143
144
145
146
147void
148xfs_buf_stale(
149 struct xfs_buf *bp)
150{
151 ASSERT(xfs_buf_islocked(bp));
152
153 bp->b_flags |= XBF_STALE;
154
155
156
157
158
159
160 bp->b_flags &= ~_XBF_DELWRI_Q;
161
162
163
164
165
166
167
168 spin_lock(&bp->b_lock);
169 __xfs_buf_ioacct_dec(bp);
170
171 atomic_set(&bp->b_lru_ref, 0);
172 if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
173 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
174 atomic_dec(&bp->b_hold);
175
176 ASSERT(atomic_read(&bp->b_hold) >= 1);
177 spin_unlock(&bp->b_lock);
178}
179
180static int
181xfs_buf_get_maps(
182 struct xfs_buf *bp,
183 int map_count)
184{
185 ASSERT(bp->b_maps == NULL);
186 bp->b_map_count = map_count;
187
188 if (map_count == 1) {
189 bp->b_maps = &bp->__b_map;
190 return 0;
191 }
192
193 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
194 KM_NOFS);
195 if (!bp->b_maps)
196 return -ENOMEM;
197 return 0;
198}
199
200
201
202
203static void
204xfs_buf_free_maps(
205 struct xfs_buf *bp)
206{
207 if (bp->b_maps != &bp->__b_map) {
208 kmem_free(bp->b_maps);
209 bp->b_maps = NULL;
210 }
211}
212
213static int
214_xfs_buf_alloc(
215 struct xfs_buftarg *target,
216 struct xfs_buf_map *map,
217 int nmaps,
218 xfs_buf_flags_t flags,
219 struct xfs_buf **bpp)
220{
221 struct xfs_buf *bp;
222 int error;
223 int i;
224
225 *bpp = NULL;
226 bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL);
227
228
229
230
231
232 flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
233
234 atomic_set(&bp->b_hold, 1);
235 atomic_set(&bp->b_lru_ref, 1);
236 init_completion(&bp->b_iowait);
237 INIT_LIST_HEAD(&bp->b_lru);
238 INIT_LIST_HEAD(&bp->b_list);
239 INIT_LIST_HEAD(&bp->b_li_list);
240 sema_init(&bp->b_sema, 0);
241 spin_lock_init(&bp->b_lock);
242 bp->b_target = target;
243 bp->b_mount = target->bt_mount;
244 bp->b_flags = flags;
245
246
247
248
249
250
251 error = xfs_buf_get_maps(bp, nmaps);
252 if (error) {
253 kmem_cache_free(xfs_buf_zone, bp);
254 return error;
255 }
256
257 bp->b_bn = map[0].bm_bn;
258 bp->b_length = 0;
259 for (i = 0; i < nmaps; i++) {
260 bp->b_maps[i].bm_bn = map[i].bm_bn;
261 bp->b_maps[i].bm_len = map[i].bm_len;
262 bp->b_length += map[i].bm_len;
263 }
264
265 atomic_set(&bp->b_pin_count, 0);
266 init_waitqueue_head(&bp->b_waiters);
267
268 XFS_STATS_INC(bp->b_mount, xb_create);
269 trace_xfs_buf_init(bp, _RET_IP_);
270
271 *bpp = bp;
272 return 0;
273}
274
275
276
277
278
279STATIC int
280_xfs_buf_get_pages(
281 struct xfs_buf *bp,
282 int page_count)
283{
284
285 if (bp->b_pages == NULL) {
286 bp->b_page_count = page_count;
287 if (page_count <= XB_PAGES) {
288 bp->b_pages = bp->b_page_array;
289 } else {
290 bp->b_pages = kmem_alloc(sizeof(struct page *) *
291 page_count, KM_NOFS);
292 if (bp->b_pages == NULL)
293 return -ENOMEM;
294 }
295 memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
296 }
297 return 0;
298}
299
300
301
302
303STATIC void
304_xfs_buf_free_pages(
305 struct xfs_buf *bp)
306{
307 if (bp->b_pages != bp->b_page_array) {
308 kmem_free(bp->b_pages);
309 bp->b_pages = NULL;
310 }
311}
312
313
314
315
316
317
318
319
320static void
321xfs_buf_free(
322 struct xfs_buf *bp)
323{
324 trace_xfs_buf_free(bp, _RET_IP_);
325
326 ASSERT(list_empty(&bp->b_lru));
327
328 if (bp->b_flags & _XBF_PAGES) {
329 uint i;
330
331 if (xfs_buf_is_vmapped(bp))
332 vm_unmap_ram(bp->b_addr - bp->b_offset,
333 bp->b_page_count);
334
335 for (i = 0; i < bp->b_page_count; i++) {
336 struct page *page = bp->b_pages[i];
337
338 __free_page(page);
339 }
340 if (current->reclaim_state)
341 current->reclaim_state->reclaimed_slab +=
342 bp->b_page_count;
343 } else if (bp->b_flags & _XBF_KMEM)
344 kmem_free(bp->b_addr);
345 _xfs_buf_free_pages(bp);
346 xfs_buf_free_maps(bp);
347 kmem_cache_free(xfs_buf_zone, bp);
348}
349
350
351
352
353STATIC int
354xfs_buf_allocate_memory(
355 struct xfs_buf *bp,
356 uint flags)
357{
358 size_t size;
359 size_t nbytes, offset;
360 gfp_t gfp_mask = xb_to_gfp(flags);
361 unsigned short page_count, i;
362 xfs_off_t start, end;
363 int error;
364 xfs_km_flags_t kmflag_mask = 0;
365
366
367
368
369 if (!(flags & XBF_READ)) {
370 kmflag_mask |= KM_ZERO;
371 gfp_mask |= __GFP_ZERO;
372 }
373
374
375
376
377
378
379 size = BBTOB(bp->b_length);
380 if (size < PAGE_SIZE) {
381 int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
382 bp->b_addr = kmem_alloc_io(size, align_mask,
383 KM_NOFS | kmflag_mask);
384 if (!bp->b_addr) {
385
386 goto use_alloc_page;
387 }
388
389 if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
390 ((unsigned long)bp->b_addr & PAGE_MASK)) {
391
392 kmem_free(bp->b_addr);
393 bp->b_addr = NULL;
394 goto use_alloc_page;
395 }
396 bp->b_offset = offset_in_page(bp->b_addr);
397 bp->b_pages = bp->b_page_array;
398 bp->b_pages[0] = kmem_to_page(bp->b_addr);
399 bp->b_page_count = 1;
400 bp->b_flags |= _XBF_KMEM;
401 return 0;
402 }
403
404use_alloc_page:
405 start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
406 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
407 >> PAGE_SHIFT;
408 page_count = end - start;
409 error = _xfs_buf_get_pages(bp, page_count);
410 if (unlikely(error))
411 return error;
412
413 offset = bp->b_offset;
414 bp->b_flags |= _XBF_PAGES;
415
416 for (i = 0; i < bp->b_page_count; i++) {
417 struct page *page;
418 uint retries = 0;
419retry:
420 page = alloc_page(gfp_mask);
421 if (unlikely(page == NULL)) {
422 if (flags & XBF_READ_AHEAD) {
423 bp->b_page_count = i;
424 error = -ENOMEM;
425 goto out_free_pages;
426 }
427
428
429
430
431
432
433
434 if (!(++retries % 100))
435 xfs_err(NULL,
436 "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
437 current->comm, current->pid,
438 __func__, gfp_mask);
439
440 XFS_STATS_INC(bp->b_mount, xb_page_retries);
441 congestion_wait(BLK_RW_ASYNC, HZ/50);
442 goto retry;
443 }
444
445 XFS_STATS_INC(bp->b_mount, xb_page_found);
446
447 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
448 size -= nbytes;
449 bp->b_pages[i] = page;
450 offset = 0;
451 }
452 return 0;
453
454out_free_pages:
455 for (i = 0; i < bp->b_page_count; i++)
456 __free_page(bp->b_pages[i]);
457 bp->b_flags &= ~_XBF_PAGES;
458 return error;
459}
460
461
462
463
464STATIC int
465_xfs_buf_map_pages(
466 struct xfs_buf *bp,
467 uint flags)
468{
469 ASSERT(bp->b_flags & _XBF_PAGES);
470 if (bp->b_page_count == 1) {
471
472 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
473 } else if (flags & XBF_UNMAPPED) {
474 bp->b_addr = NULL;
475 } else {
476 int retried = 0;
477 unsigned nofs_flag;
478
479
480
481
482
483
484
485
486
487 nofs_flag = memalloc_nofs_save();
488 do {
489 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
490 -1);
491 if (bp->b_addr)
492 break;
493 vm_unmap_aliases();
494 } while (retried++ <= 1);
495 memalloc_nofs_restore(nofs_flag);
496
497 if (!bp->b_addr)
498 return -ENOMEM;
499 bp->b_addr += bp->b_offset;
500 }
501
502 return 0;
503}
504
505
506
507
508static int
509_xfs_buf_obj_cmp(
510 struct rhashtable_compare_arg *arg,
511 const void *obj)
512{
513 const struct xfs_buf_map *map = arg->key;
514 const struct xfs_buf *bp = obj;
515
516
517
518
519
520 BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
521
522 if (bp->b_bn != map->bm_bn)
523 return 1;
524
525 if (unlikely(bp->b_length != map->bm_len)) {
526
527
528
529
530
531
532
533
534 ASSERT(bp->b_flags & XBF_STALE);
535 return 1;
536 }
537 return 0;
538}
539
540static const struct rhashtable_params xfs_buf_hash_params = {
541 .min_size = 32,
542 .nelem_hint = 16,
543 .key_len = sizeof(xfs_daddr_t),
544 .key_offset = offsetof(struct xfs_buf, b_bn),
545 .head_offset = offsetof(struct xfs_buf, b_rhash_head),
546 .automatic_shrinking = true,
547 .obj_cmpfn = _xfs_buf_obj_cmp,
548};
549
550int
551xfs_buf_hash_init(
552 struct xfs_perag *pag)
553{
554 spin_lock_init(&pag->pag_buf_lock);
555 return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
556}
557
558void
559xfs_buf_hash_destroy(
560 struct xfs_perag *pag)
561{
562 rhashtable_destroy(&pag->pag_buf_hash);
563}
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583static int
584xfs_buf_find(
585 struct xfs_buftarg *btp,
586 struct xfs_buf_map *map,
587 int nmaps,
588 xfs_buf_flags_t flags,
589 struct xfs_buf *new_bp,
590 struct xfs_buf **found_bp)
591{
592 struct xfs_perag *pag;
593 struct xfs_buf *bp;
594 struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
595 xfs_daddr_t eofs;
596 int i;
597
598 *found_bp = NULL;
599
600 for (i = 0; i < nmaps; i++)
601 cmap.bm_len += map[i].bm_len;
602
603
604 ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
605 ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
606
607
608
609
610
611 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
612 if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
613 xfs_alert(btp->bt_mount,
614 "%s: daddr 0x%llx out of range, EOFS 0x%llx",
615 __func__, cmap.bm_bn, eofs);
616 WARN_ON(1);
617 return -EFSCORRUPTED;
618 }
619
620 pag = xfs_perag_get(btp->bt_mount,
621 xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
622
623 spin_lock(&pag->pag_buf_lock);
624 bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
625 xfs_buf_hash_params);
626 if (bp) {
627 atomic_inc(&bp->b_hold);
628 goto found;
629 }
630
631
632 if (!new_bp) {
633 XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
634 spin_unlock(&pag->pag_buf_lock);
635 xfs_perag_put(pag);
636 return -ENOENT;
637 }
638
639
640 new_bp->b_pag = pag;
641 rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head,
642 xfs_buf_hash_params);
643 spin_unlock(&pag->pag_buf_lock);
644 *found_bp = new_bp;
645 return 0;
646
647found:
648 spin_unlock(&pag->pag_buf_lock);
649 xfs_perag_put(pag);
650
651 if (!xfs_buf_trylock(bp)) {
652 if (flags & XBF_TRYLOCK) {
653 xfs_buf_rele(bp);
654 XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
655 return -EAGAIN;
656 }
657 xfs_buf_lock(bp);
658 XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
659 }
660
661
662
663
664
665
666 if (bp->b_flags & XBF_STALE) {
667 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
668 bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
669 bp->b_ops = NULL;
670 }
671
672 trace_xfs_buf_find(bp, flags, _RET_IP_);
673 XFS_STATS_INC(btp->bt_mount, xb_get_locked);
674 *found_bp = bp;
675 return 0;
676}
677
678struct xfs_buf *
679xfs_buf_incore(
680 struct xfs_buftarg *target,
681 xfs_daddr_t blkno,
682 size_t numblks,
683 xfs_buf_flags_t flags)
684{
685 struct xfs_buf *bp;
686 int error;
687 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
688
689 error = xfs_buf_find(target, &map, 1, flags, NULL, &bp);
690 if (error)
691 return NULL;
692 return bp;
693}
694
695
696
697
698
699
700int
701xfs_buf_get_map(
702 struct xfs_buftarg *target,
703 struct xfs_buf_map *map,
704 int nmaps,
705 xfs_buf_flags_t flags,
706 struct xfs_buf **bpp)
707{
708 struct xfs_buf *bp;
709 struct xfs_buf *new_bp;
710 int error = 0;
711
712 *bpp = NULL;
713 error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
714 if (!error)
715 goto found;
716 if (error != -ENOENT)
717 return error;
718
719 error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp);
720 if (error)
721 return error;
722
723 error = xfs_buf_allocate_memory(new_bp, flags);
724 if (error) {
725 xfs_buf_free(new_bp);
726 return error;
727 }
728
729 error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
730 if (error) {
731 xfs_buf_free(new_bp);
732 return error;
733 }
734
735 if (bp != new_bp)
736 xfs_buf_free(new_bp);
737
738found:
739 if (!bp->b_addr) {
740 error = _xfs_buf_map_pages(bp, flags);
741 if (unlikely(error)) {
742 xfs_warn_ratelimited(target->bt_mount,
743 "%s: failed to map %u pages", __func__,
744 bp->b_page_count);
745 xfs_buf_relse(bp);
746 return error;
747 }
748 }
749
750
751
752
753
754 if (!(flags & XBF_READ))
755 xfs_buf_ioerror(bp, 0);
756
757 XFS_STATS_INC(target->bt_mount, xb_get);
758 trace_xfs_buf_get(bp, flags, _RET_IP_);
759 *bpp = bp;
760 return 0;
761}
762
763int
764_xfs_buf_read(
765 struct xfs_buf *bp,
766 xfs_buf_flags_t flags)
767{
768 ASSERT(!(flags & XBF_WRITE));
769 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
770
771 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE);
772 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
773
774 return xfs_buf_submit(bp);
775}
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794int
795xfs_buf_reverify(
796 struct xfs_buf *bp,
797 const struct xfs_buf_ops *ops)
798{
799 ASSERT(bp->b_flags & XBF_DONE);
800 ASSERT(bp->b_error == 0);
801
802 if (!ops || bp->b_ops)
803 return 0;
804
805 bp->b_ops = ops;
806 bp->b_ops->verify_read(bp);
807 if (bp->b_error)
808 bp->b_flags &= ~XBF_DONE;
809 return bp->b_error;
810}
811
812int
813xfs_buf_read_map(
814 struct xfs_buftarg *target,
815 struct xfs_buf_map *map,
816 int nmaps,
817 xfs_buf_flags_t flags,
818 struct xfs_buf **bpp,
819 const struct xfs_buf_ops *ops,
820 xfs_failaddr_t fa)
821{
822 struct xfs_buf *bp;
823 int error;
824
825 flags |= XBF_READ;
826 *bpp = NULL;
827
828 error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
829 if (error)
830 return error;
831
832 trace_xfs_buf_read(bp, flags, _RET_IP_);
833
834 if (!(bp->b_flags & XBF_DONE)) {
835
836 XFS_STATS_INC(target->bt_mount, xb_get_read);
837 bp->b_ops = ops;
838 error = _xfs_buf_read(bp, flags);
839
840
841 if (flags & XBF_ASYNC)
842 return 0;
843 } else {
844
845 error = xfs_buf_reverify(bp, ops);
846
847
848 if (flags & XBF_ASYNC) {
849 xfs_buf_relse(bp);
850 return 0;
851 }
852
853
854 bp->b_flags &= ~XBF_READ;
855 ASSERT(bp->b_ops != NULL || ops == NULL);
856 }
857
858
859
860
861
862
863
864
865
866
867 if (error) {
868 if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
869 xfs_buf_ioerror_alert(bp, fa);
870
871 bp->b_flags &= ~XBF_DONE;
872 xfs_buf_stale(bp);
873 xfs_buf_relse(bp);
874
875
876 if (error == -EFSBADCRC)
877 error = -EFSCORRUPTED;
878 return error;
879 }
880
881 *bpp = bp;
882 return 0;
883}
884
885
886
887
888
889void
890xfs_buf_readahead_map(
891 struct xfs_buftarg *target,
892 struct xfs_buf_map *map,
893 int nmaps,
894 const struct xfs_buf_ops *ops)
895{
896 struct xfs_buf *bp;
897
898 if (bdi_read_congested(target->bt_bdev->bd_bdi))
899 return;
900
901 xfs_buf_read_map(target, map, nmaps,
902 XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
903 __this_address);
904}
905
906
907
908
909
910int
911xfs_buf_read_uncached(
912 struct xfs_buftarg *target,
913 xfs_daddr_t daddr,
914 size_t numblks,
915 int flags,
916 struct xfs_buf **bpp,
917 const struct xfs_buf_ops *ops)
918{
919 struct xfs_buf *bp;
920 int error;
921
922 *bpp = NULL;
923
924 error = xfs_buf_get_uncached(target, numblks, flags, &bp);
925 if (error)
926 return error;
927
928
929 ASSERT(bp->b_map_count == 1);
930 bp->b_bn = XFS_BUF_DADDR_NULL;
931 bp->b_maps[0].bm_bn = daddr;
932 bp->b_flags |= XBF_READ;
933 bp->b_ops = ops;
934
935 xfs_buf_submit(bp);
936 if (bp->b_error) {
937 error = bp->b_error;
938 xfs_buf_relse(bp);
939 return error;
940 }
941
942 *bpp = bp;
943 return 0;
944}
945
946int
947xfs_buf_get_uncached(
948 struct xfs_buftarg *target,
949 size_t numblks,
950 int flags,
951 struct xfs_buf **bpp)
952{
953 unsigned long page_count;
954 int error, i;
955 struct xfs_buf *bp;
956 DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
957
958 *bpp = NULL;
959
960
961 error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
962 if (error)
963 goto fail;
964
965 page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
966 error = _xfs_buf_get_pages(bp, page_count);
967 if (error)
968 goto fail_free_buf;
969
970 for (i = 0; i < page_count; i++) {
971 bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
972 if (!bp->b_pages[i]) {
973 error = -ENOMEM;
974 goto fail_free_mem;
975 }
976 }
977 bp->b_flags |= _XBF_PAGES;
978
979 error = _xfs_buf_map_pages(bp, 0);
980 if (unlikely(error)) {
981 xfs_warn(target->bt_mount,
982 "%s: failed to map pages", __func__);
983 goto fail_free_mem;
984 }
985
986 trace_xfs_buf_get_uncached(bp, _RET_IP_);
987 *bpp = bp;
988 return 0;
989
990 fail_free_mem:
991 while (--i >= 0)
992 __free_page(bp->b_pages[i]);
993 _xfs_buf_free_pages(bp);
994 fail_free_buf:
995 xfs_buf_free_maps(bp);
996 kmem_cache_free(xfs_buf_zone, bp);
997 fail:
998 return error;
999}
1000
1001
1002
1003
1004
1005
1006void
1007xfs_buf_hold(
1008 struct xfs_buf *bp)
1009{
1010 trace_xfs_buf_hold(bp, _RET_IP_);
1011 atomic_inc(&bp->b_hold);
1012}
1013
1014
1015
1016
1017
1018void
1019xfs_buf_rele(
1020 struct xfs_buf *bp)
1021{
1022 struct xfs_perag *pag = bp->b_pag;
1023 bool release;
1024 bool freebuf = false;
1025
1026 trace_xfs_buf_rele(bp, _RET_IP_);
1027
1028 if (!pag) {
1029 ASSERT(list_empty(&bp->b_lru));
1030 if (atomic_dec_and_test(&bp->b_hold)) {
1031 xfs_buf_ioacct_dec(bp);
1032 xfs_buf_free(bp);
1033 }
1034 return;
1035 }
1036
1037 ASSERT(atomic_read(&bp->b_hold) > 0);
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049 spin_lock(&bp->b_lock);
1050 release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
1051 if (!release) {
1052
1053
1054
1055
1056
1057
1058 if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
1059 __xfs_buf_ioacct_dec(bp);
1060 goto out_unlock;
1061 }
1062
1063
1064 __xfs_buf_ioacct_dec(bp);
1065 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
1066
1067
1068
1069
1070
1071 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
1072 bp->b_state &= ~XFS_BSTATE_DISPOSE;
1073 atomic_inc(&bp->b_hold);
1074 }
1075 spin_unlock(&pag->pag_buf_lock);
1076 } else {
1077
1078
1079
1080
1081
1082
1083 if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
1084 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
1085 } else {
1086 ASSERT(list_empty(&bp->b_lru));
1087 }
1088
1089 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1090 rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
1091 xfs_buf_hash_params);
1092 spin_unlock(&pag->pag_buf_lock);
1093 xfs_perag_put(pag);
1094 freebuf = true;
1095 }
1096
1097out_unlock:
1098 spin_unlock(&bp->b_lock);
1099
1100 if (freebuf)
1101 xfs_buf_free(bp);
1102}
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116int
1117xfs_buf_trylock(
1118 struct xfs_buf *bp)
1119{
1120 int locked;
1121
1122 locked = down_trylock(&bp->b_sema) == 0;
1123 if (locked)
1124 trace_xfs_buf_trylock(bp, _RET_IP_);
1125 else
1126 trace_xfs_buf_trylock_fail(bp, _RET_IP_);
1127 return locked;
1128}
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139void
1140xfs_buf_lock(
1141 struct xfs_buf *bp)
1142{
1143 trace_xfs_buf_lock(bp, _RET_IP_);
1144
1145 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
1146 xfs_log_force(bp->b_mount, 0);
1147 down(&bp->b_sema);
1148
1149 trace_xfs_buf_lock_done(bp, _RET_IP_);
1150}
1151
1152void
1153xfs_buf_unlock(
1154 struct xfs_buf *bp)
1155{
1156 ASSERT(xfs_buf_islocked(bp));
1157
1158 up(&bp->b_sema);
1159 trace_xfs_buf_unlock(bp, _RET_IP_);
1160}
1161
1162STATIC void
1163xfs_buf_wait_unpin(
1164 struct xfs_buf *bp)
1165{
1166 DECLARE_WAITQUEUE (wait, current);
1167
1168 if (atomic_read(&bp->b_pin_count) == 0)
1169 return;
1170
1171 add_wait_queue(&bp->b_waiters, &wait);
1172 for (;;) {
1173 set_current_state(TASK_UNINTERRUPTIBLE);
1174 if (atomic_read(&bp->b_pin_count) == 0)
1175 break;
1176 io_schedule();
1177 }
1178 remove_wait_queue(&bp->b_waiters, &wait);
1179 set_current_state(TASK_RUNNING);
1180}
1181
1182static void
1183xfs_buf_ioerror_alert_ratelimited(
1184 struct xfs_buf *bp)
1185{
1186 static unsigned long lasttime;
1187 static struct xfs_buftarg *lasttarg;
1188
1189 if (bp->b_target != lasttarg ||
1190 time_after(jiffies, (lasttime + 5*HZ))) {
1191 lasttime = jiffies;
1192 xfs_buf_ioerror_alert(bp, __this_address);
1193 }
1194 lasttarg = bp->b_target;
1195}
1196
1197
1198
1199
1200
1201static bool
1202xfs_buf_ioerror_permanent(
1203 struct xfs_buf *bp,
1204 struct xfs_error_cfg *cfg)
1205{
1206 struct xfs_mount *mp = bp->b_mount;
1207
1208 if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
1209 ++bp->b_retries > cfg->max_retries)
1210 return true;
1211 if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
1212 time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
1213 return true;
1214
1215
1216 if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
1217 return true;
1218
1219 return false;
1220}
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239static bool
1240xfs_buf_ioend_handle_error(
1241 struct xfs_buf *bp)
1242{
1243 struct xfs_mount *mp = bp->b_mount;
1244 struct xfs_error_cfg *cfg;
1245
1246
1247
1248
1249
1250 if (XFS_FORCED_SHUTDOWN(mp))
1251 goto out_stale;
1252
1253 xfs_buf_ioerror_alert_ratelimited(bp);
1254
1255
1256
1257
1258
1259 if (bp->b_flags & _XBF_LOGRECOVERY) {
1260 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1261 return false;
1262 }
1263
1264
1265
1266
1267 if (!(bp->b_flags & XBF_ASYNC))
1268 goto out_stale;
1269
1270 trace_xfs_buf_iodone_async(bp, _RET_IP_);
1271
1272 cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
1273 if (bp->b_last_error != bp->b_error ||
1274 !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) {
1275 bp->b_last_error = bp->b_error;
1276 if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
1277 !bp->b_first_retry_time)
1278 bp->b_first_retry_time = jiffies;
1279 goto resubmit;
1280 }
1281
1282
1283
1284
1285
1286 if (xfs_buf_ioerror_permanent(bp, cfg)) {
1287 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1288 goto out_stale;
1289 }
1290
1291
1292 if (bp->b_flags & _XBF_INODES)
1293 xfs_buf_inode_io_fail(bp);
1294 else if (bp->b_flags & _XBF_DQUOTS)
1295 xfs_buf_dquot_io_fail(bp);
1296 else
1297 ASSERT(list_empty(&bp->b_li_list));
1298 xfs_buf_ioerror(bp, 0);
1299 xfs_buf_relse(bp);
1300 return true;
1301
1302resubmit:
1303 xfs_buf_ioerror(bp, 0);
1304 bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL);
1305 xfs_buf_submit(bp);
1306 return true;
1307out_stale:
1308 xfs_buf_stale(bp);
1309 bp->b_flags |= XBF_DONE;
1310 bp->b_flags &= ~XBF_WRITE;
1311 trace_xfs_buf_error_relse(bp, _RET_IP_);
1312 return false;
1313}
1314
1315static void
1316xfs_buf_ioend(
1317 struct xfs_buf *bp)
1318{
1319 trace_xfs_buf_iodone(bp, _RET_IP_);
1320
1321
1322
1323
1324
1325 if (!bp->b_error && bp->b_io_error)
1326 xfs_buf_ioerror(bp, bp->b_io_error);
1327
1328 if (bp->b_flags & XBF_READ) {
1329 if (!bp->b_error && bp->b_ops)
1330 bp->b_ops->verify_read(bp);
1331 if (!bp->b_error)
1332 bp->b_flags |= XBF_DONE;
1333 } else {
1334 if (!bp->b_error) {
1335 bp->b_flags &= ~XBF_WRITE_FAIL;
1336 bp->b_flags |= XBF_DONE;
1337 }
1338
1339 if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp))
1340 return;
1341
1342
1343 bp->b_last_error = 0;
1344 bp->b_retries = 0;
1345 bp->b_first_retry_time = 0;
1346
1347
1348
1349
1350
1351
1352 if (bp->b_log_item)
1353 xfs_buf_item_done(bp);
1354
1355 if (bp->b_flags & _XBF_INODES)
1356 xfs_buf_inode_iodone(bp);
1357 else if (bp->b_flags & _XBF_DQUOTS)
1358 xfs_buf_dquot_iodone(bp);
1359
1360 }
1361
1362 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
1363 _XBF_LOGRECOVERY);
1364
1365 if (bp->b_flags & XBF_ASYNC)
1366 xfs_buf_relse(bp);
1367 else
1368 complete(&bp->b_iowait);
1369}
1370
1371static void
1372xfs_buf_ioend_work(
1373 struct work_struct *work)
1374{
1375 struct xfs_buf *bp =
1376 container_of(work, struct xfs_buf, b_ioend_work);
1377
1378 xfs_buf_ioend(bp);
1379}
1380
1381static void
1382xfs_buf_ioend_async(
1383 struct xfs_buf *bp)
1384{
1385 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
1386 queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
1387}
1388
1389void
1390__xfs_buf_ioerror(
1391 struct xfs_buf *bp,
1392 int error,
1393 xfs_failaddr_t failaddr)
1394{
1395 ASSERT(error <= 0 && error >= -1000);
1396 bp->b_error = error;
1397 trace_xfs_buf_ioerror(bp, error, failaddr);
1398}
1399
1400void
1401xfs_buf_ioerror_alert(
1402 struct xfs_buf *bp,
1403 xfs_failaddr_t func)
1404{
1405 xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
1406 "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
1407 func, (uint64_t)XFS_BUF_ADDR(bp),
1408 bp->b_length, -bp->b_error);
1409}
1410
1411
1412
1413
1414
1415
1416
1417void
1418xfs_buf_ioend_fail(
1419 struct xfs_buf *bp)
1420{
1421 bp->b_flags &= ~XBF_DONE;
1422 xfs_buf_stale(bp);
1423 xfs_buf_ioerror(bp, -EIO);
1424 xfs_buf_ioend(bp);
1425}
1426
1427int
1428xfs_bwrite(
1429 struct xfs_buf *bp)
1430{
1431 int error;
1432
1433 ASSERT(xfs_buf_islocked(bp));
1434
1435 bp->b_flags |= XBF_WRITE;
1436 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
1437 XBF_DONE);
1438
1439 error = xfs_buf_submit(bp);
1440 if (error)
1441 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
1442 return error;
1443}
1444
1445static void
1446xfs_buf_bio_end_io(
1447 struct bio *bio)
1448{
1449 struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
1450
1451 if (!bio->bi_status &&
1452 (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
1453 XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
1454 bio->bi_status = BLK_STS_IOERR;
1455
1456
1457
1458
1459
1460 if (bio->bi_status) {
1461 int error = blk_status_to_errno(bio->bi_status);
1462
1463 cmpxchg(&bp->b_io_error, 0, error);
1464 }
1465
1466 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1467 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1468
1469 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1470 xfs_buf_ioend_async(bp);
1471 bio_put(bio);
1472}
1473
1474static void
1475xfs_buf_ioapply_map(
1476 struct xfs_buf *bp,
1477 int map,
1478 int *buf_offset,
1479 int *count,
1480 int op)
1481{
1482 int page_index;
1483 unsigned int total_nr_pages = bp->b_page_count;
1484 int nr_pages;
1485 struct bio *bio;
1486 sector_t sector = bp->b_maps[map].bm_bn;
1487 int size;
1488 int offset;
1489
1490
1491 page_index = 0;
1492 offset = *buf_offset;
1493 while (offset >= PAGE_SIZE) {
1494 page_index++;
1495 offset -= PAGE_SIZE;
1496 }
1497
1498
1499
1500
1501
1502 size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
1503 *count -= size;
1504 *buf_offset += size;
1505
1506next_chunk:
1507 atomic_inc(&bp->b_io_remaining);
1508 nr_pages = bio_max_segs(total_nr_pages);
1509
1510 bio = bio_alloc(GFP_NOIO, nr_pages);
1511 bio_set_dev(bio, bp->b_target->bt_bdev);
1512 bio->bi_iter.bi_sector = sector;
1513 bio->bi_end_io = xfs_buf_bio_end_io;
1514 bio->bi_private = bp;
1515 bio->bi_opf = op;
1516
1517 for (; size && nr_pages; nr_pages--, page_index++) {
1518 int rbytes, nbytes = PAGE_SIZE - offset;
1519
1520 if (nbytes > size)
1521 nbytes = size;
1522
1523 rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
1524 offset);
1525 if (rbytes < nbytes)
1526 break;
1527
1528 offset = 0;
1529 sector += BTOBB(nbytes);
1530 size -= nbytes;
1531 total_nr_pages--;
1532 }
1533
1534 if (likely(bio->bi_iter.bi_size)) {
1535 if (xfs_buf_is_vmapped(bp)) {
1536 flush_kernel_vmap_range(bp->b_addr,
1537 xfs_buf_vmap_len(bp));
1538 }
1539 submit_bio(bio);
1540 if (size)
1541 goto next_chunk;
1542 } else {
1543
1544
1545
1546
1547 atomic_dec(&bp->b_io_remaining);
1548 xfs_buf_ioerror(bp, -EIO);
1549 bio_put(bio);
1550 }
1551
1552}
1553
1554STATIC void
1555_xfs_buf_ioapply(
1556 struct xfs_buf *bp)
1557{
1558 struct blk_plug plug;
1559 int op;
1560 int offset;
1561 int size;
1562 int i;
1563
1564
1565
1566
1567
1568 bp->b_error = 0;
1569
1570 if (bp->b_flags & XBF_WRITE) {
1571 op = REQ_OP_WRITE;
1572
1573
1574
1575
1576
1577
1578 if (bp->b_ops) {
1579 bp->b_ops->verify_write(bp);
1580 if (bp->b_error) {
1581 xfs_force_shutdown(bp->b_mount,
1582 SHUTDOWN_CORRUPT_INCORE);
1583 return;
1584 }
1585 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
1586 struct xfs_mount *mp = bp->b_mount;
1587
1588
1589
1590
1591
1592 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1593 xfs_warn(mp,
1594 "%s: no buf ops on daddr 0x%llx len %d",
1595 __func__, bp->b_bn, bp->b_length);
1596 xfs_hex_dump(bp->b_addr,
1597 XFS_CORRUPTION_DUMP_LEN);
1598 dump_stack();
1599 }
1600 }
1601 } else {
1602 op = REQ_OP_READ;
1603 if (bp->b_flags & XBF_READ_AHEAD)
1604 op |= REQ_RAHEAD;
1605 }
1606
1607
1608 op |= REQ_META;
1609
1610
1611
1612
1613
1614
1615
1616 offset = bp->b_offset;
1617 size = BBTOB(bp->b_length);
1618 blk_start_plug(&plug);
1619 for (i = 0; i < bp->b_map_count; i++) {
1620 xfs_buf_ioapply_map(bp, i, &offset, &size, op);
1621 if (bp->b_error)
1622 break;
1623 if (size <= 0)
1624 break;
1625 }
1626 blk_finish_plug(&plug);
1627}
1628
1629
1630
1631
1632static int
1633xfs_buf_iowait(
1634 struct xfs_buf *bp)
1635{
1636 ASSERT(!(bp->b_flags & XBF_ASYNC));
1637
1638 trace_xfs_buf_iowait(bp, _RET_IP_);
1639 wait_for_completion(&bp->b_iowait);
1640 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1641
1642 return bp->b_error;
1643}
1644
1645
1646
1647
1648
1649
1650
1651static int
1652__xfs_buf_submit(
1653 struct xfs_buf *bp,
1654 bool wait)
1655{
1656 int error = 0;
1657
1658 trace_xfs_buf_submit(bp, _RET_IP_);
1659
1660 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1661
1662
1663 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1664 xfs_buf_ioend_fail(bp);
1665 return -EIO;
1666 }
1667
1668
1669
1670
1671
1672
1673 xfs_buf_hold(bp);
1674
1675 if (bp->b_flags & XBF_WRITE)
1676 xfs_buf_wait_unpin(bp);
1677
1678
1679 bp->b_io_error = 0;
1680
1681
1682
1683
1684
1685
1686 atomic_set(&bp->b_io_remaining, 1);
1687 if (bp->b_flags & XBF_ASYNC)
1688 xfs_buf_ioacct_inc(bp);
1689 _xfs_buf_ioapply(bp);
1690
1691
1692
1693
1694
1695
1696 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1697 if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
1698 xfs_buf_ioend(bp);
1699 else
1700 xfs_buf_ioend_async(bp);
1701 }
1702
1703 if (wait)
1704 error = xfs_buf_iowait(bp);
1705
1706
1707
1708
1709
1710
1711 xfs_buf_rele(bp);
1712 return error;
1713}
1714
1715void *
1716xfs_buf_offset(
1717 struct xfs_buf *bp,
1718 size_t offset)
1719{
1720 struct page *page;
1721
1722 if (bp->b_addr)
1723 return bp->b_addr + offset;
1724
1725 offset += bp->b_offset;
1726 page = bp->b_pages[offset >> PAGE_SHIFT];
1727 return page_address(page) + (offset & (PAGE_SIZE-1));
1728}
1729
1730void
1731xfs_buf_zero(
1732 struct xfs_buf *bp,
1733 size_t boff,
1734 size_t bsize)
1735{
1736 size_t bend;
1737
1738 bend = boff + bsize;
1739 while (boff < bend) {
1740 struct page *page;
1741 int page_index, page_offset, csize;
1742
1743 page_index = (boff + bp->b_offset) >> PAGE_SHIFT;
1744 page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
1745 page = bp->b_pages[page_index];
1746 csize = min_t(size_t, PAGE_SIZE - page_offset,
1747 BBTOB(bp->b_length) - boff);
1748
1749 ASSERT((csize + page_offset) <= PAGE_SIZE);
1750
1751 memset(page_address(page) + page_offset, 0, csize);
1752
1753 boff += csize;
1754 }
1755}
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768void
1769__xfs_buf_mark_corrupt(
1770 struct xfs_buf *bp,
1771 xfs_failaddr_t fa)
1772{
1773 ASSERT(bp->b_flags & XBF_DONE);
1774
1775 xfs_buf_corruption_error(bp, fa);
1776 xfs_buf_stale(bp);
1777}
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788static enum lru_status
1789xfs_buftarg_drain_rele(
1790 struct list_head *item,
1791 struct list_lru_one *lru,
1792 spinlock_t *lru_lock,
1793 void *arg)
1794
1795{
1796 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1797 struct list_head *dispose = arg;
1798
1799 if (atomic_read(&bp->b_hold) > 1) {
1800
1801 trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
1802 return LRU_SKIP;
1803 }
1804 if (!spin_trylock(&bp->b_lock))
1805 return LRU_SKIP;
1806
1807
1808
1809
1810
1811 atomic_set(&bp->b_lru_ref, 0);
1812 bp->b_state |= XFS_BSTATE_DISPOSE;
1813 list_lru_isolate_move(lru, item, dispose);
1814 spin_unlock(&bp->b_lock);
1815 return LRU_REMOVED;
1816}
1817
1818
1819
1820
1821void
1822xfs_buftarg_wait(
1823 struct xfs_buftarg *btp)
1824{
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837 while (percpu_counter_sum(&btp->bt_io_count))
1838 delay(100);
1839 flush_workqueue(btp->bt_mount->m_buf_workqueue);
1840}
1841
1842void
1843xfs_buftarg_drain(
1844 struct xfs_buftarg *btp)
1845{
1846 LIST_HEAD(dispose);
1847 int loop = 0;
1848 bool write_fail = false;
1849
1850 xfs_buftarg_wait(btp);
1851
1852
1853 while (list_lru_count(&btp->bt_lru)) {
1854 list_lru_walk(&btp->bt_lru, xfs_buftarg_drain_rele,
1855 &dispose, LONG_MAX);
1856
1857 while (!list_empty(&dispose)) {
1858 struct xfs_buf *bp;
1859 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1860 list_del_init(&bp->b_lru);
1861 if (bp->b_flags & XBF_WRITE_FAIL) {
1862 write_fail = true;
1863 xfs_buf_alert_ratelimited(bp,
1864 "XFS: Corruption Alert",
1865"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
1866 (long long)bp->b_bn);
1867 }
1868 xfs_buf_rele(bp);
1869 }
1870 if (loop++ != 0)
1871 delay(100);
1872 }
1873
1874
1875
1876
1877
1878
1879
1880 if (write_fail) {
1881 ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount));
1882 xfs_alert(btp->bt_mount,
1883 "Please run xfs_repair to determine the extent of the problem.");
1884 }
1885}
1886
1887static enum lru_status
1888xfs_buftarg_isolate(
1889 struct list_head *item,
1890 struct list_lru_one *lru,
1891 spinlock_t *lru_lock,
1892 void *arg)
1893{
1894 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1895 struct list_head *dispose = arg;
1896
1897
1898
1899
1900
1901 if (!spin_trylock(&bp->b_lock))
1902 return LRU_SKIP;
1903
1904
1905
1906
1907
1908 if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1909 spin_unlock(&bp->b_lock);
1910 return LRU_ROTATE;
1911 }
1912
1913 bp->b_state |= XFS_BSTATE_DISPOSE;
1914 list_lru_isolate_move(lru, item, dispose);
1915 spin_unlock(&bp->b_lock);
1916 return LRU_REMOVED;
1917}
1918
1919static unsigned long
1920xfs_buftarg_shrink_scan(
1921 struct shrinker *shrink,
1922 struct shrink_control *sc)
1923{
1924 struct xfs_buftarg *btp = container_of(shrink,
1925 struct xfs_buftarg, bt_shrinker);
1926 LIST_HEAD(dispose);
1927 unsigned long freed;
1928
1929 freed = list_lru_shrink_walk(&btp->bt_lru, sc,
1930 xfs_buftarg_isolate, &dispose);
1931
1932 while (!list_empty(&dispose)) {
1933 struct xfs_buf *bp;
1934 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1935 list_del_init(&bp->b_lru);
1936 xfs_buf_rele(bp);
1937 }
1938
1939 return freed;
1940}
1941
1942static unsigned long
1943xfs_buftarg_shrink_count(
1944 struct shrinker *shrink,
1945 struct shrink_control *sc)
1946{
1947 struct xfs_buftarg *btp = container_of(shrink,
1948 struct xfs_buftarg, bt_shrinker);
1949 return list_lru_shrink_count(&btp->bt_lru, sc);
1950}
1951
1952void
1953xfs_free_buftarg(
1954 struct xfs_buftarg *btp)
1955{
1956 unregister_shrinker(&btp->bt_shrinker);
1957 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
1958 percpu_counter_destroy(&btp->bt_io_count);
1959 list_lru_destroy(&btp->bt_lru);
1960
1961 xfs_blkdev_issue_flush(btp);
1962
1963 kmem_free(btp);
1964}
1965
1966int
1967xfs_setsize_buftarg(
1968 xfs_buftarg_t *btp,
1969 unsigned int sectorsize)
1970{
1971
1972 btp->bt_meta_sectorsize = sectorsize;
1973 btp->bt_meta_sectormask = sectorsize - 1;
1974
1975 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1976 xfs_warn(btp->bt_mount,
1977 "Cannot set_blocksize to %u on device %pg",
1978 sectorsize, btp->bt_bdev);
1979 return -EINVAL;
1980 }
1981
1982
1983 btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
1984 btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
1985
1986 return 0;
1987}
1988
1989
1990
1991
1992
1993
1994STATIC int
1995xfs_setsize_buftarg_early(
1996 xfs_buftarg_t *btp,
1997 struct block_device *bdev)
1998{
1999 return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
2000}
2001
2002xfs_buftarg_t *
2003xfs_alloc_buftarg(
2004 struct xfs_mount *mp,
2005 struct block_device *bdev,
2006 struct dax_device *dax_dev)
2007{
2008 xfs_buftarg_t *btp;
2009
2010 btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
2011
2012 btp->bt_mount = mp;
2013 btp->bt_dev = bdev->bd_dev;
2014 btp->bt_bdev = bdev;
2015 btp->bt_daxdev = dax_dev;
2016
2017
2018
2019
2020
2021 ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
2022 DEFAULT_RATELIMIT_BURST);
2023
2024 if (xfs_setsize_buftarg_early(btp, bdev))
2025 goto error_free;
2026
2027 if (list_lru_init(&btp->bt_lru))
2028 goto error_free;
2029
2030 if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
2031 goto error_lru;
2032
2033 btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
2034 btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
2035 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
2036 btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
2037 if (register_shrinker(&btp->bt_shrinker))
2038 goto error_pcpu;
2039 return btp;
2040
2041error_pcpu:
2042 percpu_counter_destroy(&btp->bt_io_count);
2043error_lru:
2044 list_lru_destroy(&btp->bt_lru);
2045error_free:
2046 kmem_free(btp);
2047 return NULL;
2048}
2049
2050
2051
2052
2053
2054
2055
2056void
2057xfs_buf_delwri_cancel(
2058 struct list_head *list)
2059{
2060 struct xfs_buf *bp;
2061
2062 while (!list_empty(list)) {
2063 bp = list_first_entry(list, struct xfs_buf, b_list);
2064
2065 xfs_buf_lock(bp);
2066 bp->b_flags &= ~_XBF_DELWRI_Q;
2067 list_del_init(&bp->b_list);
2068 xfs_buf_relse(bp);
2069 }
2070}
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083bool
2084xfs_buf_delwri_queue(
2085 struct xfs_buf *bp,
2086 struct list_head *list)
2087{
2088 ASSERT(xfs_buf_islocked(bp));
2089 ASSERT(!(bp->b_flags & XBF_READ));
2090
2091
2092
2093
2094
2095
2096 if (bp->b_flags & _XBF_DELWRI_Q) {
2097 trace_xfs_buf_delwri_queued(bp, _RET_IP_);
2098 return false;
2099 }
2100
2101 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111 bp->b_flags |= _XBF_DELWRI_Q;
2112 if (list_empty(&bp->b_list)) {
2113 atomic_inc(&bp->b_hold);
2114 list_add_tail(&bp->b_list, list);
2115 }
2116
2117 return true;
2118}
2119
2120
2121
2122
2123
2124
2125static int
2126xfs_buf_cmp(
2127 void *priv,
2128 struct list_head *a,
2129 struct list_head *b)
2130{
2131 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
2132 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
2133 xfs_daddr_t diff;
2134
2135 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
2136 if (diff < 0)
2137 return -1;
2138 if (diff > 0)
2139 return 1;
2140 return 0;
2141}
2142
2143
2144
2145
2146
2147
2148
2149
2150static int
2151xfs_buf_delwri_submit_buffers(
2152 struct list_head *buffer_list,
2153 struct list_head *wait_list)
2154{
2155 struct xfs_buf *bp, *n;
2156 int pinned = 0;
2157 struct blk_plug plug;
2158
2159 list_sort(NULL, buffer_list, xfs_buf_cmp);
2160
2161 blk_start_plug(&plug);
2162 list_for_each_entry_safe(bp, n, buffer_list, b_list) {
2163 if (!wait_list) {
2164 if (xfs_buf_ispinned(bp)) {
2165 pinned++;
2166 continue;
2167 }
2168 if (!xfs_buf_trylock(bp))
2169 continue;
2170 } else {
2171 xfs_buf_lock(bp);
2172 }
2173
2174
2175
2176
2177
2178
2179
2180 if (!(bp->b_flags & _XBF_DELWRI_Q)) {
2181 list_del_init(&bp->b_list);
2182 xfs_buf_relse(bp);
2183 continue;
2184 }
2185
2186 trace_xfs_buf_delwri_split(bp, _RET_IP_);
2187
2188
2189
2190
2191
2192
2193
2194 bp->b_flags &= ~_XBF_DELWRI_Q;
2195 bp->b_flags |= XBF_WRITE;
2196 if (wait_list) {
2197 bp->b_flags &= ~XBF_ASYNC;
2198 list_move_tail(&bp->b_list, wait_list);
2199 } else {
2200 bp->b_flags |= XBF_ASYNC;
2201 list_del_init(&bp->b_list);
2202 }
2203 __xfs_buf_submit(bp, false);
2204 }
2205 blk_finish_plug(&plug);
2206
2207 return pinned;
2208}
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226int
2227xfs_buf_delwri_submit_nowait(
2228 struct list_head *buffer_list)
2229{
2230 return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
2231}
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241int
2242xfs_buf_delwri_submit(
2243 struct list_head *buffer_list)
2244{
2245 LIST_HEAD (wait_list);
2246 int error = 0, error2;
2247 struct xfs_buf *bp;
2248
2249 xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
2250
2251
2252 while (!list_empty(&wait_list)) {
2253 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2254
2255 list_del_init(&bp->b_list);
2256
2257
2258
2259
2260
2261 error2 = xfs_buf_iowait(bp);
2262 xfs_buf_relse(bp);
2263 if (!error)
2264 error = error2;
2265 }
2266
2267 return error;
2268}
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285int
2286xfs_buf_delwri_pushbuf(
2287 struct xfs_buf *bp,
2288 struct list_head *buffer_list)
2289{
2290 LIST_HEAD (submit_list);
2291 int error;
2292
2293 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
2294
2295 trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
2296
2297
2298
2299
2300
2301 xfs_buf_lock(bp);
2302 list_move(&bp->b_list, &submit_list);
2303 xfs_buf_unlock(bp);
2304
2305
2306
2307
2308
2309
2310
2311 xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
2312
2313
2314
2315
2316
2317
2318 error = xfs_buf_iowait(bp);
2319 bp->b_flags |= _XBF_DELWRI_Q;
2320 xfs_buf_unlock(bp);
2321
2322 return error;
2323}
2324
2325int __init
2326xfs_buf_init(void)
2327{
2328 xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
2329 SLAB_HWCACHE_ALIGN |
2330 SLAB_RECLAIM_ACCOUNT |
2331 SLAB_MEM_SPREAD,
2332 NULL);
2333 if (!xfs_buf_zone)
2334 goto out;
2335
2336 return 0;
2337
2338 out:
2339 return -ENOMEM;
2340}
2341
2342void
2343xfs_buf_terminate(void)
2344{
2345 kmem_cache_destroy(xfs_buf_zone);
2346}
2347
2348void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
2349{
2350
2351
2352
2353
2354
2355 if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
2356 lru_ref = 0;
2357
2358 atomic_set(&bp->b_lru_ref, lru_ref);
2359}
2360
2361
2362
2363
2364
2365
2366bool
2367xfs_verify_magic(
2368 struct xfs_buf *bp,
2369 __be32 dmagic)
2370{
2371 struct xfs_mount *mp = bp->b_mount;
2372 int idx;
2373
2374 idx = xfs_sb_version_hascrc(&mp->m_sb);
2375 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
2376 return false;
2377 return dmagic == bp->b_ops->magic[idx];
2378}
2379
2380
2381
2382
2383
2384bool
2385xfs_verify_magic16(
2386 struct xfs_buf *bp,
2387 __be16 dmagic)
2388{
2389 struct xfs_mount *mp = bp->b_mount;
2390 int idx;
2391
2392 idx = xfs_sb_version_hascrc(&mp->m_sb);
2393 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
2394 return false;
2395 return dmagic == bp->b_ops->magic16[idx];
2396}
2397