1
2
3
4
5
6#include "xfs.h"
7#include <linux/backing-dev.h>
8
9#include "xfs_shared.h"
10#include "xfs_format.h"
11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h"
13#include "xfs_mount.h"
14#include "xfs_trace.h"
15#include "xfs_log.h"
16#include "xfs_log_recover.h"
17#include "xfs_trans.h"
18#include "xfs_buf_item.h"
19#include "xfs_errortag.h"
20#include "xfs_error.h"
21#include "xfs_ag.h"
22
23static kmem_zone_t *xfs_buf_zone;
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52static int __xfs_buf_submit(struct xfs_buf *bp, bool wait);
53
54static inline int
55xfs_buf_submit(
56 struct xfs_buf *bp)
57{
58 return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC));
59}
60
61static inline int
62xfs_buf_is_vmapped(
63 struct xfs_buf *bp)
64{
65
66
67
68
69
70
71
72 return bp->b_addr && bp->b_page_count > 1;
73}
74
75static inline int
76xfs_buf_vmap_len(
77 struct xfs_buf *bp)
78{
79 return (bp->b_page_count * PAGE_SIZE);
80}
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95static inline void
96xfs_buf_ioacct_inc(
97 struct xfs_buf *bp)
98{
99 if (bp->b_flags & XBF_NO_IOACCT)
100 return;
101
102 ASSERT(bp->b_flags & XBF_ASYNC);
103 spin_lock(&bp->b_lock);
104 if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
105 bp->b_state |= XFS_BSTATE_IN_FLIGHT;
106 percpu_counter_inc(&bp->b_target->bt_io_count);
107 }
108 spin_unlock(&bp->b_lock);
109}
110
111
112
113
114
115static inline void
116__xfs_buf_ioacct_dec(
117 struct xfs_buf *bp)
118{
119 lockdep_assert_held(&bp->b_lock);
120
121 if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
122 bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
123 percpu_counter_dec(&bp->b_target->bt_io_count);
124 }
125}
126
127static inline void
128xfs_buf_ioacct_dec(
129 struct xfs_buf *bp)
130{
131 spin_lock(&bp->b_lock);
132 __xfs_buf_ioacct_dec(bp);
133 spin_unlock(&bp->b_lock);
134}
135
136
137
138
139
140
141
142
143
144void
145xfs_buf_stale(
146 struct xfs_buf *bp)
147{
148 ASSERT(xfs_buf_islocked(bp));
149
150 bp->b_flags |= XBF_STALE;
151
152
153
154
155
156
157 bp->b_flags &= ~_XBF_DELWRI_Q;
158
159
160
161
162
163
164
165 spin_lock(&bp->b_lock);
166 __xfs_buf_ioacct_dec(bp);
167
168 atomic_set(&bp->b_lru_ref, 0);
169 if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
170 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
171 atomic_dec(&bp->b_hold);
172
173 ASSERT(atomic_read(&bp->b_hold) >= 1);
174 spin_unlock(&bp->b_lock);
175}
176
177static int
178xfs_buf_get_maps(
179 struct xfs_buf *bp,
180 int map_count)
181{
182 ASSERT(bp->b_maps == NULL);
183 bp->b_map_count = map_count;
184
185 if (map_count == 1) {
186 bp->b_maps = &bp->__b_map;
187 return 0;
188 }
189
190 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
191 KM_NOFS);
192 if (!bp->b_maps)
193 return -ENOMEM;
194 return 0;
195}
196
197
198
199
200static void
201xfs_buf_free_maps(
202 struct xfs_buf *bp)
203{
204 if (bp->b_maps != &bp->__b_map) {
205 kmem_free(bp->b_maps);
206 bp->b_maps = NULL;
207 }
208}
209
210static int
211_xfs_buf_alloc(
212 struct xfs_buftarg *target,
213 struct xfs_buf_map *map,
214 int nmaps,
215 xfs_buf_flags_t flags,
216 struct xfs_buf **bpp)
217{
218 struct xfs_buf *bp;
219 int error;
220 int i;
221
222 *bpp = NULL;
223 bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL);
224
225
226
227
228
229 flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
230
231 atomic_set(&bp->b_hold, 1);
232 atomic_set(&bp->b_lru_ref, 1);
233 init_completion(&bp->b_iowait);
234 INIT_LIST_HEAD(&bp->b_lru);
235 INIT_LIST_HEAD(&bp->b_list);
236 INIT_LIST_HEAD(&bp->b_li_list);
237 sema_init(&bp->b_sema, 0);
238 spin_lock_init(&bp->b_lock);
239 bp->b_target = target;
240 bp->b_mount = target->bt_mount;
241 bp->b_flags = flags;
242
243
244
245
246
247
248 error = xfs_buf_get_maps(bp, nmaps);
249 if (error) {
250 kmem_cache_free(xfs_buf_zone, bp);
251 return error;
252 }
253
254 bp->b_bn = map[0].bm_bn;
255 bp->b_length = 0;
256 for (i = 0; i < nmaps; i++) {
257 bp->b_maps[i].bm_bn = map[i].bm_bn;
258 bp->b_maps[i].bm_len = map[i].bm_len;
259 bp->b_length += map[i].bm_len;
260 }
261
262 atomic_set(&bp->b_pin_count, 0);
263 init_waitqueue_head(&bp->b_waiters);
264
265 XFS_STATS_INC(bp->b_mount, xb_create);
266 trace_xfs_buf_init(bp, _RET_IP_);
267
268 *bpp = bp;
269 return 0;
270}
271
272static void
273xfs_buf_free_pages(
274 struct xfs_buf *bp)
275{
276 uint i;
277
278 ASSERT(bp->b_flags & _XBF_PAGES);
279
280 if (xfs_buf_is_vmapped(bp))
281 vm_unmap_ram(bp->b_addr, bp->b_page_count);
282
283 for (i = 0; i < bp->b_page_count; i++) {
284 if (bp->b_pages[i])
285 __free_page(bp->b_pages[i]);
286 }
287 if (current->reclaim_state)
288 current->reclaim_state->reclaimed_slab += bp->b_page_count;
289
290 if (bp->b_pages != bp->b_page_array)
291 kmem_free(bp->b_pages);
292 bp->b_pages = NULL;
293 bp->b_flags &= ~_XBF_PAGES;
294}
295
296static void
297xfs_buf_free(
298 struct xfs_buf *bp)
299{
300 trace_xfs_buf_free(bp, _RET_IP_);
301
302 ASSERT(list_empty(&bp->b_lru));
303
304 if (bp->b_flags & _XBF_PAGES)
305 xfs_buf_free_pages(bp);
306 else if (bp->b_flags & _XBF_KMEM)
307 kmem_free(bp->b_addr);
308
309 xfs_buf_free_maps(bp);
310 kmem_cache_free(xfs_buf_zone, bp);
311}
312
313static int
314xfs_buf_alloc_kmem(
315 struct xfs_buf *bp,
316 xfs_buf_flags_t flags)
317{
318 int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
319 xfs_km_flags_t kmflag_mask = KM_NOFS;
320 size_t size = BBTOB(bp->b_length);
321
322
323 if (!(flags & XBF_READ))
324 kmflag_mask |= KM_ZERO;
325
326 bp->b_addr = kmem_alloc_io(size, align_mask, kmflag_mask);
327 if (!bp->b_addr)
328 return -ENOMEM;
329
330 if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
331 ((unsigned long)bp->b_addr & PAGE_MASK)) {
332
333 kmem_free(bp->b_addr);
334 bp->b_addr = NULL;
335 return -ENOMEM;
336 }
337 bp->b_offset = offset_in_page(bp->b_addr);
338 bp->b_pages = bp->b_page_array;
339 bp->b_pages[0] = kmem_to_page(bp->b_addr);
340 bp->b_page_count = 1;
341 bp->b_flags |= _XBF_KMEM;
342 return 0;
343}
344
345static int
346xfs_buf_alloc_pages(
347 struct xfs_buf *bp,
348 xfs_buf_flags_t flags)
349{
350 gfp_t gfp_mask = __GFP_NOWARN;
351 long filled = 0;
352
353 if (flags & XBF_READ_AHEAD)
354 gfp_mask |= __GFP_NORETRY;
355 else
356 gfp_mask |= GFP_NOFS;
357
358
359 bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
360 if (bp->b_page_count <= XB_PAGES) {
361 bp->b_pages = bp->b_page_array;
362 } else {
363 bp->b_pages = kzalloc(sizeof(struct page *) * bp->b_page_count,
364 gfp_mask);
365 if (!bp->b_pages)
366 return -ENOMEM;
367 }
368 bp->b_flags |= _XBF_PAGES;
369
370
371 if (!(flags & XBF_READ))
372 gfp_mask |= __GFP_ZERO;
373
374
375
376
377
378
379 for (;;) {
380 long last = filled;
381
382 filled = alloc_pages_bulk_array(gfp_mask, bp->b_page_count,
383 bp->b_pages);
384 if (filled == bp->b_page_count) {
385 XFS_STATS_INC(bp->b_mount, xb_page_found);
386 break;
387 }
388
389 if (filled != last)
390 continue;
391
392 if (flags & XBF_READ_AHEAD) {
393 xfs_buf_free_pages(bp);
394 return -ENOMEM;
395 }
396
397 XFS_STATS_INC(bp->b_mount, xb_page_retries);
398 congestion_wait(BLK_RW_ASYNC, HZ / 50);
399 }
400 return 0;
401}
402
403
404
405
406STATIC int
407_xfs_buf_map_pages(
408 struct xfs_buf *bp,
409 uint flags)
410{
411 ASSERT(bp->b_flags & _XBF_PAGES);
412 if (bp->b_page_count == 1) {
413
414 bp->b_addr = page_address(bp->b_pages[0]);
415 } else if (flags & XBF_UNMAPPED) {
416 bp->b_addr = NULL;
417 } else {
418 int retried = 0;
419 unsigned nofs_flag;
420
421
422
423
424
425
426
427
428
429 nofs_flag = memalloc_nofs_save();
430 do {
431 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
432 -1);
433 if (bp->b_addr)
434 break;
435 vm_unmap_aliases();
436 } while (retried++ <= 1);
437 memalloc_nofs_restore(nofs_flag);
438
439 if (!bp->b_addr)
440 return -ENOMEM;
441 }
442
443 return 0;
444}
445
446
447
448
449static int
450_xfs_buf_obj_cmp(
451 struct rhashtable_compare_arg *arg,
452 const void *obj)
453{
454 const struct xfs_buf_map *map = arg->key;
455 const struct xfs_buf *bp = obj;
456
457
458
459
460
461 BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
462
463 if (bp->b_bn != map->bm_bn)
464 return 1;
465
466 if (unlikely(bp->b_length != map->bm_len)) {
467
468
469
470
471
472
473
474
475 ASSERT(bp->b_flags & XBF_STALE);
476 return 1;
477 }
478 return 0;
479}
480
481static const struct rhashtable_params xfs_buf_hash_params = {
482 .min_size = 32,
483 .nelem_hint = 16,
484 .key_len = sizeof(xfs_daddr_t),
485 .key_offset = offsetof(struct xfs_buf, b_bn),
486 .head_offset = offsetof(struct xfs_buf, b_rhash_head),
487 .automatic_shrinking = true,
488 .obj_cmpfn = _xfs_buf_obj_cmp,
489};
490
491int
492xfs_buf_hash_init(
493 struct xfs_perag *pag)
494{
495 spin_lock_init(&pag->pag_buf_lock);
496 return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
497}
498
499void
500xfs_buf_hash_destroy(
501 struct xfs_perag *pag)
502{
503 rhashtable_destroy(&pag->pag_buf_hash);
504}
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524static int
525xfs_buf_find(
526 struct xfs_buftarg *btp,
527 struct xfs_buf_map *map,
528 int nmaps,
529 xfs_buf_flags_t flags,
530 struct xfs_buf *new_bp,
531 struct xfs_buf **found_bp)
532{
533 struct xfs_perag *pag;
534 struct xfs_buf *bp;
535 struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
536 xfs_daddr_t eofs;
537 int i;
538
539 *found_bp = NULL;
540
541 for (i = 0; i < nmaps; i++)
542 cmap.bm_len += map[i].bm_len;
543
544
545 ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
546 ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
547
548
549
550
551
552 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
553 if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
554 xfs_alert(btp->bt_mount,
555 "%s: daddr 0x%llx out of range, EOFS 0x%llx",
556 __func__, cmap.bm_bn, eofs);
557 WARN_ON(1);
558 return -EFSCORRUPTED;
559 }
560
561 pag = xfs_perag_get(btp->bt_mount,
562 xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
563
564 spin_lock(&pag->pag_buf_lock);
565 bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
566 xfs_buf_hash_params);
567 if (bp) {
568 atomic_inc(&bp->b_hold);
569 goto found;
570 }
571
572
573 if (!new_bp) {
574 XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
575 spin_unlock(&pag->pag_buf_lock);
576 xfs_perag_put(pag);
577 return -ENOENT;
578 }
579
580
581 new_bp->b_pag = pag;
582 rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head,
583 xfs_buf_hash_params);
584 spin_unlock(&pag->pag_buf_lock);
585 *found_bp = new_bp;
586 return 0;
587
588found:
589 spin_unlock(&pag->pag_buf_lock);
590 xfs_perag_put(pag);
591
592 if (!xfs_buf_trylock(bp)) {
593 if (flags & XBF_TRYLOCK) {
594 xfs_buf_rele(bp);
595 XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
596 return -EAGAIN;
597 }
598 xfs_buf_lock(bp);
599 XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
600 }
601
602
603
604
605
606
607 if (bp->b_flags & XBF_STALE) {
608 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
609 bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
610 bp->b_ops = NULL;
611 }
612
613 trace_xfs_buf_find(bp, flags, _RET_IP_);
614 XFS_STATS_INC(btp->bt_mount, xb_get_locked);
615 *found_bp = bp;
616 return 0;
617}
618
619struct xfs_buf *
620xfs_buf_incore(
621 struct xfs_buftarg *target,
622 xfs_daddr_t blkno,
623 size_t numblks,
624 xfs_buf_flags_t flags)
625{
626 struct xfs_buf *bp;
627 int error;
628 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
629
630 error = xfs_buf_find(target, &map, 1, flags, NULL, &bp);
631 if (error)
632 return NULL;
633 return bp;
634}
635
636
637
638
639
640
641int
642xfs_buf_get_map(
643 struct xfs_buftarg *target,
644 struct xfs_buf_map *map,
645 int nmaps,
646 xfs_buf_flags_t flags,
647 struct xfs_buf **bpp)
648{
649 struct xfs_buf *bp;
650 struct xfs_buf *new_bp;
651 int error;
652
653 *bpp = NULL;
654 error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
655 if (!error)
656 goto found;
657 if (error != -ENOENT)
658 return error;
659
660 error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp);
661 if (error)
662 return error;
663
664
665
666
667
668
669
670 if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
671 xfs_buf_alloc_kmem(new_bp, flags) < 0) {
672 error = xfs_buf_alloc_pages(new_bp, flags);
673 if (error)
674 goto out_free_buf;
675 }
676
677 error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
678 if (error)
679 goto out_free_buf;
680
681 if (bp != new_bp)
682 xfs_buf_free(new_bp);
683
684found:
685 if (!bp->b_addr) {
686 error = _xfs_buf_map_pages(bp, flags);
687 if (unlikely(error)) {
688 xfs_warn_ratelimited(target->bt_mount,
689 "%s: failed to map %u pages", __func__,
690 bp->b_page_count);
691 xfs_buf_relse(bp);
692 return error;
693 }
694 }
695
696
697
698
699
700 if (!(flags & XBF_READ))
701 xfs_buf_ioerror(bp, 0);
702
703 XFS_STATS_INC(target->bt_mount, xb_get);
704 trace_xfs_buf_get(bp, flags, _RET_IP_);
705 *bpp = bp;
706 return 0;
707out_free_buf:
708 xfs_buf_free(new_bp);
709 return error;
710}
711
712int
713_xfs_buf_read(
714 struct xfs_buf *bp,
715 xfs_buf_flags_t flags)
716{
717 ASSERT(!(flags & XBF_WRITE));
718 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
719
720 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE);
721 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
722
723 return xfs_buf_submit(bp);
724}
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743int
744xfs_buf_reverify(
745 struct xfs_buf *bp,
746 const struct xfs_buf_ops *ops)
747{
748 ASSERT(bp->b_flags & XBF_DONE);
749 ASSERT(bp->b_error == 0);
750
751 if (!ops || bp->b_ops)
752 return 0;
753
754 bp->b_ops = ops;
755 bp->b_ops->verify_read(bp);
756 if (bp->b_error)
757 bp->b_flags &= ~XBF_DONE;
758 return bp->b_error;
759}
760
761int
762xfs_buf_read_map(
763 struct xfs_buftarg *target,
764 struct xfs_buf_map *map,
765 int nmaps,
766 xfs_buf_flags_t flags,
767 struct xfs_buf **bpp,
768 const struct xfs_buf_ops *ops,
769 xfs_failaddr_t fa)
770{
771 struct xfs_buf *bp;
772 int error;
773
774 flags |= XBF_READ;
775 *bpp = NULL;
776
777 error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
778 if (error)
779 return error;
780
781 trace_xfs_buf_read(bp, flags, _RET_IP_);
782
783 if (!(bp->b_flags & XBF_DONE)) {
784
785 XFS_STATS_INC(target->bt_mount, xb_get_read);
786 bp->b_ops = ops;
787 error = _xfs_buf_read(bp, flags);
788
789
790 if (flags & XBF_ASYNC)
791 return 0;
792 } else {
793
794 error = xfs_buf_reverify(bp, ops);
795
796
797 if (flags & XBF_ASYNC) {
798 xfs_buf_relse(bp);
799 return 0;
800 }
801
802
803 bp->b_flags &= ~XBF_READ;
804 ASSERT(bp->b_ops != NULL || ops == NULL);
805 }
806
807
808
809
810
811
812
813
814
815
816 if (error) {
817 if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
818 xfs_buf_ioerror_alert(bp, fa);
819
820 bp->b_flags &= ~XBF_DONE;
821 xfs_buf_stale(bp);
822 xfs_buf_relse(bp);
823
824
825 if (error == -EFSBADCRC)
826 error = -EFSCORRUPTED;
827 return error;
828 }
829
830 *bpp = bp;
831 return 0;
832}
833
834
835
836
837
838void
839xfs_buf_readahead_map(
840 struct xfs_buftarg *target,
841 struct xfs_buf_map *map,
842 int nmaps,
843 const struct xfs_buf_ops *ops)
844{
845 struct xfs_buf *bp;
846
847 if (bdi_read_congested(target->bt_bdev->bd_bdi))
848 return;
849
850 xfs_buf_read_map(target, map, nmaps,
851 XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
852 __this_address);
853}
854
855
856
857
858
859int
860xfs_buf_read_uncached(
861 struct xfs_buftarg *target,
862 xfs_daddr_t daddr,
863 size_t numblks,
864 int flags,
865 struct xfs_buf **bpp,
866 const struct xfs_buf_ops *ops)
867{
868 struct xfs_buf *bp;
869 int error;
870
871 *bpp = NULL;
872
873 error = xfs_buf_get_uncached(target, numblks, flags, &bp);
874 if (error)
875 return error;
876
877
878 ASSERT(bp->b_map_count == 1);
879 bp->b_bn = XFS_BUF_DADDR_NULL;
880 bp->b_maps[0].bm_bn = daddr;
881 bp->b_flags |= XBF_READ;
882 bp->b_ops = ops;
883
884 xfs_buf_submit(bp);
885 if (bp->b_error) {
886 error = bp->b_error;
887 xfs_buf_relse(bp);
888 return error;
889 }
890
891 *bpp = bp;
892 return 0;
893}
894
895int
896xfs_buf_get_uncached(
897 struct xfs_buftarg *target,
898 size_t numblks,
899 int flags,
900 struct xfs_buf **bpp)
901{
902 int error;
903 struct xfs_buf *bp;
904 DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
905
906 *bpp = NULL;
907
908
909 error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
910 if (error)
911 return error;
912
913 error = xfs_buf_alloc_pages(bp, flags);
914 if (error)
915 goto fail_free_buf;
916
917 error = _xfs_buf_map_pages(bp, 0);
918 if (unlikely(error)) {
919 xfs_warn(target->bt_mount,
920 "%s: failed to map pages", __func__);
921 goto fail_free_buf;
922 }
923
924 trace_xfs_buf_get_uncached(bp, _RET_IP_);
925 *bpp = bp;
926 return 0;
927
928fail_free_buf:
929 xfs_buf_free(bp);
930 return error;
931}
932
933
934
935
936
937
938void
939xfs_buf_hold(
940 struct xfs_buf *bp)
941{
942 trace_xfs_buf_hold(bp, _RET_IP_);
943 atomic_inc(&bp->b_hold);
944}
945
946
947
948
949
950void
951xfs_buf_rele(
952 struct xfs_buf *bp)
953{
954 struct xfs_perag *pag = bp->b_pag;
955 bool release;
956 bool freebuf = false;
957
958 trace_xfs_buf_rele(bp, _RET_IP_);
959
960 if (!pag) {
961 ASSERT(list_empty(&bp->b_lru));
962 if (atomic_dec_and_test(&bp->b_hold)) {
963 xfs_buf_ioacct_dec(bp);
964 xfs_buf_free(bp);
965 }
966 return;
967 }
968
969 ASSERT(atomic_read(&bp->b_hold) > 0);
970
971
972
973
974
975
976
977
978
979
980
981 spin_lock(&bp->b_lock);
982 release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
983 if (!release) {
984
985
986
987
988
989
990 if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
991 __xfs_buf_ioacct_dec(bp);
992 goto out_unlock;
993 }
994
995
996 __xfs_buf_ioacct_dec(bp);
997 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
998
999
1000
1001
1002
1003 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
1004 bp->b_state &= ~XFS_BSTATE_DISPOSE;
1005 atomic_inc(&bp->b_hold);
1006 }
1007 spin_unlock(&pag->pag_buf_lock);
1008 } else {
1009
1010
1011
1012
1013
1014
1015 if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
1016 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
1017 } else {
1018 ASSERT(list_empty(&bp->b_lru));
1019 }
1020
1021 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1022 rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
1023 xfs_buf_hash_params);
1024 spin_unlock(&pag->pag_buf_lock);
1025 xfs_perag_put(pag);
1026 freebuf = true;
1027 }
1028
1029out_unlock:
1030 spin_unlock(&bp->b_lock);
1031
1032 if (freebuf)
1033 xfs_buf_free(bp);
1034}
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048int
1049xfs_buf_trylock(
1050 struct xfs_buf *bp)
1051{
1052 int locked;
1053
1054 locked = down_trylock(&bp->b_sema) == 0;
1055 if (locked)
1056 trace_xfs_buf_trylock(bp, _RET_IP_);
1057 else
1058 trace_xfs_buf_trylock_fail(bp, _RET_IP_);
1059 return locked;
1060}
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071void
1072xfs_buf_lock(
1073 struct xfs_buf *bp)
1074{
1075 trace_xfs_buf_lock(bp, _RET_IP_);
1076
1077 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
1078 xfs_log_force(bp->b_mount, 0);
1079 down(&bp->b_sema);
1080
1081 trace_xfs_buf_lock_done(bp, _RET_IP_);
1082}
1083
1084void
1085xfs_buf_unlock(
1086 struct xfs_buf *bp)
1087{
1088 ASSERT(xfs_buf_islocked(bp));
1089
1090 up(&bp->b_sema);
1091 trace_xfs_buf_unlock(bp, _RET_IP_);
1092}
1093
1094STATIC void
1095xfs_buf_wait_unpin(
1096 struct xfs_buf *bp)
1097{
1098 DECLARE_WAITQUEUE (wait, current);
1099
1100 if (atomic_read(&bp->b_pin_count) == 0)
1101 return;
1102
1103 add_wait_queue(&bp->b_waiters, &wait);
1104 for (;;) {
1105 set_current_state(TASK_UNINTERRUPTIBLE);
1106 if (atomic_read(&bp->b_pin_count) == 0)
1107 break;
1108 io_schedule();
1109 }
1110 remove_wait_queue(&bp->b_waiters, &wait);
1111 set_current_state(TASK_RUNNING);
1112}
1113
1114static void
1115xfs_buf_ioerror_alert_ratelimited(
1116 struct xfs_buf *bp)
1117{
1118 static unsigned long lasttime;
1119 static struct xfs_buftarg *lasttarg;
1120
1121 if (bp->b_target != lasttarg ||
1122 time_after(jiffies, (lasttime + 5*HZ))) {
1123 lasttime = jiffies;
1124 xfs_buf_ioerror_alert(bp, __this_address);
1125 }
1126 lasttarg = bp->b_target;
1127}
1128
1129
1130
1131
1132
1133static bool
1134xfs_buf_ioerror_permanent(
1135 struct xfs_buf *bp,
1136 struct xfs_error_cfg *cfg)
1137{
1138 struct xfs_mount *mp = bp->b_mount;
1139
1140 if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
1141 ++bp->b_retries > cfg->max_retries)
1142 return true;
1143 if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
1144 time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
1145 return true;
1146
1147
1148 if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
1149 return true;
1150
1151 return false;
1152}
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171static bool
1172xfs_buf_ioend_handle_error(
1173 struct xfs_buf *bp)
1174{
1175 struct xfs_mount *mp = bp->b_mount;
1176 struct xfs_error_cfg *cfg;
1177
1178
1179
1180
1181
1182 if (XFS_FORCED_SHUTDOWN(mp))
1183 goto out_stale;
1184
1185 xfs_buf_ioerror_alert_ratelimited(bp);
1186
1187
1188
1189
1190
1191 if (bp->b_flags & _XBF_LOGRECOVERY) {
1192 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1193 return false;
1194 }
1195
1196
1197
1198
1199 if (!(bp->b_flags & XBF_ASYNC))
1200 goto out_stale;
1201
1202 trace_xfs_buf_iodone_async(bp, _RET_IP_);
1203
1204 cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
1205 if (bp->b_last_error != bp->b_error ||
1206 !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) {
1207 bp->b_last_error = bp->b_error;
1208 if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
1209 !bp->b_first_retry_time)
1210 bp->b_first_retry_time = jiffies;
1211 goto resubmit;
1212 }
1213
1214
1215
1216
1217
1218 if (xfs_buf_ioerror_permanent(bp, cfg)) {
1219 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1220 goto out_stale;
1221 }
1222
1223
1224 if (bp->b_flags & _XBF_INODES)
1225 xfs_buf_inode_io_fail(bp);
1226 else if (bp->b_flags & _XBF_DQUOTS)
1227 xfs_buf_dquot_io_fail(bp);
1228 else
1229 ASSERT(list_empty(&bp->b_li_list));
1230 xfs_buf_ioerror(bp, 0);
1231 xfs_buf_relse(bp);
1232 return true;
1233
1234resubmit:
1235 xfs_buf_ioerror(bp, 0);
1236 bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL);
1237 xfs_buf_submit(bp);
1238 return true;
1239out_stale:
1240 xfs_buf_stale(bp);
1241 bp->b_flags |= XBF_DONE;
1242 bp->b_flags &= ~XBF_WRITE;
1243 trace_xfs_buf_error_relse(bp, _RET_IP_);
1244 return false;
1245}
1246
1247static void
1248xfs_buf_ioend(
1249 struct xfs_buf *bp)
1250{
1251 trace_xfs_buf_iodone(bp, _RET_IP_);
1252
1253
1254
1255
1256
1257 if (!bp->b_error && bp->b_io_error)
1258 xfs_buf_ioerror(bp, bp->b_io_error);
1259
1260 if (bp->b_flags & XBF_READ) {
1261 if (!bp->b_error && bp->b_ops)
1262 bp->b_ops->verify_read(bp);
1263 if (!bp->b_error)
1264 bp->b_flags |= XBF_DONE;
1265 } else {
1266 if (!bp->b_error) {
1267 bp->b_flags &= ~XBF_WRITE_FAIL;
1268 bp->b_flags |= XBF_DONE;
1269 }
1270
1271 if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp))
1272 return;
1273
1274
1275 bp->b_last_error = 0;
1276 bp->b_retries = 0;
1277 bp->b_first_retry_time = 0;
1278
1279
1280
1281
1282
1283
1284 if (bp->b_log_item)
1285 xfs_buf_item_done(bp);
1286
1287 if (bp->b_flags & _XBF_INODES)
1288 xfs_buf_inode_iodone(bp);
1289 else if (bp->b_flags & _XBF_DQUOTS)
1290 xfs_buf_dquot_iodone(bp);
1291
1292 }
1293
1294 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
1295 _XBF_LOGRECOVERY);
1296
1297 if (bp->b_flags & XBF_ASYNC)
1298 xfs_buf_relse(bp);
1299 else
1300 complete(&bp->b_iowait);
1301}
1302
1303static void
1304xfs_buf_ioend_work(
1305 struct work_struct *work)
1306{
1307 struct xfs_buf *bp =
1308 container_of(work, struct xfs_buf, b_ioend_work);
1309
1310 xfs_buf_ioend(bp);
1311}
1312
1313static void
1314xfs_buf_ioend_async(
1315 struct xfs_buf *bp)
1316{
1317 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
1318 queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
1319}
1320
1321void
1322__xfs_buf_ioerror(
1323 struct xfs_buf *bp,
1324 int error,
1325 xfs_failaddr_t failaddr)
1326{
1327 ASSERT(error <= 0 && error >= -1000);
1328 bp->b_error = error;
1329 trace_xfs_buf_ioerror(bp, error, failaddr);
1330}
1331
1332void
1333xfs_buf_ioerror_alert(
1334 struct xfs_buf *bp,
1335 xfs_failaddr_t func)
1336{
1337 xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
1338 "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
1339 func, (uint64_t)XFS_BUF_ADDR(bp),
1340 bp->b_length, -bp->b_error);
1341}
1342
1343
1344
1345
1346
1347
1348
1349void
1350xfs_buf_ioend_fail(
1351 struct xfs_buf *bp)
1352{
1353 bp->b_flags &= ~XBF_DONE;
1354 xfs_buf_stale(bp);
1355 xfs_buf_ioerror(bp, -EIO);
1356 xfs_buf_ioend(bp);
1357}
1358
1359int
1360xfs_bwrite(
1361 struct xfs_buf *bp)
1362{
1363 int error;
1364
1365 ASSERT(xfs_buf_islocked(bp));
1366
1367 bp->b_flags |= XBF_WRITE;
1368 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
1369 XBF_DONE);
1370
1371 error = xfs_buf_submit(bp);
1372 if (error)
1373 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
1374 return error;
1375}
1376
1377static void
1378xfs_buf_bio_end_io(
1379 struct bio *bio)
1380{
1381 struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
1382
1383 if (!bio->bi_status &&
1384 (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
1385 XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
1386 bio->bi_status = BLK_STS_IOERR;
1387
1388
1389
1390
1391
1392 if (bio->bi_status) {
1393 int error = blk_status_to_errno(bio->bi_status);
1394
1395 cmpxchg(&bp->b_io_error, 0, error);
1396 }
1397
1398 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1399 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1400
1401 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1402 xfs_buf_ioend_async(bp);
1403 bio_put(bio);
1404}
1405
1406static void
1407xfs_buf_ioapply_map(
1408 struct xfs_buf *bp,
1409 int map,
1410 int *buf_offset,
1411 int *count,
1412 int op)
1413{
1414 int page_index;
1415 unsigned int total_nr_pages = bp->b_page_count;
1416 int nr_pages;
1417 struct bio *bio;
1418 sector_t sector = bp->b_maps[map].bm_bn;
1419 int size;
1420 int offset;
1421
1422
1423 page_index = 0;
1424 offset = *buf_offset;
1425 while (offset >= PAGE_SIZE) {
1426 page_index++;
1427 offset -= PAGE_SIZE;
1428 }
1429
1430
1431
1432
1433
1434 size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
1435 *count -= size;
1436 *buf_offset += size;
1437
1438next_chunk:
1439 atomic_inc(&bp->b_io_remaining);
1440 nr_pages = bio_max_segs(total_nr_pages);
1441
1442 bio = bio_alloc(GFP_NOIO, nr_pages);
1443 bio_set_dev(bio, bp->b_target->bt_bdev);
1444 bio->bi_iter.bi_sector = sector;
1445 bio->bi_end_io = xfs_buf_bio_end_io;
1446 bio->bi_private = bp;
1447 bio->bi_opf = op;
1448
1449 for (; size && nr_pages; nr_pages--, page_index++) {
1450 int rbytes, nbytes = PAGE_SIZE - offset;
1451
1452 if (nbytes > size)
1453 nbytes = size;
1454
1455 rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
1456 offset);
1457 if (rbytes < nbytes)
1458 break;
1459
1460 offset = 0;
1461 sector += BTOBB(nbytes);
1462 size -= nbytes;
1463 total_nr_pages--;
1464 }
1465
1466 if (likely(bio->bi_iter.bi_size)) {
1467 if (xfs_buf_is_vmapped(bp)) {
1468 flush_kernel_vmap_range(bp->b_addr,
1469 xfs_buf_vmap_len(bp));
1470 }
1471 submit_bio(bio);
1472 if (size)
1473 goto next_chunk;
1474 } else {
1475
1476
1477
1478
1479 atomic_dec(&bp->b_io_remaining);
1480 xfs_buf_ioerror(bp, -EIO);
1481 bio_put(bio);
1482 }
1483
1484}
1485
1486STATIC void
1487_xfs_buf_ioapply(
1488 struct xfs_buf *bp)
1489{
1490 struct blk_plug plug;
1491 int op;
1492 int offset;
1493 int size;
1494 int i;
1495
1496
1497
1498
1499
1500 bp->b_error = 0;
1501
1502 if (bp->b_flags & XBF_WRITE) {
1503 op = REQ_OP_WRITE;
1504
1505
1506
1507
1508
1509
1510 if (bp->b_ops) {
1511 bp->b_ops->verify_write(bp);
1512 if (bp->b_error) {
1513 xfs_force_shutdown(bp->b_mount,
1514 SHUTDOWN_CORRUPT_INCORE);
1515 return;
1516 }
1517 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
1518 struct xfs_mount *mp = bp->b_mount;
1519
1520
1521
1522
1523
1524 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1525 xfs_warn(mp,
1526 "%s: no buf ops on daddr 0x%llx len %d",
1527 __func__, bp->b_bn, bp->b_length);
1528 xfs_hex_dump(bp->b_addr,
1529 XFS_CORRUPTION_DUMP_LEN);
1530 dump_stack();
1531 }
1532 }
1533 } else {
1534 op = REQ_OP_READ;
1535 if (bp->b_flags & XBF_READ_AHEAD)
1536 op |= REQ_RAHEAD;
1537 }
1538
1539
1540 op |= REQ_META;
1541
1542
1543
1544
1545
1546
1547
1548 offset = bp->b_offset;
1549 size = BBTOB(bp->b_length);
1550 blk_start_plug(&plug);
1551 for (i = 0; i < bp->b_map_count; i++) {
1552 xfs_buf_ioapply_map(bp, i, &offset, &size, op);
1553 if (bp->b_error)
1554 break;
1555 if (size <= 0)
1556 break;
1557 }
1558 blk_finish_plug(&plug);
1559}
1560
1561
1562
1563
1564static int
1565xfs_buf_iowait(
1566 struct xfs_buf *bp)
1567{
1568 ASSERT(!(bp->b_flags & XBF_ASYNC));
1569
1570 trace_xfs_buf_iowait(bp, _RET_IP_);
1571 wait_for_completion(&bp->b_iowait);
1572 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1573
1574 return bp->b_error;
1575}
1576
1577
1578
1579
1580
1581
1582
1583static int
1584__xfs_buf_submit(
1585 struct xfs_buf *bp,
1586 bool wait)
1587{
1588 int error = 0;
1589
1590 trace_xfs_buf_submit(bp, _RET_IP_);
1591
1592 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1593
1594
1595 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1596 xfs_buf_ioend_fail(bp);
1597 return -EIO;
1598 }
1599
1600
1601
1602
1603
1604
1605 xfs_buf_hold(bp);
1606
1607 if (bp->b_flags & XBF_WRITE)
1608 xfs_buf_wait_unpin(bp);
1609
1610
1611 bp->b_io_error = 0;
1612
1613
1614
1615
1616
1617
1618 atomic_set(&bp->b_io_remaining, 1);
1619 if (bp->b_flags & XBF_ASYNC)
1620 xfs_buf_ioacct_inc(bp);
1621 _xfs_buf_ioapply(bp);
1622
1623
1624
1625
1626
1627
1628 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1629 if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
1630 xfs_buf_ioend(bp);
1631 else
1632 xfs_buf_ioend_async(bp);
1633 }
1634
1635 if (wait)
1636 error = xfs_buf_iowait(bp);
1637
1638
1639
1640
1641
1642
1643 xfs_buf_rele(bp);
1644 return error;
1645}
1646
1647void *
1648xfs_buf_offset(
1649 struct xfs_buf *bp,
1650 size_t offset)
1651{
1652 struct page *page;
1653
1654 if (bp->b_addr)
1655 return bp->b_addr + offset;
1656
1657 page = bp->b_pages[offset >> PAGE_SHIFT];
1658 return page_address(page) + (offset & (PAGE_SIZE-1));
1659}
1660
1661void
1662xfs_buf_zero(
1663 struct xfs_buf *bp,
1664 size_t boff,
1665 size_t bsize)
1666{
1667 size_t bend;
1668
1669 bend = boff + bsize;
1670 while (boff < bend) {
1671 struct page *page;
1672 int page_index, page_offset, csize;
1673
1674 page_index = (boff + bp->b_offset) >> PAGE_SHIFT;
1675 page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
1676 page = bp->b_pages[page_index];
1677 csize = min_t(size_t, PAGE_SIZE - page_offset,
1678 BBTOB(bp->b_length) - boff);
1679
1680 ASSERT((csize + page_offset) <= PAGE_SIZE);
1681
1682 memset(page_address(page) + page_offset, 0, csize);
1683
1684 boff += csize;
1685 }
1686}
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699void
1700__xfs_buf_mark_corrupt(
1701 struct xfs_buf *bp,
1702 xfs_failaddr_t fa)
1703{
1704 ASSERT(bp->b_flags & XBF_DONE);
1705
1706 xfs_buf_corruption_error(bp, fa);
1707 xfs_buf_stale(bp);
1708}
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719static enum lru_status
1720xfs_buftarg_drain_rele(
1721 struct list_head *item,
1722 struct list_lru_one *lru,
1723 spinlock_t *lru_lock,
1724 void *arg)
1725
1726{
1727 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1728 struct list_head *dispose = arg;
1729
1730 if (atomic_read(&bp->b_hold) > 1) {
1731
1732 trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
1733 return LRU_SKIP;
1734 }
1735 if (!spin_trylock(&bp->b_lock))
1736 return LRU_SKIP;
1737
1738
1739
1740
1741
1742 atomic_set(&bp->b_lru_ref, 0);
1743 bp->b_state |= XFS_BSTATE_DISPOSE;
1744 list_lru_isolate_move(lru, item, dispose);
1745 spin_unlock(&bp->b_lock);
1746 return LRU_REMOVED;
1747}
1748
1749
1750
1751
1752void
1753xfs_buftarg_wait(
1754 struct xfs_buftarg *btp)
1755{
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768 while (percpu_counter_sum(&btp->bt_io_count))
1769 delay(100);
1770 flush_workqueue(btp->bt_mount->m_buf_workqueue);
1771}
1772
1773void
1774xfs_buftarg_drain(
1775 struct xfs_buftarg *btp)
1776{
1777 LIST_HEAD(dispose);
1778 int loop = 0;
1779 bool write_fail = false;
1780
1781 xfs_buftarg_wait(btp);
1782
1783
1784 while (list_lru_count(&btp->bt_lru)) {
1785 list_lru_walk(&btp->bt_lru, xfs_buftarg_drain_rele,
1786 &dispose, LONG_MAX);
1787
1788 while (!list_empty(&dispose)) {
1789 struct xfs_buf *bp;
1790 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1791 list_del_init(&bp->b_lru);
1792 if (bp->b_flags & XBF_WRITE_FAIL) {
1793 write_fail = true;
1794 xfs_buf_alert_ratelimited(bp,
1795 "XFS: Corruption Alert",
1796"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
1797 (long long)bp->b_bn);
1798 }
1799 xfs_buf_rele(bp);
1800 }
1801 if (loop++ != 0)
1802 delay(100);
1803 }
1804
1805
1806
1807
1808
1809
1810
1811 if (write_fail) {
1812 ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount));
1813 xfs_alert(btp->bt_mount,
1814 "Please run xfs_repair to determine the extent of the problem.");
1815 }
1816}
1817
1818static enum lru_status
1819xfs_buftarg_isolate(
1820 struct list_head *item,
1821 struct list_lru_one *lru,
1822 spinlock_t *lru_lock,
1823 void *arg)
1824{
1825 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1826 struct list_head *dispose = arg;
1827
1828
1829
1830
1831
1832 if (!spin_trylock(&bp->b_lock))
1833 return LRU_SKIP;
1834
1835
1836
1837
1838
1839 if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1840 spin_unlock(&bp->b_lock);
1841 return LRU_ROTATE;
1842 }
1843
1844 bp->b_state |= XFS_BSTATE_DISPOSE;
1845 list_lru_isolate_move(lru, item, dispose);
1846 spin_unlock(&bp->b_lock);
1847 return LRU_REMOVED;
1848}
1849
1850static unsigned long
1851xfs_buftarg_shrink_scan(
1852 struct shrinker *shrink,
1853 struct shrink_control *sc)
1854{
1855 struct xfs_buftarg *btp = container_of(shrink,
1856 struct xfs_buftarg, bt_shrinker);
1857 LIST_HEAD(dispose);
1858 unsigned long freed;
1859
1860 freed = list_lru_shrink_walk(&btp->bt_lru, sc,
1861 xfs_buftarg_isolate, &dispose);
1862
1863 while (!list_empty(&dispose)) {
1864 struct xfs_buf *bp;
1865 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1866 list_del_init(&bp->b_lru);
1867 xfs_buf_rele(bp);
1868 }
1869
1870 return freed;
1871}
1872
1873static unsigned long
1874xfs_buftarg_shrink_count(
1875 struct shrinker *shrink,
1876 struct shrink_control *sc)
1877{
1878 struct xfs_buftarg *btp = container_of(shrink,
1879 struct xfs_buftarg, bt_shrinker);
1880 return list_lru_shrink_count(&btp->bt_lru, sc);
1881}
1882
1883void
1884xfs_free_buftarg(
1885 struct xfs_buftarg *btp)
1886{
1887 unregister_shrinker(&btp->bt_shrinker);
1888 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
1889 percpu_counter_destroy(&btp->bt_io_count);
1890 list_lru_destroy(&btp->bt_lru);
1891
1892 blkdev_issue_flush(btp->bt_bdev);
1893
1894 kmem_free(btp);
1895}
1896
1897int
1898xfs_setsize_buftarg(
1899 xfs_buftarg_t *btp,
1900 unsigned int sectorsize)
1901{
1902
1903 btp->bt_meta_sectorsize = sectorsize;
1904 btp->bt_meta_sectormask = sectorsize - 1;
1905
1906 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1907 xfs_warn(btp->bt_mount,
1908 "Cannot set_blocksize to %u on device %pg",
1909 sectorsize, btp->bt_bdev);
1910 return -EINVAL;
1911 }
1912
1913
1914 btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
1915 btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
1916
1917 return 0;
1918}
1919
1920
1921
1922
1923
1924
1925STATIC int
1926xfs_setsize_buftarg_early(
1927 xfs_buftarg_t *btp,
1928 struct block_device *bdev)
1929{
1930 return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
1931}
1932
1933xfs_buftarg_t *
1934xfs_alloc_buftarg(
1935 struct xfs_mount *mp,
1936 struct block_device *bdev,
1937 struct dax_device *dax_dev)
1938{
1939 xfs_buftarg_t *btp;
1940
1941 btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
1942
1943 btp->bt_mount = mp;
1944 btp->bt_dev = bdev->bd_dev;
1945 btp->bt_bdev = bdev;
1946 btp->bt_daxdev = dax_dev;
1947
1948
1949
1950
1951
1952 ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
1953 DEFAULT_RATELIMIT_BURST);
1954
1955 if (xfs_setsize_buftarg_early(btp, bdev))
1956 goto error_free;
1957
1958 if (list_lru_init(&btp->bt_lru))
1959 goto error_free;
1960
1961 if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
1962 goto error_lru;
1963
1964 btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
1965 btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
1966 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1967 btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
1968 if (register_shrinker(&btp->bt_shrinker))
1969 goto error_pcpu;
1970 return btp;
1971
1972error_pcpu:
1973 percpu_counter_destroy(&btp->bt_io_count);
1974error_lru:
1975 list_lru_destroy(&btp->bt_lru);
1976error_free:
1977 kmem_free(btp);
1978 return NULL;
1979}
1980
1981
1982
1983
1984
1985
1986
1987void
1988xfs_buf_delwri_cancel(
1989 struct list_head *list)
1990{
1991 struct xfs_buf *bp;
1992
1993 while (!list_empty(list)) {
1994 bp = list_first_entry(list, struct xfs_buf, b_list);
1995
1996 xfs_buf_lock(bp);
1997 bp->b_flags &= ~_XBF_DELWRI_Q;
1998 list_del_init(&bp->b_list);
1999 xfs_buf_relse(bp);
2000 }
2001}
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014bool
2015xfs_buf_delwri_queue(
2016 struct xfs_buf *bp,
2017 struct list_head *list)
2018{
2019 ASSERT(xfs_buf_islocked(bp));
2020 ASSERT(!(bp->b_flags & XBF_READ));
2021
2022
2023
2024
2025
2026
2027 if (bp->b_flags & _XBF_DELWRI_Q) {
2028 trace_xfs_buf_delwri_queued(bp, _RET_IP_);
2029 return false;
2030 }
2031
2032 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042 bp->b_flags |= _XBF_DELWRI_Q;
2043 if (list_empty(&bp->b_list)) {
2044 atomic_inc(&bp->b_hold);
2045 list_add_tail(&bp->b_list, list);
2046 }
2047
2048 return true;
2049}
2050
2051
2052
2053
2054
2055
2056static int
2057xfs_buf_cmp(
2058 void *priv,
2059 const struct list_head *a,
2060 const struct list_head *b)
2061{
2062 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
2063 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
2064 xfs_daddr_t diff;
2065
2066 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
2067 if (diff < 0)
2068 return -1;
2069 if (diff > 0)
2070 return 1;
2071 return 0;
2072}
2073
2074
2075
2076
2077
2078
2079
2080
2081static int
2082xfs_buf_delwri_submit_buffers(
2083 struct list_head *buffer_list,
2084 struct list_head *wait_list)
2085{
2086 struct xfs_buf *bp, *n;
2087 int pinned = 0;
2088 struct blk_plug plug;
2089
2090 list_sort(NULL, buffer_list, xfs_buf_cmp);
2091
2092 blk_start_plug(&plug);
2093 list_for_each_entry_safe(bp, n, buffer_list, b_list) {
2094 if (!wait_list) {
2095 if (xfs_buf_ispinned(bp)) {
2096 pinned++;
2097 continue;
2098 }
2099 if (!xfs_buf_trylock(bp))
2100 continue;
2101 } else {
2102 xfs_buf_lock(bp);
2103 }
2104
2105
2106
2107
2108
2109
2110
2111 if (!(bp->b_flags & _XBF_DELWRI_Q)) {
2112 list_del_init(&bp->b_list);
2113 xfs_buf_relse(bp);
2114 continue;
2115 }
2116
2117 trace_xfs_buf_delwri_split(bp, _RET_IP_);
2118
2119
2120
2121
2122
2123
2124
2125 bp->b_flags &= ~_XBF_DELWRI_Q;
2126 bp->b_flags |= XBF_WRITE;
2127 if (wait_list) {
2128 bp->b_flags &= ~XBF_ASYNC;
2129 list_move_tail(&bp->b_list, wait_list);
2130 } else {
2131 bp->b_flags |= XBF_ASYNC;
2132 list_del_init(&bp->b_list);
2133 }
2134 __xfs_buf_submit(bp, false);
2135 }
2136 blk_finish_plug(&plug);
2137
2138 return pinned;
2139}
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157int
2158xfs_buf_delwri_submit_nowait(
2159 struct list_head *buffer_list)
2160{
2161 return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
2162}
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172int
2173xfs_buf_delwri_submit(
2174 struct list_head *buffer_list)
2175{
2176 LIST_HEAD (wait_list);
2177 int error = 0, error2;
2178 struct xfs_buf *bp;
2179
2180 xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
2181
2182
2183 while (!list_empty(&wait_list)) {
2184 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2185
2186 list_del_init(&bp->b_list);
2187
2188
2189
2190
2191
2192 error2 = xfs_buf_iowait(bp);
2193 xfs_buf_relse(bp);
2194 if (!error)
2195 error = error2;
2196 }
2197
2198 return error;
2199}
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216int
2217xfs_buf_delwri_pushbuf(
2218 struct xfs_buf *bp,
2219 struct list_head *buffer_list)
2220{
2221 LIST_HEAD (submit_list);
2222 int error;
2223
2224 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
2225
2226 trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
2227
2228
2229
2230
2231
2232 xfs_buf_lock(bp);
2233 list_move(&bp->b_list, &submit_list);
2234 xfs_buf_unlock(bp);
2235
2236
2237
2238
2239
2240
2241
2242 xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
2243
2244
2245
2246
2247
2248
2249 error = xfs_buf_iowait(bp);
2250 bp->b_flags |= _XBF_DELWRI_Q;
2251 xfs_buf_unlock(bp);
2252
2253 return error;
2254}
2255
2256int __init
2257xfs_buf_init(void)
2258{
2259 xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
2260 SLAB_HWCACHE_ALIGN |
2261 SLAB_RECLAIM_ACCOUNT |
2262 SLAB_MEM_SPREAD,
2263 NULL);
2264 if (!xfs_buf_zone)
2265 goto out;
2266
2267 return 0;
2268
2269 out:
2270 return -ENOMEM;
2271}
2272
2273void
2274xfs_buf_terminate(void)
2275{
2276 kmem_cache_destroy(xfs_buf_zone);
2277}
2278
2279void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
2280{
2281
2282
2283
2284
2285
2286 if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
2287 lru_ref = 0;
2288
2289 atomic_set(&bp->b_lru_ref, lru_ref);
2290}
2291
2292
2293
2294
2295
2296
2297bool
2298xfs_verify_magic(
2299 struct xfs_buf *bp,
2300 __be32 dmagic)
2301{
2302 struct xfs_mount *mp = bp->b_mount;
2303 int idx;
2304
2305 idx = xfs_sb_version_hascrc(&mp->m_sb);
2306 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
2307 return false;
2308 return dmagic == bp->b_ops->magic[idx];
2309}
2310
2311
2312
2313
2314
2315bool
2316xfs_verify_magic16(
2317 struct xfs_buf *bp,
2318 __be16 dmagic)
2319{
2320 struct xfs_mount *mp = bp->b_mount;
2321 int idx;
2322
2323 idx = xfs_sb_version_hascrc(&mp->m_sb);
2324 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
2325 return false;
2326 return dmagic == bp->b_ops->magic16[idx];
2327}
2328