1
2
3
4
5
6#include "xfs.h"
7#include <linux/backing-dev.h>
8
9#include "xfs_shared.h"
10#include "xfs_format.h"
11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
15#include "xfs_trace.h"
16#include "xfs_log.h"
17#include "xfs_errortag.h"
18#include "xfs_error.h"
19
20static kmem_zone_t *xfs_buf_zone;
21
22#define xb_to_gfp(flags) \
23 ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52static inline int
53xfs_buf_is_vmapped(
54 struct xfs_buf *bp)
55{
56
57
58
59
60
61
62
63 return bp->b_addr && bp->b_page_count > 1;
64}
65
66static inline int
67xfs_buf_vmap_len(
68 struct xfs_buf *bp)
69{
70 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
71}
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86static inline void
87xfs_buf_ioacct_inc(
88 struct xfs_buf *bp)
89{
90 if (bp->b_flags & XBF_NO_IOACCT)
91 return;
92
93 ASSERT(bp->b_flags & XBF_ASYNC);
94 spin_lock(&bp->b_lock);
95 if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
96 bp->b_state |= XFS_BSTATE_IN_FLIGHT;
97 percpu_counter_inc(&bp->b_target->bt_io_count);
98 }
99 spin_unlock(&bp->b_lock);
100}
101
102
103
104
105
106static inline void
107__xfs_buf_ioacct_dec(
108 struct xfs_buf *bp)
109{
110 lockdep_assert_held(&bp->b_lock);
111
112 if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
113 bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
114 percpu_counter_dec(&bp->b_target->bt_io_count);
115 }
116}
117
118static inline void
119xfs_buf_ioacct_dec(
120 struct xfs_buf *bp)
121{
122 spin_lock(&bp->b_lock);
123 __xfs_buf_ioacct_dec(bp);
124 spin_unlock(&bp->b_lock);
125}
126
127
128
129
130
131
132
133
134
135void
136xfs_buf_stale(
137 struct xfs_buf *bp)
138{
139 ASSERT(xfs_buf_islocked(bp));
140
141 bp->b_flags |= XBF_STALE;
142
143
144
145
146
147
148 bp->b_flags &= ~_XBF_DELWRI_Q;
149
150
151
152
153
154
155
156 spin_lock(&bp->b_lock);
157 __xfs_buf_ioacct_dec(bp);
158
159 atomic_set(&bp->b_lru_ref, 0);
160 if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
161 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
162 atomic_dec(&bp->b_hold);
163
164 ASSERT(atomic_read(&bp->b_hold) >= 1);
165 spin_unlock(&bp->b_lock);
166}
167
168static int
169xfs_buf_get_maps(
170 struct xfs_buf *bp,
171 int map_count)
172{
173 ASSERT(bp->b_maps == NULL);
174 bp->b_map_count = map_count;
175
176 if (map_count == 1) {
177 bp->b_maps = &bp->__b_map;
178 return 0;
179 }
180
181 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
182 KM_NOFS);
183 if (!bp->b_maps)
184 return -ENOMEM;
185 return 0;
186}
187
188
189
190
191static void
192xfs_buf_free_maps(
193 struct xfs_buf *bp)
194{
195 if (bp->b_maps != &bp->__b_map) {
196 kmem_free(bp->b_maps);
197 bp->b_maps = NULL;
198 }
199}
200
201static int
202_xfs_buf_alloc(
203 struct xfs_buftarg *target,
204 struct xfs_buf_map *map,
205 int nmaps,
206 xfs_buf_flags_t flags,
207 struct xfs_buf **bpp)
208{
209 struct xfs_buf *bp;
210 int error;
211 int i;
212
213 *bpp = NULL;
214 bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
215 if (unlikely(!bp))
216 return -ENOMEM;
217
218
219
220
221
222 flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
223
224 atomic_set(&bp->b_hold, 1);
225 atomic_set(&bp->b_lru_ref, 1);
226 init_completion(&bp->b_iowait);
227 INIT_LIST_HEAD(&bp->b_lru);
228 INIT_LIST_HEAD(&bp->b_list);
229 INIT_LIST_HEAD(&bp->b_li_list);
230 sema_init(&bp->b_sema, 0);
231 spin_lock_init(&bp->b_lock);
232 bp->b_target = target;
233 bp->b_mount = target->bt_mount;
234 bp->b_flags = flags;
235
236
237
238
239
240
241 error = xfs_buf_get_maps(bp, nmaps);
242 if (error) {
243 kmem_cache_free(xfs_buf_zone, bp);
244 return error;
245 }
246
247 bp->b_bn = map[0].bm_bn;
248 bp->b_length = 0;
249 for (i = 0; i < nmaps; i++) {
250 bp->b_maps[i].bm_bn = map[i].bm_bn;
251 bp->b_maps[i].bm_len = map[i].bm_len;
252 bp->b_length += map[i].bm_len;
253 }
254
255 atomic_set(&bp->b_pin_count, 0);
256 init_waitqueue_head(&bp->b_waiters);
257
258 XFS_STATS_INC(bp->b_mount, xb_create);
259 trace_xfs_buf_init(bp, _RET_IP_);
260
261 *bpp = bp;
262 return 0;
263}
264
265
266
267
268
269STATIC int
270_xfs_buf_get_pages(
271 xfs_buf_t *bp,
272 int page_count)
273{
274
275 if (bp->b_pages == NULL) {
276 bp->b_page_count = page_count;
277 if (page_count <= XB_PAGES) {
278 bp->b_pages = bp->b_page_array;
279 } else {
280 bp->b_pages = kmem_alloc(sizeof(struct page *) *
281 page_count, KM_NOFS);
282 if (bp->b_pages == NULL)
283 return -ENOMEM;
284 }
285 memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
286 }
287 return 0;
288}
289
290
291
292
293STATIC void
294_xfs_buf_free_pages(
295 xfs_buf_t *bp)
296{
297 if (bp->b_pages != bp->b_page_array) {
298 kmem_free(bp->b_pages);
299 bp->b_pages = NULL;
300 }
301}
302
303
304
305
306
307
308
309
310static void
311xfs_buf_free(
312 xfs_buf_t *bp)
313{
314 trace_xfs_buf_free(bp, _RET_IP_);
315
316 ASSERT(list_empty(&bp->b_lru));
317
318 if (bp->b_flags & _XBF_PAGES) {
319 uint i;
320
321 if (xfs_buf_is_vmapped(bp))
322 vm_unmap_ram(bp->b_addr - bp->b_offset,
323 bp->b_page_count);
324
325 for (i = 0; i < bp->b_page_count; i++) {
326 struct page *page = bp->b_pages[i];
327
328 __free_page(page);
329 }
330 if (current->reclaim_state)
331 current->reclaim_state->reclaimed_slab +=
332 bp->b_page_count;
333 } else if (bp->b_flags & _XBF_KMEM)
334 kmem_free(bp->b_addr);
335 _xfs_buf_free_pages(bp);
336 xfs_buf_free_maps(bp);
337 kmem_cache_free(xfs_buf_zone, bp);
338}
339
340
341
342
343STATIC int
344xfs_buf_allocate_memory(
345 xfs_buf_t *bp,
346 uint flags)
347{
348 size_t size;
349 size_t nbytes, offset;
350 gfp_t gfp_mask = xb_to_gfp(flags);
351 unsigned short page_count, i;
352 xfs_off_t start, end;
353 int error;
354 xfs_km_flags_t kmflag_mask = 0;
355
356
357
358
359 if (!(flags & XBF_READ)) {
360 kmflag_mask |= KM_ZERO;
361 gfp_mask |= __GFP_ZERO;
362 }
363
364
365
366
367
368
369 size = BBTOB(bp->b_length);
370 if (size < PAGE_SIZE) {
371 int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
372 bp->b_addr = kmem_alloc_io(size, align_mask,
373 KM_NOFS | kmflag_mask);
374 if (!bp->b_addr) {
375
376 goto use_alloc_page;
377 }
378
379 if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
380 ((unsigned long)bp->b_addr & PAGE_MASK)) {
381
382 kmem_free(bp->b_addr);
383 bp->b_addr = NULL;
384 goto use_alloc_page;
385 }
386 bp->b_offset = offset_in_page(bp->b_addr);
387 bp->b_pages = bp->b_page_array;
388 bp->b_pages[0] = kmem_to_page(bp->b_addr);
389 bp->b_page_count = 1;
390 bp->b_flags |= _XBF_KMEM;
391 return 0;
392 }
393
394use_alloc_page:
395 start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
396 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
397 >> PAGE_SHIFT;
398 page_count = end - start;
399 error = _xfs_buf_get_pages(bp, page_count);
400 if (unlikely(error))
401 return error;
402
403 offset = bp->b_offset;
404 bp->b_flags |= _XBF_PAGES;
405
406 for (i = 0; i < bp->b_page_count; i++) {
407 struct page *page;
408 uint retries = 0;
409retry:
410 page = alloc_page(gfp_mask);
411 if (unlikely(page == NULL)) {
412 if (flags & XBF_READ_AHEAD) {
413 bp->b_page_count = i;
414 error = -ENOMEM;
415 goto out_free_pages;
416 }
417
418
419
420
421
422
423
424 if (!(++retries % 100))
425 xfs_err(NULL,
426 "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
427 current->comm, current->pid,
428 __func__, gfp_mask);
429
430 XFS_STATS_INC(bp->b_mount, xb_page_retries);
431 congestion_wait(BLK_RW_ASYNC, HZ/50);
432 goto retry;
433 }
434
435 XFS_STATS_INC(bp->b_mount, xb_page_found);
436
437 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
438 size -= nbytes;
439 bp->b_pages[i] = page;
440 offset = 0;
441 }
442 return 0;
443
444out_free_pages:
445 for (i = 0; i < bp->b_page_count; i++)
446 __free_page(bp->b_pages[i]);
447 bp->b_flags &= ~_XBF_PAGES;
448 return error;
449}
450
451
452
453
454STATIC int
455_xfs_buf_map_pages(
456 xfs_buf_t *bp,
457 uint flags)
458{
459 ASSERT(bp->b_flags & _XBF_PAGES);
460 if (bp->b_page_count == 1) {
461
462 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
463 } else if (flags & XBF_UNMAPPED) {
464 bp->b_addr = NULL;
465 } else {
466 int retried = 0;
467 unsigned nofs_flag;
468
469
470
471
472
473
474
475
476
477 nofs_flag = memalloc_nofs_save();
478 do {
479 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
480 -1);
481 if (bp->b_addr)
482 break;
483 vm_unmap_aliases();
484 } while (retried++ <= 1);
485 memalloc_nofs_restore(nofs_flag);
486
487 if (!bp->b_addr)
488 return -ENOMEM;
489 bp->b_addr += bp->b_offset;
490 }
491
492 return 0;
493}
494
495
496
497
498static int
499_xfs_buf_obj_cmp(
500 struct rhashtable_compare_arg *arg,
501 const void *obj)
502{
503 const struct xfs_buf_map *map = arg->key;
504 const struct xfs_buf *bp = obj;
505
506
507
508
509
510 BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
511
512 if (bp->b_bn != map->bm_bn)
513 return 1;
514
515 if (unlikely(bp->b_length != map->bm_len)) {
516
517
518
519
520
521
522
523
524 ASSERT(bp->b_flags & XBF_STALE);
525 return 1;
526 }
527 return 0;
528}
529
530static const struct rhashtable_params xfs_buf_hash_params = {
531 .min_size = 32,
532 .nelem_hint = 16,
533 .key_len = sizeof(xfs_daddr_t),
534 .key_offset = offsetof(struct xfs_buf, b_bn),
535 .head_offset = offsetof(struct xfs_buf, b_rhash_head),
536 .automatic_shrinking = true,
537 .obj_cmpfn = _xfs_buf_obj_cmp,
538};
539
540int
541xfs_buf_hash_init(
542 struct xfs_perag *pag)
543{
544 spin_lock_init(&pag->pag_buf_lock);
545 return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
546}
547
548void
549xfs_buf_hash_destroy(
550 struct xfs_perag *pag)
551{
552 rhashtable_destroy(&pag->pag_buf_hash);
553}
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573static int
574xfs_buf_find(
575 struct xfs_buftarg *btp,
576 struct xfs_buf_map *map,
577 int nmaps,
578 xfs_buf_flags_t flags,
579 struct xfs_buf *new_bp,
580 struct xfs_buf **found_bp)
581{
582 struct xfs_perag *pag;
583 xfs_buf_t *bp;
584 struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
585 xfs_daddr_t eofs;
586 int i;
587
588 *found_bp = NULL;
589
590 for (i = 0; i < nmaps; i++)
591 cmap.bm_len += map[i].bm_len;
592
593
594 ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
595 ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
596
597
598
599
600
601 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
602 if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
603 xfs_alert(btp->bt_mount,
604 "%s: daddr 0x%llx out of range, EOFS 0x%llx",
605 __func__, cmap.bm_bn, eofs);
606 WARN_ON(1);
607 return -EFSCORRUPTED;
608 }
609
610 pag = xfs_perag_get(btp->bt_mount,
611 xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
612
613 spin_lock(&pag->pag_buf_lock);
614 bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
615 xfs_buf_hash_params);
616 if (bp) {
617 atomic_inc(&bp->b_hold);
618 goto found;
619 }
620
621
622 if (!new_bp) {
623 XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
624 spin_unlock(&pag->pag_buf_lock);
625 xfs_perag_put(pag);
626 return -ENOENT;
627 }
628
629
630 new_bp->b_pag = pag;
631 rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head,
632 xfs_buf_hash_params);
633 spin_unlock(&pag->pag_buf_lock);
634 *found_bp = new_bp;
635 return 0;
636
637found:
638 spin_unlock(&pag->pag_buf_lock);
639 xfs_perag_put(pag);
640
641 if (!xfs_buf_trylock(bp)) {
642 if (flags & XBF_TRYLOCK) {
643 xfs_buf_rele(bp);
644 XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
645 return -EAGAIN;
646 }
647 xfs_buf_lock(bp);
648 XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
649 }
650
651
652
653
654
655
656 if (bp->b_flags & XBF_STALE) {
657 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
658 ASSERT(bp->b_iodone == NULL);
659 bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
660 bp->b_ops = NULL;
661 }
662
663 trace_xfs_buf_find(bp, flags, _RET_IP_);
664 XFS_STATS_INC(btp->bt_mount, xb_get_locked);
665 *found_bp = bp;
666 return 0;
667}
668
669struct xfs_buf *
670xfs_buf_incore(
671 struct xfs_buftarg *target,
672 xfs_daddr_t blkno,
673 size_t numblks,
674 xfs_buf_flags_t flags)
675{
676 struct xfs_buf *bp;
677 int error;
678 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
679
680 error = xfs_buf_find(target, &map, 1, flags, NULL, &bp);
681 if (error)
682 return NULL;
683 return bp;
684}
685
686
687
688
689
690
691int
692xfs_buf_get_map(
693 struct xfs_buftarg *target,
694 struct xfs_buf_map *map,
695 int nmaps,
696 xfs_buf_flags_t flags,
697 struct xfs_buf **bpp)
698{
699 struct xfs_buf *bp;
700 struct xfs_buf *new_bp;
701 int error = 0;
702
703 *bpp = NULL;
704 error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
705 if (!error)
706 goto found;
707 if (error != -ENOENT)
708 return error;
709
710 error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp);
711 if (error)
712 return error;
713
714 error = xfs_buf_allocate_memory(new_bp, flags);
715 if (error) {
716 xfs_buf_free(new_bp);
717 return error;
718 }
719
720 error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
721 if (error) {
722 xfs_buf_free(new_bp);
723 return error;
724 }
725
726 if (bp != new_bp)
727 xfs_buf_free(new_bp);
728
729found:
730 if (!bp->b_addr) {
731 error = _xfs_buf_map_pages(bp, flags);
732 if (unlikely(error)) {
733 xfs_warn_ratelimited(target->bt_mount,
734 "%s: failed to map %u pages", __func__,
735 bp->b_page_count);
736 xfs_buf_relse(bp);
737 return error;
738 }
739 }
740
741
742
743
744
745 if (!(flags & XBF_READ))
746 xfs_buf_ioerror(bp, 0);
747
748 XFS_STATS_INC(target->bt_mount, xb_get);
749 trace_xfs_buf_get(bp, flags, _RET_IP_);
750 *bpp = bp;
751 return 0;
752}
753
754STATIC int
755_xfs_buf_read(
756 xfs_buf_t *bp,
757 xfs_buf_flags_t flags)
758{
759 ASSERT(!(flags & XBF_WRITE));
760 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
761
762 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
763 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
764
765 return xfs_buf_submit(bp);
766}
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785int
786xfs_buf_reverify(
787 struct xfs_buf *bp,
788 const struct xfs_buf_ops *ops)
789{
790 ASSERT(bp->b_flags & XBF_DONE);
791 ASSERT(bp->b_error == 0);
792
793 if (!ops || bp->b_ops)
794 return 0;
795
796 bp->b_ops = ops;
797 bp->b_ops->verify_read(bp);
798 if (bp->b_error)
799 bp->b_flags &= ~XBF_DONE;
800 return bp->b_error;
801}
802
803int
804xfs_buf_read_map(
805 struct xfs_buftarg *target,
806 struct xfs_buf_map *map,
807 int nmaps,
808 xfs_buf_flags_t flags,
809 struct xfs_buf **bpp,
810 const struct xfs_buf_ops *ops,
811 xfs_failaddr_t fa)
812{
813 struct xfs_buf *bp;
814 int error;
815
816 flags |= XBF_READ;
817 *bpp = NULL;
818
819 error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
820 if (error)
821 return error;
822
823 trace_xfs_buf_read(bp, flags, _RET_IP_);
824
825 if (!(bp->b_flags & XBF_DONE)) {
826
827 XFS_STATS_INC(target->bt_mount, xb_get_read);
828 bp->b_ops = ops;
829 error = _xfs_buf_read(bp, flags);
830
831
832 if (flags & XBF_ASYNC)
833 return 0;
834 } else {
835
836 error = xfs_buf_reverify(bp, ops);
837
838
839 if (flags & XBF_ASYNC) {
840 xfs_buf_relse(bp);
841 return 0;
842 }
843
844
845 bp->b_flags &= ~XBF_READ;
846 ASSERT(bp->b_ops != NULL || ops == NULL);
847 }
848
849
850
851
852
853
854
855
856
857
858 if (error) {
859 if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
860 xfs_buf_ioerror_alert(bp, fa);
861
862 bp->b_flags &= ~XBF_DONE;
863 xfs_buf_stale(bp);
864 xfs_buf_relse(bp);
865
866
867 if (error == -EFSBADCRC)
868 error = -EFSCORRUPTED;
869 return error;
870 }
871
872 *bpp = bp;
873 return 0;
874}
875
876
877
878
879
880void
881xfs_buf_readahead_map(
882 struct xfs_buftarg *target,
883 struct xfs_buf_map *map,
884 int nmaps,
885 const struct xfs_buf_ops *ops)
886{
887 struct xfs_buf *bp;
888
889 if (bdi_read_congested(target->bt_bdev->bd_bdi))
890 return;
891
892 xfs_buf_read_map(target, map, nmaps,
893 XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
894 __this_address);
895}
896
897
898
899
900
901int
902xfs_buf_read_uncached(
903 struct xfs_buftarg *target,
904 xfs_daddr_t daddr,
905 size_t numblks,
906 int flags,
907 struct xfs_buf **bpp,
908 const struct xfs_buf_ops *ops)
909{
910 struct xfs_buf *bp;
911 int error;
912
913 *bpp = NULL;
914
915 error = xfs_buf_get_uncached(target, numblks, flags, &bp);
916 if (error)
917 return error;
918
919
920 ASSERT(bp->b_map_count == 1);
921 bp->b_bn = XFS_BUF_DADDR_NULL;
922 bp->b_maps[0].bm_bn = daddr;
923 bp->b_flags |= XBF_READ;
924 bp->b_ops = ops;
925
926 xfs_buf_submit(bp);
927 if (bp->b_error) {
928 error = bp->b_error;
929 xfs_buf_relse(bp);
930 return error;
931 }
932
933 *bpp = bp;
934 return 0;
935}
936
937int
938xfs_buf_get_uncached(
939 struct xfs_buftarg *target,
940 size_t numblks,
941 int flags,
942 struct xfs_buf **bpp)
943{
944 unsigned long page_count;
945 int error, i;
946 struct xfs_buf *bp;
947 DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
948
949 *bpp = NULL;
950
951
952 error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
953 if (error)
954 goto fail;
955
956 page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
957 error = _xfs_buf_get_pages(bp, page_count);
958 if (error)
959 goto fail_free_buf;
960
961 for (i = 0; i < page_count; i++) {
962 bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
963 if (!bp->b_pages[i]) {
964 error = -ENOMEM;
965 goto fail_free_mem;
966 }
967 }
968 bp->b_flags |= _XBF_PAGES;
969
970 error = _xfs_buf_map_pages(bp, 0);
971 if (unlikely(error)) {
972 xfs_warn(target->bt_mount,
973 "%s: failed to map pages", __func__);
974 goto fail_free_mem;
975 }
976
977 trace_xfs_buf_get_uncached(bp, _RET_IP_);
978 *bpp = bp;
979 return 0;
980
981 fail_free_mem:
982 while (--i >= 0)
983 __free_page(bp->b_pages[i]);
984 _xfs_buf_free_pages(bp);
985 fail_free_buf:
986 xfs_buf_free_maps(bp);
987 kmem_cache_free(xfs_buf_zone, bp);
988 fail:
989 return error;
990}
991
992
993
994
995
996
997void
998xfs_buf_hold(
999 xfs_buf_t *bp)
1000{
1001 trace_xfs_buf_hold(bp, _RET_IP_);
1002 atomic_inc(&bp->b_hold);
1003}
1004
1005
1006
1007
1008
1009void
1010xfs_buf_rele(
1011 xfs_buf_t *bp)
1012{
1013 struct xfs_perag *pag = bp->b_pag;
1014 bool release;
1015 bool freebuf = false;
1016
1017 trace_xfs_buf_rele(bp, _RET_IP_);
1018
1019 if (!pag) {
1020 ASSERT(list_empty(&bp->b_lru));
1021 if (atomic_dec_and_test(&bp->b_hold)) {
1022 xfs_buf_ioacct_dec(bp);
1023 xfs_buf_free(bp);
1024 }
1025 return;
1026 }
1027
1028 ASSERT(atomic_read(&bp->b_hold) > 0);
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040 spin_lock(&bp->b_lock);
1041 release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
1042 if (!release) {
1043
1044
1045
1046
1047
1048
1049 if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
1050 __xfs_buf_ioacct_dec(bp);
1051 goto out_unlock;
1052 }
1053
1054
1055 __xfs_buf_ioacct_dec(bp);
1056 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
1057
1058
1059
1060
1061
1062 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
1063 bp->b_state &= ~XFS_BSTATE_DISPOSE;
1064 atomic_inc(&bp->b_hold);
1065 }
1066 spin_unlock(&pag->pag_buf_lock);
1067 } else {
1068
1069
1070
1071
1072
1073
1074 if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
1075 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
1076 } else {
1077 ASSERT(list_empty(&bp->b_lru));
1078 }
1079
1080 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1081 rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
1082 xfs_buf_hash_params);
1083 spin_unlock(&pag->pag_buf_lock);
1084 xfs_perag_put(pag);
1085 freebuf = true;
1086 }
1087
1088out_unlock:
1089 spin_unlock(&bp->b_lock);
1090
1091 if (freebuf)
1092 xfs_buf_free(bp);
1093}
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107int
1108xfs_buf_trylock(
1109 struct xfs_buf *bp)
1110{
1111 int locked;
1112
1113 locked = down_trylock(&bp->b_sema) == 0;
1114 if (locked)
1115 trace_xfs_buf_trylock(bp, _RET_IP_);
1116 else
1117 trace_xfs_buf_trylock_fail(bp, _RET_IP_);
1118 return locked;
1119}
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130void
1131xfs_buf_lock(
1132 struct xfs_buf *bp)
1133{
1134 trace_xfs_buf_lock(bp, _RET_IP_);
1135
1136 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
1137 xfs_log_force(bp->b_mount, 0);
1138 down(&bp->b_sema);
1139
1140 trace_xfs_buf_lock_done(bp, _RET_IP_);
1141}
1142
1143void
1144xfs_buf_unlock(
1145 struct xfs_buf *bp)
1146{
1147 ASSERT(xfs_buf_islocked(bp));
1148
1149 up(&bp->b_sema);
1150 trace_xfs_buf_unlock(bp, _RET_IP_);
1151}
1152
1153STATIC void
1154xfs_buf_wait_unpin(
1155 xfs_buf_t *bp)
1156{
1157 DECLARE_WAITQUEUE (wait, current);
1158
1159 if (atomic_read(&bp->b_pin_count) == 0)
1160 return;
1161
1162 add_wait_queue(&bp->b_waiters, &wait);
1163 for (;;) {
1164 set_current_state(TASK_UNINTERRUPTIBLE);
1165 if (atomic_read(&bp->b_pin_count) == 0)
1166 break;
1167 io_schedule();
1168 }
1169 remove_wait_queue(&bp->b_waiters, &wait);
1170 set_current_state(TASK_RUNNING);
1171}
1172
1173
1174
1175
1176
1177void
1178xfs_buf_ioend(
1179 struct xfs_buf *bp)
1180{
1181 bool read = bp->b_flags & XBF_READ;
1182
1183 trace_xfs_buf_iodone(bp, _RET_IP_);
1184
1185 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1186
1187
1188
1189
1190
1191 if (!bp->b_error && bp->b_io_error)
1192 xfs_buf_ioerror(bp, bp->b_io_error);
1193
1194
1195 if (read && !bp->b_error && bp->b_ops) {
1196 ASSERT(!bp->b_iodone);
1197 bp->b_ops->verify_read(bp);
1198 }
1199
1200 if (!bp->b_error) {
1201 bp->b_flags &= ~XBF_WRITE_FAIL;
1202 bp->b_flags |= XBF_DONE;
1203 }
1204
1205 if (bp->b_iodone)
1206 (*(bp->b_iodone))(bp);
1207 else if (bp->b_flags & XBF_ASYNC)
1208 xfs_buf_relse(bp);
1209 else
1210 complete(&bp->b_iowait);
1211}
1212
1213static void
1214xfs_buf_ioend_work(
1215 struct work_struct *work)
1216{
1217 struct xfs_buf *bp =
1218 container_of(work, xfs_buf_t, b_ioend_work);
1219
1220 xfs_buf_ioend(bp);
1221}
1222
1223static void
1224xfs_buf_ioend_async(
1225 struct xfs_buf *bp)
1226{
1227 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
1228 queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
1229}
1230
1231void
1232__xfs_buf_ioerror(
1233 xfs_buf_t *bp,
1234 int error,
1235 xfs_failaddr_t failaddr)
1236{
1237 ASSERT(error <= 0 && error >= -1000);
1238 bp->b_error = error;
1239 trace_xfs_buf_ioerror(bp, error, failaddr);
1240}
1241
1242void
1243xfs_buf_ioerror_alert(
1244 struct xfs_buf *bp,
1245 xfs_failaddr_t func)
1246{
1247 xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
1248 "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
1249 func, (uint64_t)XFS_BUF_ADDR(bp),
1250 bp->b_length, -bp->b_error);
1251}
1252
1253
1254
1255
1256
1257
1258
1259void
1260xfs_buf_ioend_fail(
1261 struct xfs_buf *bp)
1262{
1263 bp->b_flags &= ~XBF_DONE;
1264 xfs_buf_stale(bp);
1265 xfs_buf_ioerror(bp, -EIO);
1266 xfs_buf_ioend(bp);
1267}
1268
1269int
1270xfs_bwrite(
1271 struct xfs_buf *bp)
1272{
1273 int error;
1274
1275 ASSERT(xfs_buf_islocked(bp));
1276
1277 bp->b_flags |= XBF_WRITE;
1278 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
1279 XBF_DONE);
1280
1281 error = xfs_buf_submit(bp);
1282 if (error)
1283 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
1284 return error;
1285}
1286
1287static void
1288xfs_buf_bio_end_io(
1289 struct bio *bio)
1290{
1291 struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
1292
1293 if (!bio->bi_status &&
1294 (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
1295 XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
1296 bio->bi_status = BLK_STS_IOERR;
1297
1298
1299
1300
1301
1302 if (bio->bi_status) {
1303 int error = blk_status_to_errno(bio->bi_status);
1304
1305 cmpxchg(&bp->b_io_error, 0, error);
1306 }
1307
1308 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1309 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1310
1311 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1312 xfs_buf_ioend_async(bp);
1313 bio_put(bio);
1314}
1315
1316static void
1317xfs_buf_ioapply_map(
1318 struct xfs_buf *bp,
1319 int map,
1320 int *buf_offset,
1321 int *count,
1322 int op)
1323{
1324 int page_index;
1325 int total_nr_pages = bp->b_page_count;
1326 int nr_pages;
1327 struct bio *bio;
1328 sector_t sector = bp->b_maps[map].bm_bn;
1329 int size;
1330 int offset;
1331
1332
1333 page_index = 0;
1334 offset = *buf_offset;
1335 while (offset >= PAGE_SIZE) {
1336 page_index++;
1337 offset -= PAGE_SIZE;
1338 }
1339
1340
1341
1342
1343
1344 size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
1345 *count -= size;
1346 *buf_offset += size;
1347
1348next_chunk:
1349 atomic_inc(&bp->b_io_remaining);
1350 nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
1351
1352 bio = bio_alloc(GFP_NOIO, nr_pages);
1353 bio_set_dev(bio, bp->b_target->bt_bdev);
1354 bio->bi_iter.bi_sector = sector;
1355 bio->bi_end_io = xfs_buf_bio_end_io;
1356 bio->bi_private = bp;
1357 bio->bi_opf = op;
1358
1359 for (; size && nr_pages; nr_pages--, page_index++) {
1360 int rbytes, nbytes = PAGE_SIZE - offset;
1361
1362 if (nbytes > size)
1363 nbytes = size;
1364
1365 rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
1366 offset);
1367 if (rbytes < nbytes)
1368 break;
1369
1370 offset = 0;
1371 sector += BTOBB(nbytes);
1372 size -= nbytes;
1373 total_nr_pages--;
1374 }
1375
1376 if (likely(bio->bi_iter.bi_size)) {
1377 if (xfs_buf_is_vmapped(bp)) {
1378 flush_kernel_vmap_range(bp->b_addr,
1379 xfs_buf_vmap_len(bp));
1380 }
1381 submit_bio(bio);
1382 if (size)
1383 goto next_chunk;
1384 } else {
1385
1386
1387
1388
1389 atomic_dec(&bp->b_io_remaining);
1390 xfs_buf_ioerror(bp, -EIO);
1391 bio_put(bio);
1392 }
1393
1394}
1395
1396STATIC void
1397_xfs_buf_ioapply(
1398 struct xfs_buf *bp)
1399{
1400 struct blk_plug plug;
1401 int op;
1402 int offset;
1403 int size;
1404 int i;
1405
1406
1407
1408
1409
1410 bp->b_error = 0;
1411
1412 if (bp->b_flags & XBF_WRITE) {
1413 op = REQ_OP_WRITE;
1414
1415
1416
1417
1418
1419
1420 if (bp->b_ops) {
1421 bp->b_ops->verify_write(bp);
1422 if (bp->b_error) {
1423 xfs_force_shutdown(bp->b_mount,
1424 SHUTDOWN_CORRUPT_INCORE);
1425 return;
1426 }
1427 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
1428 struct xfs_mount *mp = bp->b_mount;
1429
1430
1431
1432
1433
1434 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1435 xfs_warn(mp,
1436 "%s: no buf ops on daddr 0x%llx len %d",
1437 __func__, bp->b_bn, bp->b_length);
1438 xfs_hex_dump(bp->b_addr,
1439 XFS_CORRUPTION_DUMP_LEN);
1440 dump_stack();
1441 }
1442 }
1443 } else {
1444 op = REQ_OP_READ;
1445 if (bp->b_flags & XBF_READ_AHEAD)
1446 op |= REQ_RAHEAD;
1447 }
1448
1449
1450 op |= REQ_META;
1451
1452
1453
1454
1455
1456
1457
1458 offset = bp->b_offset;
1459 size = BBTOB(bp->b_length);
1460 blk_start_plug(&plug);
1461 for (i = 0; i < bp->b_map_count; i++) {
1462 xfs_buf_ioapply_map(bp, i, &offset, &size, op);
1463 if (bp->b_error)
1464 break;
1465 if (size <= 0)
1466 break;
1467 }
1468 blk_finish_plug(&plug);
1469}
1470
1471
1472
1473
1474static int
1475xfs_buf_iowait(
1476 struct xfs_buf *bp)
1477{
1478 ASSERT(!(bp->b_flags & XBF_ASYNC));
1479
1480 trace_xfs_buf_iowait(bp, _RET_IP_);
1481 wait_for_completion(&bp->b_iowait);
1482 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1483
1484 return bp->b_error;
1485}
1486
1487
1488
1489
1490
1491
1492
1493int
1494__xfs_buf_submit(
1495 struct xfs_buf *bp,
1496 bool wait)
1497{
1498 int error = 0;
1499
1500 trace_xfs_buf_submit(bp, _RET_IP_);
1501
1502 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1503
1504
1505 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1506 xfs_buf_ioend_fail(bp);
1507 return -EIO;
1508 }
1509
1510
1511
1512
1513
1514
1515 xfs_buf_hold(bp);
1516
1517 if (bp->b_flags & XBF_WRITE)
1518 xfs_buf_wait_unpin(bp);
1519
1520
1521 bp->b_io_error = 0;
1522
1523
1524
1525
1526
1527
1528 atomic_set(&bp->b_io_remaining, 1);
1529 if (bp->b_flags & XBF_ASYNC)
1530 xfs_buf_ioacct_inc(bp);
1531 _xfs_buf_ioapply(bp);
1532
1533
1534
1535
1536
1537
1538 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1539 if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
1540 xfs_buf_ioend(bp);
1541 else
1542 xfs_buf_ioend_async(bp);
1543 }
1544
1545 if (wait)
1546 error = xfs_buf_iowait(bp);
1547
1548
1549
1550
1551
1552
1553 xfs_buf_rele(bp);
1554 return error;
1555}
1556
1557void *
1558xfs_buf_offset(
1559 struct xfs_buf *bp,
1560 size_t offset)
1561{
1562 struct page *page;
1563
1564 if (bp->b_addr)
1565 return bp->b_addr + offset;
1566
1567 offset += bp->b_offset;
1568 page = bp->b_pages[offset >> PAGE_SHIFT];
1569 return page_address(page) + (offset & (PAGE_SIZE-1));
1570}
1571
1572void
1573xfs_buf_zero(
1574 struct xfs_buf *bp,
1575 size_t boff,
1576 size_t bsize)
1577{
1578 size_t bend;
1579
1580 bend = boff + bsize;
1581 while (boff < bend) {
1582 struct page *page;
1583 int page_index, page_offset, csize;
1584
1585 page_index = (boff + bp->b_offset) >> PAGE_SHIFT;
1586 page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
1587 page = bp->b_pages[page_index];
1588 csize = min_t(size_t, PAGE_SIZE - page_offset,
1589 BBTOB(bp->b_length) - boff);
1590
1591 ASSERT((csize + page_offset) <= PAGE_SIZE);
1592
1593 memset(page_address(page) + page_offset, 0, csize);
1594
1595 boff += csize;
1596 }
1597}
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610void
1611__xfs_buf_mark_corrupt(
1612 struct xfs_buf *bp,
1613 xfs_failaddr_t fa)
1614{
1615 ASSERT(bp->b_flags & XBF_DONE);
1616
1617 xfs_buf_corruption_error(bp, fa);
1618 xfs_buf_stale(bp);
1619}
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630static enum lru_status
1631xfs_buftarg_wait_rele(
1632 struct list_head *item,
1633 struct list_lru_one *lru,
1634 spinlock_t *lru_lock,
1635 void *arg)
1636
1637{
1638 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1639 struct list_head *dispose = arg;
1640
1641 if (atomic_read(&bp->b_hold) > 1) {
1642
1643 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1644 return LRU_SKIP;
1645 }
1646 if (!spin_trylock(&bp->b_lock))
1647 return LRU_SKIP;
1648
1649
1650
1651
1652
1653 atomic_set(&bp->b_lru_ref, 0);
1654 bp->b_state |= XFS_BSTATE_DISPOSE;
1655 list_lru_isolate_move(lru, item, dispose);
1656 spin_unlock(&bp->b_lock);
1657 return LRU_REMOVED;
1658}
1659
1660void
1661xfs_wait_buftarg(
1662 struct xfs_buftarg *btp)
1663{
1664 LIST_HEAD(dispose);
1665 int loop = 0;
1666 bool write_fail = false;
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680 while (percpu_counter_sum(&btp->bt_io_count))
1681 delay(100);
1682 flush_workqueue(btp->bt_mount->m_buf_workqueue);
1683
1684
1685 while (list_lru_count(&btp->bt_lru)) {
1686 list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
1687 &dispose, LONG_MAX);
1688
1689 while (!list_empty(&dispose)) {
1690 struct xfs_buf *bp;
1691 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1692 list_del_init(&bp->b_lru);
1693 if (bp->b_flags & XBF_WRITE_FAIL) {
1694 write_fail = true;
1695 xfs_buf_alert_ratelimited(bp,
1696 "XFS: Corruption Alert",
1697"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
1698 (long long)bp->b_bn);
1699 }
1700 xfs_buf_rele(bp);
1701 }
1702 if (loop++ != 0)
1703 delay(100);
1704 }
1705
1706
1707
1708
1709
1710
1711
1712 if (write_fail) {
1713 ASSERT(XFS_FORCED_SHUTDOWN(btp->bt_mount));
1714 xfs_alert(btp->bt_mount,
1715 "Please run xfs_repair to determine the extent of the problem.");
1716 }
1717}
1718
1719static enum lru_status
1720xfs_buftarg_isolate(
1721 struct list_head *item,
1722 struct list_lru_one *lru,
1723 spinlock_t *lru_lock,
1724 void *arg)
1725{
1726 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1727 struct list_head *dispose = arg;
1728
1729
1730
1731
1732
1733 if (!spin_trylock(&bp->b_lock))
1734 return LRU_SKIP;
1735
1736
1737
1738
1739
1740 if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1741 spin_unlock(&bp->b_lock);
1742 return LRU_ROTATE;
1743 }
1744
1745 bp->b_state |= XFS_BSTATE_DISPOSE;
1746 list_lru_isolate_move(lru, item, dispose);
1747 spin_unlock(&bp->b_lock);
1748 return LRU_REMOVED;
1749}
1750
1751static unsigned long
1752xfs_buftarg_shrink_scan(
1753 struct shrinker *shrink,
1754 struct shrink_control *sc)
1755{
1756 struct xfs_buftarg *btp = container_of(shrink,
1757 struct xfs_buftarg, bt_shrinker);
1758 LIST_HEAD(dispose);
1759 unsigned long freed;
1760
1761 freed = list_lru_shrink_walk(&btp->bt_lru, sc,
1762 xfs_buftarg_isolate, &dispose);
1763
1764 while (!list_empty(&dispose)) {
1765 struct xfs_buf *bp;
1766 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1767 list_del_init(&bp->b_lru);
1768 xfs_buf_rele(bp);
1769 }
1770
1771 return freed;
1772}
1773
1774static unsigned long
1775xfs_buftarg_shrink_count(
1776 struct shrinker *shrink,
1777 struct shrink_control *sc)
1778{
1779 struct xfs_buftarg *btp = container_of(shrink,
1780 struct xfs_buftarg, bt_shrinker);
1781 return list_lru_shrink_count(&btp->bt_lru, sc);
1782}
1783
1784void
1785xfs_free_buftarg(
1786 struct xfs_buftarg *btp)
1787{
1788 unregister_shrinker(&btp->bt_shrinker);
1789 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
1790 percpu_counter_destroy(&btp->bt_io_count);
1791 list_lru_destroy(&btp->bt_lru);
1792
1793 xfs_blkdev_issue_flush(btp);
1794
1795 kmem_free(btp);
1796}
1797
1798int
1799xfs_setsize_buftarg(
1800 xfs_buftarg_t *btp,
1801 unsigned int sectorsize)
1802{
1803
1804 btp->bt_meta_sectorsize = sectorsize;
1805 btp->bt_meta_sectormask = sectorsize - 1;
1806
1807 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1808 xfs_warn(btp->bt_mount,
1809 "Cannot set_blocksize to %u on device %pg",
1810 sectorsize, btp->bt_bdev);
1811 return -EINVAL;
1812 }
1813
1814
1815 btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
1816 btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
1817
1818 return 0;
1819}
1820
1821
1822
1823
1824
1825
1826STATIC int
1827xfs_setsize_buftarg_early(
1828 xfs_buftarg_t *btp,
1829 struct block_device *bdev)
1830{
1831 return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
1832}
1833
1834xfs_buftarg_t *
1835xfs_alloc_buftarg(
1836 struct xfs_mount *mp,
1837 struct block_device *bdev,
1838 struct dax_device *dax_dev)
1839{
1840 xfs_buftarg_t *btp;
1841
1842 btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
1843
1844 btp->bt_mount = mp;
1845 btp->bt_dev = bdev->bd_dev;
1846 btp->bt_bdev = bdev;
1847 btp->bt_daxdev = dax_dev;
1848
1849
1850
1851
1852
1853 ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
1854 DEFAULT_RATELIMIT_BURST);
1855
1856 if (xfs_setsize_buftarg_early(btp, bdev))
1857 goto error_free;
1858
1859 if (list_lru_init(&btp->bt_lru))
1860 goto error_free;
1861
1862 if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
1863 goto error_lru;
1864
1865 btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
1866 btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
1867 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1868 btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
1869 if (register_shrinker(&btp->bt_shrinker))
1870 goto error_pcpu;
1871 return btp;
1872
1873error_pcpu:
1874 percpu_counter_destroy(&btp->bt_io_count);
1875error_lru:
1876 list_lru_destroy(&btp->bt_lru);
1877error_free:
1878 kmem_free(btp);
1879 return NULL;
1880}
1881
1882
1883
1884
1885
1886
1887
1888void
1889xfs_buf_delwri_cancel(
1890 struct list_head *list)
1891{
1892 struct xfs_buf *bp;
1893
1894 while (!list_empty(list)) {
1895 bp = list_first_entry(list, struct xfs_buf, b_list);
1896
1897 xfs_buf_lock(bp);
1898 bp->b_flags &= ~_XBF_DELWRI_Q;
1899 list_del_init(&bp->b_list);
1900 xfs_buf_relse(bp);
1901 }
1902}
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915bool
1916xfs_buf_delwri_queue(
1917 struct xfs_buf *bp,
1918 struct list_head *list)
1919{
1920 ASSERT(xfs_buf_islocked(bp));
1921 ASSERT(!(bp->b_flags & XBF_READ));
1922
1923
1924
1925
1926
1927
1928 if (bp->b_flags & _XBF_DELWRI_Q) {
1929 trace_xfs_buf_delwri_queued(bp, _RET_IP_);
1930 return false;
1931 }
1932
1933 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943 bp->b_flags |= _XBF_DELWRI_Q;
1944 if (list_empty(&bp->b_list)) {
1945 atomic_inc(&bp->b_hold);
1946 list_add_tail(&bp->b_list, list);
1947 }
1948
1949 return true;
1950}
1951
1952
1953
1954
1955
1956
1957static int
1958xfs_buf_cmp(
1959 void *priv,
1960 struct list_head *a,
1961 struct list_head *b)
1962{
1963 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1964 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1965 xfs_daddr_t diff;
1966
1967 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
1968 if (diff < 0)
1969 return -1;
1970 if (diff > 0)
1971 return 1;
1972 return 0;
1973}
1974
1975
1976
1977
1978
1979
1980
1981
1982static int
1983xfs_buf_delwri_submit_buffers(
1984 struct list_head *buffer_list,
1985 struct list_head *wait_list)
1986{
1987 struct xfs_buf *bp, *n;
1988 int pinned = 0;
1989 struct blk_plug plug;
1990
1991 list_sort(NULL, buffer_list, xfs_buf_cmp);
1992
1993 blk_start_plug(&plug);
1994 list_for_each_entry_safe(bp, n, buffer_list, b_list) {
1995 if (!wait_list) {
1996 if (xfs_buf_ispinned(bp)) {
1997 pinned++;
1998 continue;
1999 }
2000 if (!xfs_buf_trylock(bp))
2001 continue;
2002 } else {
2003 xfs_buf_lock(bp);
2004 }
2005
2006
2007
2008
2009
2010
2011
2012 if (!(bp->b_flags & _XBF_DELWRI_Q)) {
2013 list_del_init(&bp->b_list);
2014 xfs_buf_relse(bp);
2015 continue;
2016 }
2017
2018 trace_xfs_buf_delwri_split(bp, _RET_IP_);
2019
2020
2021
2022
2023
2024
2025
2026 bp->b_flags &= ~_XBF_DELWRI_Q;
2027 bp->b_flags |= XBF_WRITE;
2028 if (wait_list) {
2029 bp->b_flags &= ~XBF_ASYNC;
2030 list_move_tail(&bp->b_list, wait_list);
2031 } else {
2032 bp->b_flags |= XBF_ASYNC;
2033 list_del_init(&bp->b_list);
2034 }
2035 __xfs_buf_submit(bp, false);
2036 }
2037 blk_finish_plug(&plug);
2038
2039 return pinned;
2040}
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058int
2059xfs_buf_delwri_submit_nowait(
2060 struct list_head *buffer_list)
2061{
2062 return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
2063}
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073int
2074xfs_buf_delwri_submit(
2075 struct list_head *buffer_list)
2076{
2077 LIST_HEAD (wait_list);
2078 int error = 0, error2;
2079 struct xfs_buf *bp;
2080
2081 xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
2082
2083
2084 while (!list_empty(&wait_list)) {
2085 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2086
2087 list_del_init(&bp->b_list);
2088
2089
2090
2091
2092
2093 error2 = xfs_buf_iowait(bp);
2094 xfs_buf_relse(bp);
2095 if (!error)
2096 error = error2;
2097 }
2098
2099 return error;
2100}
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117int
2118xfs_buf_delwri_pushbuf(
2119 struct xfs_buf *bp,
2120 struct list_head *buffer_list)
2121{
2122 LIST_HEAD (submit_list);
2123 int error;
2124
2125 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
2126
2127 trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
2128
2129
2130
2131
2132
2133 xfs_buf_lock(bp);
2134 list_move(&bp->b_list, &submit_list);
2135 xfs_buf_unlock(bp);
2136
2137
2138
2139
2140
2141
2142
2143 xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
2144
2145
2146
2147
2148
2149
2150 error = xfs_buf_iowait(bp);
2151 bp->b_flags |= _XBF_DELWRI_Q;
2152 xfs_buf_unlock(bp);
2153
2154 return error;
2155}
2156
2157int __init
2158xfs_buf_init(void)
2159{
2160 xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
2161 SLAB_HWCACHE_ALIGN |
2162 SLAB_RECLAIM_ACCOUNT |
2163 SLAB_MEM_SPREAD,
2164 NULL);
2165 if (!xfs_buf_zone)
2166 goto out;
2167
2168 return 0;
2169
2170 out:
2171 return -ENOMEM;
2172}
2173
2174void
2175xfs_buf_terminate(void)
2176{
2177 kmem_cache_destroy(xfs_buf_zone);
2178}
2179
2180void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
2181{
2182
2183
2184
2185
2186
2187 if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
2188 lru_ref = 0;
2189
2190 atomic_set(&bp->b_lru_ref, lru_ref);
2191}
2192
2193
2194
2195
2196
2197
2198bool
2199xfs_verify_magic(
2200 struct xfs_buf *bp,
2201 __be32 dmagic)
2202{
2203 struct xfs_mount *mp = bp->b_mount;
2204 int idx;
2205
2206 idx = xfs_sb_version_hascrc(&mp->m_sb);
2207 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
2208 return false;
2209 return dmagic == bp->b_ops->magic[idx];
2210}
2211
2212
2213
2214
2215
2216bool
2217xfs_verify_magic16(
2218 struct xfs_buf *bp,
2219 __be16 dmagic)
2220{
2221 struct xfs_mount *mp = bp->b_mount;
2222 int idx;
2223
2224 idx = xfs_sb_version_hascrc(&mp->m_sb);
2225 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
2226 return false;
2227 return dmagic == bp->b_ops->magic16[idx];
2228}
2229