1
2
3
4
5
6#include "xfs.h"
7#include <linux/backing-dev.h>
8
9#include "xfs_shared.h"
10#include "xfs_format.h"
11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
15#include "xfs_trace.h"
16#include "xfs_log.h"
17#include "xfs_errortag.h"
18#include "xfs_error.h"
19
20static kmem_zone_t *xfs_buf_zone;
21
22#define xb_to_gfp(flags) \
23 ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52static inline int
53xfs_buf_is_vmapped(
54 struct xfs_buf *bp)
55{
56
57
58
59
60
61
62
63 return bp->b_addr && bp->b_page_count > 1;
64}
65
66static inline int
67xfs_buf_vmap_len(
68 struct xfs_buf *bp)
69{
70 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
71}
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86static inline void
87xfs_buf_ioacct_inc(
88 struct xfs_buf *bp)
89{
90 if (bp->b_flags & XBF_NO_IOACCT)
91 return;
92
93 ASSERT(bp->b_flags & XBF_ASYNC);
94 spin_lock(&bp->b_lock);
95 if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
96 bp->b_state |= XFS_BSTATE_IN_FLIGHT;
97 percpu_counter_inc(&bp->b_target->bt_io_count);
98 }
99 spin_unlock(&bp->b_lock);
100}
101
102
103
104
105
106static inline void
107__xfs_buf_ioacct_dec(
108 struct xfs_buf *bp)
109{
110 lockdep_assert_held(&bp->b_lock);
111
112 if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
113 bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
114 percpu_counter_dec(&bp->b_target->bt_io_count);
115 }
116}
117
118static inline void
119xfs_buf_ioacct_dec(
120 struct xfs_buf *bp)
121{
122 spin_lock(&bp->b_lock);
123 __xfs_buf_ioacct_dec(bp);
124 spin_unlock(&bp->b_lock);
125}
126
127
128
129
130
131
132
133
134
135void
136xfs_buf_stale(
137 struct xfs_buf *bp)
138{
139 ASSERT(xfs_buf_islocked(bp));
140
141 bp->b_flags |= XBF_STALE;
142
143
144
145
146
147
148 bp->b_flags &= ~_XBF_DELWRI_Q;
149
150
151
152
153
154
155
156 spin_lock(&bp->b_lock);
157 __xfs_buf_ioacct_dec(bp);
158
159 atomic_set(&bp->b_lru_ref, 0);
160 if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
161 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
162 atomic_dec(&bp->b_hold);
163
164 ASSERT(atomic_read(&bp->b_hold) >= 1);
165 spin_unlock(&bp->b_lock);
166}
167
168static int
169xfs_buf_get_maps(
170 struct xfs_buf *bp,
171 int map_count)
172{
173 ASSERT(bp->b_maps == NULL);
174 bp->b_map_count = map_count;
175
176 if (map_count == 1) {
177 bp->b_maps = &bp->__b_map;
178 return 0;
179 }
180
181 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
182 KM_NOFS);
183 if (!bp->b_maps)
184 return -ENOMEM;
185 return 0;
186}
187
188
189
190
191static void
192xfs_buf_free_maps(
193 struct xfs_buf *bp)
194{
195 if (bp->b_maps != &bp->__b_map) {
196 kmem_free(bp->b_maps);
197 bp->b_maps = NULL;
198 }
199}
200
201static struct xfs_buf *
202_xfs_buf_alloc(
203 struct xfs_buftarg *target,
204 struct xfs_buf_map *map,
205 int nmaps,
206 xfs_buf_flags_t flags)
207{
208 struct xfs_buf *bp;
209 int error;
210 int i;
211
212 bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
213 if (unlikely(!bp))
214 return NULL;
215
216
217
218
219
220 flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
221
222 atomic_set(&bp->b_hold, 1);
223 atomic_set(&bp->b_lru_ref, 1);
224 init_completion(&bp->b_iowait);
225 INIT_LIST_HEAD(&bp->b_lru);
226 INIT_LIST_HEAD(&bp->b_list);
227 INIT_LIST_HEAD(&bp->b_li_list);
228 sema_init(&bp->b_sema, 0);
229 spin_lock_init(&bp->b_lock);
230 bp->b_target = target;
231 bp->b_mount = target->bt_mount;
232 bp->b_flags = flags;
233
234
235
236
237
238
239 error = xfs_buf_get_maps(bp, nmaps);
240 if (error) {
241 kmem_zone_free(xfs_buf_zone, bp);
242 return NULL;
243 }
244
245 bp->b_bn = map[0].bm_bn;
246 bp->b_length = 0;
247 for (i = 0; i < nmaps; i++) {
248 bp->b_maps[i].bm_bn = map[i].bm_bn;
249 bp->b_maps[i].bm_len = map[i].bm_len;
250 bp->b_length += map[i].bm_len;
251 }
252
253 atomic_set(&bp->b_pin_count, 0);
254 init_waitqueue_head(&bp->b_waiters);
255
256 XFS_STATS_INC(bp->b_mount, xb_create);
257 trace_xfs_buf_init(bp, _RET_IP_);
258
259 return bp;
260}
261
262
263
264
265
266STATIC int
267_xfs_buf_get_pages(
268 xfs_buf_t *bp,
269 int page_count)
270{
271
272 if (bp->b_pages == NULL) {
273 bp->b_page_count = page_count;
274 if (page_count <= XB_PAGES) {
275 bp->b_pages = bp->b_page_array;
276 } else {
277 bp->b_pages = kmem_alloc(sizeof(struct page *) *
278 page_count, KM_NOFS);
279 if (bp->b_pages == NULL)
280 return -ENOMEM;
281 }
282 memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
283 }
284 return 0;
285}
286
287
288
289
290STATIC void
291_xfs_buf_free_pages(
292 xfs_buf_t *bp)
293{
294 if (bp->b_pages != bp->b_page_array) {
295 kmem_free(bp->b_pages);
296 bp->b_pages = NULL;
297 }
298}
299
300
301
302
303
304
305
306
307void
308xfs_buf_free(
309 xfs_buf_t *bp)
310{
311 trace_xfs_buf_free(bp, _RET_IP_);
312
313 ASSERT(list_empty(&bp->b_lru));
314
315 if (bp->b_flags & _XBF_PAGES) {
316 uint i;
317
318 if (xfs_buf_is_vmapped(bp))
319 vm_unmap_ram(bp->b_addr - bp->b_offset,
320 bp->b_page_count);
321
322 for (i = 0; i < bp->b_page_count; i++) {
323 struct page *page = bp->b_pages[i];
324
325 __free_page(page);
326 }
327 } else if (bp->b_flags & _XBF_KMEM)
328 kmem_free(bp->b_addr);
329 _xfs_buf_free_pages(bp);
330 xfs_buf_free_maps(bp);
331 kmem_zone_free(xfs_buf_zone, bp);
332}
333
334
335
336
337STATIC int
338xfs_buf_allocate_memory(
339 xfs_buf_t *bp,
340 uint flags)
341{
342 size_t size;
343 size_t nbytes, offset;
344 gfp_t gfp_mask = xb_to_gfp(flags);
345 unsigned short page_count, i;
346 xfs_off_t start, end;
347 int error;
348
349
350
351
352
353
354 size = BBTOB(bp->b_length);
355 if (size < PAGE_SIZE) {
356 bp->b_addr = kmem_alloc(size, KM_NOFS);
357 if (!bp->b_addr) {
358
359 goto use_alloc_page;
360 }
361
362 if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
363 ((unsigned long)bp->b_addr & PAGE_MASK)) {
364
365 kmem_free(bp->b_addr);
366 bp->b_addr = NULL;
367 goto use_alloc_page;
368 }
369 bp->b_offset = offset_in_page(bp->b_addr);
370 bp->b_pages = bp->b_page_array;
371 bp->b_pages[0] = virt_to_page(bp->b_addr);
372 bp->b_page_count = 1;
373 bp->b_flags |= _XBF_KMEM;
374 return 0;
375 }
376
377use_alloc_page:
378 start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
379 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
380 >> PAGE_SHIFT;
381 page_count = end - start;
382 error = _xfs_buf_get_pages(bp, page_count);
383 if (unlikely(error))
384 return error;
385
386 offset = bp->b_offset;
387 bp->b_flags |= _XBF_PAGES;
388
389 for (i = 0; i < bp->b_page_count; i++) {
390 struct page *page;
391 uint retries = 0;
392retry:
393 page = alloc_page(gfp_mask);
394 if (unlikely(page == NULL)) {
395 if (flags & XBF_READ_AHEAD) {
396 bp->b_page_count = i;
397 error = -ENOMEM;
398 goto out_free_pages;
399 }
400
401
402
403
404
405
406
407 if (!(++retries % 100))
408 xfs_err(NULL,
409 "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
410 current->comm, current->pid,
411 __func__, gfp_mask);
412
413 XFS_STATS_INC(bp->b_mount, xb_page_retries);
414 congestion_wait(BLK_RW_ASYNC, HZ/50);
415 goto retry;
416 }
417
418 XFS_STATS_INC(bp->b_mount, xb_page_found);
419
420 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
421 size -= nbytes;
422 bp->b_pages[i] = page;
423 offset = 0;
424 }
425 return 0;
426
427out_free_pages:
428 for (i = 0; i < bp->b_page_count; i++)
429 __free_page(bp->b_pages[i]);
430 bp->b_flags &= ~_XBF_PAGES;
431 return error;
432}
433
434
435
436
437STATIC int
438_xfs_buf_map_pages(
439 xfs_buf_t *bp,
440 uint flags)
441{
442 ASSERT(bp->b_flags & _XBF_PAGES);
443 if (bp->b_page_count == 1) {
444
445 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
446 } else if (flags & XBF_UNMAPPED) {
447 bp->b_addr = NULL;
448 } else {
449 int retried = 0;
450 unsigned nofs_flag;
451
452
453
454
455
456
457
458
459
460 nofs_flag = memalloc_nofs_save();
461 do {
462 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
463 -1, PAGE_KERNEL);
464 if (bp->b_addr)
465 break;
466 vm_unmap_aliases();
467 } while (retried++ <= 1);
468 memalloc_nofs_restore(nofs_flag);
469
470 if (!bp->b_addr)
471 return -ENOMEM;
472 bp->b_addr += bp->b_offset;
473 }
474
475 return 0;
476}
477
478
479
480
481static int
482_xfs_buf_obj_cmp(
483 struct rhashtable_compare_arg *arg,
484 const void *obj)
485{
486 const struct xfs_buf_map *map = arg->key;
487 const struct xfs_buf *bp = obj;
488
489
490
491
492
493 BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
494
495 if (bp->b_bn != map->bm_bn)
496 return 1;
497
498 if (unlikely(bp->b_length != map->bm_len)) {
499
500
501
502
503
504
505
506
507 ASSERT(bp->b_flags & XBF_STALE);
508 return 1;
509 }
510 return 0;
511}
512
513static const struct rhashtable_params xfs_buf_hash_params = {
514 .min_size = 32,
515 .nelem_hint = 16,
516 .key_len = sizeof(xfs_daddr_t),
517 .key_offset = offsetof(struct xfs_buf, b_bn),
518 .head_offset = offsetof(struct xfs_buf, b_rhash_head),
519 .automatic_shrinking = true,
520 .obj_cmpfn = _xfs_buf_obj_cmp,
521};
522
523int
524xfs_buf_hash_init(
525 struct xfs_perag *pag)
526{
527 spin_lock_init(&pag->pag_buf_lock);
528 return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
529}
530
531void
532xfs_buf_hash_destroy(
533 struct xfs_perag *pag)
534{
535 rhashtable_destroy(&pag->pag_buf_hash);
536}
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556static int
557xfs_buf_find(
558 struct xfs_buftarg *btp,
559 struct xfs_buf_map *map,
560 int nmaps,
561 xfs_buf_flags_t flags,
562 struct xfs_buf *new_bp,
563 struct xfs_buf **found_bp)
564{
565 struct xfs_perag *pag;
566 xfs_buf_t *bp;
567 struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
568 xfs_daddr_t eofs;
569 int i;
570
571 *found_bp = NULL;
572
573 for (i = 0; i < nmaps; i++)
574 cmap.bm_len += map[i].bm_len;
575
576
577 ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
578 ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
579
580
581
582
583
584 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
585 if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
586 xfs_alert(btp->bt_mount,
587 "%s: daddr 0x%llx out of range, EOFS 0x%llx",
588 __func__, cmap.bm_bn, eofs);
589 WARN_ON(1);
590 return -EFSCORRUPTED;
591 }
592
593 pag = xfs_perag_get(btp->bt_mount,
594 xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
595
596 spin_lock(&pag->pag_buf_lock);
597 bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
598 xfs_buf_hash_params);
599 if (bp) {
600 atomic_inc(&bp->b_hold);
601 goto found;
602 }
603
604
605 if (!new_bp) {
606 XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
607 spin_unlock(&pag->pag_buf_lock);
608 xfs_perag_put(pag);
609 return -ENOENT;
610 }
611
612
613 new_bp->b_pag = pag;
614 rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head,
615 xfs_buf_hash_params);
616 spin_unlock(&pag->pag_buf_lock);
617 *found_bp = new_bp;
618 return 0;
619
620found:
621 spin_unlock(&pag->pag_buf_lock);
622 xfs_perag_put(pag);
623
624 if (!xfs_buf_trylock(bp)) {
625 if (flags & XBF_TRYLOCK) {
626 xfs_buf_rele(bp);
627 XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
628 return -EAGAIN;
629 }
630 xfs_buf_lock(bp);
631 XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
632 }
633
634
635
636
637
638
639 if (bp->b_flags & XBF_STALE) {
640 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
641 ASSERT(bp->b_iodone == NULL);
642 bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
643 bp->b_ops = NULL;
644 }
645
646 trace_xfs_buf_find(bp, flags, _RET_IP_);
647 XFS_STATS_INC(btp->bt_mount, xb_get_locked);
648 *found_bp = bp;
649 return 0;
650}
651
652struct xfs_buf *
653xfs_buf_incore(
654 struct xfs_buftarg *target,
655 xfs_daddr_t blkno,
656 size_t numblks,
657 xfs_buf_flags_t flags)
658{
659 struct xfs_buf *bp;
660 int error;
661 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
662
663 error = xfs_buf_find(target, &map, 1, flags, NULL, &bp);
664 if (error)
665 return NULL;
666 return bp;
667}
668
669
670
671
672
673
674struct xfs_buf *
675xfs_buf_get_map(
676 struct xfs_buftarg *target,
677 struct xfs_buf_map *map,
678 int nmaps,
679 xfs_buf_flags_t flags)
680{
681 struct xfs_buf *bp;
682 struct xfs_buf *new_bp;
683 int error = 0;
684
685 error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
686
687 switch (error) {
688 case 0:
689
690 goto found;
691 case -EAGAIN:
692
693 ASSERT(flags & XBF_TRYLOCK);
694 return NULL;
695 case -ENOENT:
696
697 break;
698 case -EFSCORRUPTED:
699 default:
700
701
702
703
704 return NULL;
705 }
706
707 new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
708 if (unlikely(!new_bp))
709 return NULL;
710
711 error = xfs_buf_allocate_memory(new_bp, flags);
712 if (error) {
713 xfs_buf_free(new_bp);
714 return NULL;
715 }
716
717 error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
718 if (error) {
719 xfs_buf_free(new_bp);
720 return NULL;
721 }
722
723 if (bp != new_bp)
724 xfs_buf_free(new_bp);
725
726found:
727 if (!bp->b_addr) {
728 error = _xfs_buf_map_pages(bp, flags);
729 if (unlikely(error)) {
730 xfs_warn(target->bt_mount,
731 "%s: failed to map pagesn", __func__);
732 xfs_buf_relse(bp);
733 return NULL;
734 }
735 }
736
737
738
739
740
741 if (!(flags & XBF_READ))
742 xfs_buf_ioerror(bp, 0);
743
744 XFS_STATS_INC(target->bt_mount, xb_get);
745 trace_xfs_buf_get(bp, flags, _RET_IP_);
746 return bp;
747}
748
749STATIC int
750_xfs_buf_read(
751 xfs_buf_t *bp,
752 xfs_buf_flags_t flags)
753{
754 ASSERT(!(flags & XBF_WRITE));
755 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
756
757 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
758 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
759
760 return xfs_buf_submit(bp);
761}
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780int
781xfs_buf_reverify(
782 struct xfs_buf *bp,
783 const struct xfs_buf_ops *ops)
784{
785 ASSERT(bp->b_flags & XBF_DONE);
786 ASSERT(bp->b_error == 0);
787
788 if (!ops || bp->b_ops)
789 return 0;
790
791 bp->b_ops = ops;
792 bp->b_ops->verify_read(bp);
793 if (bp->b_error)
794 bp->b_flags &= ~XBF_DONE;
795 return bp->b_error;
796}
797
798xfs_buf_t *
799xfs_buf_read_map(
800 struct xfs_buftarg *target,
801 struct xfs_buf_map *map,
802 int nmaps,
803 xfs_buf_flags_t flags,
804 const struct xfs_buf_ops *ops)
805{
806 struct xfs_buf *bp;
807
808 flags |= XBF_READ;
809
810 bp = xfs_buf_get_map(target, map, nmaps, flags);
811 if (!bp)
812 return NULL;
813
814 trace_xfs_buf_read(bp, flags, _RET_IP_);
815
816 if (!(bp->b_flags & XBF_DONE)) {
817 XFS_STATS_INC(target->bt_mount, xb_get_read);
818 bp->b_ops = ops;
819 _xfs_buf_read(bp, flags);
820 return bp;
821 }
822
823 xfs_buf_reverify(bp, ops);
824
825 if (flags & XBF_ASYNC) {
826
827
828
829
830 xfs_buf_relse(bp);
831 return NULL;
832 }
833
834
835 bp->b_flags &= ~XBF_READ;
836 ASSERT(bp->b_ops != NULL || ops == NULL);
837 return bp;
838}
839
840
841
842
843
844void
845xfs_buf_readahead_map(
846 struct xfs_buftarg *target,
847 struct xfs_buf_map *map,
848 int nmaps,
849 const struct xfs_buf_ops *ops)
850{
851 if (bdi_read_congested(target->bt_bdev->bd_bdi))
852 return;
853
854 xfs_buf_read_map(target, map, nmaps,
855 XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops);
856}
857
858
859
860
861
862int
863xfs_buf_read_uncached(
864 struct xfs_buftarg *target,
865 xfs_daddr_t daddr,
866 size_t numblks,
867 int flags,
868 struct xfs_buf **bpp,
869 const struct xfs_buf_ops *ops)
870{
871 struct xfs_buf *bp;
872
873 *bpp = NULL;
874
875 bp = xfs_buf_get_uncached(target, numblks, flags);
876 if (!bp)
877 return -ENOMEM;
878
879
880 ASSERT(bp->b_map_count == 1);
881 bp->b_bn = XFS_BUF_DADDR_NULL;
882 bp->b_maps[0].bm_bn = daddr;
883 bp->b_flags |= XBF_READ;
884 bp->b_ops = ops;
885
886 xfs_buf_submit(bp);
887 if (bp->b_error) {
888 int error = bp->b_error;
889 xfs_buf_relse(bp);
890 return error;
891 }
892
893 *bpp = bp;
894 return 0;
895}
896
897xfs_buf_t *
898xfs_buf_get_uncached(
899 struct xfs_buftarg *target,
900 size_t numblks,
901 int flags)
902{
903 unsigned long page_count;
904 int error, i;
905 struct xfs_buf *bp;
906 DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
907
908
909 bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT);
910 if (unlikely(bp == NULL))
911 goto fail;
912
913 page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
914 error = _xfs_buf_get_pages(bp, page_count);
915 if (error)
916 goto fail_free_buf;
917
918 for (i = 0; i < page_count; i++) {
919 bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
920 if (!bp->b_pages[i])
921 goto fail_free_mem;
922 }
923 bp->b_flags |= _XBF_PAGES;
924
925 error = _xfs_buf_map_pages(bp, 0);
926 if (unlikely(error)) {
927 xfs_warn(target->bt_mount,
928 "%s: failed to map pages", __func__);
929 goto fail_free_mem;
930 }
931
932 trace_xfs_buf_get_uncached(bp, _RET_IP_);
933 return bp;
934
935 fail_free_mem:
936 while (--i >= 0)
937 __free_page(bp->b_pages[i]);
938 _xfs_buf_free_pages(bp);
939 fail_free_buf:
940 xfs_buf_free_maps(bp);
941 kmem_zone_free(xfs_buf_zone, bp);
942 fail:
943 return NULL;
944}
945
946
947
948
949
950
951void
952xfs_buf_hold(
953 xfs_buf_t *bp)
954{
955 trace_xfs_buf_hold(bp, _RET_IP_);
956 atomic_inc(&bp->b_hold);
957}
958
959
960
961
962
963void
964xfs_buf_rele(
965 xfs_buf_t *bp)
966{
967 struct xfs_perag *pag = bp->b_pag;
968 bool release;
969 bool freebuf = false;
970
971 trace_xfs_buf_rele(bp, _RET_IP_);
972
973 if (!pag) {
974 ASSERT(list_empty(&bp->b_lru));
975 if (atomic_dec_and_test(&bp->b_hold)) {
976 xfs_buf_ioacct_dec(bp);
977 xfs_buf_free(bp);
978 }
979 return;
980 }
981
982 ASSERT(atomic_read(&bp->b_hold) > 0);
983
984
985
986
987
988
989
990
991
992
993
994 spin_lock(&bp->b_lock);
995 release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
996 if (!release) {
997
998
999
1000
1001
1002
1003 if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
1004 __xfs_buf_ioacct_dec(bp);
1005 goto out_unlock;
1006 }
1007
1008
1009 __xfs_buf_ioacct_dec(bp);
1010 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
1011
1012
1013
1014
1015
1016 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
1017 bp->b_state &= ~XFS_BSTATE_DISPOSE;
1018 atomic_inc(&bp->b_hold);
1019 }
1020 spin_unlock(&pag->pag_buf_lock);
1021 } else {
1022
1023
1024
1025
1026
1027
1028 if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
1029 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
1030 } else {
1031 ASSERT(list_empty(&bp->b_lru));
1032 }
1033
1034 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1035 rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
1036 xfs_buf_hash_params);
1037 spin_unlock(&pag->pag_buf_lock);
1038 xfs_perag_put(pag);
1039 freebuf = true;
1040 }
1041
1042out_unlock:
1043 spin_unlock(&bp->b_lock);
1044
1045 if (freebuf)
1046 xfs_buf_free(bp);
1047}
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061int
1062xfs_buf_trylock(
1063 struct xfs_buf *bp)
1064{
1065 int locked;
1066
1067 locked = down_trylock(&bp->b_sema) == 0;
1068 if (locked)
1069 trace_xfs_buf_trylock(bp, _RET_IP_);
1070 else
1071 trace_xfs_buf_trylock_fail(bp, _RET_IP_);
1072 return locked;
1073}
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084void
1085xfs_buf_lock(
1086 struct xfs_buf *bp)
1087{
1088 trace_xfs_buf_lock(bp, _RET_IP_);
1089
1090 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
1091 xfs_log_force(bp->b_mount, 0);
1092 down(&bp->b_sema);
1093
1094 trace_xfs_buf_lock_done(bp, _RET_IP_);
1095}
1096
1097void
1098xfs_buf_unlock(
1099 struct xfs_buf *bp)
1100{
1101 ASSERT(xfs_buf_islocked(bp));
1102
1103 up(&bp->b_sema);
1104 trace_xfs_buf_unlock(bp, _RET_IP_);
1105}
1106
1107STATIC void
1108xfs_buf_wait_unpin(
1109 xfs_buf_t *bp)
1110{
1111 DECLARE_WAITQUEUE (wait, current);
1112
1113 if (atomic_read(&bp->b_pin_count) == 0)
1114 return;
1115
1116 add_wait_queue(&bp->b_waiters, &wait);
1117 for (;;) {
1118 set_current_state(TASK_UNINTERRUPTIBLE);
1119 if (atomic_read(&bp->b_pin_count) == 0)
1120 break;
1121 io_schedule();
1122 }
1123 remove_wait_queue(&bp->b_waiters, &wait);
1124 set_current_state(TASK_RUNNING);
1125}
1126
1127
1128
1129
1130
1131void
1132xfs_buf_ioend(
1133 struct xfs_buf *bp)
1134{
1135 bool read = bp->b_flags & XBF_READ;
1136
1137 trace_xfs_buf_iodone(bp, _RET_IP_);
1138
1139 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1140
1141
1142
1143
1144
1145 if (!bp->b_error && bp->b_io_error)
1146 xfs_buf_ioerror(bp, bp->b_io_error);
1147
1148
1149 if (read && !bp->b_error && bp->b_ops) {
1150 ASSERT(!bp->b_iodone);
1151 bp->b_ops->verify_read(bp);
1152 }
1153
1154 if (!bp->b_error)
1155 bp->b_flags |= XBF_DONE;
1156
1157 if (bp->b_iodone)
1158 (*(bp->b_iodone))(bp);
1159 else if (bp->b_flags & XBF_ASYNC)
1160 xfs_buf_relse(bp);
1161 else
1162 complete(&bp->b_iowait);
1163}
1164
1165static void
1166xfs_buf_ioend_work(
1167 struct work_struct *work)
1168{
1169 struct xfs_buf *bp =
1170 container_of(work, xfs_buf_t, b_ioend_work);
1171
1172 xfs_buf_ioend(bp);
1173}
1174
1175static void
1176xfs_buf_ioend_async(
1177 struct xfs_buf *bp)
1178{
1179 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
1180 queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
1181}
1182
1183void
1184__xfs_buf_ioerror(
1185 xfs_buf_t *bp,
1186 int error,
1187 xfs_failaddr_t failaddr)
1188{
1189 ASSERT(error <= 0 && error >= -1000);
1190 bp->b_error = error;
1191 trace_xfs_buf_ioerror(bp, error, failaddr);
1192}
1193
1194void
1195xfs_buf_ioerror_alert(
1196 struct xfs_buf *bp,
1197 const char *func)
1198{
1199 xfs_alert(bp->b_mount,
1200"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
1201 func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
1202 -bp->b_error);
1203}
1204
1205int
1206xfs_bwrite(
1207 struct xfs_buf *bp)
1208{
1209 int error;
1210
1211 ASSERT(xfs_buf_islocked(bp));
1212
1213 bp->b_flags |= XBF_WRITE;
1214 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
1215 XBF_WRITE_FAIL | XBF_DONE);
1216
1217 error = xfs_buf_submit(bp);
1218 if (error)
1219 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
1220 return error;
1221}
1222
1223static void
1224xfs_buf_bio_end_io(
1225 struct bio *bio)
1226{
1227 struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
1228
1229
1230
1231
1232
1233 if (bio->bi_status) {
1234 int error = blk_status_to_errno(bio->bi_status);
1235
1236 cmpxchg(&bp->b_io_error, 0, error);
1237 }
1238
1239 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1240 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1241
1242 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1243 xfs_buf_ioend_async(bp);
1244 bio_put(bio);
1245}
1246
1247static void
1248xfs_buf_ioapply_map(
1249 struct xfs_buf *bp,
1250 int map,
1251 int *buf_offset,
1252 int *count,
1253 int op,
1254 int op_flags)
1255{
1256 int page_index;
1257 int total_nr_pages = bp->b_page_count;
1258 int nr_pages;
1259 struct bio *bio;
1260 sector_t sector = bp->b_maps[map].bm_bn;
1261 int size;
1262 int offset;
1263
1264
1265 page_index = 0;
1266 offset = *buf_offset;
1267 while (offset >= PAGE_SIZE) {
1268 page_index++;
1269 offset -= PAGE_SIZE;
1270 }
1271
1272
1273
1274
1275
1276 size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
1277 *count -= size;
1278 *buf_offset += size;
1279
1280next_chunk:
1281 atomic_inc(&bp->b_io_remaining);
1282 nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
1283
1284 bio = bio_alloc(GFP_NOIO, nr_pages);
1285 bio_set_dev(bio, bp->b_target->bt_bdev);
1286 bio->bi_iter.bi_sector = sector;
1287 bio->bi_end_io = xfs_buf_bio_end_io;
1288 bio->bi_private = bp;
1289 bio_set_op_attrs(bio, op, op_flags);
1290
1291 for (; size && nr_pages; nr_pages--, page_index++) {
1292 int rbytes, nbytes = PAGE_SIZE - offset;
1293
1294 if (nbytes > size)
1295 nbytes = size;
1296
1297 rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
1298 offset);
1299 if (rbytes < nbytes)
1300 break;
1301
1302 offset = 0;
1303 sector += BTOBB(nbytes);
1304 size -= nbytes;
1305 total_nr_pages--;
1306 }
1307
1308 if (likely(bio->bi_iter.bi_size)) {
1309 if (xfs_buf_is_vmapped(bp)) {
1310 flush_kernel_vmap_range(bp->b_addr,
1311 xfs_buf_vmap_len(bp));
1312 }
1313 submit_bio(bio);
1314 if (size)
1315 goto next_chunk;
1316 } else {
1317
1318
1319
1320
1321 atomic_dec(&bp->b_io_remaining);
1322 xfs_buf_ioerror(bp, -EIO);
1323 bio_put(bio);
1324 }
1325
1326}
1327
1328STATIC void
1329_xfs_buf_ioapply(
1330 struct xfs_buf *bp)
1331{
1332 struct blk_plug plug;
1333 int op;
1334 int op_flags = 0;
1335 int offset;
1336 int size;
1337 int i;
1338
1339
1340
1341
1342
1343 bp->b_error = 0;
1344
1345 if (bp->b_flags & XBF_WRITE) {
1346 op = REQ_OP_WRITE;
1347
1348
1349
1350
1351
1352
1353 if (bp->b_ops) {
1354 bp->b_ops->verify_write(bp);
1355 if (bp->b_error) {
1356 xfs_force_shutdown(bp->b_mount,
1357 SHUTDOWN_CORRUPT_INCORE);
1358 return;
1359 }
1360 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
1361 struct xfs_mount *mp = bp->b_mount;
1362
1363
1364
1365
1366
1367 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1368 xfs_warn(mp,
1369 "%s: no buf ops on daddr 0x%llx len %d",
1370 __func__, bp->b_bn, bp->b_length);
1371 xfs_hex_dump(bp->b_addr,
1372 XFS_CORRUPTION_DUMP_LEN);
1373 dump_stack();
1374 }
1375 }
1376 } else if (bp->b_flags & XBF_READ_AHEAD) {
1377 op = REQ_OP_READ;
1378 op_flags = REQ_RAHEAD;
1379 } else {
1380 op = REQ_OP_READ;
1381 }
1382
1383
1384 op_flags |= REQ_META;
1385
1386
1387
1388
1389
1390
1391
1392 offset = bp->b_offset;
1393 size = BBTOB(bp->b_length);
1394 blk_start_plug(&plug);
1395 for (i = 0; i < bp->b_map_count; i++) {
1396 xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
1397 if (bp->b_error)
1398 break;
1399 if (size <= 0)
1400 break;
1401 }
1402 blk_finish_plug(&plug);
1403}
1404
1405
1406
1407
1408static int
1409xfs_buf_iowait(
1410 struct xfs_buf *bp)
1411{
1412 ASSERT(!(bp->b_flags & XBF_ASYNC));
1413
1414 trace_xfs_buf_iowait(bp, _RET_IP_);
1415 wait_for_completion(&bp->b_iowait);
1416 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1417
1418 return bp->b_error;
1419}
1420
1421
1422
1423
1424
1425
1426
1427int
1428__xfs_buf_submit(
1429 struct xfs_buf *bp,
1430 bool wait)
1431{
1432 int error = 0;
1433
1434 trace_xfs_buf_submit(bp, _RET_IP_);
1435
1436 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1437
1438
1439 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1440 xfs_buf_ioerror(bp, -EIO);
1441 bp->b_flags &= ~XBF_DONE;
1442 xfs_buf_stale(bp);
1443 xfs_buf_ioend(bp);
1444 return -EIO;
1445 }
1446
1447
1448
1449
1450
1451
1452 xfs_buf_hold(bp);
1453
1454 if (bp->b_flags & XBF_WRITE)
1455 xfs_buf_wait_unpin(bp);
1456
1457
1458 bp->b_io_error = 0;
1459
1460
1461
1462
1463
1464
1465 atomic_set(&bp->b_io_remaining, 1);
1466 if (bp->b_flags & XBF_ASYNC)
1467 xfs_buf_ioacct_inc(bp);
1468 _xfs_buf_ioapply(bp);
1469
1470
1471
1472
1473
1474
1475 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1476 if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
1477 xfs_buf_ioend(bp);
1478 else
1479 xfs_buf_ioend_async(bp);
1480 }
1481
1482 if (wait)
1483 error = xfs_buf_iowait(bp);
1484
1485
1486
1487
1488
1489
1490 xfs_buf_rele(bp);
1491 return error;
1492}
1493
1494void *
1495xfs_buf_offset(
1496 struct xfs_buf *bp,
1497 size_t offset)
1498{
1499 struct page *page;
1500
1501 if (bp->b_addr)
1502 return bp->b_addr + offset;
1503
1504 offset += bp->b_offset;
1505 page = bp->b_pages[offset >> PAGE_SHIFT];
1506 return page_address(page) + (offset & (PAGE_SIZE-1));
1507}
1508
1509void
1510xfs_buf_zero(
1511 struct xfs_buf *bp,
1512 size_t boff,
1513 size_t bsize)
1514{
1515 size_t bend;
1516
1517 bend = boff + bsize;
1518 while (boff < bend) {
1519 struct page *page;
1520 int page_index, page_offset, csize;
1521
1522 page_index = (boff + bp->b_offset) >> PAGE_SHIFT;
1523 page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
1524 page = bp->b_pages[page_index];
1525 csize = min_t(size_t, PAGE_SIZE - page_offset,
1526 BBTOB(bp->b_length) - boff);
1527
1528 ASSERT((csize + page_offset) <= PAGE_SIZE);
1529
1530 memset(page_address(page) + page_offset, 0, csize);
1531
1532 boff += csize;
1533 }
1534}
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545static enum lru_status
1546xfs_buftarg_wait_rele(
1547 struct list_head *item,
1548 struct list_lru_one *lru,
1549 spinlock_t *lru_lock,
1550 void *arg)
1551
1552{
1553 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1554 struct list_head *dispose = arg;
1555
1556 if (atomic_read(&bp->b_hold) > 1) {
1557
1558 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1559 return LRU_SKIP;
1560 }
1561 if (!spin_trylock(&bp->b_lock))
1562 return LRU_SKIP;
1563
1564
1565
1566
1567
1568 atomic_set(&bp->b_lru_ref, 0);
1569 bp->b_state |= XFS_BSTATE_DISPOSE;
1570 list_lru_isolate_move(lru, item, dispose);
1571 spin_unlock(&bp->b_lock);
1572 return LRU_REMOVED;
1573}
1574
1575void
1576xfs_wait_buftarg(
1577 struct xfs_buftarg *btp)
1578{
1579 LIST_HEAD(dispose);
1580 int loop = 0;
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594 while (percpu_counter_sum(&btp->bt_io_count))
1595 delay(100);
1596 flush_workqueue(btp->bt_mount->m_buf_workqueue);
1597
1598
1599 while (list_lru_count(&btp->bt_lru)) {
1600 list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
1601 &dispose, LONG_MAX);
1602
1603 while (!list_empty(&dispose)) {
1604 struct xfs_buf *bp;
1605 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1606 list_del_init(&bp->b_lru);
1607 if (bp->b_flags & XBF_WRITE_FAIL) {
1608 xfs_alert(btp->bt_mount,
1609"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
1610 (long long)bp->b_bn);
1611 xfs_alert(btp->bt_mount,
1612"Please run xfs_repair to determine the extent of the problem.");
1613 }
1614 xfs_buf_rele(bp);
1615 }
1616 if (loop++ != 0)
1617 delay(100);
1618 }
1619}
1620
1621static enum lru_status
1622xfs_buftarg_isolate(
1623 struct list_head *item,
1624 struct list_lru_one *lru,
1625 spinlock_t *lru_lock,
1626 void *arg)
1627{
1628 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1629 struct list_head *dispose = arg;
1630
1631
1632
1633
1634
1635 if (!spin_trylock(&bp->b_lock))
1636 return LRU_SKIP;
1637
1638
1639
1640
1641
1642 if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1643 spin_unlock(&bp->b_lock);
1644 return LRU_ROTATE;
1645 }
1646
1647 bp->b_state |= XFS_BSTATE_DISPOSE;
1648 list_lru_isolate_move(lru, item, dispose);
1649 spin_unlock(&bp->b_lock);
1650 return LRU_REMOVED;
1651}
1652
1653static unsigned long
1654xfs_buftarg_shrink_scan(
1655 struct shrinker *shrink,
1656 struct shrink_control *sc)
1657{
1658 struct xfs_buftarg *btp = container_of(shrink,
1659 struct xfs_buftarg, bt_shrinker);
1660 LIST_HEAD(dispose);
1661 unsigned long freed;
1662
1663 freed = list_lru_shrink_walk(&btp->bt_lru, sc,
1664 xfs_buftarg_isolate, &dispose);
1665
1666 while (!list_empty(&dispose)) {
1667 struct xfs_buf *bp;
1668 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1669 list_del_init(&bp->b_lru);
1670 xfs_buf_rele(bp);
1671 }
1672
1673 return freed;
1674}
1675
1676static unsigned long
1677xfs_buftarg_shrink_count(
1678 struct shrinker *shrink,
1679 struct shrink_control *sc)
1680{
1681 struct xfs_buftarg *btp = container_of(shrink,
1682 struct xfs_buftarg, bt_shrinker);
1683 return list_lru_shrink_count(&btp->bt_lru, sc);
1684}
1685
1686void
1687xfs_free_buftarg(
1688 struct xfs_buftarg *btp)
1689{
1690 unregister_shrinker(&btp->bt_shrinker);
1691 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
1692 percpu_counter_destroy(&btp->bt_io_count);
1693 list_lru_destroy(&btp->bt_lru);
1694
1695 xfs_blkdev_issue_flush(btp);
1696
1697 kmem_free(btp);
1698}
1699
1700int
1701xfs_setsize_buftarg(
1702 xfs_buftarg_t *btp,
1703 unsigned int sectorsize)
1704{
1705
1706 btp->bt_meta_sectorsize = sectorsize;
1707 btp->bt_meta_sectormask = sectorsize - 1;
1708
1709 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1710 xfs_warn(btp->bt_mount,
1711 "Cannot set_blocksize to %u on device %pg",
1712 sectorsize, btp->bt_bdev);
1713 return -EINVAL;
1714 }
1715
1716
1717 btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
1718 btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
1719
1720 return 0;
1721}
1722
1723
1724
1725
1726
1727
1728STATIC int
1729xfs_setsize_buftarg_early(
1730 xfs_buftarg_t *btp,
1731 struct block_device *bdev)
1732{
1733 return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
1734}
1735
1736xfs_buftarg_t *
1737xfs_alloc_buftarg(
1738 struct xfs_mount *mp,
1739 struct block_device *bdev,
1740 struct dax_device *dax_dev)
1741{
1742 xfs_buftarg_t *btp;
1743
1744 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP | KM_NOFS);
1745
1746 btp->bt_mount = mp;
1747 btp->bt_dev = bdev->bd_dev;
1748 btp->bt_bdev = bdev;
1749 btp->bt_daxdev = dax_dev;
1750
1751 if (xfs_setsize_buftarg_early(btp, bdev))
1752 goto error_free;
1753
1754 if (list_lru_init(&btp->bt_lru))
1755 goto error_free;
1756
1757 if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
1758 goto error_lru;
1759
1760 btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
1761 btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
1762 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1763 btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
1764 if (register_shrinker(&btp->bt_shrinker))
1765 goto error_pcpu;
1766 return btp;
1767
1768error_pcpu:
1769 percpu_counter_destroy(&btp->bt_io_count);
1770error_lru:
1771 list_lru_destroy(&btp->bt_lru);
1772error_free:
1773 kmem_free(btp);
1774 return NULL;
1775}
1776
1777
1778
1779
1780
1781
1782
1783void
1784xfs_buf_delwri_cancel(
1785 struct list_head *list)
1786{
1787 struct xfs_buf *bp;
1788
1789 while (!list_empty(list)) {
1790 bp = list_first_entry(list, struct xfs_buf, b_list);
1791
1792 xfs_buf_lock(bp);
1793 bp->b_flags &= ~_XBF_DELWRI_Q;
1794 list_del_init(&bp->b_list);
1795 xfs_buf_relse(bp);
1796 }
1797}
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810bool
1811xfs_buf_delwri_queue(
1812 struct xfs_buf *bp,
1813 struct list_head *list)
1814{
1815 ASSERT(xfs_buf_islocked(bp));
1816 ASSERT(!(bp->b_flags & XBF_READ));
1817
1818
1819
1820
1821
1822
1823 if (bp->b_flags & _XBF_DELWRI_Q) {
1824 trace_xfs_buf_delwri_queued(bp, _RET_IP_);
1825 return false;
1826 }
1827
1828 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838 bp->b_flags |= _XBF_DELWRI_Q;
1839 if (list_empty(&bp->b_list)) {
1840 atomic_inc(&bp->b_hold);
1841 list_add_tail(&bp->b_list, list);
1842 }
1843
1844 return true;
1845}
1846
1847
1848
1849
1850
1851
1852static int
1853xfs_buf_cmp(
1854 void *priv,
1855 struct list_head *a,
1856 struct list_head *b)
1857{
1858 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1859 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1860 xfs_daddr_t diff;
1861
1862 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
1863 if (diff < 0)
1864 return -1;
1865 if (diff > 0)
1866 return 1;
1867 return 0;
1868}
1869
1870
1871
1872
1873
1874
1875
1876
1877static int
1878xfs_buf_delwri_submit_buffers(
1879 struct list_head *buffer_list,
1880 struct list_head *wait_list)
1881{
1882 struct xfs_buf *bp, *n;
1883 int pinned = 0;
1884 struct blk_plug plug;
1885
1886 list_sort(NULL, buffer_list, xfs_buf_cmp);
1887
1888 blk_start_plug(&plug);
1889 list_for_each_entry_safe(bp, n, buffer_list, b_list) {
1890 if (!wait_list) {
1891 if (xfs_buf_ispinned(bp)) {
1892 pinned++;
1893 continue;
1894 }
1895 if (!xfs_buf_trylock(bp))
1896 continue;
1897 } else {
1898 xfs_buf_lock(bp);
1899 }
1900
1901
1902
1903
1904
1905
1906
1907 if (!(bp->b_flags & _XBF_DELWRI_Q)) {
1908 list_del_init(&bp->b_list);
1909 xfs_buf_relse(bp);
1910 continue;
1911 }
1912
1913 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1914
1915
1916
1917
1918
1919
1920
1921 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL);
1922 bp->b_flags |= XBF_WRITE;
1923 if (wait_list) {
1924 bp->b_flags &= ~XBF_ASYNC;
1925 list_move_tail(&bp->b_list, wait_list);
1926 } else {
1927 bp->b_flags |= XBF_ASYNC;
1928 list_del_init(&bp->b_list);
1929 }
1930 __xfs_buf_submit(bp, false);
1931 }
1932 blk_finish_plug(&plug);
1933
1934 return pinned;
1935}
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953int
1954xfs_buf_delwri_submit_nowait(
1955 struct list_head *buffer_list)
1956{
1957 return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
1958}
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968int
1969xfs_buf_delwri_submit(
1970 struct list_head *buffer_list)
1971{
1972 LIST_HEAD (wait_list);
1973 int error = 0, error2;
1974 struct xfs_buf *bp;
1975
1976 xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
1977
1978
1979 while (!list_empty(&wait_list)) {
1980 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1981
1982 list_del_init(&bp->b_list);
1983
1984
1985
1986
1987
1988 error2 = xfs_buf_iowait(bp);
1989 xfs_buf_relse(bp);
1990 if (!error)
1991 error = error2;
1992 }
1993
1994 return error;
1995}
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012int
2013xfs_buf_delwri_pushbuf(
2014 struct xfs_buf *bp,
2015 struct list_head *buffer_list)
2016{
2017 LIST_HEAD (submit_list);
2018 int error;
2019
2020 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
2021
2022 trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
2023
2024
2025
2026
2027
2028 xfs_buf_lock(bp);
2029 list_move(&bp->b_list, &submit_list);
2030 xfs_buf_unlock(bp);
2031
2032
2033
2034
2035
2036
2037
2038 xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
2039
2040
2041
2042
2043
2044
2045 error = xfs_buf_iowait(bp);
2046 bp->b_flags |= _XBF_DELWRI_Q;
2047 xfs_buf_unlock(bp);
2048
2049 return error;
2050}
2051
2052int __init
2053xfs_buf_init(void)
2054{
2055 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
2056 KM_ZONE_HWALIGN, NULL);
2057 if (!xfs_buf_zone)
2058 goto out;
2059
2060 return 0;
2061
2062 out:
2063 return -ENOMEM;
2064}
2065
2066void
2067xfs_buf_terminate(void)
2068{
2069 kmem_zone_destroy(xfs_buf_zone);
2070}
2071
2072void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
2073{
2074
2075
2076
2077
2078
2079 if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
2080 lru_ref = 0;
2081
2082 atomic_set(&bp->b_lru_ref, lru_ref);
2083}
2084
2085
2086
2087
2088
2089
2090bool
2091xfs_verify_magic(
2092 struct xfs_buf *bp,
2093 __be32 dmagic)
2094{
2095 struct xfs_mount *mp = bp->b_mount;
2096 int idx;
2097
2098 idx = xfs_sb_version_hascrc(&mp->m_sb);
2099 if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])))
2100 return false;
2101 return dmagic == bp->b_ops->magic[idx];
2102}
2103
2104
2105
2106
2107
2108bool
2109xfs_verify_magic16(
2110 struct xfs_buf *bp,
2111 __be16 dmagic)
2112{
2113 struct xfs_mount *mp = bp->b_mount;
2114 int idx;
2115
2116 idx = xfs_sb_version_hascrc(&mp->m_sb);
2117 if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])))
2118 return false;
2119 return dmagic == bp->b_ops->magic16[idx];
2120}
2121