1
2
3
4
5
6#include "xfs.h"
7#include <linux/stddef.h>
8#include <linux/errno.h>
9#include <linux/gfp.h>
10#include <linux/pagemap.h>
11#include <linux/init.h>
12#include <linux/vmalloc.h>
13#include <linux/bio.h>
14#include <linux/sysctl.h>
15#include <linux/proc_fs.h>
16#include <linux/workqueue.h>
17#include <linux/percpu.h>
18#include <linux/blkdev.h>
19#include <linux/hash.h>
20#include <linux/kthread.h>
21#include <linux/migrate.h>
22#include <linux/backing-dev.h>
23#include <linux/freezer.h>
24
25#include "xfs_format.h"
26#include "xfs_log_format.h"
27#include "xfs_trans_resv.h"
28#include "xfs_sb.h"
29#include "xfs_mount.h"
30#include "xfs_trace.h"
31#include "xfs_log.h"
32#include "xfs_errortag.h"
33#include "xfs_error.h"
34
35static kmem_zone_t *xfs_buf_zone;
36
37#ifdef XFS_BUF_LOCK_TRACKING
38# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
39# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
40# define XB_GET_OWNER(bp) ((bp)->b_last_holder)
41#else
42# define XB_SET_OWNER(bp) do { } while (0)
43# define XB_CLEAR_OWNER(bp) do { } while (0)
44# define XB_GET_OWNER(bp) do { } while (0)
45#endif
46
47#define xb_to_gfp(flags) \
48 ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
49
50
51static inline int
52xfs_buf_is_vmapped(
53 struct xfs_buf *bp)
54{
55
56
57
58
59
60
61
62 return bp->b_addr && bp->b_page_count > 1;
63}
64
65static inline int
66xfs_buf_vmap_len(
67 struct xfs_buf *bp)
68{
69 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
70}
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85static inline void
86xfs_buf_ioacct_inc(
87 struct xfs_buf *bp)
88{
89 if (bp->b_flags & XBF_NO_IOACCT)
90 return;
91
92 ASSERT(bp->b_flags & XBF_ASYNC);
93 spin_lock(&bp->b_lock);
94 if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
95 bp->b_state |= XFS_BSTATE_IN_FLIGHT;
96 percpu_counter_inc(&bp->b_target->bt_io_count);
97 }
98 spin_unlock(&bp->b_lock);
99}
100
101
102
103
104
105static inline void
106__xfs_buf_ioacct_dec(
107 struct xfs_buf *bp)
108{
109 lockdep_assert_held(&bp->b_lock);
110
111 if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
112 bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
113 percpu_counter_dec(&bp->b_target->bt_io_count);
114 }
115}
116
117static inline void
118xfs_buf_ioacct_dec(
119 struct xfs_buf *bp)
120{
121 spin_lock(&bp->b_lock);
122 __xfs_buf_ioacct_dec(bp);
123 spin_unlock(&bp->b_lock);
124}
125
126
127
128
129
130
131
132
133
134void
135xfs_buf_stale(
136 struct xfs_buf *bp)
137{
138 ASSERT(xfs_buf_islocked(bp));
139
140 bp->b_flags |= XBF_STALE;
141
142
143
144
145
146
147 bp->b_flags &= ~_XBF_DELWRI_Q;
148
149
150
151
152
153
154
155 spin_lock(&bp->b_lock);
156 __xfs_buf_ioacct_dec(bp);
157
158 atomic_set(&bp->b_lru_ref, 0);
159 if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
160 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
161 atomic_dec(&bp->b_hold);
162
163 ASSERT(atomic_read(&bp->b_hold) >= 1);
164 spin_unlock(&bp->b_lock);
165}
166
167static int
168xfs_buf_get_maps(
169 struct xfs_buf *bp,
170 int map_count)
171{
172 ASSERT(bp->b_maps == NULL);
173 bp->b_map_count = map_count;
174
175 if (map_count == 1) {
176 bp->b_maps = &bp->__b_map;
177 return 0;
178 }
179
180 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
181 KM_NOFS);
182 if (!bp->b_maps)
183 return -ENOMEM;
184 return 0;
185}
186
187
188
189
190static void
191xfs_buf_free_maps(
192 struct xfs_buf *bp)
193{
194 if (bp->b_maps != &bp->__b_map) {
195 kmem_free(bp->b_maps);
196 bp->b_maps = NULL;
197 }
198}
199
200struct xfs_buf *
201_xfs_buf_alloc(
202 struct xfs_buftarg *target,
203 struct xfs_buf_map *map,
204 int nmaps,
205 xfs_buf_flags_t flags)
206{
207 struct xfs_buf *bp;
208 int error;
209 int i;
210
211 bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
212 if (unlikely(!bp))
213 return NULL;
214
215
216
217
218
219 flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
220
221 atomic_set(&bp->b_hold, 1);
222 atomic_set(&bp->b_lru_ref, 1);
223 init_completion(&bp->b_iowait);
224 INIT_LIST_HEAD(&bp->b_lru);
225 INIT_LIST_HEAD(&bp->b_list);
226 INIT_LIST_HEAD(&bp->b_li_list);
227 sema_init(&bp->b_sema, 0);
228 spin_lock_init(&bp->b_lock);
229 XB_SET_OWNER(bp);
230 bp->b_target = target;
231 bp->b_flags = flags;
232
233
234
235
236
237
238 error = xfs_buf_get_maps(bp, nmaps);
239 if (error) {
240 kmem_zone_free(xfs_buf_zone, bp);
241 return NULL;
242 }
243
244 bp->b_bn = map[0].bm_bn;
245 bp->b_length = 0;
246 for (i = 0; i < nmaps; i++) {
247 bp->b_maps[i].bm_bn = map[i].bm_bn;
248 bp->b_maps[i].bm_len = map[i].bm_len;
249 bp->b_length += map[i].bm_len;
250 }
251 bp->b_io_length = bp->b_length;
252
253 atomic_set(&bp->b_pin_count, 0);
254 init_waitqueue_head(&bp->b_waiters);
255
256 XFS_STATS_INC(target->bt_mount, xb_create);
257 trace_xfs_buf_init(bp, _RET_IP_);
258
259 return bp;
260}
261
262
263
264
265
266STATIC int
267_xfs_buf_get_pages(
268 xfs_buf_t *bp,
269 int page_count)
270{
271
272 if (bp->b_pages == NULL) {
273 bp->b_page_count = page_count;
274 if (page_count <= XB_PAGES) {
275 bp->b_pages = bp->b_page_array;
276 } else {
277 bp->b_pages = kmem_alloc(sizeof(struct page *) *
278 page_count, KM_NOFS);
279 if (bp->b_pages == NULL)
280 return -ENOMEM;
281 }
282 memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
283 }
284 return 0;
285}
286
287
288
289
290STATIC void
291_xfs_buf_free_pages(
292 xfs_buf_t *bp)
293{
294 if (bp->b_pages != bp->b_page_array) {
295 kmem_free(bp->b_pages);
296 bp->b_pages = NULL;
297 }
298}
299
300
301
302
303
304
305
306
307void
308xfs_buf_free(
309 xfs_buf_t *bp)
310{
311 trace_xfs_buf_free(bp, _RET_IP_);
312
313 ASSERT(list_empty(&bp->b_lru));
314
315 if (bp->b_flags & _XBF_PAGES) {
316 uint i;
317
318 if (xfs_buf_is_vmapped(bp))
319 vm_unmap_ram(bp->b_addr - bp->b_offset,
320 bp->b_page_count);
321
322 for (i = 0; i < bp->b_page_count; i++) {
323 struct page *page = bp->b_pages[i];
324
325 __free_page(page);
326 }
327 } else if (bp->b_flags & _XBF_KMEM)
328 kmem_free(bp->b_addr);
329 _xfs_buf_free_pages(bp);
330 xfs_buf_free_maps(bp);
331 kmem_zone_free(xfs_buf_zone, bp);
332}
333
334
335
336
337STATIC int
338xfs_buf_allocate_memory(
339 xfs_buf_t *bp,
340 uint flags)
341{
342 size_t size;
343 size_t nbytes, offset;
344 gfp_t gfp_mask = xb_to_gfp(flags);
345 unsigned short page_count, i;
346 xfs_off_t start, end;
347 int error;
348
349
350
351
352
353
354 size = BBTOB(bp->b_length);
355 if (size < PAGE_SIZE) {
356 bp->b_addr = kmem_alloc(size, KM_NOFS);
357 if (!bp->b_addr) {
358
359 goto use_alloc_page;
360 }
361
362 if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
363 ((unsigned long)bp->b_addr & PAGE_MASK)) {
364
365 kmem_free(bp->b_addr);
366 bp->b_addr = NULL;
367 goto use_alloc_page;
368 }
369 bp->b_offset = offset_in_page(bp->b_addr);
370 bp->b_pages = bp->b_page_array;
371 bp->b_pages[0] = virt_to_page(bp->b_addr);
372 bp->b_page_count = 1;
373 bp->b_flags |= _XBF_KMEM;
374 return 0;
375 }
376
377use_alloc_page:
378 start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
379 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
380 >> PAGE_SHIFT;
381 page_count = end - start;
382 error = _xfs_buf_get_pages(bp, page_count);
383 if (unlikely(error))
384 return error;
385
386 offset = bp->b_offset;
387 bp->b_flags |= _XBF_PAGES;
388
389 for (i = 0; i < bp->b_page_count; i++) {
390 struct page *page;
391 uint retries = 0;
392retry:
393 page = alloc_page(gfp_mask);
394 if (unlikely(page == NULL)) {
395 if (flags & XBF_READ_AHEAD) {
396 bp->b_page_count = i;
397 error = -ENOMEM;
398 goto out_free_pages;
399 }
400
401
402
403
404
405
406
407 if (!(++retries % 100))
408 xfs_err(NULL,
409 "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
410 current->comm, current->pid,
411 __func__, gfp_mask);
412
413 XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
414 congestion_wait(BLK_RW_ASYNC, HZ/50);
415 goto retry;
416 }
417
418 XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found);
419
420 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
421 size -= nbytes;
422 bp->b_pages[i] = page;
423 offset = 0;
424 }
425 return 0;
426
427out_free_pages:
428 for (i = 0; i < bp->b_page_count; i++)
429 __free_page(bp->b_pages[i]);
430 bp->b_flags &= ~_XBF_PAGES;
431 return error;
432}
433
434
435
436
437STATIC int
438_xfs_buf_map_pages(
439 xfs_buf_t *bp,
440 uint flags)
441{
442 ASSERT(bp->b_flags & _XBF_PAGES);
443 if (bp->b_page_count == 1) {
444
445 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
446 } else if (flags & XBF_UNMAPPED) {
447 bp->b_addr = NULL;
448 } else {
449 int retried = 0;
450 unsigned nofs_flag;
451
452
453
454
455
456
457
458
459
460 nofs_flag = memalloc_nofs_save();
461 do {
462 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
463 -1, PAGE_KERNEL);
464 if (bp->b_addr)
465 break;
466 vm_unmap_aliases();
467 } while (retried++ <= 1);
468 memalloc_nofs_restore(nofs_flag);
469
470 if (!bp->b_addr)
471 return -ENOMEM;
472 bp->b_addr += bp->b_offset;
473 }
474
475 return 0;
476}
477
478
479
480
481static int
482_xfs_buf_obj_cmp(
483 struct rhashtable_compare_arg *arg,
484 const void *obj)
485{
486 const struct xfs_buf_map *map = arg->key;
487 const struct xfs_buf *bp = obj;
488
489
490
491
492
493 BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
494
495 if (bp->b_bn != map->bm_bn)
496 return 1;
497
498 if (unlikely(bp->b_length != map->bm_len)) {
499
500
501
502
503
504
505
506
507 ASSERT(bp->b_flags & XBF_STALE);
508 return 1;
509 }
510 return 0;
511}
512
513static const struct rhashtable_params xfs_buf_hash_params = {
514 .min_size = 32,
515 .nelem_hint = 16,
516 .key_len = sizeof(xfs_daddr_t),
517 .key_offset = offsetof(struct xfs_buf, b_bn),
518 .head_offset = offsetof(struct xfs_buf, b_rhash_head),
519 .automatic_shrinking = true,
520 .obj_cmpfn = _xfs_buf_obj_cmp,
521};
522
523int
524xfs_buf_hash_init(
525 struct xfs_perag *pag)
526{
527 spin_lock_init(&pag->pag_buf_lock);
528 return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
529}
530
531void
532xfs_buf_hash_destroy(
533 struct xfs_perag *pag)
534{
535 rhashtable_destroy(&pag->pag_buf_hash);
536}
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556static int
557xfs_buf_find(
558 struct xfs_buftarg *btp,
559 struct xfs_buf_map *map,
560 int nmaps,
561 xfs_buf_flags_t flags,
562 struct xfs_buf *new_bp,
563 struct xfs_buf **found_bp)
564{
565 struct xfs_perag *pag;
566 xfs_buf_t *bp;
567 struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
568 xfs_daddr_t eofs;
569 int i;
570
571 *found_bp = NULL;
572
573 for (i = 0; i < nmaps; i++)
574 cmap.bm_len += map[i].bm_len;
575
576
577 ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
578 ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
579
580
581
582
583
584 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
585 if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
586 xfs_alert(btp->bt_mount,
587 "%s: daddr 0x%llx out of range, EOFS 0x%llx",
588 __func__, cmap.bm_bn, eofs);
589 WARN_ON(1);
590 return -EFSCORRUPTED;
591 }
592
593 pag = xfs_perag_get(btp->bt_mount,
594 xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
595
596 spin_lock(&pag->pag_buf_lock);
597 bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
598 xfs_buf_hash_params);
599 if (bp) {
600 atomic_inc(&bp->b_hold);
601 goto found;
602 }
603
604
605 if (!new_bp) {
606 XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
607 spin_unlock(&pag->pag_buf_lock);
608 xfs_perag_put(pag);
609 return -ENOENT;
610 }
611
612
613 new_bp->b_pag = pag;
614 rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head,
615 xfs_buf_hash_params);
616 spin_unlock(&pag->pag_buf_lock);
617 *found_bp = new_bp;
618 return 0;
619
620found:
621 spin_unlock(&pag->pag_buf_lock);
622 xfs_perag_put(pag);
623
624 if (!xfs_buf_trylock(bp)) {
625 if (flags & XBF_TRYLOCK) {
626 xfs_buf_rele(bp);
627 XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
628 return -EAGAIN;
629 }
630 xfs_buf_lock(bp);
631 XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
632 }
633
634
635
636
637
638
639 if (bp->b_flags & XBF_STALE) {
640 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
641 ASSERT(bp->b_iodone == NULL);
642 bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
643 bp->b_ops = NULL;
644 }
645
646 trace_xfs_buf_find(bp, flags, _RET_IP_);
647 XFS_STATS_INC(btp->bt_mount, xb_get_locked);
648 *found_bp = bp;
649 return 0;
650}
651
652struct xfs_buf *
653xfs_buf_incore(
654 struct xfs_buftarg *target,
655 xfs_daddr_t blkno,
656 size_t numblks,
657 xfs_buf_flags_t flags)
658{
659 struct xfs_buf *bp;
660 int error;
661 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
662
663 error = xfs_buf_find(target, &map, 1, flags, NULL, &bp);
664 if (error)
665 return NULL;
666 return bp;
667}
668
669
670
671
672
673
674struct xfs_buf *
675xfs_buf_get_map(
676 struct xfs_buftarg *target,
677 struct xfs_buf_map *map,
678 int nmaps,
679 xfs_buf_flags_t flags)
680{
681 struct xfs_buf *bp;
682 struct xfs_buf *new_bp;
683 int error = 0;
684
685 error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
686
687 switch (error) {
688 case 0:
689
690 goto found;
691 case -EAGAIN:
692
693 ASSERT(flags & XBF_TRYLOCK);
694 return NULL;
695 case -ENOENT:
696
697 break;
698 case -EFSCORRUPTED:
699 default:
700
701
702
703
704 return NULL;
705 }
706
707 new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
708 if (unlikely(!new_bp))
709 return NULL;
710
711 error = xfs_buf_allocate_memory(new_bp, flags);
712 if (error) {
713 xfs_buf_free(new_bp);
714 return NULL;
715 }
716
717 error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
718 if (error) {
719 xfs_buf_free(new_bp);
720 return NULL;
721 }
722
723 if (bp != new_bp)
724 xfs_buf_free(new_bp);
725
726found:
727 if (!bp->b_addr) {
728 error = _xfs_buf_map_pages(bp, flags);
729 if (unlikely(error)) {
730 xfs_warn(target->bt_mount,
731 "%s: failed to map pagesn", __func__);
732 xfs_buf_relse(bp);
733 return NULL;
734 }
735 }
736
737
738
739
740
741 if (!(flags & XBF_READ))
742 xfs_buf_ioerror(bp, 0);
743
744 XFS_STATS_INC(target->bt_mount, xb_get);
745 trace_xfs_buf_get(bp, flags, _RET_IP_);
746 return bp;
747}
748
749STATIC int
750_xfs_buf_read(
751 xfs_buf_t *bp,
752 xfs_buf_flags_t flags)
753{
754 ASSERT(!(flags & XBF_WRITE));
755 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
756
757 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
758 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
759
760 if (flags & XBF_ASYNC) {
761 xfs_buf_submit(bp);
762 return 0;
763 }
764 return xfs_buf_submit_wait(bp);
765}
766
767xfs_buf_t *
768xfs_buf_read_map(
769 struct xfs_buftarg *target,
770 struct xfs_buf_map *map,
771 int nmaps,
772 xfs_buf_flags_t flags,
773 const struct xfs_buf_ops *ops)
774{
775 struct xfs_buf *bp;
776
777 flags |= XBF_READ;
778
779 bp = xfs_buf_get_map(target, map, nmaps, flags);
780 if (bp) {
781 trace_xfs_buf_read(bp, flags, _RET_IP_);
782
783 if (!(bp->b_flags & XBF_DONE)) {
784 XFS_STATS_INC(target->bt_mount, xb_get_read);
785 bp->b_ops = ops;
786 _xfs_buf_read(bp, flags);
787 } else if (flags & XBF_ASYNC) {
788
789
790
791
792 xfs_buf_relse(bp);
793 return NULL;
794 } else {
795
796 bp->b_flags &= ~XBF_READ;
797 }
798 }
799
800 return bp;
801}
802
803
804
805
806
807void
808xfs_buf_readahead_map(
809 struct xfs_buftarg *target,
810 struct xfs_buf_map *map,
811 int nmaps,
812 const struct xfs_buf_ops *ops)
813{
814 if (bdi_read_congested(target->bt_bdev->bd_bdi))
815 return;
816
817 xfs_buf_read_map(target, map, nmaps,
818 XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops);
819}
820
821
822
823
824
825int
826xfs_buf_read_uncached(
827 struct xfs_buftarg *target,
828 xfs_daddr_t daddr,
829 size_t numblks,
830 int flags,
831 struct xfs_buf **bpp,
832 const struct xfs_buf_ops *ops)
833{
834 struct xfs_buf *bp;
835
836 *bpp = NULL;
837
838 bp = xfs_buf_get_uncached(target, numblks, flags);
839 if (!bp)
840 return -ENOMEM;
841
842
843 ASSERT(bp->b_map_count == 1);
844 bp->b_bn = XFS_BUF_DADDR_NULL;
845 bp->b_maps[0].bm_bn = daddr;
846 bp->b_flags |= XBF_READ;
847 bp->b_ops = ops;
848
849 xfs_buf_submit_wait(bp);
850 if (bp->b_error) {
851 int error = bp->b_error;
852 xfs_buf_relse(bp);
853 return error;
854 }
855
856 *bpp = bp;
857 return 0;
858}
859
860
861
862
863
864void
865xfs_buf_set_empty(
866 struct xfs_buf *bp,
867 size_t numblks)
868{
869 if (bp->b_pages)
870 _xfs_buf_free_pages(bp);
871
872 bp->b_pages = NULL;
873 bp->b_page_count = 0;
874 bp->b_addr = NULL;
875 bp->b_length = numblks;
876 bp->b_io_length = numblks;
877
878 ASSERT(bp->b_map_count == 1);
879 bp->b_bn = XFS_BUF_DADDR_NULL;
880 bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
881 bp->b_maps[0].bm_len = bp->b_length;
882}
883
884static inline struct page *
885mem_to_page(
886 void *addr)
887{
888 if ((!is_vmalloc_addr(addr))) {
889 return virt_to_page(addr);
890 } else {
891 return vmalloc_to_page(addr);
892 }
893}
894
895int
896xfs_buf_associate_memory(
897 xfs_buf_t *bp,
898 void *mem,
899 size_t len)
900{
901 int rval;
902 int i = 0;
903 unsigned long pageaddr;
904 unsigned long offset;
905 size_t buflen;
906 int page_count;
907
908 pageaddr = (unsigned long)mem & PAGE_MASK;
909 offset = (unsigned long)mem - pageaddr;
910 buflen = PAGE_ALIGN(len + offset);
911 page_count = buflen >> PAGE_SHIFT;
912
913
914 if (bp->b_pages)
915 _xfs_buf_free_pages(bp);
916
917 bp->b_pages = NULL;
918 bp->b_addr = mem;
919
920 rval = _xfs_buf_get_pages(bp, page_count);
921 if (rval)
922 return rval;
923
924 bp->b_offset = offset;
925
926 for (i = 0; i < bp->b_page_count; i++) {
927 bp->b_pages[i] = mem_to_page((void *)pageaddr);
928 pageaddr += PAGE_SIZE;
929 }
930
931 bp->b_io_length = BTOBB(len);
932 bp->b_length = BTOBB(buflen);
933
934 return 0;
935}
936
937xfs_buf_t *
938xfs_buf_get_uncached(
939 struct xfs_buftarg *target,
940 size_t numblks,
941 int flags)
942{
943 unsigned long page_count;
944 int error, i;
945 struct xfs_buf *bp;
946 DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
947
948
949 bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT);
950 if (unlikely(bp == NULL))
951 goto fail;
952
953 page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
954 error = _xfs_buf_get_pages(bp, page_count);
955 if (error)
956 goto fail_free_buf;
957
958 for (i = 0; i < page_count; i++) {
959 bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
960 if (!bp->b_pages[i])
961 goto fail_free_mem;
962 }
963 bp->b_flags |= _XBF_PAGES;
964
965 error = _xfs_buf_map_pages(bp, 0);
966 if (unlikely(error)) {
967 xfs_warn(target->bt_mount,
968 "%s: failed to map pages", __func__);
969 goto fail_free_mem;
970 }
971
972 trace_xfs_buf_get_uncached(bp, _RET_IP_);
973 return bp;
974
975 fail_free_mem:
976 while (--i >= 0)
977 __free_page(bp->b_pages[i]);
978 _xfs_buf_free_pages(bp);
979 fail_free_buf:
980 xfs_buf_free_maps(bp);
981 kmem_zone_free(xfs_buf_zone, bp);
982 fail:
983 return NULL;
984}
985
986
987
988
989
990
991void
992xfs_buf_hold(
993 xfs_buf_t *bp)
994{
995 trace_xfs_buf_hold(bp, _RET_IP_);
996 atomic_inc(&bp->b_hold);
997}
998
999
1000
1001
1002
1003void
1004xfs_buf_rele(
1005 xfs_buf_t *bp)
1006{
1007 struct xfs_perag *pag = bp->b_pag;
1008 bool release;
1009 bool freebuf = false;
1010
1011 trace_xfs_buf_rele(bp, _RET_IP_);
1012
1013 if (!pag) {
1014 ASSERT(list_empty(&bp->b_lru));
1015 if (atomic_dec_and_test(&bp->b_hold)) {
1016 xfs_buf_ioacct_dec(bp);
1017 xfs_buf_free(bp);
1018 }
1019 return;
1020 }
1021
1022 ASSERT(atomic_read(&bp->b_hold) > 0);
1023
1024 release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
1025 spin_lock(&bp->b_lock);
1026 if (!release) {
1027
1028
1029
1030
1031
1032
1033 if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
1034 __xfs_buf_ioacct_dec(bp);
1035 goto out_unlock;
1036 }
1037
1038
1039 __xfs_buf_ioacct_dec(bp);
1040 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
1041
1042
1043
1044
1045
1046 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
1047 bp->b_state &= ~XFS_BSTATE_DISPOSE;
1048 atomic_inc(&bp->b_hold);
1049 }
1050 spin_unlock(&pag->pag_buf_lock);
1051 } else {
1052
1053
1054
1055
1056
1057
1058 if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
1059 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
1060 } else {
1061 ASSERT(list_empty(&bp->b_lru));
1062 }
1063
1064 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1065 rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
1066 xfs_buf_hash_params);
1067 spin_unlock(&pag->pag_buf_lock);
1068 xfs_perag_put(pag);
1069 freebuf = true;
1070 }
1071
1072out_unlock:
1073 spin_unlock(&bp->b_lock);
1074
1075 if (freebuf)
1076 xfs_buf_free(bp);
1077}
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091int
1092xfs_buf_trylock(
1093 struct xfs_buf *bp)
1094{
1095 int locked;
1096
1097 locked = down_trylock(&bp->b_sema) == 0;
1098 if (locked) {
1099 XB_SET_OWNER(bp);
1100 trace_xfs_buf_trylock(bp, _RET_IP_);
1101 } else {
1102 trace_xfs_buf_trylock_fail(bp, _RET_IP_);
1103 }
1104 return locked;
1105}
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116void
1117xfs_buf_lock(
1118 struct xfs_buf *bp)
1119{
1120 trace_xfs_buf_lock(bp, _RET_IP_);
1121
1122 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
1123 xfs_log_force(bp->b_target->bt_mount, 0);
1124 down(&bp->b_sema);
1125 XB_SET_OWNER(bp);
1126
1127 trace_xfs_buf_lock_done(bp, _RET_IP_);
1128}
1129
1130void
1131xfs_buf_unlock(
1132 struct xfs_buf *bp)
1133{
1134 ASSERT(xfs_buf_islocked(bp));
1135
1136 XB_CLEAR_OWNER(bp);
1137 up(&bp->b_sema);
1138
1139 trace_xfs_buf_unlock(bp, _RET_IP_);
1140}
1141
1142STATIC void
1143xfs_buf_wait_unpin(
1144 xfs_buf_t *bp)
1145{
1146 DECLARE_WAITQUEUE (wait, current);
1147
1148 if (atomic_read(&bp->b_pin_count) == 0)
1149 return;
1150
1151 add_wait_queue(&bp->b_waiters, &wait);
1152 for (;;) {
1153 set_current_state(TASK_UNINTERRUPTIBLE);
1154 if (atomic_read(&bp->b_pin_count) == 0)
1155 break;
1156 io_schedule();
1157 }
1158 remove_wait_queue(&bp->b_waiters, &wait);
1159 set_current_state(TASK_RUNNING);
1160}
1161
1162
1163
1164
1165
1166void
1167xfs_buf_ioend(
1168 struct xfs_buf *bp)
1169{
1170 bool read = bp->b_flags & XBF_READ;
1171
1172 trace_xfs_buf_iodone(bp, _RET_IP_);
1173
1174 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1175
1176
1177
1178
1179
1180 if (!bp->b_error && bp->b_io_error)
1181 xfs_buf_ioerror(bp, bp->b_io_error);
1182
1183
1184 if (read && !bp->b_error && bp->b_ops) {
1185 ASSERT(!bp->b_iodone);
1186 bp->b_ops->verify_read(bp);
1187 }
1188
1189 if (!bp->b_error)
1190 bp->b_flags |= XBF_DONE;
1191
1192 if (bp->b_iodone)
1193 (*(bp->b_iodone))(bp);
1194 else if (bp->b_flags & XBF_ASYNC)
1195 xfs_buf_relse(bp);
1196 else
1197 complete(&bp->b_iowait);
1198}
1199
1200static void
1201xfs_buf_ioend_work(
1202 struct work_struct *work)
1203{
1204 struct xfs_buf *bp =
1205 container_of(work, xfs_buf_t, b_ioend_work);
1206
1207 xfs_buf_ioend(bp);
1208}
1209
1210static void
1211xfs_buf_ioend_async(
1212 struct xfs_buf *bp)
1213{
1214 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
1215 queue_work(bp->b_ioend_wq, &bp->b_ioend_work);
1216}
1217
1218void
1219__xfs_buf_ioerror(
1220 xfs_buf_t *bp,
1221 int error,
1222 xfs_failaddr_t failaddr)
1223{
1224 ASSERT(error <= 0 && error >= -1000);
1225 bp->b_error = error;
1226 trace_xfs_buf_ioerror(bp, error, failaddr);
1227}
1228
1229void
1230xfs_buf_ioerror_alert(
1231 struct xfs_buf *bp,
1232 const char *func)
1233{
1234 xfs_alert(bp->b_target->bt_mount,
1235"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
1236 func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
1237 -bp->b_error);
1238}
1239
1240int
1241xfs_bwrite(
1242 struct xfs_buf *bp)
1243{
1244 int error;
1245
1246 ASSERT(xfs_buf_islocked(bp));
1247
1248 bp->b_flags |= XBF_WRITE;
1249 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
1250 XBF_WRITE_FAIL | XBF_DONE);
1251
1252 error = xfs_buf_submit_wait(bp);
1253 if (error) {
1254 xfs_force_shutdown(bp->b_target->bt_mount,
1255 SHUTDOWN_META_IO_ERROR);
1256 }
1257 return error;
1258}
1259
1260static void
1261xfs_buf_bio_end_io(
1262 struct bio *bio)
1263{
1264 struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
1265
1266
1267
1268
1269
1270 if (bio->bi_status) {
1271 int error = blk_status_to_errno(bio->bi_status);
1272
1273 cmpxchg(&bp->b_io_error, 0, error);
1274 }
1275
1276 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1277 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1278
1279 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1280 xfs_buf_ioend_async(bp);
1281 bio_put(bio);
1282}
1283
1284static void
1285xfs_buf_ioapply_map(
1286 struct xfs_buf *bp,
1287 int map,
1288 int *buf_offset,
1289 int *count,
1290 int op,
1291 int op_flags)
1292{
1293 int page_index;
1294 int total_nr_pages = bp->b_page_count;
1295 int nr_pages;
1296 struct bio *bio;
1297 sector_t sector = bp->b_maps[map].bm_bn;
1298 int size;
1299 int offset;
1300
1301
1302 page_index = 0;
1303 offset = *buf_offset;
1304 while (offset >= PAGE_SIZE) {
1305 page_index++;
1306 offset -= PAGE_SIZE;
1307 }
1308
1309
1310
1311
1312
1313 size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
1314 *count -= size;
1315 *buf_offset += size;
1316
1317next_chunk:
1318 atomic_inc(&bp->b_io_remaining);
1319 nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
1320
1321 bio = bio_alloc(GFP_NOIO, nr_pages);
1322 bio_set_dev(bio, bp->b_target->bt_bdev);
1323 bio->bi_iter.bi_sector = sector;
1324 bio->bi_end_io = xfs_buf_bio_end_io;
1325 bio->bi_private = bp;
1326 bio_set_op_attrs(bio, op, op_flags);
1327
1328 for (; size && nr_pages; nr_pages--, page_index++) {
1329 int rbytes, nbytes = PAGE_SIZE - offset;
1330
1331 if (nbytes > size)
1332 nbytes = size;
1333
1334 rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
1335 offset);
1336 if (rbytes < nbytes)
1337 break;
1338
1339 offset = 0;
1340 sector += BTOBB(nbytes);
1341 size -= nbytes;
1342 total_nr_pages--;
1343 }
1344
1345 if (likely(bio->bi_iter.bi_size)) {
1346 if (xfs_buf_is_vmapped(bp)) {
1347 flush_kernel_vmap_range(bp->b_addr,
1348 xfs_buf_vmap_len(bp));
1349 }
1350 submit_bio(bio);
1351 if (size)
1352 goto next_chunk;
1353 } else {
1354
1355
1356
1357
1358 atomic_dec(&bp->b_io_remaining);
1359 xfs_buf_ioerror(bp, -EIO);
1360 bio_put(bio);
1361 }
1362
1363}
1364
1365STATIC void
1366_xfs_buf_ioapply(
1367 struct xfs_buf *bp)
1368{
1369 struct blk_plug plug;
1370 int op;
1371 int op_flags = 0;
1372 int offset;
1373 int size;
1374 int i;
1375
1376
1377
1378
1379
1380 bp->b_error = 0;
1381
1382
1383
1384
1385
1386 if (!bp->b_ioend_wq)
1387 bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
1388
1389 if (bp->b_flags & XBF_WRITE) {
1390 op = REQ_OP_WRITE;
1391 if (bp->b_flags & XBF_SYNCIO)
1392 op_flags = REQ_SYNC;
1393 if (bp->b_flags & XBF_FUA)
1394 op_flags |= REQ_FUA;
1395 if (bp->b_flags & XBF_FLUSH)
1396 op_flags |= REQ_PREFLUSH;
1397
1398
1399
1400
1401
1402
1403 if (bp->b_ops) {
1404 bp->b_ops->verify_write(bp);
1405 if (bp->b_error) {
1406 xfs_force_shutdown(bp->b_target->bt_mount,
1407 SHUTDOWN_CORRUPT_INCORE);
1408 return;
1409 }
1410 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
1411 struct xfs_mount *mp = bp->b_target->bt_mount;
1412
1413
1414
1415
1416
1417 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1418 xfs_warn(mp,
1419 "%s: no buf ops on daddr 0x%llx len %d",
1420 __func__, bp->b_bn, bp->b_length);
1421 xfs_hex_dump(bp->b_addr,
1422 XFS_CORRUPTION_DUMP_LEN);
1423 dump_stack();
1424 }
1425 }
1426 } else if (bp->b_flags & XBF_READ_AHEAD) {
1427 op = REQ_OP_READ;
1428 op_flags = REQ_RAHEAD;
1429 } else {
1430 op = REQ_OP_READ;
1431 }
1432
1433
1434 op_flags |= REQ_META;
1435
1436
1437
1438
1439
1440
1441
1442 offset = bp->b_offset;
1443 size = BBTOB(bp->b_io_length);
1444 blk_start_plug(&plug);
1445 for (i = 0; i < bp->b_map_count; i++) {
1446 xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
1447 if (bp->b_error)
1448 break;
1449 if (size <= 0)
1450 break;
1451 }
1452 blk_finish_plug(&plug);
1453}
1454
1455
1456
1457
1458
1459
1460
1461void
1462xfs_buf_submit(
1463 struct xfs_buf *bp)
1464{
1465 trace_xfs_buf_submit(bp, _RET_IP_);
1466
1467 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1468 ASSERT(bp->b_flags & XBF_ASYNC);
1469
1470
1471 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1472 xfs_buf_ioerror(bp, -EIO);
1473 bp->b_flags &= ~XBF_DONE;
1474 xfs_buf_stale(bp);
1475 xfs_buf_ioend(bp);
1476 return;
1477 }
1478
1479 if (bp->b_flags & XBF_WRITE)
1480 xfs_buf_wait_unpin(bp);
1481
1482
1483 bp->b_io_error = 0;
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493 xfs_buf_hold(bp);
1494
1495
1496
1497
1498
1499
1500 atomic_set(&bp->b_io_remaining, 1);
1501 xfs_buf_ioacct_inc(bp);
1502 _xfs_buf_ioapply(bp);
1503
1504
1505
1506
1507
1508
1509 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1510 if (bp->b_error)
1511 xfs_buf_ioend(bp);
1512 else
1513 xfs_buf_ioend_async(bp);
1514 }
1515
1516 xfs_buf_rele(bp);
1517
1518}
1519
1520
1521
1522
1523int
1524xfs_buf_submit_wait(
1525 struct xfs_buf *bp)
1526{
1527 int error;
1528
1529 trace_xfs_buf_submit_wait(bp, _RET_IP_);
1530
1531 ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC)));
1532
1533 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1534 xfs_buf_ioerror(bp, -EIO);
1535 xfs_buf_stale(bp);
1536 bp->b_flags &= ~XBF_DONE;
1537 return -EIO;
1538 }
1539
1540 if (bp->b_flags & XBF_WRITE)
1541 xfs_buf_wait_unpin(bp);
1542
1543
1544 bp->b_io_error = 0;
1545
1546
1547
1548
1549
1550
1551
1552 xfs_buf_hold(bp);
1553
1554
1555
1556
1557
1558
1559 atomic_set(&bp->b_io_remaining, 1);
1560 _xfs_buf_ioapply(bp);
1561
1562
1563
1564
1565
1566 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1567 xfs_buf_ioend(bp);
1568
1569
1570 trace_xfs_buf_iowait(bp, _RET_IP_);
1571 wait_for_completion(&bp->b_iowait);
1572 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1573 error = bp->b_error;
1574
1575
1576
1577
1578
1579 xfs_buf_rele(bp);
1580 return error;
1581}
1582
1583void *
1584xfs_buf_offset(
1585 struct xfs_buf *bp,
1586 size_t offset)
1587{
1588 struct page *page;
1589
1590 if (bp->b_addr)
1591 return bp->b_addr + offset;
1592
1593 offset += bp->b_offset;
1594 page = bp->b_pages[offset >> PAGE_SHIFT];
1595 return page_address(page) + (offset & (PAGE_SIZE-1));
1596}
1597
1598
1599
1600
1601void
1602xfs_buf_iomove(
1603 xfs_buf_t *bp,
1604 size_t boff,
1605 size_t bsize,
1606 void *data,
1607 xfs_buf_rw_t mode)
1608{
1609 size_t bend;
1610
1611 bend = boff + bsize;
1612 while (boff < bend) {
1613 struct page *page;
1614 int page_index, page_offset, csize;
1615
1616 page_index = (boff + bp->b_offset) >> PAGE_SHIFT;
1617 page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
1618 page = bp->b_pages[page_index];
1619 csize = min_t(size_t, PAGE_SIZE - page_offset,
1620 BBTOB(bp->b_io_length) - boff);
1621
1622 ASSERT((csize + page_offset) <= PAGE_SIZE);
1623
1624 switch (mode) {
1625 case XBRW_ZERO:
1626 memset(page_address(page) + page_offset, 0, csize);
1627 break;
1628 case XBRW_READ:
1629 memcpy(data, page_address(page) + page_offset, csize);
1630 break;
1631 case XBRW_WRITE:
1632 memcpy(page_address(page) + page_offset, data, csize);
1633 }
1634
1635 boff += csize;
1636 data += csize;
1637 }
1638}
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649static enum lru_status
1650xfs_buftarg_wait_rele(
1651 struct list_head *item,
1652 struct list_lru_one *lru,
1653 spinlock_t *lru_lock,
1654 void *arg)
1655
1656{
1657 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1658 struct list_head *dispose = arg;
1659
1660 if (atomic_read(&bp->b_hold) > 1) {
1661
1662 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1663 return LRU_SKIP;
1664 }
1665 if (!spin_trylock(&bp->b_lock))
1666 return LRU_SKIP;
1667
1668
1669
1670
1671
1672 atomic_set(&bp->b_lru_ref, 0);
1673 bp->b_state |= XFS_BSTATE_DISPOSE;
1674 list_lru_isolate_move(lru, item, dispose);
1675 spin_unlock(&bp->b_lock);
1676 return LRU_REMOVED;
1677}
1678
1679void
1680xfs_wait_buftarg(
1681 struct xfs_buftarg *btp)
1682{
1683 LIST_HEAD(dispose);
1684 int loop = 0;
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698 while (percpu_counter_sum(&btp->bt_io_count))
1699 delay(100);
1700 flush_workqueue(btp->bt_mount->m_buf_workqueue);
1701
1702
1703 while (list_lru_count(&btp->bt_lru)) {
1704 list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
1705 &dispose, LONG_MAX);
1706
1707 while (!list_empty(&dispose)) {
1708 struct xfs_buf *bp;
1709 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1710 list_del_init(&bp->b_lru);
1711 if (bp->b_flags & XBF_WRITE_FAIL) {
1712 xfs_alert(btp->bt_mount,
1713"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
1714 (long long)bp->b_bn);
1715 xfs_alert(btp->bt_mount,
1716"Please run xfs_repair to determine the extent of the problem.");
1717 }
1718 xfs_buf_rele(bp);
1719 }
1720 if (loop++ != 0)
1721 delay(100);
1722 }
1723}
1724
1725static enum lru_status
1726xfs_buftarg_isolate(
1727 struct list_head *item,
1728 struct list_lru_one *lru,
1729 spinlock_t *lru_lock,
1730 void *arg)
1731{
1732 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1733 struct list_head *dispose = arg;
1734
1735
1736
1737
1738
1739 if (!spin_trylock(&bp->b_lock))
1740 return LRU_SKIP;
1741
1742
1743
1744
1745
1746 if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1747 spin_unlock(&bp->b_lock);
1748 return LRU_ROTATE;
1749 }
1750
1751 bp->b_state |= XFS_BSTATE_DISPOSE;
1752 list_lru_isolate_move(lru, item, dispose);
1753 spin_unlock(&bp->b_lock);
1754 return LRU_REMOVED;
1755}
1756
1757static unsigned long
1758xfs_buftarg_shrink_scan(
1759 struct shrinker *shrink,
1760 struct shrink_control *sc)
1761{
1762 struct xfs_buftarg *btp = container_of(shrink,
1763 struct xfs_buftarg, bt_shrinker);
1764 LIST_HEAD(dispose);
1765 unsigned long freed;
1766
1767 freed = list_lru_shrink_walk(&btp->bt_lru, sc,
1768 xfs_buftarg_isolate, &dispose);
1769
1770 while (!list_empty(&dispose)) {
1771 struct xfs_buf *bp;
1772 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1773 list_del_init(&bp->b_lru);
1774 xfs_buf_rele(bp);
1775 }
1776
1777 return freed;
1778}
1779
1780static unsigned long
1781xfs_buftarg_shrink_count(
1782 struct shrinker *shrink,
1783 struct shrink_control *sc)
1784{
1785 struct xfs_buftarg *btp = container_of(shrink,
1786 struct xfs_buftarg, bt_shrinker);
1787 return list_lru_shrink_count(&btp->bt_lru, sc);
1788}
1789
1790void
1791xfs_free_buftarg(
1792 struct xfs_buftarg *btp)
1793{
1794 unregister_shrinker(&btp->bt_shrinker);
1795 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
1796 percpu_counter_destroy(&btp->bt_io_count);
1797 list_lru_destroy(&btp->bt_lru);
1798
1799 xfs_blkdev_issue_flush(btp);
1800
1801 kmem_free(btp);
1802}
1803
1804int
1805xfs_setsize_buftarg(
1806 xfs_buftarg_t *btp,
1807 unsigned int sectorsize)
1808{
1809
1810 btp->bt_meta_sectorsize = sectorsize;
1811 btp->bt_meta_sectormask = sectorsize - 1;
1812
1813 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1814 xfs_warn(btp->bt_mount,
1815 "Cannot set_blocksize to %u on device %pg",
1816 sectorsize, btp->bt_bdev);
1817 return -EINVAL;
1818 }
1819
1820
1821 btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
1822 btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
1823
1824 return 0;
1825}
1826
1827
1828
1829
1830
1831
1832STATIC int
1833xfs_setsize_buftarg_early(
1834 xfs_buftarg_t *btp,
1835 struct block_device *bdev)
1836{
1837 return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
1838}
1839
1840xfs_buftarg_t *
1841xfs_alloc_buftarg(
1842 struct xfs_mount *mp,
1843 struct block_device *bdev,
1844 struct dax_device *dax_dev)
1845{
1846 xfs_buftarg_t *btp;
1847
1848 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP | KM_NOFS);
1849
1850 btp->bt_mount = mp;
1851 btp->bt_dev = bdev->bd_dev;
1852 btp->bt_bdev = bdev;
1853 btp->bt_daxdev = dax_dev;
1854
1855 if (xfs_setsize_buftarg_early(btp, bdev))
1856 goto error_free;
1857
1858 if (list_lru_init(&btp->bt_lru))
1859 goto error_free;
1860
1861 if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
1862 goto error_lru;
1863
1864 btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
1865 btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
1866 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1867 btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
1868 if (register_shrinker(&btp->bt_shrinker))
1869 goto error_pcpu;
1870 return btp;
1871
1872error_pcpu:
1873 percpu_counter_destroy(&btp->bt_io_count);
1874error_lru:
1875 list_lru_destroy(&btp->bt_lru);
1876error_free:
1877 kmem_free(btp);
1878 return NULL;
1879}
1880
1881
1882
1883
1884
1885
1886
1887void
1888xfs_buf_delwri_cancel(
1889 struct list_head *list)
1890{
1891 struct xfs_buf *bp;
1892
1893 while (!list_empty(list)) {
1894 bp = list_first_entry(list, struct xfs_buf, b_list);
1895
1896 xfs_buf_lock(bp);
1897 bp->b_flags &= ~_XBF_DELWRI_Q;
1898 list_del_init(&bp->b_list);
1899 xfs_buf_relse(bp);
1900 }
1901}
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914bool
1915xfs_buf_delwri_queue(
1916 struct xfs_buf *bp,
1917 struct list_head *list)
1918{
1919 ASSERT(xfs_buf_islocked(bp));
1920 ASSERT(!(bp->b_flags & XBF_READ));
1921
1922
1923
1924
1925
1926
1927 if (bp->b_flags & _XBF_DELWRI_Q) {
1928 trace_xfs_buf_delwri_queued(bp, _RET_IP_);
1929 return false;
1930 }
1931
1932 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942 bp->b_flags |= _XBF_DELWRI_Q;
1943 if (list_empty(&bp->b_list)) {
1944 atomic_inc(&bp->b_hold);
1945 list_add_tail(&bp->b_list, list);
1946 }
1947
1948 return true;
1949}
1950
1951
1952
1953
1954
1955
1956static int
1957xfs_buf_cmp(
1958 void *priv,
1959 struct list_head *a,
1960 struct list_head *b)
1961{
1962 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1963 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1964 xfs_daddr_t diff;
1965
1966 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
1967 if (diff < 0)
1968 return -1;
1969 if (diff > 0)
1970 return 1;
1971 return 0;
1972}
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986static int
1987xfs_buf_delwri_submit_buffers(
1988 struct list_head *buffer_list,
1989 struct list_head *wait_list)
1990{
1991 struct xfs_buf *bp, *n;
1992 LIST_HEAD (submit_list);
1993 int pinned = 0;
1994 struct blk_plug plug;
1995
1996 list_sort(NULL, buffer_list, xfs_buf_cmp);
1997
1998 blk_start_plug(&plug);
1999 list_for_each_entry_safe(bp, n, buffer_list, b_list) {
2000 if (!wait_list) {
2001 if (xfs_buf_ispinned(bp)) {
2002 pinned++;
2003 continue;
2004 }
2005 if (!xfs_buf_trylock(bp))
2006 continue;
2007 } else {
2008 xfs_buf_lock(bp);
2009 }
2010
2011
2012
2013
2014
2015
2016
2017 if (!(bp->b_flags & _XBF_DELWRI_Q)) {
2018 list_del_init(&bp->b_list);
2019 xfs_buf_relse(bp);
2020 continue;
2021 }
2022
2023 trace_xfs_buf_delwri_split(bp, _RET_IP_);
2024
2025
2026
2027
2028
2029
2030
2031
2032 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL);
2033 bp->b_flags |= XBF_WRITE | XBF_ASYNC;
2034 if (wait_list) {
2035 xfs_buf_hold(bp);
2036 list_move_tail(&bp->b_list, wait_list);
2037 } else
2038 list_del_init(&bp->b_list);
2039
2040 xfs_buf_submit(bp);
2041 }
2042 blk_finish_plug(&plug);
2043
2044 return pinned;
2045}
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056int
2057xfs_buf_delwri_submit_nowait(
2058 struct list_head *buffer_list)
2059{
2060 return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
2061}
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071int
2072xfs_buf_delwri_submit(
2073 struct list_head *buffer_list)
2074{
2075 LIST_HEAD (wait_list);
2076 int error = 0, error2;
2077 struct xfs_buf *bp;
2078
2079 xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
2080
2081
2082 while (!list_empty(&wait_list)) {
2083 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2084
2085 list_del_init(&bp->b_list);
2086
2087
2088 xfs_buf_lock(bp);
2089 error2 = bp->b_error;
2090 xfs_buf_relse(bp);
2091 if (!error)
2092 error = error2;
2093 }
2094
2095 return error;
2096}
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113int
2114xfs_buf_delwri_pushbuf(
2115 struct xfs_buf *bp,
2116 struct list_head *buffer_list)
2117{
2118 LIST_HEAD (submit_list);
2119 int error;
2120
2121 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
2122
2123 trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
2124
2125
2126
2127
2128
2129 xfs_buf_lock(bp);
2130 list_move(&bp->b_list, &submit_list);
2131 xfs_buf_unlock(bp);
2132
2133
2134
2135
2136
2137
2138
2139 xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150 xfs_buf_lock(bp);
2151 error = bp->b_error;
2152 bp->b_flags |= _XBF_DELWRI_Q;
2153 xfs_buf_unlock(bp);
2154
2155 return error;
2156}
2157
2158int __init
2159xfs_buf_init(void)
2160{
2161 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
2162 KM_ZONE_HWALIGN, NULL);
2163 if (!xfs_buf_zone)
2164 goto out;
2165
2166 return 0;
2167
2168 out:
2169 return -ENOMEM;
2170}
2171
2172void
2173xfs_buf_terminate(void)
2174{
2175 kmem_zone_destroy(xfs_buf_zone);
2176}
2177
2178void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
2179{
2180
2181
2182
2183
2184
2185 if (XFS_TEST_ERROR(false, bp->b_target->bt_mount,
2186 XFS_ERRTAG_BUF_LRU_REF))
2187 lru_ref = 0;
2188
2189 atomic_set(&bp->b_lru_ref, lru_ref);
2190}
2191