1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include "xfs.h"
19#include <linux/stddef.h>
20#include <linux/errno.h>
21#include <linux/gfp.h>
22#include <linux/pagemap.h>
23#include <linux/init.h>
24#include <linux/vmalloc.h>
25#include <linux/bio.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/percpu.h>
30#include <linux/blkdev.h>
31#include <linux/hash.h>
32#include <linux/kthread.h>
33#include <linux/migrate.h>
34#include <linux/backing-dev.h>
35#include <linux/freezer.h>
36#include <linux/sched/mm.h>
37
38#include "xfs_format.h"
39#include "xfs_log_format.h"
40#include "xfs_trans_resv.h"
41#include "xfs_sb.h"
42#include "xfs_mount.h"
43#include "xfs_trace.h"
44#include "xfs_log.h"
45
46static kmem_zone_t *xfs_buf_zone;
47
48#ifdef XFS_BUF_LOCK_TRACKING
49# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
50# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
51# define XB_GET_OWNER(bp) ((bp)->b_last_holder)
52#else
53# define XB_SET_OWNER(bp) do { } while (0)
54# define XB_CLEAR_OWNER(bp) do { } while (0)
55# define XB_GET_OWNER(bp) do { } while (0)
56#endif
57
58#define xb_to_gfp(flags) \
59 ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
60
61
62static inline int
63xfs_buf_is_vmapped(
64 struct xfs_buf *bp)
65{
66
67
68
69
70
71
72
73 return bp->b_addr && bp->b_page_count > 1;
74}
75
76static inline int
77xfs_buf_vmap_len(
78 struct xfs_buf *bp)
79{
80 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
81}
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96static inline void
97xfs_buf_ioacct_inc(
98 struct xfs_buf *bp)
99{
100 if (bp->b_flags & XBF_NO_IOACCT)
101 return;
102
103 ASSERT(bp->b_flags & XBF_ASYNC);
104 spin_lock(&bp->b_lock);
105 if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
106 bp->b_state |= XFS_BSTATE_IN_FLIGHT;
107 percpu_counter_inc(&bp->b_target->bt_io_count);
108 }
109 spin_unlock(&bp->b_lock);
110}
111
112
113
114
115
116static inline void
117__xfs_buf_ioacct_dec(
118 struct xfs_buf *bp)
119{
120 lockdep_assert_held(&bp->b_lock);
121
122 if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
123 bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
124 percpu_counter_dec(&bp->b_target->bt_io_count);
125 }
126}
127
128static inline void
129xfs_buf_ioacct_dec(
130 struct xfs_buf *bp)
131{
132 spin_lock(&bp->b_lock);
133 __xfs_buf_ioacct_dec(bp);
134 spin_unlock(&bp->b_lock);
135}
136
137
138
139
140
141
142
143
144
145void
146xfs_buf_stale(
147 struct xfs_buf *bp)
148{
149 ASSERT(xfs_buf_islocked(bp));
150
151 bp->b_flags |= XBF_STALE;
152
153
154
155
156
157
158 bp->b_flags &= ~_XBF_DELWRI_Q;
159
160
161
162
163
164
165
166 spin_lock(&bp->b_lock);
167 __xfs_buf_ioacct_dec(bp);
168
169 atomic_set(&bp->b_lru_ref, 0);
170 if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
171 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
172 atomic_dec(&bp->b_hold);
173
174 ASSERT(atomic_read(&bp->b_hold) >= 1);
175 spin_unlock(&bp->b_lock);
176}
177
178static int
179xfs_buf_get_maps(
180 struct xfs_buf *bp,
181 int map_count)
182{
183 ASSERT(bp->b_maps == NULL);
184 bp->b_map_count = map_count;
185
186 if (map_count == 1) {
187 bp->b_maps = &bp->__b_map;
188 return 0;
189 }
190
191 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
192 KM_NOFS);
193 if (!bp->b_maps)
194 return -ENOMEM;
195 return 0;
196}
197
198
199
200
201static void
202xfs_buf_free_maps(
203 struct xfs_buf *bp)
204{
205 if (bp->b_maps != &bp->__b_map) {
206 kmem_free(bp->b_maps);
207 bp->b_maps = NULL;
208 }
209}
210
211struct xfs_buf *
212_xfs_buf_alloc(
213 struct xfs_buftarg *target,
214 struct xfs_buf_map *map,
215 int nmaps,
216 xfs_buf_flags_t flags)
217{
218 struct xfs_buf *bp;
219 int error;
220 int i;
221
222 bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
223 if (unlikely(!bp))
224 return NULL;
225
226
227
228
229
230 flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
231
232 atomic_set(&bp->b_hold, 1);
233 atomic_set(&bp->b_lru_ref, 1);
234 init_completion(&bp->b_iowait);
235 INIT_LIST_HEAD(&bp->b_lru);
236 INIT_LIST_HEAD(&bp->b_list);
237 sema_init(&bp->b_sema, 0);
238 spin_lock_init(&bp->b_lock);
239 XB_SET_OWNER(bp);
240 bp->b_target = target;
241 bp->b_flags = flags;
242
243
244
245
246
247
248 error = xfs_buf_get_maps(bp, nmaps);
249 if (error) {
250 kmem_zone_free(xfs_buf_zone, bp);
251 return NULL;
252 }
253
254 bp->b_bn = map[0].bm_bn;
255 bp->b_length = 0;
256 for (i = 0; i < nmaps; i++) {
257 bp->b_maps[i].bm_bn = map[i].bm_bn;
258 bp->b_maps[i].bm_len = map[i].bm_len;
259 bp->b_length += map[i].bm_len;
260 }
261 bp->b_io_length = bp->b_length;
262
263 atomic_set(&bp->b_pin_count, 0);
264 init_waitqueue_head(&bp->b_waiters);
265
266 XFS_STATS_INC(target->bt_mount, xb_create);
267 trace_xfs_buf_init(bp, _RET_IP_);
268
269 return bp;
270}
271
272
273
274
275
276STATIC int
277_xfs_buf_get_pages(
278 xfs_buf_t *bp,
279 int page_count)
280{
281
282 if (bp->b_pages == NULL) {
283 bp->b_page_count = page_count;
284 if (page_count <= XB_PAGES) {
285 bp->b_pages = bp->b_page_array;
286 } else {
287 bp->b_pages = kmem_alloc(sizeof(struct page *) *
288 page_count, KM_NOFS);
289 if (bp->b_pages == NULL)
290 return -ENOMEM;
291 }
292 memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
293 }
294 return 0;
295}
296
297
298
299
300STATIC void
301_xfs_buf_free_pages(
302 xfs_buf_t *bp)
303{
304 if (bp->b_pages != bp->b_page_array) {
305 kmem_free(bp->b_pages);
306 bp->b_pages = NULL;
307 }
308}
309
310
311
312
313
314
315
316
317void
318xfs_buf_free(
319 xfs_buf_t *bp)
320{
321 trace_xfs_buf_free(bp, _RET_IP_);
322
323 ASSERT(list_empty(&bp->b_lru));
324
325 if (bp->b_flags & _XBF_PAGES) {
326 uint i;
327
328 if (xfs_buf_is_vmapped(bp))
329 vm_unmap_ram(bp->b_addr - bp->b_offset,
330 bp->b_page_count);
331
332 for (i = 0; i < bp->b_page_count; i++) {
333 struct page *page = bp->b_pages[i];
334
335 __free_page(page);
336 }
337 } else if (bp->b_flags & _XBF_KMEM)
338 kmem_free(bp->b_addr);
339 _xfs_buf_free_pages(bp);
340 xfs_buf_free_maps(bp);
341 kmem_zone_free(xfs_buf_zone, bp);
342}
343
344
345
346
347STATIC int
348xfs_buf_allocate_memory(
349 xfs_buf_t *bp,
350 uint flags)
351{
352 size_t size;
353 size_t nbytes, offset;
354 gfp_t gfp_mask = xb_to_gfp(flags);
355 unsigned short page_count, i;
356 xfs_off_t start, end;
357 int error;
358
359
360
361
362
363
364 size = BBTOB(bp->b_length);
365 if (size < PAGE_SIZE) {
366 bp->b_addr = kmem_alloc(size, KM_NOFS);
367 if (!bp->b_addr) {
368
369 goto use_alloc_page;
370 }
371
372 if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
373 ((unsigned long)bp->b_addr & PAGE_MASK)) {
374
375 kmem_free(bp->b_addr);
376 bp->b_addr = NULL;
377 goto use_alloc_page;
378 }
379 bp->b_offset = offset_in_page(bp->b_addr);
380 bp->b_pages = bp->b_page_array;
381 bp->b_pages[0] = virt_to_page(bp->b_addr);
382 bp->b_page_count = 1;
383 bp->b_flags |= _XBF_KMEM;
384 return 0;
385 }
386
387use_alloc_page:
388 start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
389 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
390 >> PAGE_SHIFT;
391 page_count = end - start;
392 error = _xfs_buf_get_pages(bp, page_count);
393 if (unlikely(error))
394 return error;
395
396 offset = bp->b_offset;
397 bp->b_flags |= _XBF_PAGES;
398
399 for (i = 0; i < bp->b_page_count; i++) {
400 struct page *page;
401 uint retries = 0;
402retry:
403 page = alloc_page(gfp_mask);
404 if (unlikely(page == NULL)) {
405 if (flags & XBF_READ_AHEAD) {
406 bp->b_page_count = i;
407 error = -ENOMEM;
408 goto out_free_pages;
409 }
410
411
412
413
414
415
416
417 if (!(++retries % 100))
418 xfs_err(NULL,
419 "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
420 current->comm, current->pid,
421 __func__, gfp_mask);
422
423 XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
424 congestion_wait(BLK_RW_ASYNC, HZ/50);
425 goto retry;
426 }
427
428 XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found);
429
430 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
431 size -= nbytes;
432 bp->b_pages[i] = page;
433 offset = 0;
434 }
435 return 0;
436
437out_free_pages:
438 for (i = 0; i < bp->b_page_count; i++)
439 __free_page(bp->b_pages[i]);
440 bp->b_flags &= ~_XBF_PAGES;
441 return error;
442}
443
444
445
446
447STATIC int
448_xfs_buf_map_pages(
449 xfs_buf_t *bp,
450 uint flags)
451{
452 ASSERT(bp->b_flags & _XBF_PAGES);
453 if (bp->b_page_count == 1) {
454
455 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
456 } else if (flags & XBF_UNMAPPED) {
457 bp->b_addr = NULL;
458 } else {
459 int retried = 0;
460 unsigned nofs_flag;
461
462
463
464
465
466
467
468
469
470 nofs_flag = memalloc_nofs_save();
471 do {
472 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
473 -1, PAGE_KERNEL);
474 if (bp->b_addr)
475 break;
476 vm_unmap_aliases();
477 } while (retried++ <= 1);
478 memalloc_nofs_restore(nofs_flag);
479
480 if (!bp->b_addr)
481 return -ENOMEM;
482 bp->b_addr += bp->b_offset;
483 }
484
485 return 0;
486}
487
488
489
490
491static int
492_xfs_buf_obj_cmp(
493 struct rhashtable_compare_arg *arg,
494 const void *obj)
495{
496 const struct xfs_buf_map *map = arg->key;
497 const struct xfs_buf *bp = obj;
498
499
500
501
502
503 BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
504
505 if (bp->b_bn != map->bm_bn)
506 return 1;
507
508 if (unlikely(bp->b_length != map->bm_len)) {
509
510
511
512
513
514
515
516
517 ASSERT(bp->b_flags & XBF_STALE);
518 return 1;
519 }
520 return 0;
521}
522
523static const struct rhashtable_params xfs_buf_hash_params = {
524 .min_size = 32,
525 .nelem_hint = 16,
526 .key_len = sizeof(xfs_daddr_t),
527 .key_offset = offsetof(struct xfs_buf, b_bn),
528 .head_offset = offsetof(struct xfs_buf, b_rhash_head),
529 .automatic_shrinking = true,
530 .obj_cmpfn = _xfs_buf_obj_cmp,
531};
532
533int
534xfs_buf_hash_init(
535 struct xfs_perag *pag)
536{
537 spin_lock_init(&pag->pag_buf_lock);
538 return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
539}
540
541void
542xfs_buf_hash_destroy(
543 struct xfs_perag *pag)
544{
545 rhashtable_destroy(&pag->pag_buf_hash);
546}
547
548
549
550
551
552
553xfs_buf_t *
554_xfs_buf_find(
555 struct xfs_buftarg *btp,
556 struct xfs_buf_map *map,
557 int nmaps,
558 xfs_buf_flags_t flags,
559 xfs_buf_t *new_bp)
560{
561 struct xfs_perag *pag;
562 xfs_buf_t *bp;
563 struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
564 xfs_daddr_t eofs;
565 int i;
566
567 for (i = 0; i < nmaps; i++)
568 cmap.bm_len += map[i].bm_len;
569
570
571 ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
572 ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
573
574
575
576
577
578 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
579 if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
580
581
582
583
584
585 xfs_alert(btp->bt_mount,
586 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
587 __func__, cmap.bm_bn, eofs);
588 WARN_ON(1);
589 return NULL;
590 }
591
592 pag = xfs_perag_get(btp->bt_mount,
593 xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
594
595 spin_lock(&pag->pag_buf_lock);
596 bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
597 xfs_buf_hash_params);
598 if (bp) {
599 atomic_inc(&bp->b_hold);
600 goto found;
601 }
602
603
604 if (new_bp) {
605
606 new_bp->b_pag = pag;
607 rhashtable_insert_fast(&pag->pag_buf_hash,
608 &new_bp->b_rhash_head,
609 xfs_buf_hash_params);
610 spin_unlock(&pag->pag_buf_lock);
611 } else {
612 XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
613 spin_unlock(&pag->pag_buf_lock);
614 xfs_perag_put(pag);
615 }
616 return new_bp;
617
618found:
619 spin_unlock(&pag->pag_buf_lock);
620 xfs_perag_put(pag);
621
622 if (!xfs_buf_trylock(bp)) {
623 if (flags & XBF_TRYLOCK) {
624 xfs_buf_rele(bp);
625 XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
626 return NULL;
627 }
628 xfs_buf_lock(bp);
629 XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
630 }
631
632
633
634
635
636
637 if (bp->b_flags & XBF_STALE) {
638 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
639 ASSERT(bp->b_iodone == NULL);
640 bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
641 bp->b_ops = NULL;
642 }
643
644 trace_xfs_buf_find(bp, flags, _RET_IP_);
645 XFS_STATS_INC(btp->bt_mount, xb_get_locked);
646 return bp;
647}
648
649
650
651
652
653
654struct xfs_buf *
655xfs_buf_get_map(
656 struct xfs_buftarg *target,
657 struct xfs_buf_map *map,
658 int nmaps,
659 xfs_buf_flags_t flags)
660{
661 struct xfs_buf *bp;
662 struct xfs_buf *new_bp;
663 int error = 0;
664
665 bp = _xfs_buf_find(target, map, nmaps, flags, NULL);
666 if (likely(bp))
667 goto found;
668
669 new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
670 if (unlikely(!new_bp))
671 return NULL;
672
673 error = xfs_buf_allocate_memory(new_bp, flags);
674 if (error) {
675 xfs_buf_free(new_bp);
676 return NULL;
677 }
678
679 bp = _xfs_buf_find(target, map, nmaps, flags, new_bp);
680 if (!bp) {
681 xfs_buf_free(new_bp);
682 return NULL;
683 }
684
685 if (bp != new_bp)
686 xfs_buf_free(new_bp);
687
688found:
689 if (!bp->b_addr) {
690 error = _xfs_buf_map_pages(bp, flags);
691 if (unlikely(error)) {
692 xfs_warn(target->bt_mount,
693 "%s: failed to map pagesn", __func__);
694 xfs_buf_relse(bp);
695 return NULL;
696 }
697 }
698
699
700
701
702
703 if (!(flags & XBF_READ))
704 xfs_buf_ioerror(bp, 0);
705
706 XFS_STATS_INC(target->bt_mount, xb_get);
707 trace_xfs_buf_get(bp, flags, _RET_IP_);
708 return bp;
709}
710
711STATIC int
712_xfs_buf_read(
713 xfs_buf_t *bp,
714 xfs_buf_flags_t flags)
715{
716 ASSERT(!(flags & XBF_WRITE));
717 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
718
719 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
720 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
721
722 if (flags & XBF_ASYNC) {
723 xfs_buf_submit(bp);
724 return 0;
725 }
726 return xfs_buf_submit_wait(bp);
727}
728
729xfs_buf_t *
730xfs_buf_read_map(
731 struct xfs_buftarg *target,
732 struct xfs_buf_map *map,
733 int nmaps,
734 xfs_buf_flags_t flags,
735 const struct xfs_buf_ops *ops)
736{
737 struct xfs_buf *bp;
738
739 flags |= XBF_READ;
740
741 bp = xfs_buf_get_map(target, map, nmaps, flags);
742 if (bp) {
743 trace_xfs_buf_read(bp, flags, _RET_IP_);
744
745 if (!(bp->b_flags & XBF_DONE)) {
746 XFS_STATS_INC(target->bt_mount, xb_get_read);
747 bp->b_ops = ops;
748 _xfs_buf_read(bp, flags);
749 } else if (flags & XBF_ASYNC) {
750
751
752
753
754 xfs_buf_relse(bp);
755 return NULL;
756 } else {
757
758 bp->b_flags &= ~XBF_READ;
759 }
760 }
761
762 return bp;
763}
764
765
766
767
768
769void
770xfs_buf_readahead_map(
771 struct xfs_buftarg *target,
772 struct xfs_buf_map *map,
773 int nmaps,
774 const struct xfs_buf_ops *ops)
775{
776 if (bdi_read_congested(target->bt_bdev->bd_bdi))
777 return;
778
779 xfs_buf_read_map(target, map, nmaps,
780 XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops);
781}
782
783
784
785
786
787int
788xfs_buf_read_uncached(
789 struct xfs_buftarg *target,
790 xfs_daddr_t daddr,
791 size_t numblks,
792 int flags,
793 struct xfs_buf **bpp,
794 const struct xfs_buf_ops *ops)
795{
796 struct xfs_buf *bp;
797
798 *bpp = NULL;
799
800 bp = xfs_buf_get_uncached(target, numblks, flags);
801 if (!bp)
802 return -ENOMEM;
803
804
805 ASSERT(bp->b_map_count == 1);
806 bp->b_bn = XFS_BUF_DADDR_NULL;
807 bp->b_maps[0].bm_bn = daddr;
808 bp->b_flags |= XBF_READ;
809 bp->b_ops = ops;
810
811 xfs_buf_submit_wait(bp);
812 if (bp->b_error) {
813 int error = bp->b_error;
814 xfs_buf_relse(bp);
815 return error;
816 }
817
818 *bpp = bp;
819 return 0;
820}
821
822
823
824
825
826void
827xfs_buf_set_empty(
828 struct xfs_buf *bp,
829 size_t numblks)
830{
831 if (bp->b_pages)
832 _xfs_buf_free_pages(bp);
833
834 bp->b_pages = NULL;
835 bp->b_page_count = 0;
836 bp->b_addr = NULL;
837 bp->b_length = numblks;
838 bp->b_io_length = numblks;
839
840 ASSERT(bp->b_map_count == 1);
841 bp->b_bn = XFS_BUF_DADDR_NULL;
842 bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
843 bp->b_maps[0].bm_len = bp->b_length;
844}
845
846static inline struct page *
847mem_to_page(
848 void *addr)
849{
850 if ((!is_vmalloc_addr(addr))) {
851 return virt_to_page(addr);
852 } else {
853 return vmalloc_to_page(addr);
854 }
855}
856
857int
858xfs_buf_associate_memory(
859 xfs_buf_t *bp,
860 void *mem,
861 size_t len)
862{
863 int rval;
864 int i = 0;
865 unsigned long pageaddr;
866 unsigned long offset;
867 size_t buflen;
868 int page_count;
869
870 pageaddr = (unsigned long)mem & PAGE_MASK;
871 offset = (unsigned long)mem - pageaddr;
872 buflen = PAGE_ALIGN(len + offset);
873 page_count = buflen >> PAGE_SHIFT;
874
875
876 if (bp->b_pages)
877 _xfs_buf_free_pages(bp);
878
879 bp->b_pages = NULL;
880 bp->b_addr = mem;
881
882 rval = _xfs_buf_get_pages(bp, page_count);
883 if (rval)
884 return rval;
885
886 bp->b_offset = offset;
887
888 for (i = 0; i < bp->b_page_count; i++) {
889 bp->b_pages[i] = mem_to_page((void *)pageaddr);
890 pageaddr += PAGE_SIZE;
891 }
892
893 bp->b_io_length = BTOBB(len);
894 bp->b_length = BTOBB(buflen);
895
896 return 0;
897}
898
899xfs_buf_t *
900xfs_buf_get_uncached(
901 struct xfs_buftarg *target,
902 size_t numblks,
903 int flags)
904{
905 unsigned long page_count;
906 int error, i;
907 struct xfs_buf *bp;
908 DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
909
910
911 bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT);
912 if (unlikely(bp == NULL))
913 goto fail;
914
915 page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
916 error = _xfs_buf_get_pages(bp, page_count);
917 if (error)
918 goto fail_free_buf;
919
920 for (i = 0; i < page_count; i++) {
921 bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
922 if (!bp->b_pages[i])
923 goto fail_free_mem;
924 }
925 bp->b_flags |= _XBF_PAGES;
926
927 error = _xfs_buf_map_pages(bp, 0);
928 if (unlikely(error)) {
929 xfs_warn(target->bt_mount,
930 "%s: failed to map pages", __func__);
931 goto fail_free_mem;
932 }
933
934 trace_xfs_buf_get_uncached(bp, _RET_IP_);
935 return bp;
936
937 fail_free_mem:
938 while (--i >= 0)
939 __free_page(bp->b_pages[i]);
940 _xfs_buf_free_pages(bp);
941 fail_free_buf:
942 xfs_buf_free_maps(bp);
943 kmem_zone_free(xfs_buf_zone, bp);
944 fail:
945 return NULL;
946}
947
948
949
950
951
952
953void
954xfs_buf_hold(
955 xfs_buf_t *bp)
956{
957 trace_xfs_buf_hold(bp, _RET_IP_);
958 atomic_inc(&bp->b_hold);
959}
960
961
962
963
964
965void
966xfs_buf_rele(
967 xfs_buf_t *bp)
968{
969 struct xfs_perag *pag = bp->b_pag;
970 bool release;
971 bool freebuf = false;
972
973 trace_xfs_buf_rele(bp, _RET_IP_);
974
975 if (!pag) {
976 ASSERT(list_empty(&bp->b_lru));
977 if (atomic_dec_and_test(&bp->b_hold)) {
978 xfs_buf_ioacct_dec(bp);
979 xfs_buf_free(bp);
980 }
981 return;
982 }
983
984 ASSERT(atomic_read(&bp->b_hold) > 0);
985
986 release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
987 spin_lock(&bp->b_lock);
988 if (!release) {
989
990
991
992
993
994
995 if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
996 __xfs_buf_ioacct_dec(bp);
997 goto out_unlock;
998 }
999
1000
1001 __xfs_buf_ioacct_dec(bp);
1002 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
1003
1004
1005
1006
1007
1008 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
1009 bp->b_state &= ~XFS_BSTATE_DISPOSE;
1010 atomic_inc(&bp->b_hold);
1011 }
1012 spin_unlock(&pag->pag_buf_lock);
1013 } else {
1014
1015
1016
1017
1018
1019
1020 if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
1021 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
1022 } else {
1023 ASSERT(list_empty(&bp->b_lru));
1024 }
1025
1026 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1027 rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
1028 xfs_buf_hash_params);
1029 spin_unlock(&pag->pag_buf_lock);
1030 xfs_perag_put(pag);
1031 freebuf = true;
1032 }
1033
1034out_unlock:
1035 spin_unlock(&bp->b_lock);
1036
1037 if (freebuf)
1038 xfs_buf_free(bp);
1039}
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053int
1054xfs_buf_trylock(
1055 struct xfs_buf *bp)
1056{
1057 int locked;
1058
1059 locked = down_trylock(&bp->b_sema) == 0;
1060 if (locked) {
1061 XB_SET_OWNER(bp);
1062 trace_xfs_buf_trylock(bp, _RET_IP_);
1063 } else {
1064 trace_xfs_buf_trylock_fail(bp, _RET_IP_);
1065 }
1066 return locked;
1067}
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078void
1079xfs_buf_lock(
1080 struct xfs_buf *bp)
1081{
1082 trace_xfs_buf_lock(bp, _RET_IP_);
1083
1084 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
1085 xfs_log_force(bp->b_target->bt_mount, 0);
1086 down(&bp->b_sema);
1087 XB_SET_OWNER(bp);
1088
1089 trace_xfs_buf_lock_done(bp, _RET_IP_);
1090}
1091
1092void
1093xfs_buf_unlock(
1094 struct xfs_buf *bp)
1095{
1096 ASSERT(xfs_buf_islocked(bp));
1097
1098 XB_CLEAR_OWNER(bp);
1099 up(&bp->b_sema);
1100
1101 trace_xfs_buf_unlock(bp, _RET_IP_);
1102}
1103
1104STATIC void
1105xfs_buf_wait_unpin(
1106 xfs_buf_t *bp)
1107{
1108 DECLARE_WAITQUEUE (wait, current);
1109
1110 if (atomic_read(&bp->b_pin_count) == 0)
1111 return;
1112
1113 add_wait_queue(&bp->b_waiters, &wait);
1114 for (;;) {
1115 set_current_state(TASK_UNINTERRUPTIBLE);
1116 if (atomic_read(&bp->b_pin_count) == 0)
1117 break;
1118 io_schedule();
1119 }
1120 remove_wait_queue(&bp->b_waiters, &wait);
1121 set_current_state(TASK_RUNNING);
1122}
1123
1124
1125
1126
1127
1128void
1129xfs_buf_ioend(
1130 struct xfs_buf *bp)
1131{
1132 bool read = bp->b_flags & XBF_READ;
1133
1134 trace_xfs_buf_iodone(bp, _RET_IP_);
1135
1136 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1137
1138
1139
1140
1141
1142 if (!bp->b_error && bp->b_io_error)
1143 xfs_buf_ioerror(bp, bp->b_io_error);
1144
1145
1146 if (read && !bp->b_error && bp->b_ops) {
1147 ASSERT(!bp->b_iodone);
1148 bp->b_ops->verify_read(bp);
1149 }
1150
1151 if (!bp->b_error)
1152 bp->b_flags |= XBF_DONE;
1153
1154 if (bp->b_iodone)
1155 (*(bp->b_iodone))(bp);
1156 else if (bp->b_flags & XBF_ASYNC)
1157 xfs_buf_relse(bp);
1158 else
1159 complete(&bp->b_iowait);
1160}
1161
1162static void
1163xfs_buf_ioend_work(
1164 struct work_struct *work)
1165{
1166 struct xfs_buf *bp =
1167 container_of(work, xfs_buf_t, b_ioend_work);
1168
1169 xfs_buf_ioend(bp);
1170}
1171
1172static void
1173xfs_buf_ioend_async(
1174 struct xfs_buf *bp)
1175{
1176 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
1177 queue_work(bp->b_ioend_wq, &bp->b_ioend_work);
1178}
1179
1180void
1181xfs_buf_ioerror(
1182 xfs_buf_t *bp,
1183 int error)
1184{
1185 ASSERT(error <= 0 && error >= -1000);
1186 bp->b_error = error;
1187 trace_xfs_buf_ioerror(bp, error, _RET_IP_);
1188}
1189
1190void
1191xfs_buf_ioerror_alert(
1192 struct xfs_buf *bp,
1193 const char *func)
1194{
1195 xfs_alert(bp->b_target->bt_mount,
1196"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
1197 (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
1198}
1199
1200int
1201xfs_bwrite(
1202 struct xfs_buf *bp)
1203{
1204 int error;
1205
1206 ASSERT(xfs_buf_islocked(bp));
1207
1208 bp->b_flags |= XBF_WRITE;
1209 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
1210 XBF_WRITE_FAIL | XBF_DONE);
1211
1212 error = xfs_buf_submit_wait(bp);
1213 if (error) {
1214 xfs_force_shutdown(bp->b_target->bt_mount,
1215 SHUTDOWN_META_IO_ERROR);
1216 }
1217 return error;
1218}
1219
1220static void
1221xfs_buf_bio_end_io(
1222 struct bio *bio)
1223{
1224 struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
1225
1226
1227
1228
1229
1230 if (bio->bi_status) {
1231 int error = blk_status_to_errno(bio->bi_status);
1232
1233 cmpxchg(&bp->b_io_error, 0, error);
1234 }
1235
1236 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1237 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1238
1239 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1240 xfs_buf_ioend_async(bp);
1241 bio_put(bio);
1242}
1243
1244static void
1245xfs_buf_ioapply_map(
1246 struct xfs_buf *bp,
1247 int map,
1248 int *buf_offset,
1249 int *count,
1250 int op,
1251 int op_flags)
1252{
1253 int page_index;
1254 int total_nr_pages = bp->b_page_count;
1255 int nr_pages;
1256 struct bio *bio;
1257 sector_t sector = bp->b_maps[map].bm_bn;
1258 int size;
1259 int offset;
1260
1261 total_nr_pages = bp->b_page_count;
1262
1263
1264 page_index = 0;
1265 offset = *buf_offset;
1266 while (offset >= PAGE_SIZE) {
1267 page_index++;
1268 offset -= PAGE_SIZE;
1269 }
1270
1271
1272
1273
1274
1275 size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
1276 *count -= size;
1277 *buf_offset += size;
1278
1279next_chunk:
1280 atomic_inc(&bp->b_io_remaining);
1281 nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
1282
1283 bio = bio_alloc(GFP_NOIO, nr_pages);
1284 bio->bi_bdev = bp->b_target->bt_bdev;
1285 bio->bi_iter.bi_sector = sector;
1286 bio->bi_end_io = xfs_buf_bio_end_io;
1287 bio->bi_private = bp;
1288 bio_set_op_attrs(bio, op, op_flags);
1289
1290 for (; size && nr_pages; nr_pages--, page_index++) {
1291 int rbytes, nbytes = PAGE_SIZE - offset;
1292
1293 if (nbytes > size)
1294 nbytes = size;
1295
1296 rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
1297 offset);
1298 if (rbytes < nbytes)
1299 break;
1300
1301 offset = 0;
1302 sector += BTOBB(nbytes);
1303 size -= nbytes;
1304 total_nr_pages--;
1305 }
1306
1307 if (likely(bio->bi_iter.bi_size)) {
1308 if (xfs_buf_is_vmapped(bp)) {
1309 flush_kernel_vmap_range(bp->b_addr,
1310 xfs_buf_vmap_len(bp));
1311 }
1312 submit_bio(bio);
1313 if (size)
1314 goto next_chunk;
1315 } else {
1316
1317
1318
1319
1320 atomic_dec(&bp->b_io_remaining);
1321 xfs_buf_ioerror(bp, -EIO);
1322 bio_put(bio);
1323 }
1324
1325}
1326
1327STATIC void
1328_xfs_buf_ioapply(
1329 struct xfs_buf *bp)
1330{
1331 struct blk_plug plug;
1332 int op;
1333 int op_flags = 0;
1334 int offset;
1335 int size;
1336 int i;
1337
1338
1339
1340
1341
1342 bp->b_error = 0;
1343
1344
1345
1346
1347
1348 if (!bp->b_ioend_wq)
1349 bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
1350
1351 if (bp->b_flags & XBF_WRITE) {
1352 op = REQ_OP_WRITE;
1353 if (bp->b_flags & XBF_SYNCIO)
1354 op_flags = REQ_SYNC;
1355 if (bp->b_flags & XBF_FUA)
1356 op_flags |= REQ_FUA;
1357 if (bp->b_flags & XBF_FLUSH)
1358 op_flags |= REQ_PREFLUSH;
1359
1360
1361
1362
1363
1364
1365 if (bp->b_ops) {
1366 bp->b_ops->verify_write(bp);
1367 if (bp->b_error) {
1368 xfs_force_shutdown(bp->b_target->bt_mount,
1369 SHUTDOWN_CORRUPT_INCORE);
1370 return;
1371 }
1372 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
1373 struct xfs_mount *mp = bp->b_target->bt_mount;
1374
1375
1376
1377
1378
1379 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1380 xfs_warn(mp,
1381 "%s: no ops on block 0x%llx/0x%x",
1382 __func__, bp->b_bn, bp->b_length);
1383 xfs_hex_dump(bp->b_addr, 64);
1384 dump_stack();
1385 }
1386 }
1387 } else if (bp->b_flags & XBF_READ_AHEAD) {
1388 op = REQ_OP_READ;
1389 op_flags = REQ_RAHEAD;
1390 } else {
1391 op = REQ_OP_READ;
1392 }
1393
1394
1395 op_flags |= REQ_META;
1396
1397
1398
1399
1400
1401
1402
1403 offset = bp->b_offset;
1404 size = BBTOB(bp->b_io_length);
1405 blk_start_plug(&plug);
1406 for (i = 0; i < bp->b_map_count; i++) {
1407 xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
1408 if (bp->b_error)
1409 break;
1410 if (size <= 0)
1411 break;
1412 }
1413 blk_finish_plug(&plug);
1414}
1415
1416
1417
1418
1419
1420
1421
1422void
1423xfs_buf_submit(
1424 struct xfs_buf *bp)
1425{
1426 trace_xfs_buf_submit(bp, _RET_IP_);
1427
1428 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1429 ASSERT(bp->b_flags & XBF_ASYNC);
1430
1431
1432 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1433 xfs_buf_ioerror(bp, -EIO);
1434 bp->b_flags &= ~XBF_DONE;
1435 xfs_buf_stale(bp);
1436 xfs_buf_ioend(bp);
1437 return;
1438 }
1439
1440 if (bp->b_flags & XBF_WRITE)
1441 xfs_buf_wait_unpin(bp);
1442
1443
1444 bp->b_io_error = 0;
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454 xfs_buf_hold(bp);
1455
1456
1457
1458
1459
1460
1461 atomic_set(&bp->b_io_remaining, 1);
1462 xfs_buf_ioacct_inc(bp);
1463 _xfs_buf_ioapply(bp);
1464
1465
1466
1467
1468
1469
1470 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1471 if (bp->b_error)
1472 xfs_buf_ioend(bp);
1473 else
1474 xfs_buf_ioend_async(bp);
1475 }
1476
1477 xfs_buf_rele(bp);
1478
1479}
1480
1481
1482
1483
1484int
1485xfs_buf_submit_wait(
1486 struct xfs_buf *bp)
1487{
1488 int error;
1489
1490 trace_xfs_buf_submit_wait(bp, _RET_IP_);
1491
1492 ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC)));
1493
1494 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
1495 xfs_buf_ioerror(bp, -EIO);
1496 xfs_buf_stale(bp);
1497 bp->b_flags &= ~XBF_DONE;
1498 return -EIO;
1499 }
1500
1501 if (bp->b_flags & XBF_WRITE)
1502 xfs_buf_wait_unpin(bp);
1503
1504
1505 bp->b_io_error = 0;
1506
1507
1508
1509
1510
1511
1512
1513 xfs_buf_hold(bp);
1514
1515
1516
1517
1518
1519
1520 atomic_set(&bp->b_io_remaining, 1);
1521 _xfs_buf_ioapply(bp);
1522
1523
1524
1525
1526
1527 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1528 xfs_buf_ioend(bp);
1529
1530
1531 trace_xfs_buf_iowait(bp, _RET_IP_);
1532 wait_for_completion(&bp->b_iowait);
1533 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1534 error = bp->b_error;
1535
1536
1537
1538
1539
1540 xfs_buf_rele(bp);
1541 return error;
1542}
1543
1544void *
1545xfs_buf_offset(
1546 struct xfs_buf *bp,
1547 size_t offset)
1548{
1549 struct page *page;
1550
1551 if (bp->b_addr)
1552 return bp->b_addr + offset;
1553
1554 offset += bp->b_offset;
1555 page = bp->b_pages[offset >> PAGE_SHIFT];
1556 return page_address(page) + (offset & (PAGE_SIZE-1));
1557}
1558
1559
1560
1561
1562void
1563xfs_buf_iomove(
1564 xfs_buf_t *bp,
1565 size_t boff,
1566 size_t bsize,
1567 void *data,
1568 xfs_buf_rw_t mode)
1569{
1570 size_t bend;
1571
1572 bend = boff + bsize;
1573 while (boff < bend) {
1574 struct page *page;
1575 int page_index, page_offset, csize;
1576
1577 page_index = (boff + bp->b_offset) >> PAGE_SHIFT;
1578 page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
1579 page = bp->b_pages[page_index];
1580 csize = min_t(size_t, PAGE_SIZE - page_offset,
1581 BBTOB(bp->b_io_length) - boff);
1582
1583 ASSERT((csize + page_offset) <= PAGE_SIZE);
1584
1585 switch (mode) {
1586 case XBRW_ZERO:
1587 memset(page_address(page) + page_offset, 0, csize);
1588 break;
1589 case XBRW_READ:
1590 memcpy(data, page_address(page) + page_offset, csize);
1591 break;
1592 case XBRW_WRITE:
1593 memcpy(page_address(page) + page_offset, data, csize);
1594 }
1595
1596 boff += csize;
1597 data += csize;
1598 }
1599}
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610static enum lru_status
1611xfs_buftarg_wait_rele(
1612 struct list_head *item,
1613 struct list_lru_one *lru,
1614 spinlock_t *lru_lock,
1615 void *arg)
1616
1617{
1618 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1619 struct list_head *dispose = arg;
1620
1621 if (atomic_read(&bp->b_hold) > 1) {
1622
1623 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1624 return LRU_SKIP;
1625 }
1626 if (!spin_trylock(&bp->b_lock))
1627 return LRU_SKIP;
1628
1629
1630
1631
1632
1633 atomic_set(&bp->b_lru_ref, 0);
1634 bp->b_state |= XFS_BSTATE_DISPOSE;
1635 list_lru_isolate_move(lru, item, dispose);
1636 spin_unlock(&bp->b_lock);
1637 return LRU_REMOVED;
1638}
1639
1640void
1641xfs_wait_buftarg(
1642 struct xfs_buftarg *btp)
1643{
1644 LIST_HEAD(dispose);
1645 int loop = 0;
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659 while (percpu_counter_sum(&btp->bt_io_count))
1660 delay(100);
1661 flush_workqueue(btp->bt_mount->m_buf_workqueue);
1662
1663
1664 while (list_lru_count(&btp->bt_lru)) {
1665 list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
1666 &dispose, LONG_MAX);
1667
1668 while (!list_empty(&dispose)) {
1669 struct xfs_buf *bp;
1670 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1671 list_del_init(&bp->b_lru);
1672 if (bp->b_flags & XBF_WRITE_FAIL) {
1673 xfs_alert(btp->bt_mount,
1674"Corruption Alert: Buffer at block 0x%llx had permanent write failures!",
1675 (long long)bp->b_bn);
1676 xfs_alert(btp->bt_mount,
1677"Please run xfs_repair to determine the extent of the problem.");
1678 }
1679 xfs_buf_rele(bp);
1680 }
1681 if (loop++ != 0)
1682 delay(100);
1683 }
1684}
1685
1686static enum lru_status
1687xfs_buftarg_isolate(
1688 struct list_head *item,
1689 struct list_lru_one *lru,
1690 spinlock_t *lru_lock,
1691 void *arg)
1692{
1693 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1694 struct list_head *dispose = arg;
1695
1696
1697
1698
1699
1700 if (!spin_trylock(&bp->b_lock))
1701 return LRU_SKIP;
1702
1703
1704
1705
1706
1707 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1708 spin_unlock(&bp->b_lock);
1709 return LRU_ROTATE;
1710 }
1711
1712 bp->b_state |= XFS_BSTATE_DISPOSE;
1713 list_lru_isolate_move(lru, item, dispose);
1714 spin_unlock(&bp->b_lock);
1715 return LRU_REMOVED;
1716}
1717
1718static unsigned long
1719xfs_buftarg_shrink_scan(
1720 struct shrinker *shrink,
1721 struct shrink_control *sc)
1722{
1723 struct xfs_buftarg *btp = container_of(shrink,
1724 struct xfs_buftarg, bt_shrinker);
1725 LIST_HEAD(dispose);
1726 unsigned long freed;
1727
1728 freed = list_lru_shrink_walk(&btp->bt_lru, sc,
1729 xfs_buftarg_isolate, &dispose);
1730
1731 while (!list_empty(&dispose)) {
1732 struct xfs_buf *bp;
1733 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1734 list_del_init(&bp->b_lru);
1735 xfs_buf_rele(bp);
1736 }
1737
1738 return freed;
1739}
1740
1741static unsigned long
1742xfs_buftarg_shrink_count(
1743 struct shrinker *shrink,
1744 struct shrink_control *sc)
1745{
1746 struct xfs_buftarg *btp = container_of(shrink,
1747 struct xfs_buftarg, bt_shrinker);
1748 return list_lru_shrink_count(&btp->bt_lru, sc);
1749}
1750
1751void
1752xfs_free_buftarg(
1753 struct xfs_mount *mp,
1754 struct xfs_buftarg *btp)
1755{
1756 unregister_shrinker(&btp->bt_shrinker);
1757 ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
1758 percpu_counter_destroy(&btp->bt_io_count);
1759 list_lru_destroy(&btp->bt_lru);
1760
1761 xfs_blkdev_issue_flush(btp);
1762
1763 kmem_free(btp);
1764}
1765
1766int
1767xfs_setsize_buftarg(
1768 xfs_buftarg_t *btp,
1769 unsigned int sectorsize)
1770{
1771
1772 btp->bt_meta_sectorsize = sectorsize;
1773 btp->bt_meta_sectormask = sectorsize - 1;
1774
1775 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1776 xfs_warn(btp->bt_mount,
1777 "Cannot set_blocksize to %u on device %pg",
1778 sectorsize, btp->bt_bdev);
1779 return -EINVAL;
1780 }
1781
1782
1783 btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
1784 btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
1785
1786 return 0;
1787}
1788
1789
1790
1791
1792
1793
1794STATIC int
1795xfs_setsize_buftarg_early(
1796 xfs_buftarg_t *btp,
1797 struct block_device *bdev)
1798{
1799 return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
1800}
1801
1802xfs_buftarg_t *
1803xfs_alloc_buftarg(
1804 struct xfs_mount *mp,
1805 struct block_device *bdev)
1806{
1807 xfs_buftarg_t *btp;
1808
1809 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP | KM_NOFS);
1810
1811 btp->bt_mount = mp;
1812 btp->bt_dev = bdev->bd_dev;
1813 btp->bt_bdev = bdev;
1814
1815 if (xfs_setsize_buftarg_early(btp, bdev))
1816 goto error;
1817
1818 if (list_lru_init(&btp->bt_lru))
1819 goto error;
1820
1821 if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
1822 goto error;
1823
1824 btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
1825 btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
1826 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1827 btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
1828 register_shrinker(&btp->bt_shrinker);
1829 return btp;
1830
1831error:
1832 kmem_free(btp);
1833 return NULL;
1834}
1835
1836
1837
1838
1839
1840
1841
1842void
1843xfs_buf_delwri_cancel(
1844 struct list_head *list)
1845{
1846 struct xfs_buf *bp;
1847
1848 while (!list_empty(list)) {
1849 bp = list_first_entry(list, struct xfs_buf, b_list);
1850
1851 xfs_buf_lock(bp);
1852 bp->b_flags &= ~_XBF_DELWRI_Q;
1853 list_del_init(&bp->b_list);
1854 xfs_buf_relse(bp);
1855 }
1856}
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869bool
1870xfs_buf_delwri_queue(
1871 struct xfs_buf *bp,
1872 struct list_head *list)
1873{
1874 ASSERT(xfs_buf_islocked(bp));
1875 ASSERT(!(bp->b_flags & XBF_READ));
1876
1877
1878
1879
1880
1881
1882 if (bp->b_flags & _XBF_DELWRI_Q) {
1883 trace_xfs_buf_delwri_queued(bp, _RET_IP_);
1884 return false;
1885 }
1886
1887 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897 bp->b_flags |= _XBF_DELWRI_Q;
1898 if (list_empty(&bp->b_list)) {
1899 atomic_inc(&bp->b_hold);
1900 list_add_tail(&bp->b_list, list);
1901 }
1902
1903 return true;
1904}
1905
1906
1907
1908
1909
1910
1911static int
1912xfs_buf_cmp(
1913 void *priv,
1914 struct list_head *a,
1915 struct list_head *b)
1916{
1917 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1918 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1919 xfs_daddr_t diff;
1920
1921 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
1922 if (diff < 0)
1923 return -1;
1924 if (diff > 0)
1925 return 1;
1926 return 0;
1927}
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941static int
1942xfs_buf_delwri_submit_buffers(
1943 struct list_head *buffer_list,
1944 struct list_head *wait_list)
1945{
1946 struct xfs_buf *bp, *n;
1947 LIST_HEAD (submit_list);
1948 int pinned = 0;
1949 struct blk_plug plug;
1950
1951 list_sort(NULL, buffer_list, xfs_buf_cmp);
1952
1953 blk_start_plug(&plug);
1954 list_for_each_entry_safe(bp, n, buffer_list, b_list) {
1955 if (!wait_list) {
1956 if (xfs_buf_ispinned(bp)) {
1957 pinned++;
1958 continue;
1959 }
1960 if (!xfs_buf_trylock(bp))
1961 continue;
1962 } else {
1963 xfs_buf_lock(bp);
1964 }
1965
1966
1967
1968
1969
1970
1971
1972 if (!(bp->b_flags & _XBF_DELWRI_Q)) {
1973 list_del_init(&bp->b_list);
1974 xfs_buf_relse(bp);
1975 continue;
1976 }
1977
1978 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1979
1980
1981
1982
1983
1984
1985
1986
1987 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL);
1988 bp->b_flags |= XBF_WRITE | XBF_ASYNC;
1989 if (wait_list) {
1990 xfs_buf_hold(bp);
1991 list_move_tail(&bp->b_list, wait_list);
1992 } else
1993 list_del_init(&bp->b_list);
1994
1995 xfs_buf_submit(bp);
1996 }
1997 blk_finish_plug(&plug);
1998
1999 return pinned;
2000}
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011int
2012xfs_buf_delwri_submit_nowait(
2013 struct list_head *buffer_list)
2014{
2015 return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
2016}
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026int
2027xfs_buf_delwri_submit(
2028 struct list_head *buffer_list)
2029{
2030 LIST_HEAD (wait_list);
2031 int error = 0, error2;
2032 struct xfs_buf *bp;
2033
2034 xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
2035
2036
2037 while (!list_empty(&wait_list)) {
2038 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2039
2040 list_del_init(&bp->b_list);
2041
2042
2043 xfs_buf_lock(bp);
2044 error2 = bp->b_error;
2045 xfs_buf_relse(bp);
2046 if (!error)
2047 error = error2;
2048 }
2049
2050 return error;
2051}
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068int
2069xfs_buf_delwri_pushbuf(
2070 struct xfs_buf *bp,
2071 struct list_head *buffer_list)
2072{
2073 LIST_HEAD (submit_list);
2074 int error;
2075
2076 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
2077
2078 trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
2079
2080
2081
2082
2083
2084 xfs_buf_lock(bp);
2085 list_move(&bp->b_list, &submit_list);
2086 xfs_buf_unlock(bp);
2087
2088
2089
2090
2091
2092
2093
2094 xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105 xfs_buf_lock(bp);
2106 error = bp->b_error;
2107 bp->b_flags |= _XBF_DELWRI_Q;
2108 xfs_buf_unlock(bp);
2109
2110 return error;
2111}
2112
2113int __init
2114xfs_buf_init(void)
2115{
2116 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
2117 KM_ZONE_HWALIGN, NULL);
2118 if (!xfs_buf_zone)
2119 goto out;
2120
2121 return 0;
2122
2123 out:
2124 return -ENOMEM;
2125}
2126
2127void
2128xfs_buf_terminate(void)
2129{
2130 kmem_zone_destroy(xfs_buf_zone);
2131}
2132