1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/slab.h>
22#include <linux/delay.h>
23#include <linux/blkdev.h>
24#include <linux/module.h>
25#include <linux/seq_file.h>
26#include <linux/ratelimit.h>
27#include <linux/kthread.h>
28#include "md.h"
29#include "raid10.h"
30#include "raid0.h"
31#include "bitmap.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77#define NR_RAID10_BIOS 256
78
79
80
81
82
83
84#define IO_BLOCKED ((struct bio *)1)
85
86
87
88
89#define IO_MADE_GOOD ((struct bio *)2)
90
91#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
92
93
94
95
96
97static int max_queued_requests = 1024;
98
99static void allow_barrier(struct r10conf *conf);
100static void lower_barrier(struct r10conf *conf);
101static int _enough(struct r10conf *conf, int previous, int ignore);
102static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
103 int *skipped);
104static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
105static void end_reshape_write(struct bio *bio);
106static void end_reshape(struct r10conf *conf);
107
108static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
109{
110 struct r10conf *conf = data;
111 int size = offsetof(struct r10bio, devs[conf->copies]);
112
113
114
115 return kzalloc(size, gfp_flags);
116}
117
118static void r10bio_pool_free(void *r10_bio, void *data)
119{
120 kfree(r10_bio);
121}
122
123
124#define RESYNC_BLOCK_SIZE (64*1024)
125#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
126
127#define RESYNC_WINDOW (1024*1024)
128
129#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
130
131
132
133
134
135
136
137
138static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
139{
140 struct r10conf *conf = data;
141 struct page *page;
142 struct r10bio *r10_bio;
143 struct bio *bio;
144 int i, j;
145 int nalloc;
146
147 r10_bio = r10bio_pool_alloc(gfp_flags, conf);
148 if (!r10_bio)
149 return NULL;
150
151 if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery) ||
152 test_bit(MD_RECOVERY_RESHAPE, &conf->mddev->recovery))
153 nalloc = conf->copies;
154 else
155 nalloc = 2;
156
157
158
159
160 for (j = nalloc ; j-- ; ) {
161 bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
162 if (!bio)
163 goto out_free_bio;
164 r10_bio->devs[j].bio = bio;
165 if (!conf->have_replacement)
166 continue;
167 bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
168 if (!bio)
169 goto out_free_bio;
170 r10_bio->devs[j].repl_bio = bio;
171 }
172
173
174
175
176 for (j = 0 ; j < nalloc; j++) {
177 struct bio *rbio = r10_bio->devs[j].repl_bio;
178 bio = r10_bio->devs[j].bio;
179 for (i = 0; i < RESYNC_PAGES; i++) {
180 if (j > 0 && !test_bit(MD_RECOVERY_SYNC,
181 &conf->mddev->recovery)) {
182
183
184 struct bio *rbio = r10_bio->devs[0].bio;
185 page = rbio->bi_io_vec[i].bv_page;
186 get_page(page);
187 } else
188 page = alloc_page(gfp_flags);
189 if (unlikely(!page))
190 goto out_free_pages;
191
192 bio->bi_io_vec[i].bv_page = page;
193 if (rbio)
194 rbio->bi_io_vec[i].bv_page = page;
195 }
196 }
197
198 return r10_bio;
199
200out_free_pages:
201 for ( ; i > 0 ; i--)
202 safe_put_page(bio->bi_io_vec[i-1].bv_page);
203 while (j--)
204 for (i = 0; i < RESYNC_PAGES ; i++)
205 safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
206 j = 0;
207out_free_bio:
208 for ( ; j < nalloc; j++) {
209 if (r10_bio->devs[j].bio)
210 bio_put(r10_bio->devs[j].bio);
211 if (r10_bio->devs[j].repl_bio)
212 bio_put(r10_bio->devs[j].repl_bio);
213 }
214 r10bio_pool_free(r10_bio, conf);
215 return NULL;
216}
217
218static void r10buf_pool_free(void *__r10_bio, void *data)
219{
220 int i;
221 struct r10conf *conf = data;
222 struct r10bio *r10bio = __r10_bio;
223 int j;
224
225 for (j=0; j < conf->copies; j++) {
226 struct bio *bio = r10bio->devs[j].bio;
227 if (bio) {
228 for (i = 0; i < RESYNC_PAGES; i++) {
229 safe_put_page(bio->bi_io_vec[i].bv_page);
230 bio->bi_io_vec[i].bv_page = NULL;
231 }
232 bio_put(bio);
233 }
234 bio = r10bio->devs[j].repl_bio;
235 if (bio)
236 bio_put(bio);
237 }
238 r10bio_pool_free(r10bio, conf);
239}
240
241static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
242{
243 int i;
244
245 for (i = 0; i < conf->copies; i++) {
246 struct bio **bio = & r10_bio->devs[i].bio;
247 if (!BIO_SPECIAL(*bio))
248 bio_put(*bio);
249 *bio = NULL;
250 bio = &r10_bio->devs[i].repl_bio;
251 if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio))
252 bio_put(*bio);
253 *bio = NULL;
254 }
255}
256
257static void free_r10bio(struct r10bio *r10_bio)
258{
259 struct r10conf *conf = r10_bio->mddev->private;
260
261 put_all_bios(conf, r10_bio);
262 mempool_free(r10_bio, conf->r10bio_pool);
263}
264
265static void put_buf(struct r10bio *r10_bio)
266{
267 struct r10conf *conf = r10_bio->mddev->private;
268
269 mempool_free(r10_bio, conf->r10buf_pool);
270
271 lower_barrier(conf);
272}
273
274static void reschedule_retry(struct r10bio *r10_bio)
275{
276 unsigned long flags;
277 struct mddev *mddev = r10_bio->mddev;
278 struct r10conf *conf = mddev->private;
279
280 spin_lock_irqsave(&conf->device_lock, flags);
281 list_add(&r10_bio->retry_list, &conf->retry_list);
282 conf->nr_queued ++;
283 spin_unlock_irqrestore(&conf->device_lock, flags);
284
285
286 wake_up(&conf->wait_barrier);
287
288 md_wakeup_thread(mddev->thread);
289}
290
291
292
293
294
295
296static void raid_end_bio_io(struct r10bio *r10_bio)
297{
298 struct bio *bio = r10_bio->master_bio;
299 int done;
300 struct r10conf *conf = r10_bio->mddev->private;
301
302 if (bio->bi_phys_segments) {
303 unsigned long flags;
304 spin_lock_irqsave(&conf->device_lock, flags);
305 bio->bi_phys_segments--;
306 done = (bio->bi_phys_segments == 0);
307 spin_unlock_irqrestore(&conf->device_lock, flags);
308 } else
309 done = 1;
310 if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
311 bio->bi_error = -EIO;
312 if (done) {
313 bio_endio(bio);
314
315
316
317
318 allow_barrier(conf);
319 }
320 free_r10bio(r10_bio);
321}
322
323
324
325
326static inline void update_head_pos(int slot, struct r10bio *r10_bio)
327{
328 struct r10conf *conf = r10_bio->mddev->private;
329
330 conf->mirrors[r10_bio->devs[slot].devnum].head_position =
331 r10_bio->devs[slot].addr + (r10_bio->sectors);
332}
333
334
335
336
337static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
338 struct bio *bio, int *slotp, int *replp)
339{
340 int slot;
341 int repl = 0;
342
343 for (slot = 0; slot < conf->copies; slot++) {
344 if (r10_bio->devs[slot].bio == bio)
345 break;
346 if (r10_bio->devs[slot].repl_bio == bio) {
347 repl = 1;
348 break;
349 }
350 }
351
352 BUG_ON(slot == conf->copies);
353 update_head_pos(slot, r10_bio);
354
355 if (slotp)
356 *slotp = slot;
357 if (replp)
358 *replp = repl;
359 return r10_bio->devs[slot].devnum;
360}
361
362static void raid10_end_read_request(struct bio *bio)
363{
364 int uptodate = !bio->bi_error;
365 struct r10bio *r10_bio = bio->bi_private;
366 int slot, dev;
367 struct md_rdev *rdev;
368 struct r10conf *conf = r10_bio->mddev->private;
369
370 slot = r10_bio->read_slot;
371 dev = r10_bio->devs[slot].devnum;
372 rdev = r10_bio->devs[slot].rdev;
373
374
375
376 update_head_pos(slot, r10_bio);
377
378 if (uptodate) {
379
380
381
382
383
384
385
386
387
388 set_bit(R10BIO_Uptodate, &r10_bio->state);
389 } else {
390
391
392
393
394
395 if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state),
396 rdev->raid_disk))
397 uptodate = 1;
398 }
399 if (uptodate) {
400 raid_end_bio_io(r10_bio);
401 rdev_dec_pending(rdev, conf->mddev);
402 } else {
403
404
405
406 char b[BDEVNAME_SIZE];
407 printk_ratelimited(KERN_ERR
408 "md/raid10:%s: %s: rescheduling sector %llu\n",
409 mdname(conf->mddev),
410 bdevname(rdev->bdev, b),
411 (unsigned long long)r10_bio->sector);
412 set_bit(R10BIO_ReadError, &r10_bio->state);
413 reschedule_retry(r10_bio);
414 }
415}
416
417static void close_write(struct r10bio *r10_bio)
418{
419
420 bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
421 r10_bio->sectors,
422 !test_bit(R10BIO_Degraded, &r10_bio->state),
423 0);
424 md_write_end(r10_bio->mddev);
425}
426
427static void one_write_done(struct r10bio *r10_bio)
428{
429 if (atomic_dec_and_test(&r10_bio->remaining)) {
430 if (test_bit(R10BIO_WriteError, &r10_bio->state))
431 reschedule_retry(r10_bio);
432 else {
433 close_write(r10_bio);
434 if (test_bit(R10BIO_MadeGood, &r10_bio->state))
435 reschedule_retry(r10_bio);
436 else
437 raid_end_bio_io(r10_bio);
438 }
439 }
440}
441
442static void raid10_end_write_request(struct bio *bio)
443{
444 struct r10bio *r10_bio = bio->bi_private;
445 int dev;
446 int dec_rdev = 1;
447 struct r10conf *conf = r10_bio->mddev->private;
448 int slot, repl;
449 struct md_rdev *rdev = NULL;
450
451 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
452
453 if (repl)
454 rdev = conf->mirrors[dev].replacement;
455 if (!rdev) {
456 smp_rmb();
457 repl = 0;
458 rdev = conf->mirrors[dev].rdev;
459 }
460
461
462
463 if (bio->bi_error) {
464 if (repl)
465
466
467
468 md_error(rdev->mddev, rdev);
469 else {
470 set_bit(WriteErrorSeen, &rdev->flags);
471 if (!test_and_set_bit(WantReplacement, &rdev->flags))
472 set_bit(MD_RECOVERY_NEEDED,
473 &rdev->mddev->recovery);
474 set_bit(R10BIO_WriteError, &r10_bio->state);
475 dec_rdev = 0;
476 }
477 } else {
478
479
480
481
482
483
484
485
486
487 sector_t first_bad;
488 int bad_sectors;
489
490
491
492
493
494
495
496
497
498 if (test_bit(In_sync, &rdev->flags) &&
499 !test_bit(Faulty, &rdev->flags))
500 set_bit(R10BIO_Uptodate, &r10_bio->state);
501
502
503 if (is_badblock(rdev,
504 r10_bio->devs[slot].addr,
505 r10_bio->sectors,
506 &first_bad, &bad_sectors)) {
507 bio_put(bio);
508 if (repl)
509 r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
510 else
511 r10_bio->devs[slot].bio = IO_MADE_GOOD;
512 dec_rdev = 0;
513 set_bit(R10BIO_MadeGood, &r10_bio->state);
514 }
515 }
516
517
518
519
520
521
522 one_write_done(r10_bio);
523 if (dec_rdev)
524 rdev_dec_pending(rdev, conf->mddev);
525}
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio)
553{
554 int n,f;
555 sector_t sector;
556 sector_t chunk;
557 sector_t stripe;
558 int dev;
559 int slot = 0;
560 int last_far_set_start, last_far_set_size;
561
562 last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1;
563 last_far_set_start *= geo->far_set_size;
564
565 last_far_set_size = geo->far_set_size;
566 last_far_set_size += (geo->raid_disks % geo->far_set_size);
567
568
569 chunk = r10bio->sector >> geo->chunk_shift;
570 sector = r10bio->sector & geo->chunk_mask;
571
572 chunk *= geo->near_copies;
573 stripe = chunk;
574 dev = sector_div(stripe, geo->raid_disks);
575 if (geo->far_offset)
576 stripe *= geo->far_copies;
577
578 sector += stripe << geo->chunk_shift;
579
580
581 for (n = 0; n < geo->near_copies; n++) {
582 int d = dev;
583 int set;
584 sector_t s = sector;
585 r10bio->devs[slot].devnum = d;
586 r10bio->devs[slot].addr = s;
587 slot++;
588
589 for (f = 1; f < geo->far_copies; f++) {
590 set = d / geo->far_set_size;
591 d += geo->near_copies;
592
593 if ((geo->raid_disks % geo->far_set_size) &&
594 (d > last_far_set_start)) {
595 d -= last_far_set_start;
596 d %= last_far_set_size;
597 d += last_far_set_start;
598 } else {
599 d %= geo->far_set_size;
600 d += geo->far_set_size * set;
601 }
602 s += geo->stride;
603 r10bio->devs[slot].devnum = d;
604 r10bio->devs[slot].addr = s;
605 slot++;
606 }
607 dev++;
608 if (dev >= geo->raid_disks) {
609 dev = 0;
610 sector += (geo->chunk_mask + 1);
611 }
612 }
613}
614
615static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio)
616{
617 struct geom *geo = &conf->geo;
618
619 if (conf->reshape_progress != MaxSector &&
620 ((r10bio->sector >= conf->reshape_progress) !=
621 conf->mddev->reshape_backwards)) {
622 set_bit(R10BIO_Previous, &r10bio->state);
623 geo = &conf->prev;
624 } else
625 clear_bit(R10BIO_Previous, &r10bio->state);
626
627 __raid10_find_phys(geo, r10bio);
628}
629
630static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
631{
632 sector_t offset, chunk, vchunk;
633
634
635
636 struct geom *geo = &conf->geo;
637 int far_set_start = (dev / geo->far_set_size) * geo->far_set_size;
638 int far_set_size = geo->far_set_size;
639 int last_far_set_start;
640
641 if (geo->raid_disks % geo->far_set_size) {
642 last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1;
643 last_far_set_start *= geo->far_set_size;
644
645 if (dev >= last_far_set_start) {
646 far_set_size = geo->far_set_size;
647 far_set_size += (geo->raid_disks % geo->far_set_size);
648 far_set_start = last_far_set_start;
649 }
650 }
651
652 offset = sector & geo->chunk_mask;
653 if (geo->far_offset) {
654 int fc;
655 chunk = sector >> geo->chunk_shift;
656 fc = sector_div(chunk, geo->far_copies);
657 dev -= fc * geo->near_copies;
658 if (dev < far_set_start)
659 dev += far_set_size;
660 } else {
661 while (sector >= geo->stride) {
662 sector -= geo->stride;
663 if (dev < (geo->near_copies + far_set_start))
664 dev += far_set_size - geo->near_copies;
665 else
666 dev -= geo->near_copies;
667 }
668 chunk = sector >> geo->chunk_shift;
669 }
670 vchunk = chunk * geo->raid_disks + dev;
671 sector_div(vchunk, geo->near_copies);
672 return (vchunk << geo->chunk_shift) + offset;
673}
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694static struct md_rdev *read_balance(struct r10conf *conf,
695 struct r10bio *r10_bio,
696 int *max_sectors)
697{
698 const sector_t this_sector = r10_bio->sector;
699 int disk, slot;
700 int sectors = r10_bio->sectors;
701 int best_good_sectors;
702 sector_t new_distance, best_dist;
703 struct md_rdev *best_rdev, *rdev = NULL;
704 int do_balance;
705 int best_slot;
706 struct geom *geo = &conf->geo;
707
708 raid10_find_phys(conf, r10_bio);
709 rcu_read_lock();
710retry:
711 sectors = r10_bio->sectors;
712 best_slot = -1;
713 best_rdev = NULL;
714 best_dist = MaxSector;
715 best_good_sectors = 0;
716 do_balance = 1;
717
718
719
720
721
722
723 if (conf->mddev->recovery_cp < MaxSector
724 && (this_sector + sectors >= conf->next_resync))
725 do_balance = 0;
726
727 for (slot = 0; slot < conf->copies ; slot++) {
728 sector_t first_bad;
729 int bad_sectors;
730 sector_t dev_sector;
731
732 if (r10_bio->devs[slot].bio == IO_BLOCKED)
733 continue;
734 disk = r10_bio->devs[slot].devnum;
735 rdev = rcu_dereference(conf->mirrors[disk].replacement);
736 if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
737 r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
738 rdev = rcu_dereference(conf->mirrors[disk].rdev);
739 if (rdev == NULL ||
740 test_bit(Faulty, &rdev->flags))
741 continue;
742 if (!test_bit(In_sync, &rdev->flags) &&
743 r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
744 continue;
745
746 dev_sector = r10_bio->devs[slot].addr;
747 if (is_badblock(rdev, dev_sector, sectors,
748 &first_bad, &bad_sectors)) {
749 if (best_dist < MaxSector)
750
751 continue;
752 if (first_bad <= dev_sector) {
753
754
755
756
757 bad_sectors -= (dev_sector - first_bad);
758 if (!do_balance && sectors > bad_sectors)
759 sectors = bad_sectors;
760 if (best_good_sectors > sectors)
761 best_good_sectors = sectors;
762 } else {
763 sector_t good_sectors =
764 first_bad - dev_sector;
765 if (good_sectors > best_good_sectors) {
766 best_good_sectors = good_sectors;
767 best_slot = slot;
768 best_rdev = rdev;
769 }
770 if (!do_balance)
771
772 break;
773 }
774 continue;
775 } else
776 best_good_sectors = sectors;
777
778 if (!do_balance)
779 break;
780
781
782
783
784
785 if (geo->near_copies > 1 && !atomic_read(&rdev->nr_pending))
786 break;
787
788
789 if (geo->far_copies > 1)
790 new_distance = r10_bio->devs[slot].addr;
791 else
792 new_distance = abs(r10_bio->devs[slot].addr -
793 conf->mirrors[disk].head_position);
794 if (new_distance < best_dist) {
795 best_dist = new_distance;
796 best_slot = slot;
797 best_rdev = rdev;
798 }
799 }
800 if (slot >= conf->copies) {
801 slot = best_slot;
802 rdev = best_rdev;
803 }
804
805 if (slot >= 0) {
806 atomic_inc(&rdev->nr_pending);
807 if (test_bit(Faulty, &rdev->flags)) {
808
809
810
811 rdev_dec_pending(rdev, conf->mddev);
812 goto retry;
813 }
814 r10_bio->read_slot = slot;
815 } else
816 rdev = NULL;
817 rcu_read_unlock();
818 *max_sectors = best_good_sectors;
819
820 return rdev;
821}
822
823static int raid10_congested(struct mddev *mddev, int bits)
824{
825 struct r10conf *conf = mddev->private;
826 int i, ret = 0;
827
828 if ((bits & (1 << WB_async_congested)) &&
829 conf->pending_count >= max_queued_requests)
830 return 1;
831
832 rcu_read_lock();
833 for (i = 0;
834 (i < conf->geo.raid_disks || i < conf->prev.raid_disks)
835 && ret == 0;
836 i++) {
837 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
838 if (rdev && !test_bit(Faulty, &rdev->flags)) {
839 struct request_queue *q = bdev_get_queue(rdev->bdev);
840
841 ret |= bdi_congested(&q->backing_dev_info, bits);
842 }
843 }
844 rcu_read_unlock();
845 return ret;
846}
847
848static void flush_pending_writes(struct r10conf *conf)
849{
850
851
852
853 spin_lock_irq(&conf->device_lock);
854
855 if (conf->pending_bio_list.head) {
856 struct bio *bio;
857 bio = bio_list_get(&conf->pending_bio_list);
858 conf->pending_count = 0;
859 spin_unlock_irq(&conf->device_lock);
860
861
862 bitmap_unplug(conf->mddev->bitmap);
863 wake_up(&conf->wait_barrier);
864
865 while (bio) {
866 struct bio *next = bio->bi_next;
867 bio->bi_next = NULL;
868 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
869 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
870
871 bio_endio(bio);
872 else
873 generic_make_request(bio);
874 bio = next;
875 }
876 } else
877 spin_unlock_irq(&conf->device_lock);
878}
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902static void raise_barrier(struct r10conf *conf, int force)
903{
904 BUG_ON(force && !conf->barrier);
905 spin_lock_irq(&conf->resync_lock);
906
907
908 wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
909 conf->resync_lock);
910
911
912 conf->barrier++;
913
914
915 wait_event_lock_irq(conf->wait_barrier,
916 !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
917 conf->resync_lock);
918
919 spin_unlock_irq(&conf->resync_lock);
920}
921
922static void lower_barrier(struct r10conf *conf)
923{
924 unsigned long flags;
925 spin_lock_irqsave(&conf->resync_lock, flags);
926 conf->barrier--;
927 spin_unlock_irqrestore(&conf->resync_lock, flags);
928 wake_up(&conf->wait_barrier);
929}
930
931static void wait_barrier(struct r10conf *conf)
932{
933 spin_lock_irq(&conf->resync_lock);
934 if (conf->barrier) {
935 conf->nr_waiting++;
936
937
938
939
940
941
942
943
944
945 wait_event_lock_irq(conf->wait_barrier,
946 !conf->barrier ||
947 (conf->nr_pending &&
948 current->bio_list &&
949 !bio_list_empty(current->bio_list)),
950 conf->resync_lock);
951 conf->nr_waiting--;
952 }
953 conf->nr_pending++;
954 spin_unlock_irq(&conf->resync_lock);
955}
956
957static void allow_barrier(struct r10conf *conf)
958{
959 unsigned long flags;
960 spin_lock_irqsave(&conf->resync_lock, flags);
961 conf->nr_pending--;
962 spin_unlock_irqrestore(&conf->resync_lock, flags);
963 wake_up(&conf->wait_barrier);
964}
965
966static void freeze_array(struct r10conf *conf, int extra)
967{
968
969
970
971
972
973
974
975
976
977
978
979
980 spin_lock_irq(&conf->resync_lock);
981 conf->barrier++;
982 conf->nr_waiting++;
983 wait_event_lock_irq_cmd(conf->wait_barrier,
984 conf->nr_pending == conf->nr_queued+extra,
985 conf->resync_lock,
986 flush_pending_writes(conf));
987
988 spin_unlock_irq(&conf->resync_lock);
989}
990
991static void unfreeze_array(struct r10conf *conf)
992{
993
994 spin_lock_irq(&conf->resync_lock);
995 conf->barrier--;
996 conf->nr_waiting--;
997 wake_up(&conf->wait_barrier);
998 spin_unlock_irq(&conf->resync_lock);
999}
1000
1001static sector_t choose_data_offset(struct r10bio *r10_bio,
1002 struct md_rdev *rdev)
1003{
1004 if (!test_bit(MD_RECOVERY_RESHAPE, &rdev->mddev->recovery) ||
1005 test_bit(R10BIO_Previous, &r10_bio->state))
1006 return rdev->data_offset;
1007 else
1008 return rdev->new_data_offset;
1009}
1010
1011struct raid10_plug_cb {
1012 struct blk_plug_cb cb;
1013 struct bio_list pending;
1014 int pending_cnt;
1015};
1016
1017static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1018{
1019 struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
1020 cb);
1021 struct mddev *mddev = plug->cb.data;
1022 struct r10conf *conf = mddev->private;
1023 struct bio *bio;
1024
1025 if (from_schedule || current->bio_list) {
1026 spin_lock_irq(&conf->device_lock);
1027 bio_list_merge(&conf->pending_bio_list, &plug->pending);
1028 conf->pending_count += plug->pending_cnt;
1029 spin_unlock_irq(&conf->device_lock);
1030 wake_up(&conf->wait_barrier);
1031 md_wakeup_thread(mddev->thread);
1032 kfree(plug);
1033 return;
1034 }
1035
1036
1037 bio = bio_list_get(&plug->pending);
1038 bitmap_unplug(mddev->bitmap);
1039 wake_up(&conf->wait_barrier);
1040
1041 while (bio) {
1042 struct bio *next = bio->bi_next;
1043 bio->bi_next = NULL;
1044 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
1045 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
1046
1047 bio_endio(bio);
1048 else
1049 generic_make_request(bio);
1050 bio = next;
1051 }
1052 kfree(plug);
1053}
1054
1055static void __make_request(struct mddev *mddev, struct bio *bio)
1056{
1057 struct r10conf *conf = mddev->private;
1058 struct r10bio *r10_bio;
1059 struct bio *read_bio;
1060 int i;
1061 const int rw = bio_data_dir(bio);
1062 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
1063 const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
1064 const unsigned long do_discard = (bio->bi_rw
1065 & (REQ_DISCARD | REQ_SECURE));
1066 const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME);
1067 unsigned long flags;
1068 struct md_rdev *blocked_rdev;
1069 struct blk_plug_cb *cb;
1070 struct raid10_plug_cb *plug = NULL;
1071 int sectors_handled;
1072 int max_sectors;
1073 int sectors;
1074
1075
1076
1077
1078
1079
1080 wait_barrier(conf);
1081
1082 sectors = bio_sectors(bio);
1083 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1084 bio->bi_iter.bi_sector < conf->reshape_progress &&
1085 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
1086
1087
1088
1089 allow_barrier(conf);
1090 wait_event(conf->wait_barrier,
1091 conf->reshape_progress <= bio->bi_iter.bi_sector ||
1092 conf->reshape_progress >= bio->bi_iter.bi_sector +
1093 sectors);
1094 wait_barrier(conf);
1095 }
1096 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1097 bio_data_dir(bio) == WRITE &&
1098 (mddev->reshape_backwards
1099 ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
1100 bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
1101 : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
1102 bio->bi_iter.bi_sector < conf->reshape_progress))) {
1103
1104 mddev->reshape_position = conf->reshape_progress;
1105 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1106 set_bit(MD_CHANGE_PENDING, &mddev->flags);
1107 md_wakeup_thread(mddev->thread);
1108 wait_event(mddev->sb_wait,
1109 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
1110
1111 conf->reshape_safe = mddev->reshape_position;
1112 }
1113
1114 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1115
1116 r10_bio->master_bio = bio;
1117 r10_bio->sectors = sectors;
1118
1119 r10_bio->mddev = mddev;
1120 r10_bio->sector = bio->bi_iter.bi_sector;
1121 r10_bio->state = 0;
1122
1123
1124
1125
1126
1127
1128
1129
1130 bio->bi_phys_segments = 0;
1131 bio_clear_flag(bio, BIO_SEG_VALID);
1132
1133 if (rw == READ) {
1134
1135
1136
1137 struct md_rdev *rdev;
1138 int slot;
1139
1140read_again:
1141 rdev = read_balance(conf, r10_bio, &max_sectors);
1142 if (!rdev) {
1143 raid_end_bio_io(r10_bio);
1144 return;
1145 }
1146 slot = r10_bio->read_slot;
1147
1148 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1149 bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
1150 max_sectors);
1151
1152 r10_bio->devs[slot].bio = read_bio;
1153 r10_bio->devs[slot].rdev = rdev;
1154
1155 read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
1156 choose_data_offset(r10_bio, rdev);
1157 read_bio->bi_bdev = rdev->bdev;
1158 read_bio->bi_end_io = raid10_end_read_request;
1159 read_bio->bi_rw = READ | do_sync;
1160 read_bio->bi_private = r10_bio;
1161
1162 if (max_sectors < r10_bio->sectors) {
1163
1164
1165
1166 sectors_handled = (r10_bio->sector + max_sectors
1167 - bio->bi_iter.bi_sector);
1168 r10_bio->sectors = max_sectors;
1169 spin_lock_irq(&conf->device_lock);
1170 if (bio->bi_phys_segments == 0)
1171 bio->bi_phys_segments = 2;
1172 else
1173 bio->bi_phys_segments++;
1174 spin_unlock_irq(&conf->device_lock);
1175
1176
1177
1178
1179
1180 reschedule_retry(r10_bio);
1181
1182 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1183
1184 r10_bio->master_bio = bio;
1185 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1186 r10_bio->state = 0;
1187 r10_bio->mddev = mddev;
1188 r10_bio->sector = bio->bi_iter.bi_sector +
1189 sectors_handled;
1190 goto read_again;
1191 } else
1192 generic_make_request(read_bio);
1193 return;
1194 }
1195
1196
1197
1198
1199 if (conf->pending_count >= max_queued_requests) {
1200 md_wakeup_thread(mddev->thread);
1201 wait_event(conf->wait_barrier,
1202 conf->pending_count < max_queued_requests);
1203 }
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216 r10_bio->read_slot = -1;
1217 raid10_find_phys(conf, r10_bio);
1218retry_write:
1219 blocked_rdev = NULL;
1220 rcu_read_lock();
1221 max_sectors = r10_bio->sectors;
1222
1223 for (i = 0; i < conf->copies; i++) {
1224 int d = r10_bio->devs[i].devnum;
1225 struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
1226 struct md_rdev *rrdev = rcu_dereference(
1227 conf->mirrors[d].replacement);
1228 if (rdev == rrdev)
1229 rrdev = NULL;
1230 if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
1231 atomic_inc(&rdev->nr_pending);
1232 blocked_rdev = rdev;
1233 break;
1234 }
1235 if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
1236 atomic_inc(&rrdev->nr_pending);
1237 blocked_rdev = rrdev;
1238 break;
1239 }
1240 if (rdev && (test_bit(Faulty, &rdev->flags)))
1241 rdev = NULL;
1242 if (rrdev && (test_bit(Faulty, &rrdev->flags)))
1243 rrdev = NULL;
1244
1245 r10_bio->devs[i].bio = NULL;
1246 r10_bio->devs[i].repl_bio = NULL;
1247
1248 if (!rdev && !rrdev) {
1249 set_bit(R10BIO_Degraded, &r10_bio->state);
1250 continue;
1251 }
1252 if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
1253 sector_t first_bad;
1254 sector_t dev_sector = r10_bio->devs[i].addr;
1255 int bad_sectors;
1256 int is_bad;
1257
1258 is_bad = is_badblock(rdev, dev_sector,
1259 max_sectors,
1260 &first_bad, &bad_sectors);
1261 if (is_bad < 0) {
1262
1263
1264
1265 atomic_inc(&rdev->nr_pending);
1266 set_bit(BlockedBadBlocks, &rdev->flags);
1267 blocked_rdev = rdev;
1268 break;
1269 }
1270 if (is_bad && first_bad <= dev_sector) {
1271
1272 bad_sectors -= (dev_sector - first_bad);
1273 if (bad_sectors < max_sectors)
1274
1275
1276
1277 max_sectors = bad_sectors;
1278
1279
1280
1281
1282
1283
1284
1285
1286 continue;
1287 }
1288 if (is_bad) {
1289 int good_sectors = first_bad - dev_sector;
1290 if (good_sectors < max_sectors)
1291 max_sectors = good_sectors;
1292 }
1293 }
1294 if (rdev) {
1295 r10_bio->devs[i].bio = bio;
1296 atomic_inc(&rdev->nr_pending);
1297 }
1298 if (rrdev) {
1299 r10_bio->devs[i].repl_bio = bio;
1300 atomic_inc(&rrdev->nr_pending);
1301 }
1302 }
1303 rcu_read_unlock();
1304
1305 if (unlikely(blocked_rdev)) {
1306
1307 int j;
1308 int d;
1309
1310 for (j = 0; j < i; j++) {
1311 if (r10_bio->devs[j].bio) {
1312 d = r10_bio->devs[j].devnum;
1313 rdev_dec_pending(conf->mirrors[d].rdev, mddev);
1314 }
1315 if (r10_bio->devs[j].repl_bio) {
1316 struct md_rdev *rdev;
1317 d = r10_bio->devs[j].devnum;
1318 rdev = conf->mirrors[d].replacement;
1319 if (!rdev) {
1320
1321 smp_mb();
1322 rdev = conf->mirrors[d].rdev;
1323 }
1324 rdev_dec_pending(rdev, mddev);
1325 }
1326 }
1327 allow_barrier(conf);
1328 md_wait_for_blocked_rdev(blocked_rdev, mddev);
1329 wait_barrier(conf);
1330 goto retry_write;
1331 }
1332
1333 if (max_sectors < r10_bio->sectors) {
1334
1335
1336
1337 r10_bio->sectors = max_sectors;
1338 spin_lock_irq(&conf->device_lock);
1339 if (bio->bi_phys_segments == 0)
1340 bio->bi_phys_segments = 2;
1341 else
1342 bio->bi_phys_segments++;
1343 spin_unlock_irq(&conf->device_lock);
1344 }
1345 sectors_handled = r10_bio->sector + max_sectors -
1346 bio->bi_iter.bi_sector;
1347
1348 atomic_set(&r10_bio->remaining, 1);
1349 bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
1350
1351 for (i = 0; i < conf->copies; i++) {
1352 struct bio *mbio;
1353 int d = r10_bio->devs[i].devnum;
1354 if (r10_bio->devs[i].bio) {
1355 struct md_rdev *rdev = conf->mirrors[d].rdev;
1356 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1357 bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
1358 max_sectors);
1359 r10_bio->devs[i].bio = mbio;
1360
1361 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
1362 choose_data_offset(r10_bio,
1363 rdev));
1364 mbio->bi_bdev = rdev->bdev;
1365 mbio->bi_end_io = raid10_end_write_request;
1366 mbio->bi_rw =
1367 WRITE | do_sync | do_fua | do_discard | do_same;
1368 mbio->bi_private = r10_bio;
1369
1370 atomic_inc(&r10_bio->remaining);
1371
1372 cb = blk_check_plugged(raid10_unplug, mddev,
1373 sizeof(*plug));
1374 if (cb)
1375 plug = container_of(cb, struct raid10_plug_cb,
1376 cb);
1377 else
1378 plug = NULL;
1379 spin_lock_irqsave(&conf->device_lock, flags);
1380 if (plug) {
1381 bio_list_add(&plug->pending, mbio);
1382 plug->pending_cnt++;
1383 } else {
1384 bio_list_add(&conf->pending_bio_list, mbio);
1385 conf->pending_count++;
1386 }
1387 spin_unlock_irqrestore(&conf->device_lock, flags);
1388 if (!plug)
1389 md_wakeup_thread(mddev->thread);
1390 }
1391
1392 if (r10_bio->devs[i].repl_bio) {
1393 struct md_rdev *rdev = conf->mirrors[d].replacement;
1394 if (rdev == NULL) {
1395
1396 smp_mb();
1397 rdev = conf->mirrors[d].rdev;
1398 }
1399 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1400 bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
1401 max_sectors);
1402 r10_bio->devs[i].repl_bio = mbio;
1403
1404 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
1405 choose_data_offset(
1406 r10_bio, rdev));
1407 mbio->bi_bdev = rdev->bdev;
1408 mbio->bi_end_io = raid10_end_write_request;
1409 mbio->bi_rw =
1410 WRITE | do_sync | do_fua | do_discard | do_same;
1411 mbio->bi_private = r10_bio;
1412
1413 atomic_inc(&r10_bio->remaining);
1414 spin_lock_irqsave(&conf->device_lock, flags);
1415 bio_list_add(&conf->pending_bio_list, mbio);
1416 conf->pending_count++;
1417 spin_unlock_irqrestore(&conf->device_lock, flags);
1418 if (!mddev_check_plugged(mddev))
1419 md_wakeup_thread(mddev->thread);
1420 }
1421 }
1422
1423
1424
1425
1426
1427 if (sectors_handled < bio_sectors(bio)) {
1428 one_write_done(r10_bio);
1429
1430
1431
1432 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1433
1434 r10_bio->master_bio = bio;
1435 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1436
1437 r10_bio->mddev = mddev;
1438 r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1439 r10_bio->state = 0;
1440 goto retry_write;
1441 }
1442 one_write_done(r10_bio);
1443}
1444
1445static void raid10_make_request(struct mddev *mddev, struct bio *bio)
1446{
1447 struct r10conf *conf = mddev->private;
1448 sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
1449 int chunk_sects = chunk_mask + 1;
1450
1451 struct bio *split;
1452
1453 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
1454 md_flush_request(mddev, bio);
1455 return;
1456 }
1457
1458 md_write_start(mddev, bio);
1459
1460 do {
1461
1462
1463
1464
1465
1466 if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
1467 bio_sectors(bio) > chunk_sects
1468 && (conf->geo.near_copies < conf->geo.raid_disks
1469 || conf->prev.near_copies <
1470 conf->prev.raid_disks))) {
1471 split = bio_split(bio, chunk_sects -
1472 (bio->bi_iter.bi_sector &
1473 (chunk_sects - 1)),
1474 GFP_NOIO, fs_bio_set);
1475 bio_chain(split, bio);
1476 } else {
1477 split = bio;
1478 }
1479
1480 __make_request(mddev, split);
1481 } while (split != bio);
1482
1483
1484 wake_up(&conf->wait_barrier);
1485}
1486
1487static void raid10_status(struct seq_file *seq, struct mddev *mddev)
1488{
1489 struct r10conf *conf = mddev->private;
1490 int i;
1491
1492 if (conf->geo.near_copies < conf->geo.raid_disks)
1493 seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2);
1494 if (conf->geo.near_copies > 1)
1495 seq_printf(seq, " %d near-copies", conf->geo.near_copies);
1496 if (conf->geo.far_copies > 1) {
1497 if (conf->geo.far_offset)
1498 seq_printf(seq, " %d offset-copies", conf->geo.far_copies);
1499 else
1500 seq_printf(seq, " %d far-copies", conf->geo.far_copies);
1501 if (conf->geo.far_set_size != conf->geo.raid_disks)
1502 seq_printf(seq, " %d devices per set", conf->geo.far_set_size);
1503 }
1504 seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
1505 conf->geo.raid_disks - mddev->degraded);
1506 for (i = 0; i < conf->geo.raid_disks; i++)
1507 seq_printf(seq, "%s",
1508 conf->mirrors[i].rdev &&
1509 test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
1510 seq_printf(seq, "]");
1511}
1512
1513
1514
1515
1516
1517
1518static int _enough(struct r10conf *conf, int previous, int ignore)
1519{
1520 int first = 0;
1521 int has_enough = 0;
1522 int disks, ncopies;
1523 if (previous) {
1524 disks = conf->prev.raid_disks;
1525 ncopies = conf->prev.near_copies;
1526 } else {
1527 disks = conf->geo.raid_disks;
1528 ncopies = conf->geo.near_copies;
1529 }
1530
1531 rcu_read_lock();
1532 do {
1533 int n = conf->copies;
1534 int cnt = 0;
1535 int this = first;
1536 while (n--) {
1537 struct md_rdev *rdev;
1538 if (this != ignore &&
1539 (rdev = rcu_dereference(conf->mirrors[this].rdev)) &&
1540 test_bit(In_sync, &rdev->flags))
1541 cnt++;
1542 this = (this+1) % disks;
1543 }
1544 if (cnt == 0)
1545 goto out;
1546 first = (first + ncopies) % disks;
1547 } while (first != 0);
1548 has_enough = 1;
1549out:
1550 rcu_read_unlock();
1551 return has_enough;
1552}
1553
1554static int enough(struct r10conf *conf, int ignore)
1555{
1556
1557
1558
1559
1560
1561 return _enough(conf, 0, ignore) &&
1562 _enough(conf, 1, ignore);
1563}
1564
1565static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
1566{
1567 char b[BDEVNAME_SIZE];
1568 struct r10conf *conf = mddev->private;
1569 unsigned long flags;
1570
1571
1572
1573
1574
1575
1576
1577 spin_lock_irqsave(&conf->device_lock, flags);
1578 if (test_bit(In_sync, &rdev->flags)
1579 && !enough(conf, rdev->raid_disk)) {
1580
1581
1582
1583 spin_unlock_irqrestore(&conf->device_lock, flags);
1584 return;
1585 }
1586 if (test_and_clear_bit(In_sync, &rdev->flags))
1587 mddev->degraded++;
1588
1589
1590
1591 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1592 set_bit(Blocked, &rdev->flags);
1593 set_bit(Faulty, &rdev->flags);
1594 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1595 set_bit(MD_CHANGE_PENDING, &mddev->flags);
1596 spin_unlock_irqrestore(&conf->device_lock, flags);
1597 printk(KERN_ALERT
1598 "md/raid10:%s: Disk failure on %s, disabling device.\n"
1599 "md/raid10:%s: Operation continuing on %d devices.\n",
1600 mdname(mddev), bdevname(rdev->bdev, b),
1601 mdname(mddev), conf->geo.raid_disks - mddev->degraded);
1602}
1603
1604static void print_conf(struct r10conf *conf)
1605{
1606 int i;
1607 struct raid10_info *tmp;
1608
1609 printk(KERN_DEBUG "RAID10 conf printout:\n");
1610 if (!conf) {
1611 printk(KERN_DEBUG "(!conf)\n");
1612 return;
1613 }
1614 printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
1615 conf->geo.raid_disks);
1616
1617 for (i = 0; i < conf->geo.raid_disks; i++) {
1618 char b[BDEVNAME_SIZE];
1619 tmp = conf->mirrors + i;
1620 if (tmp->rdev)
1621 printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
1622 i, !test_bit(In_sync, &tmp->rdev->flags),
1623 !test_bit(Faulty, &tmp->rdev->flags),
1624 bdevname(tmp->rdev->bdev,b));
1625 }
1626}
1627
1628static void close_sync(struct r10conf *conf)
1629{
1630 wait_barrier(conf);
1631 allow_barrier(conf);
1632
1633 mempool_destroy(conf->r10buf_pool);
1634 conf->r10buf_pool = NULL;
1635}
1636
1637static int raid10_spare_active(struct mddev *mddev)
1638{
1639 int i;
1640 struct r10conf *conf = mddev->private;
1641 struct raid10_info *tmp;
1642 int count = 0;
1643 unsigned long flags;
1644
1645
1646
1647
1648
1649 for (i = 0; i < conf->geo.raid_disks; i++) {
1650 tmp = conf->mirrors + i;
1651 if (tmp->replacement
1652 && tmp->replacement->recovery_offset == MaxSector
1653 && !test_bit(Faulty, &tmp->replacement->flags)
1654 && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
1655
1656 if (!tmp->rdev
1657 || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
1658 count++;
1659 if (tmp->rdev) {
1660
1661
1662
1663
1664 set_bit(Faulty, &tmp->rdev->flags);
1665 sysfs_notify_dirent_safe(
1666 tmp->rdev->sysfs_state);
1667 }
1668 sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
1669 } else if (tmp->rdev
1670 && tmp->rdev->recovery_offset == MaxSector
1671 && !test_bit(Faulty, &tmp->rdev->flags)
1672 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
1673 count++;
1674 sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
1675 }
1676 }
1677 spin_lock_irqsave(&conf->device_lock, flags);
1678 mddev->degraded -= count;
1679 spin_unlock_irqrestore(&conf->device_lock, flags);
1680
1681 print_conf(conf);
1682 return count;
1683}
1684
1685static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1686{
1687 struct r10conf *conf = mddev->private;
1688 int err = -EEXIST;
1689 int mirror;
1690 int first = 0;
1691 int last = conf->geo.raid_disks - 1;
1692
1693 if (mddev->recovery_cp < MaxSector)
1694
1695
1696
1697 return -EBUSY;
1698 if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
1699 return -EINVAL;
1700
1701 if (md_integrity_add_rdev(rdev, mddev))
1702 return -ENXIO;
1703
1704 if (rdev->raid_disk >= 0)
1705 first = last = rdev->raid_disk;
1706
1707 if (rdev->saved_raid_disk >= first &&
1708 conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
1709 mirror = rdev->saved_raid_disk;
1710 else
1711 mirror = first;
1712 for ( ; mirror <= last ; mirror++) {
1713 struct raid10_info *p = &conf->mirrors[mirror];
1714 if (p->recovery_disabled == mddev->recovery_disabled)
1715 continue;
1716 if (p->rdev) {
1717 if (!test_bit(WantReplacement, &p->rdev->flags) ||
1718 p->replacement != NULL)
1719 continue;
1720 clear_bit(In_sync, &rdev->flags);
1721 set_bit(Replacement, &rdev->flags);
1722 rdev->raid_disk = mirror;
1723 err = 0;
1724 if (mddev->gendisk)
1725 disk_stack_limits(mddev->gendisk, rdev->bdev,
1726 rdev->data_offset << 9);
1727 conf->fullsync = 1;
1728 rcu_assign_pointer(p->replacement, rdev);
1729 break;
1730 }
1731
1732 if (mddev->gendisk)
1733 disk_stack_limits(mddev->gendisk, rdev->bdev,
1734 rdev->data_offset << 9);
1735
1736 p->head_position = 0;
1737 p->recovery_disabled = mddev->recovery_disabled - 1;
1738 rdev->raid_disk = mirror;
1739 err = 0;
1740 if (rdev->saved_raid_disk != mirror)
1741 conf->fullsync = 1;
1742 rcu_assign_pointer(p->rdev, rdev);
1743 break;
1744 }
1745 if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
1746 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
1747
1748 print_conf(conf);
1749 return err;
1750}
1751
1752static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
1753{
1754 struct r10conf *conf = mddev->private;
1755 int err = 0;
1756 int number = rdev->raid_disk;
1757 struct md_rdev **rdevp;
1758 struct raid10_info *p = conf->mirrors + number;
1759
1760 print_conf(conf);
1761 if (rdev == p->rdev)
1762 rdevp = &p->rdev;
1763 else if (rdev == p->replacement)
1764 rdevp = &p->replacement;
1765 else
1766 return 0;
1767
1768 if (test_bit(In_sync, &rdev->flags) ||
1769 atomic_read(&rdev->nr_pending)) {
1770 err = -EBUSY;
1771 goto abort;
1772 }
1773
1774
1775
1776 if (!test_bit(Faulty, &rdev->flags) &&
1777 mddev->recovery_disabled != p->recovery_disabled &&
1778 (!p->replacement || p->replacement == rdev) &&
1779 number < conf->geo.raid_disks &&
1780 enough(conf, -1)) {
1781 err = -EBUSY;
1782 goto abort;
1783 }
1784 *rdevp = NULL;
1785 synchronize_rcu();
1786 if (atomic_read(&rdev->nr_pending)) {
1787
1788 err = -EBUSY;
1789 *rdevp = rdev;
1790 goto abort;
1791 } else if (p->replacement) {
1792
1793 p->rdev = p->replacement;
1794 clear_bit(Replacement, &p->replacement->flags);
1795 smp_mb();
1796
1797
1798 p->replacement = NULL;
1799 clear_bit(WantReplacement, &rdev->flags);
1800 } else
1801
1802
1803
1804 clear_bit(WantReplacement, &rdev->flags);
1805
1806 err = md_integrity_register(mddev);
1807
1808abort:
1809
1810 print_conf(conf);
1811 return err;
1812}
1813
1814static void end_sync_read(struct bio *bio)
1815{
1816 struct r10bio *r10_bio = bio->bi_private;
1817 struct r10conf *conf = r10_bio->mddev->private;
1818 int d;
1819
1820 if (bio == r10_bio->master_bio) {
1821
1822 d = r10_bio->read_slot;
1823 } else
1824 d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
1825
1826 if (!bio->bi_error)
1827 set_bit(R10BIO_Uptodate, &r10_bio->state);
1828 else
1829
1830
1831
1832 atomic_add(r10_bio->sectors,
1833 &conf->mirrors[d].rdev->corrected_errors);
1834
1835
1836
1837
1838 rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev);
1839 if (test_bit(R10BIO_IsRecover, &r10_bio->state) ||
1840 atomic_dec_and_test(&r10_bio->remaining)) {
1841
1842
1843
1844 reschedule_retry(r10_bio);
1845 }
1846}
1847
1848static void end_sync_request(struct r10bio *r10_bio)
1849{
1850 struct mddev *mddev = r10_bio->mddev;
1851
1852 while (atomic_dec_and_test(&r10_bio->remaining)) {
1853 if (r10_bio->master_bio == NULL) {
1854
1855 sector_t s = r10_bio->sectors;
1856 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
1857 test_bit(R10BIO_WriteError, &r10_bio->state))
1858 reschedule_retry(r10_bio);
1859 else
1860 put_buf(r10_bio);
1861 md_done_sync(mddev, s, 1);
1862 break;
1863 } else {
1864 struct r10bio *r10_bio2 = (struct r10bio *)r10_bio->master_bio;
1865 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
1866 test_bit(R10BIO_WriteError, &r10_bio->state))
1867 reschedule_retry(r10_bio);
1868 else
1869 put_buf(r10_bio);
1870 r10_bio = r10_bio2;
1871 }
1872 }
1873}
1874
1875static void end_sync_write(struct bio *bio)
1876{
1877 struct r10bio *r10_bio = bio->bi_private;
1878 struct mddev *mddev = r10_bio->mddev;
1879 struct r10conf *conf = mddev->private;
1880 int d;
1881 sector_t first_bad;
1882 int bad_sectors;
1883 int slot;
1884 int repl;
1885 struct md_rdev *rdev = NULL;
1886
1887 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
1888 if (repl)
1889 rdev = conf->mirrors[d].replacement;
1890 else
1891 rdev = conf->mirrors[d].rdev;
1892
1893 if (bio->bi_error) {
1894 if (repl)
1895 md_error(mddev, rdev);
1896 else {
1897 set_bit(WriteErrorSeen, &rdev->flags);
1898 if (!test_and_set_bit(WantReplacement, &rdev->flags))
1899 set_bit(MD_RECOVERY_NEEDED,
1900 &rdev->mddev->recovery);
1901 set_bit(R10BIO_WriteError, &r10_bio->state);
1902 }
1903 } else if (is_badblock(rdev,
1904 r10_bio->devs[slot].addr,
1905 r10_bio->sectors,
1906 &first_bad, &bad_sectors))
1907 set_bit(R10BIO_MadeGood, &r10_bio->state);
1908
1909 rdev_dec_pending(rdev, mddev);
1910
1911 end_sync_request(r10_bio);
1912}
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
1931{
1932 struct r10conf *conf = mddev->private;
1933 int i, first;
1934 struct bio *tbio, *fbio;
1935 int vcnt;
1936
1937 atomic_set(&r10_bio->remaining, 1);
1938
1939
1940 for (i=0; i<conf->copies; i++)
1941 if (!r10_bio->devs[i].bio->bi_error)
1942 break;
1943
1944 if (i == conf->copies)
1945 goto done;
1946
1947 first = i;
1948 fbio = r10_bio->devs[i].bio;
1949 fbio->bi_iter.bi_size = r10_bio->sectors << 9;
1950 fbio->bi_iter.bi_idx = 0;
1951
1952 vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
1953
1954 for (i=0 ; i < conf->copies ; i++) {
1955 int j, d;
1956
1957 tbio = r10_bio->devs[i].bio;
1958
1959 if (tbio->bi_end_io != end_sync_read)
1960 continue;
1961 if (i == first)
1962 continue;
1963 if (!r10_bio->devs[i].bio->bi_error) {
1964
1965
1966
1967
1968 int sectors = r10_bio->sectors;
1969 for (j = 0; j < vcnt; j++) {
1970 int len = PAGE_SIZE;
1971 if (sectors < (len / 512))
1972 len = sectors * 512;
1973 if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
1974 page_address(tbio->bi_io_vec[j].bv_page),
1975 len))
1976 break;
1977 sectors -= len/512;
1978 }
1979 if (j == vcnt)
1980 continue;
1981 atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
1982 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
1983
1984 continue;
1985 }
1986
1987
1988
1989
1990
1991 bio_reset(tbio);
1992
1993 tbio->bi_vcnt = vcnt;
1994 tbio->bi_iter.bi_size = fbio->bi_iter.bi_size;
1995 tbio->bi_rw = WRITE;
1996 tbio->bi_private = r10_bio;
1997 tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
1998 tbio->bi_end_io = end_sync_write;
1999
2000 bio_copy_data(tbio, fbio);
2001
2002 d = r10_bio->devs[i].devnum;
2003 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
2004 atomic_inc(&r10_bio->remaining);
2005 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
2006
2007 tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
2008 tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
2009 generic_make_request(tbio);
2010 }
2011
2012
2013
2014
2015 for (i = 0; i < conf->copies; i++) {
2016 int d;
2017
2018 tbio = r10_bio->devs[i].repl_bio;
2019 if (!tbio || !tbio->bi_end_io)
2020 continue;
2021 if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
2022 && r10_bio->devs[i].bio != fbio)
2023 bio_copy_data(tbio, fbio);
2024 d = r10_bio->devs[i].devnum;
2025 atomic_inc(&r10_bio->remaining);
2026 md_sync_acct(conf->mirrors[d].replacement->bdev,
2027 bio_sectors(tbio));
2028 generic_make_request(tbio);
2029 }
2030
2031done:
2032 if (atomic_dec_and_test(&r10_bio->remaining)) {
2033 md_done_sync(mddev, r10_bio->sectors, 1);
2034 put_buf(r10_bio);
2035 }
2036}
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048static void fix_recovery_read_error(struct r10bio *r10_bio)
2049{
2050
2051
2052
2053
2054
2055
2056
2057 struct mddev *mddev = r10_bio->mddev;
2058 struct r10conf *conf = mddev->private;
2059 struct bio *bio = r10_bio->devs[0].bio;
2060 sector_t sect = 0;
2061 int sectors = r10_bio->sectors;
2062 int idx = 0;
2063 int dr = r10_bio->devs[0].devnum;
2064 int dw = r10_bio->devs[1].devnum;
2065
2066 while (sectors) {
2067 int s = sectors;
2068 struct md_rdev *rdev;
2069 sector_t addr;
2070 int ok;
2071
2072 if (s > (PAGE_SIZE>>9))
2073 s = PAGE_SIZE >> 9;
2074
2075 rdev = conf->mirrors[dr].rdev;
2076 addr = r10_bio->devs[0].addr + sect,
2077 ok = sync_page_io(rdev,
2078 addr,
2079 s << 9,
2080 bio->bi_io_vec[idx].bv_page,
2081 READ, false);
2082 if (ok) {
2083 rdev = conf->mirrors[dw].rdev;
2084 addr = r10_bio->devs[1].addr + sect;
2085 ok = sync_page_io(rdev,
2086 addr,
2087 s << 9,
2088 bio->bi_io_vec[idx].bv_page,
2089 WRITE, false);
2090 if (!ok) {
2091 set_bit(WriteErrorSeen, &rdev->flags);
2092 if (!test_and_set_bit(WantReplacement,
2093 &rdev->flags))
2094 set_bit(MD_RECOVERY_NEEDED,
2095 &rdev->mddev->recovery);
2096 }
2097 }
2098 if (!ok) {
2099
2100
2101
2102
2103 rdev_set_badblocks(rdev, addr, s, 0);
2104
2105 if (rdev != conf->mirrors[dw].rdev) {
2106
2107 struct md_rdev *rdev2 = conf->mirrors[dw].rdev;
2108 addr = r10_bio->devs[1].addr + sect;
2109 ok = rdev_set_badblocks(rdev2, addr, s, 0);
2110 if (!ok) {
2111
2112 printk(KERN_NOTICE
2113 "md/raid10:%s: recovery aborted"
2114 " due to read error\n",
2115 mdname(mddev));
2116
2117 conf->mirrors[dw].recovery_disabled
2118 = mddev->recovery_disabled;
2119 set_bit(MD_RECOVERY_INTR,
2120 &mddev->recovery);
2121 break;
2122 }
2123 }
2124 }
2125
2126 sectors -= s;
2127 sect += s;
2128 idx++;
2129 }
2130}
2131
2132static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2133{
2134 struct r10conf *conf = mddev->private;
2135 int d;
2136 struct bio *wbio, *wbio2;
2137
2138 if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
2139 fix_recovery_read_error(r10_bio);
2140 end_sync_request(r10_bio);
2141 return;
2142 }
2143
2144
2145
2146
2147
2148 d = r10_bio->devs[1].devnum;
2149 wbio = r10_bio->devs[1].bio;
2150 wbio2 = r10_bio->devs[1].repl_bio;
2151
2152
2153
2154
2155 if (wbio2 && !wbio2->bi_end_io)
2156 wbio2 = NULL;
2157 if (wbio->bi_end_io) {
2158 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
2159 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
2160 generic_make_request(wbio);
2161 }
2162 if (wbio2) {
2163 atomic_inc(&conf->mirrors[d].replacement->nr_pending);
2164 md_sync_acct(conf->mirrors[d].replacement->bdev,
2165 bio_sectors(wbio2));
2166 generic_make_request(wbio2);
2167 }
2168}
2169
2170
2171
2172
2173
2174
2175
2176static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
2177{
2178 struct timespec cur_time_mon;
2179 unsigned long hours_since_last;
2180 unsigned int read_errors = atomic_read(&rdev->read_errors);
2181
2182 ktime_get_ts(&cur_time_mon);
2183
2184 if (rdev->last_read_error.tv_sec == 0 &&
2185 rdev->last_read_error.tv_nsec == 0) {
2186
2187 rdev->last_read_error = cur_time_mon;
2188 return;
2189 }
2190
2191 hours_since_last = (cur_time_mon.tv_sec -
2192 rdev->last_read_error.tv_sec) / 3600;
2193
2194 rdev->last_read_error = cur_time_mon;
2195
2196
2197
2198
2199
2200
2201 if (hours_since_last >= 8 * sizeof(read_errors))
2202 atomic_set(&rdev->read_errors, 0);
2203 else
2204 atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
2205}
2206
2207static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
2208 int sectors, struct page *page, int rw)
2209{
2210 sector_t first_bad;
2211 int bad_sectors;
2212
2213 if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
2214 && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
2215 return -1;
2216 if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
2217
2218 return 1;
2219 if (rw == WRITE) {
2220 set_bit(WriteErrorSeen, &rdev->flags);
2221 if (!test_and_set_bit(WantReplacement, &rdev->flags))
2222 set_bit(MD_RECOVERY_NEEDED,
2223 &rdev->mddev->recovery);
2224 }
2225
2226 if (!rdev_set_badblocks(rdev, sector, sectors, 0))
2227 md_error(rdev->mddev, rdev);
2228 return 0;
2229}
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio)
2240{
2241 int sect = 0;
2242 int sectors = r10_bio->sectors;
2243 struct md_rdev*rdev;
2244 int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
2245 int d = r10_bio->devs[r10_bio->read_slot].devnum;
2246
2247
2248
2249
2250 rdev = conf->mirrors[d].rdev;
2251
2252 if (test_bit(Faulty, &rdev->flags))
2253
2254
2255 return;
2256
2257 check_decay_read_errors(mddev, rdev);
2258 atomic_inc(&rdev->read_errors);
2259 if (atomic_read(&rdev->read_errors) > max_read_errors) {
2260 char b[BDEVNAME_SIZE];
2261 bdevname(rdev->bdev, b);
2262
2263 printk(KERN_NOTICE
2264 "md/raid10:%s: %s: Raid device exceeded "
2265 "read_error threshold [cur %d:max %d]\n",
2266 mdname(mddev), b,
2267 atomic_read(&rdev->read_errors), max_read_errors);
2268 printk(KERN_NOTICE
2269 "md/raid10:%s: %s: Failing raid device\n",
2270 mdname(mddev), b);
2271 md_error(mddev, conf->mirrors[d].rdev);
2272 r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
2273 return;
2274 }
2275
2276 while(sectors) {
2277 int s = sectors;
2278 int sl = r10_bio->read_slot;
2279 int success = 0;
2280 int start;
2281
2282 if (s > (PAGE_SIZE>>9))
2283 s = PAGE_SIZE >> 9;
2284
2285 rcu_read_lock();
2286 do {
2287 sector_t first_bad;
2288 int bad_sectors;
2289
2290 d = r10_bio->devs[sl].devnum;
2291 rdev = rcu_dereference(conf->mirrors[d].rdev);
2292 if (rdev &&
2293 test_bit(In_sync, &rdev->flags) &&
2294 is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
2295 &first_bad, &bad_sectors) == 0) {
2296 atomic_inc(&rdev->nr_pending);
2297 rcu_read_unlock();
2298 success = sync_page_io(rdev,
2299 r10_bio->devs[sl].addr +
2300 sect,
2301 s<<9,
2302 conf->tmppage, READ, false);
2303 rdev_dec_pending(rdev, mddev);
2304 rcu_read_lock();
2305 if (success)
2306 break;
2307 }
2308 sl++;
2309 if (sl == conf->copies)
2310 sl = 0;
2311 } while (!success && sl != r10_bio->read_slot);
2312 rcu_read_unlock();
2313
2314 if (!success) {
2315
2316
2317
2318
2319 int dn = r10_bio->devs[r10_bio->read_slot].devnum;
2320 rdev = conf->mirrors[dn].rdev;
2321
2322 if (!rdev_set_badblocks(
2323 rdev,
2324 r10_bio->devs[r10_bio->read_slot].addr
2325 + sect,
2326 s, 0)) {
2327 md_error(mddev, rdev);
2328 r10_bio->devs[r10_bio->read_slot].bio
2329 = IO_BLOCKED;
2330 }
2331 break;
2332 }
2333
2334 start = sl;
2335
2336 rcu_read_lock();
2337 while (sl != r10_bio->read_slot) {
2338 char b[BDEVNAME_SIZE];
2339
2340 if (sl==0)
2341 sl = conf->copies;
2342 sl--;
2343 d = r10_bio->devs[sl].devnum;
2344 rdev = rcu_dereference(conf->mirrors[d].rdev);
2345 if (!rdev ||
2346 !test_bit(In_sync, &rdev->flags))
2347 continue;
2348
2349 atomic_inc(&rdev->nr_pending);
2350 rcu_read_unlock();
2351 if (r10_sync_page_io(rdev,
2352 r10_bio->devs[sl].addr +
2353 sect,
2354 s, conf->tmppage, WRITE)
2355 == 0) {
2356
2357 printk(KERN_NOTICE
2358 "md/raid10:%s: read correction "
2359 "write failed"
2360 " (%d sectors at %llu on %s)\n",
2361 mdname(mddev), s,
2362 (unsigned long long)(
2363 sect +
2364 choose_data_offset(r10_bio,
2365 rdev)),
2366 bdevname(rdev->bdev, b));
2367 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
2368 "drive\n",
2369 mdname(mddev),
2370 bdevname(rdev->bdev, b));
2371 }
2372 rdev_dec_pending(rdev, mddev);
2373 rcu_read_lock();
2374 }
2375 sl = start;
2376 while (sl != r10_bio->read_slot) {
2377 char b[BDEVNAME_SIZE];
2378
2379 if (sl==0)
2380 sl = conf->copies;
2381 sl--;
2382 d = r10_bio->devs[sl].devnum;
2383 rdev = rcu_dereference(conf->mirrors[d].rdev);
2384 if (!rdev ||
2385 !test_bit(In_sync, &rdev->flags))
2386 continue;
2387
2388 atomic_inc(&rdev->nr_pending);
2389 rcu_read_unlock();
2390 switch (r10_sync_page_io(rdev,
2391 r10_bio->devs[sl].addr +
2392 sect,
2393 s, conf->tmppage,
2394 READ)) {
2395 case 0:
2396
2397 printk(KERN_NOTICE
2398 "md/raid10:%s: unable to read back "
2399 "corrected sectors"
2400 " (%d sectors at %llu on %s)\n",
2401 mdname(mddev), s,
2402 (unsigned long long)(
2403 sect +
2404 choose_data_offset(r10_bio, rdev)),
2405 bdevname(rdev->bdev, b));
2406 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
2407 "drive\n",
2408 mdname(mddev),
2409 bdevname(rdev->bdev, b));
2410 break;
2411 case 1:
2412 printk(KERN_INFO
2413 "md/raid10:%s: read error corrected"
2414 " (%d sectors at %llu on %s)\n",
2415 mdname(mddev), s,
2416 (unsigned long long)(
2417 sect +
2418 choose_data_offset(r10_bio, rdev)),
2419 bdevname(rdev->bdev, b));
2420 atomic_add(s, &rdev->corrected_errors);
2421 }
2422
2423 rdev_dec_pending(rdev, mddev);
2424 rcu_read_lock();
2425 }
2426 rcu_read_unlock();
2427
2428 sectors -= s;
2429 sect += s;
2430 }
2431}
2432
2433static int narrow_write_error(struct r10bio *r10_bio, int i)
2434{
2435 struct bio *bio = r10_bio->master_bio;
2436 struct mddev *mddev = r10_bio->mddev;
2437 struct r10conf *conf = mddev->private;
2438 struct md_rdev *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450 int block_sectors;
2451 sector_t sector;
2452 int sectors;
2453 int sect_to_write = r10_bio->sectors;
2454 int ok = 1;
2455
2456 if (rdev->badblocks.shift < 0)
2457 return 0;
2458
2459 block_sectors = roundup(1 << rdev->badblocks.shift,
2460 bdev_logical_block_size(rdev->bdev) >> 9);
2461 sector = r10_bio->sector;
2462 sectors = ((r10_bio->sector + block_sectors)
2463 & ~(sector_t)(block_sectors - 1))
2464 - sector;
2465
2466 while (sect_to_write) {
2467 struct bio *wbio;
2468 if (sectors > sect_to_write)
2469 sectors = sect_to_write;
2470
2471 wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
2472 bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
2473 wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
2474 choose_data_offset(r10_bio, rdev) +
2475 (sector - r10_bio->sector));
2476 wbio->bi_bdev = rdev->bdev;
2477 if (submit_bio_wait(WRITE, wbio) < 0)
2478
2479 ok = rdev_set_badblocks(rdev, sector,
2480 sectors, 0)
2481 && ok;
2482
2483 bio_put(wbio);
2484 sect_to_write -= sectors;
2485 sector += sectors;
2486 sectors = block_sectors;
2487 }
2488 return ok;
2489}
2490
2491static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
2492{
2493 int slot = r10_bio->read_slot;
2494 struct bio *bio;
2495 struct r10conf *conf = mddev->private;
2496 struct md_rdev *rdev = r10_bio->devs[slot].rdev;
2497 char b[BDEVNAME_SIZE];
2498 unsigned long do_sync;
2499 int max_sectors;
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509 bio = r10_bio->devs[slot].bio;
2510 bdevname(bio->bi_bdev, b);
2511 bio_put(bio);
2512 r10_bio->devs[slot].bio = NULL;
2513
2514 if (mddev->ro == 0) {
2515 freeze_array(conf, 1);
2516 fix_read_error(conf, mddev, r10_bio);
2517 unfreeze_array(conf);
2518 } else
2519 r10_bio->devs[slot].bio = IO_BLOCKED;
2520
2521 rdev_dec_pending(rdev, mddev);
2522
2523read_more:
2524 rdev = read_balance(conf, r10_bio, &max_sectors);
2525 if (rdev == NULL) {
2526 printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
2527 " read error for block %llu\n",
2528 mdname(mddev), b,
2529 (unsigned long long)r10_bio->sector);
2530 raid_end_bio_io(r10_bio);
2531 return;
2532 }
2533
2534 do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
2535 slot = r10_bio->read_slot;
2536 printk_ratelimited(
2537 KERN_ERR
2538 "md/raid10:%s: %s: redirecting "
2539 "sector %llu to another mirror\n",
2540 mdname(mddev),
2541 bdevname(rdev->bdev, b),
2542 (unsigned long long)r10_bio->sector);
2543 bio = bio_clone_mddev(r10_bio->master_bio,
2544 GFP_NOIO, mddev);
2545 bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors);
2546 r10_bio->devs[slot].bio = bio;
2547 r10_bio->devs[slot].rdev = rdev;
2548 bio->bi_iter.bi_sector = r10_bio->devs[slot].addr
2549 + choose_data_offset(r10_bio, rdev);
2550 bio->bi_bdev = rdev->bdev;
2551 bio->bi_rw = READ | do_sync;
2552 bio->bi_private = r10_bio;
2553 bio->bi_end_io = raid10_end_read_request;
2554 if (max_sectors < r10_bio->sectors) {
2555
2556 struct bio *mbio = r10_bio->master_bio;
2557 int sectors_handled =
2558 r10_bio->sector + max_sectors
2559 - mbio->bi_iter.bi_sector;
2560 r10_bio->sectors = max_sectors;
2561 spin_lock_irq(&conf->device_lock);
2562 if (mbio->bi_phys_segments == 0)
2563 mbio->bi_phys_segments = 2;
2564 else
2565 mbio->bi_phys_segments++;
2566 spin_unlock_irq(&conf->device_lock);
2567 generic_make_request(bio);
2568
2569 r10_bio = mempool_alloc(conf->r10bio_pool,
2570 GFP_NOIO);
2571 r10_bio->master_bio = mbio;
2572 r10_bio->sectors = bio_sectors(mbio) - sectors_handled;
2573 r10_bio->state = 0;
2574 set_bit(R10BIO_ReadError,
2575 &r10_bio->state);
2576 r10_bio->mddev = mddev;
2577 r10_bio->sector = mbio->bi_iter.bi_sector
2578 + sectors_handled;
2579
2580 goto read_more;
2581 } else
2582 generic_make_request(bio);
2583}
2584
2585static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2586{
2587
2588
2589
2590
2591
2592
2593 int m;
2594 struct md_rdev *rdev;
2595
2596 if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
2597 test_bit(R10BIO_IsRecover, &r10_bio->state)) {
2598 for (m = 0; m < conf->copies; m++) {
2599 int dev = r10_bio->devs[m].devnum;
2600 rdev = conf->mirrors[dev].rdev;
2601 if (r10_bio->devs[m].bio == NULL)
2602 continue;
2603 if (!r10_bio->devs[m].bio->bi_error) {
2604 rdev_clear_badblocks(
2605 rdev,
2606 r10_bio->devs[m].addr,
2607 r10_bio->sectors, 0);
2608 } else {
2609 if (!rdev_set_badblocks(
2610 rdev,
2611 r10_bio->devs[m].addr,
2612 r10_bio->sectors, 0))
2613 md_error(conf->mddev, rdev);
2614 }
2615 rdev = conf->mirrors[dev].replacement;
2616 if (r10_bio->devs[m].repl_bio == NULL)
2617 continue;
2618
2619 if (!r10_bio->devs[m].repl_bio->bi_error) {
2620 rdev_clear_badblocks(
2621 rdev,
2622 r10_bio->devs[m].addr,
2623 r10_bio->sectors, 0);
2624 } else {
2625 if (!rdev_set_badblocks(
2626 rdev,
2627 r10_bio->devs[m].addr,
2628 r10_bio->sectors, 0))
2629 md_error(conf->mddev, rdev);
2630 }
2631 }
2632 put_buf(r10_bio);
2633 } else {
2634 bool fail = false;
2635 for (m = 0; m < conf->copies; m++) {
2636 int dev = r10_bio->devs[m].devnum;
2637 struct bio *bio = r10_bio->devs[m].bio;
2638 rdev = conf->mirrors[dev].rdev;
2639 if (bio == IO_MADE_GOOD) {
2640 rdev_clear_badblocks(
2641 rdev,
2642 r10_bio->devs[m].addr,
2643 r10_bio->sectors, 0);
2644 rdev_dec_pending(rdev, conf->mddev);
2645 } else if (bio != NULL && bio->bi_error) {
2646 fail = true;
2647 if (!narrow_write_error(r10_bio, m)) {
2648 md_error(conf->mddev, rdev);
2649 set_bit(R10BIO_Degraded,
2650 &r10_bio->state);
2651 }
2652 rdev_dec_pending(rdev, conf->mddev);
2653 }
2654 bio = r10_bio->devs[m].repl_bio;
2655 rdev = conf->mirrors[dev].replacement;
2656 if (rdev && bio == IO_MADE_GOOD) {
2657 rdev_clear_badblocks(
2658 rdev,
2659 r10_bio->devs[m].addr,
2660 r10_bio->sectors, 0);
2661 rdev_dec_pending(rdev, conf->mddev);
2662 }
2663 }
2664 if (fail) {
2665 spin_lock_irq(&conf->device_lock);
2666 list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
2667 conf->nr_queued++;
2668 spin_unlock_irq(&conf->device_lock);
2669 md_wakeup_thread(conf->mddev->thread);
2670 } else {
2671 if (test_bit(R10BIO_WriteError,
2672 &r10_bio->state))
2673 close_write(r10_bio);
2674 raid_end_bio_io(r10_bio);
2675 }
2676 }
2677}
2678
2679static void raid10d(struct md_thread *thread)
2680{
2681 struct mddev *mddev = thread->mddev;
2682 struct r10bio *r10_bio;
2683 unsigned long flags;
2684 struct r10conf *conf = mddev->private;
2685 struct list_head *head = &conf->retry_list;
2686 struct blk_plug plug;
2687
2688 md_check_recovery(mddev);
2689
2690 if (!list_empty_careful(&conf->bio_end_io_list) &&
2691 !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
2692 LIST_HEAD(tmp);
2693 spin_lock_irqsave(&conf->device_lock, flags);
2694 if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
2695 while (!list_empty(&conf->bio_end_io_list)) {
2696 list_move(conf->bio_end_io_list.prev, &tmp);
2697 conf->nr_queued--;
2698 }
2699 }
2700 spin_unlock_irqrestore(&conf->device_lock, flags);
2701 while (!list_empty(&tmp)) {
2702 r10_bio = list_first_entry(&tmp, struct r10bio,
2703 retry_list);
2704 list_del(&r10_bio->retry_list);
2705 if (mddev->degraded)
2706 set_bit(R10BIO_Degraded, &r10_bio->state);
2707
2708 if (test_bit(R10BIO_WriteError,
2709 &r10_bio->state))
2710 close_write(r10_bio);
2711 raid_end_bio_io(r10_bio);
2712 }
2713 }
2714
2715 blk_start_plug(&plug);
2716 for (;;) {
2717
2718 flush_pending_writes(conf);
2719
2720 spin_lock_irqsave(&conf->device_lock, flags);
2721 if (list_empty(head)) {
2722 spin_unlock_irqrestore(&conf->device_lock, flags);
2723 break;
2724 }
2725 r10_bio = list_entry(head->prev, struct r10bio, retry_list);
2726 list_del(head->prev);
2727 conf->nr_queued--;
2728 spin_unlock_irqrestore(&conf->device_lock, flags);
2729
2730 mddev = r10_bio->mddev;
2731 conf = mddev->private;
2732 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
2733 test_bit(R10BIO_WriteError, &r10_bio->state))
2734 handle_write_completed(conf, r10_bio);
2735 else if (test_bit(R10BIO_IsReshape, &r10_bio->state))
2736 reshape_request_write(mddev, r10_bio);
2737 else if (test_bit(R10BIO_IsSync, &r10_bio->state))
2738 sync_request_write(mddev, r10_bio);
2739 else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
2740 recovery_request_write(mddev, r10_bio);
2741 else if (test_bit(R10BIO_ReadError, &r10_bio->state))
2742 handle_read_error(mddev, r10_bio);
2743 else {
2744
2745
2746
2747 int slot = r10_bio->read_slot;
2748 generic_make_request(r10_bio->devs[slot].bio);
2749 }
2750
2751 cond_resched();
2752 if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
2753 md_check_recovery(mddev);
2754 }
2755 blk_finish_plug(&plug);
2756}
2757
2758static int init_resync(struct r10conf *conf)
2759{
2760 int buffs;
2761 int i;
2762
2763 buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
2764 BUG_ON(conf->r10buf_pool);
2765 conf->have_replacement = 0;
2766 for (i = 0; i < conf->geo.raid_disks; i++)
2767 if (conf->mirrors[i].replacement)
2768 conf->have_replacement = 1;
2769 conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
2770 if (!conf->r10buf_pool)
2771 return -ENOMEM;
2772 conf->next_resync = 0;
2773 return 0;
2774}
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
2809 int *skipped)
2810{
2811 struct r10conf *conf = mddev->private;
2812 struct r10bio *r10_bio;
2813 struct bio *biolist = NULL, *bio;
2814 sector_t max_sector, nr_sectors;
2815 int i;
2816 int max_sync;
2817 sector_t sync_blocks;
2818 sector_t sectors_skipped = 0;
2819 int chunks_skipped = 0;
2820 sector_t chunk_mask = conf->geo.chunk_mask;
2821
2822 if (!conf->r10buf_pool)
2823 if (init_resync(conf))
2824 return 0;
2825
2826
2827
2828
2829
2830 if (mddev->bitmap == NULL &&
2831 mddev->recovery_cp == MaxSector &&
2832 mddev->reshape_position == MaxSector &&
2833 !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
2834 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
2835 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2836 conf->fullsync == 0) {
2837 *skipped = 1;
2838 return mddev->dev_sectors - sector_nr;
2839 }
2840
2841 skipped:
2842 max_sector = mddev->dev_sectors;
2843 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
2844 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2845 max_sector = mddev->resync_max_sectors;
2846 if (sector_nr >= max_sector) {
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
2857 end_reshape(conf);
2858 close_sync(conf);
2859 return 0;
2860 }
2861
2862 if (mddev->curr_resync < max_sector) {
2863 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
2864 bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
2865 &sync_blocks, 1);
2866 else for (i = 0; i < conf->geo.raid_disks; i++) {
2867 sector_t sect =
2868 raid10_find_virt(conf, mddev->curr_resync, i);
2869 bitmap_end_sync(mddev->bitmap, sect,
2870 &sync_blocks, 1);
2871 }
2872 } else {
2873
2874 if ((!mddev->bitmap || conf->fullsync)
2875 && conf->have_replacement
2876 && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2877
2878
2879
2880 for (i = 0; i < conf->geo.raid_disks; i++)
2881 if (conf->mirrors[i].replacement)
2882 conf->mirrors[i].replacement
2883 ->recovery_offset
2884 = MaxSector;
2885 }
2886 conf->fullsync = 0;
2887 }
2888 bitmap_close_sync(mddev->bitmap);
2889 close_sync(conf);
2890 *skipped = 1;
2891 return sectors_skipped;
2892 }
2893
2894 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2895 return reshape_request(mddev, sector_nr, skipped);
2896
2897 if (chunks_skipped >= conf->geo.raid_disks) {
2898
2899
2900
2901 *skipped = 1;
2902 return (max_sector - sector_nr) + sectors_skipped;
2903 }
2904
2905 if (max_sector > mddev->resync_max)
2906 max_sector = mddev->resync_max;
2907
2908
2909
2910
2911 if (conf->geo.near_copies < conf->geo.raid_disks &&
2912 max_sector > (sector_nr | chunk_mask))
2913 max_sector = (sector_nr | chunk_mask) + 1;
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930 max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
2931 if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2932
2933 int j;
2934 r10_bio = NULL;
2935
2936 for (i = 0 ; i < conf->geo.raid_disks; i++) {
2937 int still_degraded;
2938 struct r10bio *rb2;
2939 sector_t sect;
2940 int must_sync;
2941 int any_working;
2942 struct raid10_info *mirror = &conf->mirrors[i];
2943
2944 if ((mirror->rdev == NULL ||
2945 test_bit(In_sync, &mirror->rdev->flags))
2946 &&
2947 (mirror->replacement == NULL ||
2948 test_bit(Faulty,
2949 &mirror->replacement->flags)))
2950 continue;
2951
2952 still_degraded = 0;
2953
2954 rb2 = r10_bio;
2955 sect = raid10_find_virt(conf, sector_nr, i);
2956 if (sect >= mddev->resync_max_sectors) {
2957
2958
2959
2960 continue;
2961 }
2962
2963
2964
2965
2966 must_sync = bitmap_start_sync(mddev->bitmap, sect,
2967 &sync_blocks, 1);
2968 if (sync_blocks < max_sync)
2969 max_sync = sync_blocks;
2970 if (!must_sync &&
2971 mirror->replacement == NULL &&
2972 !conf->fullsync) {
2973
2974
2975
2976 chunks_skipped = -1;
2977 continue;
2978 }
2979
2980 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
2981 r10_bio->state = 0;
2982 raise_barrier(conf, rb2 != NULL);
2983 atomic_set(&r10_bio->remaining, 0);
2984
2985 r10_bio->master_bio = (struct bio*)rb2;
2986 if (rb2)
2987 atomic_inc(&rb2->remaining);
2988 r10_bio->mddev = mddev;
2989 set_bit(R10BIO_IsRecover, &r10_bio->state);
2990 r10_bio->sector = sect;
2991
2992 raid10_find_phys(conf, r10_bio);
2993
2994
2995
2996
2997 for (j = 0; j < conf->geo.raid_disks; j++)
2998 if (conf->mirrors[j].rdev == NULL ||
2999 test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
3000 still_degraded = 1;
3001 break;
3002 }
3003
3004 must_sync = bitmap_start_sync(mddev->bitmap, sect,
3005 &sync_blocks, still_degraded);
3006
3007 any_working = 0;
3008 for (j=0; j<conf->copies;j++) {
3009 int k;
3010 int d = r10_bio->devs[j].devnum;
3011 sector_t from_addr, to_addr;
3012 struct md_rdev *rdev;
3013 sector_t sector, first_bad;
3014 int bad_sectors;
3015 if (!conf->mirrors[d].rdev ||
3016 !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
3017 continue;
3018
3019 any_working = 1;
3020 rdev = conf->mirrors[d].rdev;
3021 sector = r10_bio->devs[j].addr;
3022
3023 if (is_badblock(rdev, sector, max_sync,
3024 &first_bad, &bad_sectors)) {
3025 if (first_bad > sector)
3026 max_sync = first_bad - sector;
3027 else {
3028 bad_sectors -= (sector
3029 - first_bad);
3030 if (max_sync > bad_sectors)
3031 max_sync = bad_sectors;
3032 continue;
3033 }
3034 }
3035 bio = r10_bio->devs[0].bio;
3036 bio_reset(bio);
3037 bio->bi_next = biolist;
3038 biolist = bio;
3039 bio->bi_private = r10_bio;
3040 bio->bi_end_io = end_sync_read;
3041 bio->bi_rw = READ;
3042 from_addr = r10_bio->devs[j].addr;
3043 bio->bi_iter.bi_sector = from_addr +
3044 rdev->data_offset;
3045 bio->bi_bdev = rdev->bdev;
3046 atomic_inc(&rdev->nr_pending);
3047
3048
3049 for (k=0; k<conf->copies; k++)
3050 if (r10_bio->devs[k].devnum == i)
3051 break;
3052 BUG_ON(k == conf->copies);
3053 to_addr = r10_bio->devs[k].addr;
3054 r10_bio->devs[0].devnum = d;
3055 r10_bio->devs[0].addr = from_addr;
3056 r10_bio->devs[1].devnum = i;
3057 r10_bio->devs[1].addr = to_addr;
3058
3059 rdev = mirror->rdev;
3060 if (!test_bit(In_sync, &rdev->flags)) {
3061 bio = r10_bio->devs[1].bio;
3062 bio_reset(bio);
3063 bio->bi_next = biolist;
3064 biolist = bio;
3065 bio->bi_private = r10_bio;
3066 bio->bi_end_io = end_sync_write;
3067 bio->bi_rw = WRITE;
3068 bio->bi_iter.bi_sector = to_addr
3069 + rdev->data_offset;
3070 bio->bi_bdev = rdev->bdev;
3071 atomic_inc(&r10_bio->remaining);
3072 } else
3073 r10_bio->devs[1].bio->bi_end_io = NULL;
3074
3075
3076 bio = r10_bio->devs[1].repl_bio;
3077 if (bio)
3078 bio->bi_end_io = NULL;
3079 rdev = mirror->replacement;
3080
3081
3082
3083
3084
3085
3086
3087
3088 if (rdev == NULL || bio == NULL ||
3089 test_bit(Faulty, &rdev->flags))
3090 break;
3091 bio_reset(bio);
3092 bio->bi_next = biolist;
3093 biolist = bio;
3094 bio->bi_private = r10_bio;
3095 bio->bi_end_io = end_sync_write;
3096 bio->bi_rw = WRITE;
3097 bio->bi_iter.bi_sector = to_addr +
3098 rdev->data_offset;
3099 bio->bi_bdev = rdev->bdev;
3100 atomic_inc(&r10_bio->remaining);
3101 break;
3102 }
3103 if (j == conf->copies) {
3104
3105
3106 if (any_working) {
3107
3108
3109
3110 int k;
3111 for (k = 0; k < conf->copies; k++)
3112 if (r10_bio->devs[k].devnum == i)
3113 break;
3114 if (!test_bit(In_sync,
3115 &mirror->rdev->flags)
3116 && !rdev_set_badblocks(
3117 mirror->rdev,
3118 r10_bio->devs[k].addr,
3119 max_sync, 0))
3120 any_working = 0;
3121 if (mirror->replacement &&
3122 !rdev_set_badblocks(
3123 mirror->replacement,
3124 r10_bio->devs[k].addr,
3125 max_sync, 0))
3126 any_working = 0;
3127 }
3128 if (!any_working) {
3129 if (!test_and_set_bit(MD_RECOVERY_INTR,
3130 &mddev->recovery))
3131 printk(KERN_INFO "md/raid10:%s: insufficient "
3132 "working devices for recovery.\n",
3133 mdname(mddev));
3134 mirror->recovery_disabled
3135 = mddev->recovery_disabled;
3136 }
3137 put_buf(r10_bio);
3138 if (rb2)
3139 atomic_dec(&rb2->remaining);
3140 r10_bio = rb2;
3141 break;
3142 }
3143 }
3144 if (biolist == NULL) {
3145 while (r10_bio) {
3146 struct r10bio *rb2 = r10_bio;
3147 r10_bio = (struct r10bio*) rb2->master_bio;
3148 rb2->master_bio = NULL;
3149 put_buf(rb2);
3150 }
3151 goto giveup;
3152 }
3153 } else {
3154
3155 int count = 0;
3156
3157 bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0);
3158
3159 if (!bitmap_start_sync(mddev->bitmap, sector_nr,
3160 &sync_blocks, mddev->degraded) &&
3161 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED,
3162 &mddev->recovery)) {
3163
3164 *skipped = 1;
3165 return sync_blocks + sectors_skipped;
3166 }
3167 if (sync_blocks < max_sync)
3168 max_sync = sync_blocks;
3169 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
3170 r10_bio->state = 0;
3171
3172 r10_bio->mddev = mddev;
3173 atomic_set(&r10_bio->remaining, 0);
3174 raise_barrier(conf, 0);
3175 conf->next_resync = sector_nr;
3176
3177 r10_bio->master_bio = NULL;
3178 r10_bio->sector = sector_nr;
3179 set_bit(R10BIO_IsSync, &r10_bio->state);
3180 raid10_find_phys(conf, r10_bio);
3181 r10_bio->sectors = (sector_nr | chunk_mask) - sector_nr + 1;
3182
3183 for (i = 0; i < conf->copies; i++) {
3184 int d = r10_bio->devs[i].devnum;
3185 sector_t first_bad, sector;
3186 int bad_sectors;
3187
3188 if (r10_bio->devs[i].repl_bio)
3189 r10_bio->devs[i].repl_bio->bi_end_io = NULL;
3190
3191 bio = r10_bio->devs[i].bio;
3192 bio_reset(bio);
3193 bio->bi_error = -EIO;
3194 if (conf->mirrors[d].rdev == NULL ||
3195 test_bit(Faulty, &conf->mirrors[d].rdev->flags))
3196 continue;
3197 sector = r10_bio->devs[i].addr;
3198 if (is_badblock(conf->mirrors[d].rdev,
3199 sector, max_sync,
3200 &first_bad, &bad_sectors)) {
3201 if (first_bad > sector)
3202 max_sync = first_bad - sector;
3203 else {
3204 bad_sectors -= (sector - first_bad);
3205 if (max_sync > bad_sectors)
3206 max_sync = bad_sectors;
3207 continue;
3208 }
3209 }
3210 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
3211 atomic_inc(&r10_bio->remaining);
3212 bio->bi_next = biolist;
3213 biolist = bio;
3214 bio->bi_private = r10_bio;
3215 bio->bi_end_io = end_sync_read;
3216 bio->bi_rw = READ;
3217 bio->bi_iter.bi_sector = sector +
3218 conf->mirrors[d].rdev->data_offset;
3219 bio->bi_bdev = conf->mirrors[d].rdev->bdev;
3220 count++;
3221
3222 if (conf->mirrors[d].replacement == NULL ||
3223 test_bit(Faulty,
3224 &conf->mirrors[d].replacement->flags))
3225 continue;
3226
3227
3228 bio = r10_bio->devs[i].repl_bio;
3229 bio_reset(bio);
3230 bio->bi_error = -EIO;
3231
3232 sector = r10_bio->devs[i].addr;
3233 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
3234 bio->bi_next = biolist;
3235 biolist = bio;
3236 bio->bi_private = r10_bio;
3237 bio->bi_end_io = end_sync_write;
3238 bio->bi_rw = WRITE;
3239 bio->bi_iter.bi_sector = sector +
3240 conf->mirrors[d].replacement->data_offset;
3241 bio->bi_bdev = conf->mirrors[d].replacement->bdev;
3242 count++;
3243 }
3244
3245 if (count < 2) {
3246 for (i=0; i<conf->copies; i++) {
3247 int d = r10_bio->devs[i].devnum;
3248 if (r10_bio->devs[i].bio->bi_end_io)
3249 rdev_dec_pending(conf->mirrors[d].rdev,
3250 mddev);
3251 if (r10_bio->devs[i].repl_bio &&
3252 r10_bio->devs[i].repl_bio->bi_end_io)
3253 rdev_dec_pending(
3254 conf->mirrors[d].replacement,
3255 mddev);
3256 }
3257 put_buf(r10_bio);
3258 biolist = NULL;
3259 goto giveup;
3260 }
3261 }
3262
3263 nr_sectors = 0;
3264 if (sector_nr + max_sync < max_sector)
3265 max_sector = sector_nr + max_sync;
3266 do {
3267 struct page *page;
3268 int len = PAGE_SIZE;
3269 if (sector_nr + (len>>9) > max_sector)
3270 len = (max_sector - sector_nr) << 9;
3271 if (len == 0)
3272 break;
3273 for (bio= biolist ; bio ; bio=bio->bi_next) {
3274 struct bio *bio2;
3275 page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
3276 if (bio_add_page(bio, page, len, 0))
3277 continue;
3278
3279
3280 bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
3281 for (bio2 = biolist;
3282 bio2 && bio2 != bio;
3283 bio2 = bio2->bi_next) {
3284
3285 bio2->bi_vcnt--;
3286 bio2->bi_iter.bi_size -= len;
3287 bio_clear_flag(bio2, BIO_SEG_VALID);
3288 }
3289 goto bio_full;
3290 }
3291 nr_sectors += len>>9;
3292 sector_nr += len>>9;
3293 } while (biolist->bi_vcnt < RESYNC_PAGES);
3294 bio_full:
3295 r10_bio->sectors = nr_sectors;
3296
3297 while (biolist) {
3298 bio = biolist;
3299 biolist = biolist->bi_next;
3300
3301 bio->bi_next = NULL;
3302 r10_bio = bio->bi_private;
3303 r10_bio->sectors = nr_sectors;
3304
3305 if (bio->bi_end_io == end_sync_read) {
3306 md_sync_acct(bio->bi_bdev, nr_sectors);
3307 bio->bi_error = 0;
3308 generic_make_request(bio);
3309 }
3310 }
3311
3312 if (sectors_skipped)
3313
3314
3315
3316 md_done_sync(mddev, sectors_skipped, 1);
3317
3318 return sectors_skipped + nr_sectors;
3319 giveup:
3320
3321
3322
3323
3324 if (sector_nr + max_sync < max_sector)
3325 max_sector = sector_nr + max_sync;
3326
3327 sectors_skipped += (max_sector - sector_nr);
3328 chunks_skipped ++;
3329 sector_nr = max_sector;
3330 goto skipped;
3331}
3332
3333static sector_t
3334raid10_size(struct mddev *mddev, sector_t sectors, int raid_disks)
3335{
3336 sector_t size;
3337 struct r10conf *conf = mddev->private;
3338
3339 if (!raid_disks)
3340 raid_disks = min(conf->geo.raid_disks,
3341 conf->prev.raid_disks);
3342 if (!sectors)
3343 sectors = conf->dev_sectors;
3344
3345 size = sectors >> conf->geo.chunk_shift;
3346 sector_div(size, conf->geo.far_copies);
3347 size = size * raid_disks;
3348 sector_div(size, conf->geo.near_copies);
3349
3350 return size << conf->geo.chunk_shift;
3351}
3352
3353static void calc_sectors(struct r10conf *conf, sector_t size)
3354{
3355
3356
3357
3358
3359
3360 size = size >> conf->geo.chunk_shift;
3361 sector_div(size, conf->geo.far_copies);
3362 size = size * conf->geo.raid_disks;
3363 sector_div(size, conf->geo.near_copies);
3364
3365
3366 size = size * conf->copies;
3367
3368
3369
3370
3371 size = DIV_ROUND_UP_SECTOR_T(size, conf->geo.raid_disks);
3372
3373 conf->dev_sectors = size << conf->geo.chunk_shift;
3374
3375 if (conf->geo.far_offset)
3376 conf->geo.stride = 1 << conf->geo.chunk_shift;
3377 else {
3378 sector_div(size, conf->geo.far_copies);
3379 conf->geo.stride = size << conf->geo.chunk_shift;
3380 }
3381}
3382
3383enum geo_type {geo_new, geo_old, geo_start};
3384static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
3385{
3386 int nc, fc, fo;
3387 int layout, chunk, disks;
3388 switch (new) {
3389 case geo_old:
3390 layout = mddev->layout;
3391 chunk = mddev->chunk_sectors;
3392 disks = mddev->raid_disks - mddev->delta_disks;
3393 break;
3394 case geo_new:
3395 layout = mddev->new_layout;
3396 chunk = mddev->new_chunk_sectors;
3397 disks = mddev->raid_disks;
3398 break;
3399 default:
3400 case geo_start:
3401
3402 layout = mddev->new_layout;
3403 chunk = mddev->new_chunk_sectors;
3404 disks = mddev->raid_disks + mddev->delta_disks;
3405 break;
3406 }
3407 if (layout >> 19)
3408 return -1;
3409 if (chunk < (PAGE_SIZE >> 9) ||
3410 !is_power_of_2(chunk))
3411 return -2;
3412 nc = layout & 255;
3413 fc = (layout >> 8) & 255;
3414 fo = layout & (1<<16);
3415 geo->raid_disks = disks;
3416 geo->near_copies = nc;
3417 geo->far_copies = fc;
3418 geo->far_offset = fo;
3419 switch (layout >> 17) {
3420 case 0:
3421 geo->far_set_size = disks;
3422 break;
3423 case 1:
3424
3425 geo->far_set_size = disks/fc;
3426 WARN(geo->far_set_size < fc,
3427 "This RAID10 layout does not provide data safety - please backup and create new array\n");
3428 break;
3429 case 2:
3430 geo->far_set_size = fc * nc;
3431 break;
3432 default:
3433 return -1;
3434 }
3435 geo->chunk_mask = chunk - 1;
3436 geo->chunk_shift = ffz(~chunk);
3437 return nc*fc;
3438}
3439
3440static struct r10conf *setup_conf(struct mddev *mddev)
3441{
3442 struct r10conf *conf = NULL;
3443 int err = -EINVAL;
3444 struct geom geo;
3445 int copies;
3446
3447 copies = setup_geo(&geo, mddev, geo_new);
3448
3449 if (copies == -2) {
3450 printk(KERN_ERR "md/raid10:%s: chunk size must be "
3451 "at least PAGE_SIZE(%ld) and be a power of 2.\n",
3452 mdname(mddev), PAGE_SIZE);
3453 goto out;
3454 }
3455
3456 if (copies < 2 || copies > mddev->raid_disks) {
3457 printk(KERN_ERR "md/raid10:%s: unsupported raid10 layout: 0x%8x\n",
3458 mdname(mddev), mddev->new_layout);
3459 goto out;
3460 }
3461
3462 err = -ENOMEM;
3463 conf = kzalloc(sizeof(struct r10conf), GFP_KERNEL);
3464 if (!conf)
3465 goto out;
3466
3467
3468 conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks +
3469 max(0,-mddev->delta_disks)),
3470 GFP_KERNEL);
3471 if (!conf->mirrors)
3472 goto out;
3473
3474 conf->tmppage = alloc_page(GFP_KERNEL);
3475 if (!conf->tmppage)
3476 goto out;
3477
3478 conf->geo = geo;
3479 conf->copies = copies;
3480 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
3481 r10bio_pool_free, conf);
3482 if (!conf->r10bio_pool)
3483 goto out;
3484
3485 calc_sectors(conf, mddev->dev_sectors);
3486 if (mddev->reshape_position == MaxSector) {
3487 conf->prev = conf->geo;
3488 conf->reshape_progress = MaxSector;
3489 } else {
3490 if (setup_geo(&conf->prev, mddev, geo_old) != conf->copies) {
3491 err = -EINVAL;
3492 goto out;
3493 }
3494 conf->reshape_progress = mddev->reshape_position;
3495 if (conf->prev.far_offset)
3496 conf->prev.stride = 1 << conf->prev.chunk_shift;
3497 else
3498
3499 conf->prev.stride = conf->dev_sectors;
3500 }
3501 conf->reshape_safe = conf->reshape_progress;
3502 spin_lock_init(&conf->device_lock);
3503 INIT_LIST_HEAD(&conf->retry_list);
3504 INIT_LIST_HEAD(&conf->bio_end_io_list);
3505
3506 spin_lock_init(&conf->resync_lock);
3507 init_waitqueue_head(&conf->wait_barrier);
3508
3509 conf->thread = md_register_thread(raid10d, mddev, "raid10");
3510 if (!conf->thread)
3511 goto out;
3512
3513 conf->mddev = mddev;
3514 return conf;
3515
3516 out:
3517 if (err == -ENOMEM)
3518 printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n",
3519 mdname(mddev));
3520 if (conf) {
3521 mempool_destroy(conf->r10bio_pool);
3522 kfree(conf->mirrors);
3523 safe_put_page(conf->tmppage);
3524 kfree(conf);
3525 }
3526 return ERR_PTR(err);
3527}
3528
3529static int raid10_run(struct mddev *mddev)
3530{
3531 struct r10conf *conf;
3532 int i, disk_idx, chunk_size;
3533 struct raid10_info *disk;
3534 struct md_rdev *rdev;
3535 sector_t size;
3536 sector_t min_offset_diff = 0;
3537 int first = 1;
3538 bool discard_supported = false;
3539
3540 if (mddev->private == NULL) {
3541 conf = setup_conf(mddev);
3542 if (IS_ERR(conf))
3543 return PTR_ERR(conf);
3544 mddev->private = conf;
3545 }
3546 conf = mddev->private;
3547 if (!conf)
3548 goto out;
3549
3550 mddev->thread = conf->thread;
3551 conf->thread = NULL;
3552
3553 chunk_size = mddev->chunk_sectors << 9;
3554 if (mddev->queue) {
3555 blk_queue_max_discard_sectors(mddev->queue,
3556 mddev->chunk_sectors);
3557 blk_queue_max_write_same_sectors(mddev->queue, 0);
3558 blk_queue_io_min(mddev->queue, chunk_size);
3559 if (conf->geo.raid_disks % conf->geo.near_copies)
3560 blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
3561 else
3562 blk_queue_io_opt(mddev->queue, chunk_size *
3563 (conf->geo.raid_disks / conf->geo.near_copies));
3564 }
3565
3566 rdev_for_each(rdev, mddev) {
3567 long long diff;
3568 struct request_queue *q;
3569
3570 disk_idx = rdev->raid_disk;
3571 if (disk_idx < 0)
3572 continue;
3573 if (disk_idx >= conf->geo.raid_disks &&
3574 disk_idx >= conf->prev.raid_disks)
3575 continue;
3576 disk = conf->mirrors + disk_idx;
3577
3578 if (test_bit(Replacement, &rdev->flags)) {
3579 if (disk->replacement)
3580 goto out_free_conf;
3581 disk->replacement = rdev;
3582 } else {
3583 if (disk->rdev)
3584 goto out_free_conf;
3585 disk->rdev = rdev;
3586 }
3587 q = bdev_get_queue(rdev->bdev);
3588 diff = (rdev->new_data_offset - rdev->data_offset);
3589 if (!mddev->reshape_backwards)
3590 diff = -diff;
3591 if (diff < 0)
3592 diff = 0;
3593 if (first || diff < min_offset_diff)
3594 min_offset_diff = diff;
3595
3596 if (mddev->gendisk)
3597 disk_stack_limits(mddev->gendisk, rdev->bdev,
3598 rdev->data_offset << 9);
3599
3600 disk->head_position = 0;
3601
3602 if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
3603 discard_supported = true;
3604 }
3605
3606 if (mddev->queue) {
3607 if (discard_supported)
3608 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
3609 mddev->queue);
3610 else
3611 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
3612 mddev->queue);
3613 }
3614
3615 if (!enough(conf, -1)) {
3616 printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
3617 mdname(mddev));
3618 goto out_free_conf;
3619 }
3620
3621 if (conf->reshape_progress != MaxSector) {
3622
3623 if (conf->geo.far_copies != 1 &&
3624 conf->geo.far_offset == 0)
3625 goto out_free_conf;
3626 if (conf->prev.far_copies != 1 &&
3627 conf->prev.far_offset == 0)
3628 goto out_free_conf;
3629 }
3630
3631 mddev->degraded = 0;
3632 for (i = 0;
3633 i < conf->geo.raid_disks
3634 || i < conf->prev.raid_disks;
3635 i++) {
3636
3637 disk = conf->mirrors + i;
3638
3639 if (!disk->rdev && disk->replacement) {
3640
3641 disk->rdev = disk->replacement;
3642 disk->replacement = NULL;
3643 clear_bit(Replacement, &disk->rdev->flags);
3644 }
3645
3646 if (!disk->rdev ||
3647 !test_bit(In_sync, &disk->rdev->flags)) {
3648 disk->head_position = 0;
3649 mddev->degraded++;
3650 if (disk->rdev &&
3651 disk->rdev->saved_raid_disk < 0)
3652 conf->fullsync = 1;
3653 }
3654 disk->recovery_disabled = mddev->recovery_disabled - 1;
3655 }
3656
3657 if (mddev->recovery_cp != MaxSector)
3658 printk(KERN_NOTICE "md/raid10:%s: not clean"
3659 " -- starting background reconstruction\n",
3660 mdname(mddev));
3661 printk(KERN_INFO
3662 "md/raid10:%s: active with %d out of %d devices\n",
3663 mdname(mddev), conf->geo.raid_disks - mddev->degraded,
3664 conf->geo.raid_disks);
3665
3666
3667
3668 mddev->dev_sectors = conf->dev_sectors;
3669 size = raid10_size(mddev, 0, 0);
3670 md_set_array_sectors(mddev, size);
3671 mddev->resync_max_sectors = size;
3672
3673 if (mddev->queue) {
3674 int stripe = conf->geo.raid_disks *
3675 ((mddev->chunk_sectors << 9) / PAGE_SIZE);
3676
3677
3678
3679
3680
3681 stripe /= conf->geo.near_copies;
3682 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
3683 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
3684 }
3685
3686 if (md_integrity_register(mddev))
3687 goto out_free_conf;
3688
3689 if (conf->reshape_progress != MaxSector) {
3690 unsigned long before_length, after_length;
3691
3692 before_length = ((1 << conf->prev.chunk_shift) *
3693 conf->prev.far_copies);
3694 after_length = ((1 << conf->geo.chunk_shift) *
3695 conf->geo.far_copies);
3696
3697 if (max(before_length, after_length) > min_offset_diff) {
3698
3699 printk("md/raid10: offset difference not enough to continue reshape\n");
3700 goto out_free_conf;
3701 }
3702 conf->offset_diff = min_offset_diff;
3703
3704 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
3705 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
3706 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
3707 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3708 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
3709 "reshape");
3710 }
3711
3712 return 0;
3713
3714out_free_conf:
3715 md_unregister_thread(&mddev->thread);
3716 mempool_destroy(conf->r10bio_pool);
3717 safe_put_page(conf->tmppage);
3718 kfree(conf->mirrors);
3719 kfree(conf);
3720 mddev->private = NULL;
3721out:
3722 return -EIO;
3723}
3724
3725static void raid10_free(struct mddev *mddev, void *priv)
3726{
3727 struct r10conf *conf = priv;
3728
3729 mempool_destroy(conf->r10bio_pool);
3730 safe_put_page(conf->tmppage);
3731 kfree(conf->mirrors);
3732 kfree(conf->mirrors_old);
3733 kfree(conf->mirrors_new);
3734 kfree(conf);
3735}
3736
3737static void raid10_quiesce(struct mddev *mddev, int state)
3738{
3739 struct r10conf *conf = mddev->private;
3740
3741 switch(state) {
3742 case 1:
3743 raise_barrier(conf, 0);
3744 break;
3745 case 0:
3746 lower_barrier(conf);
3747 break;
3748 }
3749}
3750
3751static int raid10_resize(struct mddev *mddev, sector_t sectors)
3752{
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765 struct r10conf *conf = mddev->private;
3766 sector_t oldsize, size;
3767
3768 if (mddev->reshape_position != MaxSector)
3769 return -EBUSY;
3770
3771 if (conf->geo.far_copies > 1 && !conf->geo.far_offset)
3772 return -EINVAL;
3773
3774 oldsize = raid10_size(mddev, 0, 0);
3775 size = raid10_size(mddev, sectors, 0);
3776 if (mddev->external_size &&
3777 mddev->array_sectors > size)
3778 return -EINVAL;
3779 if (mddev->bitmap) {
3780 int ret = bitmap_resize(mddev->bitmap, size, 0, 0);
3781 if (ret)
3782 return ret;
3783 }
3784 md_set_array_sectors(mddev, size);
3785 set_capacity(mddev->gendisk, mddev->array_sectors);
3786 revalidate_disk(mddev->gendisk);
3787 if (sectors > mddev->dev_sectors &&
3788 mddev->recovery_cp > oldsize) {
3789 mddev->recovery_cp = oldsize;
3790 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3791 }
3792 calc_sectors(conf, sectors);
3793 mddev->dev_sectors = conf->dev_sectors;
3794 mddev->resync_max_sectors = size;
3795 return 0;
3796}
3797
3798static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
3799{
3800 struct md_rdev *rdev;
3801 struct r10conf *conf;
3802
3803 if (mddev->degraded > 0) {
3804 printk(KERN_ERR "md/raid10:%s: Error: degraded raid0!\n",
3805 mdname(mddev));
3806 return ERR_PTR(-EINVAL);
3807 }
3808 sector_div(size, devs);
3809
3810
3811 mddev->new_level = 10;
3812
3813 mddev->new_layout = (1<<8) + 2;
3814 mddev->new_chunk_sectors = mddev->chunk_sectors;
3815 mddev->delta_disks = mddev->raid_disks;
3816 mddev->raid_disks *= 2;
3817
3818 mddev->recovery_cp = MaxSector;
3819 mddev->dev_sectors = size;
3820
3821 conf = setup_conf(mddev);
3822 if (!IS_ERR(conf)) {
3823 rdev_for_each(rdev, mddev)
3824 if (rdev->raid_disk >= 0) {
3825 rdev->new_raid_disk = rdev->raid_disk * 2;
3826 rdev->sectors = size;
3827 }
3828 conf->barrier = 1;
3829 }
3830
3831 return conf;
3832}
3833
3834static void *raid10_takeover(struct mddev *mddev)
3835{
3836 struct r0conf *raid0_conf;
3837
3838
3839
3840
3841 if (mddev->level == 0) {
3842
3843 raid0_conf = mddev->private;
3844 if (raid0_conf->nr_strip_zones > 1) {
3845 printk(KERN_ERR "md/raid10:%s: cannot takeover raid 0"
3846 " with more than one zone.\n",
3847 mdname(mddev));
3848 return ERR_PTR(-EINVAL);
3849 }
3850 return raid10_takeover_raid0(mddev,
3851 raid0_conf->strip_zone->zone_end,
3852 raid0_conf->strip_zone->nb_dev);
3853 }
3854 return ERR_PTR(-EINVAL);
3855}
3856
3857static int raid10_check_reshape(struct mddev *mddev)
3858{
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873 struct r10conf *conf = mddev->private;
3874 struct geom geo;
3875
3876 if (conf->geo.far_copies != 1 && !conf->geo.far_offset)
3877 return -EINVAL;
3878
3879 if (setup_geo(&geo, mddev, geo_start) != conf->copies)
3880
3881 return -EINVAL;
3882 if (geo.far_copies > 1 && !geo.far_offset)
3883
3884 return -EINVAL;
3885
3886 if (mddev->array_sectors & geo.chunk_mask)
3887
3888 return -EINVAL;
3889
3890 if (!enough(conf, -1))
3891 return -EINVAL;
3892
3893 kfree(conf->mirrors_new);
3894 conf->mirrors_new = NULL;
3895 if (mddev->delta_disks > 0) {
3896
3897 conf->mirrors_new = kzalloc(
3898 sizeof(struct raid10_info)
3899 *(mddev->raid_disks +
3900 mddev->delta_disks),
3901 GFP_KERNEL);
3902 if (!conf->mirrors_new)
3903 return -ENOMEM;
3904 }
3905 return 0;
3906}
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921static int calc_degraded(struct r10conf *conf)
3922{
3923 int degraded, degraded2;
3924 int i;
3925
3926 rcu_read_lock();
3927 degraded = 0;
3928
3929 for (i = 0; i < conf->prev.raid_disks; i++) {
3930 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
3931 if (!rdev || test_bit(Faulty, &rdev->flags))
3932 degraded++;
3933 else if (!test_bit(In_sync, &rdev->flags))
3934
3935
3936
3937
3938 degraded++;
3939 }
3940 rcu_read_unlock();
3941 if (conf->geo.raid_disks == conf->prev.raid_disks)
3942 return degraded;
3943 rcu_read_lock();
3944 degraded2 = 0;
3945 for (i = 0; i < conf->geo.raid_disks; i++) {
3946 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
3947 if (!rdev || test_bit(Faulty, &rdev->flags))
3948 degraded2++;
3949 else if (!test_bit(In_sync, &rdev->flags)) {
3950
3951
3952
3953
3954
3955 if (conf->geo.raid_disks <= conf->prev.raid_disks)
3956 degraded2++;
3957 }
3958 }
3959 rcu_read_unlock();
3960 if (degraded2 > degraded)
3961 return degraded2;
3962 return degraded;
3963}
3964
3965static int raid10_start_reshape(struct mddev *mddev)
3966{
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977 unsigned long before_length, after_length;
3978 sector_t min_offset_diff = 0;
3979 int first = 1;
3980 struct geom new;
3981 struct r10conf *conf = mddev->private;
3982 struct md_rdev *rdev;
3983 int spares = 0;
3984 int ret;
3985
3986 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3987 return -EBUSY;
3988
3989 if (setup_geo(&new, mddev, geo_start) != conf->copies)
3990 return -EINVAL;
3991
3992 before_length = ((1 << conf->prev.chunk_shift) *
3993 conf->prev.far_copies);
3994 after_length = ((1 << conf->geo.chunk_shift) *
3995 conf->geo.far_copies);
3996
3997 rdev_for_each(rdev, mddev) {
3998 if (!test_bit(In_sync, &rdev->flags)
3999 && !test_bit(Faulty, &rdev->flags))
4000 spares++;
4001 if (rdev->raid_disk >= 0) {
4002 long long diff = (rdev->new_data_offset
4003 - rdev->data_offset);
4004 if (!mddev->reshape_backwards)
4005 diff = -diff;
4006 if (diff < 0)
4007 diff = 0;
4008 if (first || diff < min_offset_diff)
4009 min_offset_diff = diff;
4010 }
4011 }
4012
4013 if (max(before_length, after_length) > min_offset_diff)
4014 return -EINVAL;
4015
4016 if (spares < mddev->delta_disks)
4017 return -EINVAL;
4018
4019 conf->offset_diff = min_offset_diff;
4020 spin_lock_irq(&conf->device_lock);
4021 if (conf->mirrors_new) {
4022 memcpy(conf->mirrors_new, conf->mirrors,
4023 sizeof(struct raid10_info)*conf->prev.raid_disks);
4024 smp_mb();
4025 kfree(conf->mirrors_old);
4026 conf->mirrors_old = conf->mirrors;
4027 conf->mirrors = conf->mirrors_new;
4028 conf->mirrors_new = NULL;
4029 }
4030 setup_geo(&conf->geo, mddev, geo_start);
4031 smp_mb();
4032 if (mddev->reshape_backwards) {
4033 sector_t size = raid10_size(mddev, 0, 0);
4034 if (size < mddev->array_sectors) {
4035 spin_unlock_irq(&conf->device_lock);
4036 printk(KERN_ERR "md/raid10:%s: array size must be reduce before number of disks\n",
4037 mdname(mddev));
4038 return -EINVAL;
4039 }
4040 mddev->resync_max_sectors = size;
4041 conf->reshape_progress = size;
4042 } else
4043 conf->reshape_progress = 0;
4044 conf->reshape_safe = conf->reshape_progress;
4045 spin_unlock_irq(&conf->device_lock);
4046
4047 if (mddev->delta_disks && mddev->bitmap) {
4048 ret = bitmap_resize(mddev->bitmap,
4049 raid10_size(mddev, 0,
4050 conf->geo.raid_disks),
4051 0, 0);
4052 if (ret)
4053 goto abort;
4054 }
4055 if (mddev->delta_disks > 0) {
4056 rdev_for_each(rdev, mddev)
4057 if (rdev->raid_disk < 0 &&
4058 !test_bit(Faulty, &rdev->flags)) {
4059 if (raid10_add_disk(mddev, rdev) == 0) {
4060 if (rdev->raid_disk >=
4061 conf->prev.raid_disks)
4062 set_bit(In_sync, &rdev->flags);
4063 else
4064 rdev->recovery_offset = 0;
4065
4066 if (sysfs_link_rdev(mddev, rdev))
4067 ;
4068 }
4069 } else if (rdev->raid_disk >= conf->prev.raid_disks
4070 && !test_bit(Faulty, &rdev->flags)) {
4071
4072 set_bit(In_sync, &rdev->flags);
4073 }
4074 }
4075
4076
4077
4078
4079 spin_lock_irq(&conf->device_lock);
4080 mddev->degraded = calc_degraded(conf);
4081 spin_unlock_irq(&conf->device_lock);
4082 mddev->raid_disks = conf->geo.raid_disks;
4083 mddev->reshape_position = conf->reshape_progress;
4084 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4085
4086 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4087 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4088 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
4089 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
4090 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4091
4092 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
4093 "reshape");
4094 if (!mddev->sync_thread) {
4095 ret = -EAGAIN;
4096 goto abort;
4097 }
4098 conf->reshape_checkpoint = jiffies;
4099 md_wakeup_thread(mddev->sync_thread);
4100 md_new_event(mddev);
4101 return 0;
4102
4103abort:
4104 mddev->recovery = 0;
4105 spin_lock_irq(&conf->device_lock);
4106 conf->geo = conf->prev;
4107 mddev->raid_disks = conf->geo.raid_disks;
4108 rdev_for_each(rdev, mddev)
4109 rdev->new_data_offset = rdev->data_offset;
4110 smp_wmb();
4111 conf->reshape_progress = MaxSector;
4112 conf->reshape_safe = MaxSector;
4113 mddev->reshape_position = MaxSector;
4114 spin_unlock_irq(&conf->device_lock);
4115 return ret;
4116}
4117
4118
4119
4120
4121
4122
4123
4124static sector_t last_dev_address(sector_t s, struct geom *geo)
4125{
4126 s = (s | geo->chunk_mask) + 1;
4127 s >>= geo->chunk_shift;
4128 s *= geo->near_copies;
4129 s = DIV_ROUND_UP_SECTOR_T(s, geo->raid_disks);
4130 s *= geo->far_copies;
4131 s <<= geo->chunk_shift;
4132 return s;
4133}
4134
4135
4136
4137
4138
4139static sector_t first_dev_address(sector_t s, struct geom *geo)
4140{
4141 s >>= geo->chunk_shift;
4142 s *= geo->near_copies;
4143 sector_div(s, geo->raid_disks);
4144 s *= geo->far_copies;
4145 s <<= geo->chunk_shift;
4146 return s;
4147}
4148
4149static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
4150 int *skipped)
4151{
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189 struct r10conf *conf = mddev->private;
4190 struct r10bio *r10_bio;
4191 sector_t next, safe, last;
4192 int max_sectors;
4193 int nr_sectors;
4194 int s;
4195 struct md_rdev *rdev;
4196 int need_flush = 0;
4197 struct bio *blist;
4198 struct bio *bio, *read_bio;
4199 int sectors_done = 0;
4200
4201 if (sector_nr == 0) {
4202
4203 if (mddev->reshape_backwards &&
4204 conf->reshape_progress < raid10_size(mddev, 0, 0)) {
4205 sector_nr = (raid10_size(mddev, 0, 0)
4206 - conf->reshape_progress);
4207 } else if (!mddev->reshape_backwards &&
4208 conf->reshape_progress > 0)
4209 sector_nr = conf->reshape_progress;
4210 if (sector_nr) {
4211 mddev->curr_resync_completed = sector_nr;
4212 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
4213 *skipped = 1;
4214 return sector_nr;
4215 }
4216 }
4217
4218
4219
4220
4221
4222 if (mddev->reshape_backwards) {
4223
4224
4225
4226 next = first_dev_address(conf->reshape_progress - 1,
4227 &conf->geo);
4228
4229
4230
4231
4232 safe = last_dev_address(conf->reshape_safe - 1,
4233 &conf->prev);
4234
4235 if (next + conf->offset_diff < safe)
4236 need_flush = 1;
4237
4238 last = conf->reshape_progress - 1;
4239 sector_nr = last & ~(sector_t)(conf->geo.chunk_mask
4240 & conf->prev.chunk_mask);
4241 if (sector_nr + RESYNC_BLOCK_SIZE/512 < last)
4242 sector_nr = last + 1 - RESYNC_BLOCK_SIZE/512;
4243 } else {
4244
4245
4246
4247 next = last_dev_address(conf->reshape_progress, &conf->geo);
4248
4249
4250
4251
4252 safe = first_dev_address(conf->reshape_safe, &conf->prev);
4253
4254
4255
4256
4257 if (next > safe + conf->offset_diff)
4258 need_flush = 1;
4259
4260 sector_nr = conf->reshape_progress;
4261 last = sector_nr | (conf->geo.chunk_mask
4262 & conf->prev.chunk_mask);
4263
4264 if (sector_nr + RESYNC_BLOCK_SIZE/512 <= last)
4265 last = sector_nr + RESYNC_BLOCK_SIZE/512 - 1;
4266 }
4267
4268 if (need_flush ||
4269 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
4270
4271 wait_barrier(conf);
4272 mddev->reshape_position = conf->reshape_progress;
4273 if (mddev->reshape_backwards)
4274 mddev->curr_resync_completed = raid10_size(mddev, 0, 0)
4275 - conf->reshape_progress;
4276 else
4277 mddev->curr_resync_completed = conf->reshape_progress;
4278 conf->reshape_checkpoint = jiffies;
4279 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4280 md_wakeup_thread(mddev->thread);
4281 wait_event(mddev->sb_wait, mddev->flags == 0 ||
4282 test_bit(MD_RECOVERY_INTR, &mddev->recovery));
4283 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
4284 allow_barrier(conf);
4285 return sectors_done;
4286 }
4287 conf->reshape_safe = mddev->reshape_position;
4288 allow_barrier(conf);
4289 }
4290
4291read_more:
4292
4293 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
4294 r10_bio->state = 0;
4295 raise_barrier(conf, sectors_done != 0);
4296 atomic_set(&r10_bio->remaining, 0);
4297 r10_bio->mddev = mddev;
4298 r10_bio->sector = sector_nr;
4299 set_bit(R10BIO_IsReshape, &r10_bio->state);
4300 r10_bio->sectors = last - sector_nr + 1;
4301 rdev = read_balance(conf, r10_bio, &max_sectors);
4302 BUG_ON(!test_bit(R10BIO_Previous, &r10_bio->state));
4303
4304 if (!rdev) {
4305
4306
4307
4308
4309 mempool_free(r10_bio, conf->r10buf_pool);
4310 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4311 return sectors_done;
4312 }
4313
4314 read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);
4315
4316 read_bio->bi_bdev = rdev->bdev;
4317 read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
4318 + rdev->data_offset);
4319 read_bio->bi_private = r10_bio;
4320 read_bio->bi_end_io = end_sync_read;
4321 read_bio->bi_rw = READ;
4322 read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
4323 read_bio->bi_error = 0;
4324 read_bio->bi_vcnt = 0;
4325 read_bio->bi_iter.bi_size = 0;
4326 r10_bio->master_bio = read_bio;
4327 r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
4328
4329
4330 __raid10_find_phys(&conf->geo, r10_bio);
4331
4332 blist = read_bio;
4333 read_bio->bi_next = NULL;
4334
4335 for (s = 0; s < conf->copies*2; s++) {
4336 struct bio *b;
4337 int d = r10_bio->devs[s/2].devnum;
4338 struct md_rdev *rdev2;
4339 if (s&1) {
4340 rdev2 = conf->mirrors[d].replacement;
4341 b = r10_bio->devs[s/2].repl_bio;
4342 } else {
4343 rdev2 = conf->mirrors[d].rdev;
4344 b = r10_bio->devs[s/2].bio;
4345 }
4346 if (!rdev2 || test_bit(Faulty, &rdev2->flags))
4347 continue;
4348
4349 bio_reset(b);
4350 b->bi_bdev = rdev2->bdev;
4351 b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
4352 rdev2->new_data_offset;
4353 b->bi_private = r10_bio;
4354 b->bi_end_io = end_reshape_write;
4355 b->bi_rw = WRITE;
4356 b->bi_next = blist;
4357 blist = b;
4358 }
4359
4360
4361
4362 nr_sectors = 0;
4363 for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) {
4364 struct page *page = r10_bio->devs[0].bio->bi_io_vec[s/(PAGE_SIZE>>9)].bv_page;
4365 int len = (max_sectors - s) << 9;
4366 if (len > PAGE_SIZE)
4367 len = PAGE_SIZE;
4368 for (bio = blist; bio ; bio = bio->bi_next) {
4369 struct bio *bio2;
4370 if (bio_add_page(bio, page, len, 0))
4371 continue;
4372
4373
4374 for (bio2 = blist;
4375 bio2 && bio2 != bio;
4376 bio2 = bio2->bi_next) {
4377
4378 bio2->bi_vcnt--;
4379 bio2->bi_iter.bi_size -= len;
4380 bio_clear_flag(bio2, BIO_SEG_VALID);
4381 }
4382 goto bio_full;
4383 }
4384 sector_nr += len >> 9;
4385 nr_sectors += len >> 9;
4386 }
4387bio_full:
4388 r10_bio->sectors = nr_sectors;
4389
4390
4391 md_sync_acct(read_bio->bi_bdev, r10_bio->sectors);
4392 atomic_inc(&r10_bio->remaining);
4393 read_bio->bi_next = NULL;
4394 generic_make_request(read_bio);
4395 sector_nr += nr_sectors;
4396 sectors_done += nr_sectors;
4397 if (sector_nr <= last)
4398 goto read_more;
4399
4400
4401
4402
4403 if (mddev->reshape_backwards)
4404 conf->reshape_progress -= sectors_done;
4405 else
4406 conf->reshape_progress += sectors_done;
4407
4408 return sectors_done;
4409}
4410
4411static void end_reshape_request(struct r10bio *r10_bio);
4412static int handle_reshape_read_error(struct mddev *mddev,
4413 struct r10bio *r10_bio);
4414static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
4415{
4416
4417
4418
4419
4420
4421 struct r10conf *conf = mddev->private;
4422 int s;
4423
4424 if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
4425 if (handle_reshape_read_error(mddev, r10_bio) < 0) {
4426
4427 md_done_sync(mddev, r10_bio->sectors, 0);
4428 return;
4429 }
4430
4431
4432
4433
4434 atomic_set(&r10_bio->remaining, 1);
4435 for (s = 0; s < conf->copies*2; s++) {
4436 struct bio *b;
4437 int d = r10_bio->devs[s/2].devnum;
4438 struct md_rdev *rdev;
4439 if (s&1) {
4440 rdev = conf->mirrors[d].replacement;
4441 b = r10_bio->devs[s/2].repl_bio;
4442 } else {
4443 rdev = conf->mirrors[d].rdev;
4444 b = r10_bio->devs[s/2].bio;
4445 }
4446 if (!rdev || test_bit(Faulty, &rdev->flags))
4447 continue;
4448 atomic_inc(&rdev->nr_pending);
4449 md_sync_acct(b->bi_bdev, r10_bio->sectors);
4450 atomic_inc(&r10_bio->remaining);
4451 b->bi_next = NULL;
4452 generic_make_request(b);
4453 }
4454 end_reshape_request(r10_bio);
4455}
4456
4457static void end_reshape(struct r10conf *conf)
4458{
4459 if (test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery))
4460 return;
4461
4462 spin_lock_irq(&conf->device_lock);
4463 conf->prev = conf->geo;
4464 md_finish_reshape(conf->mddev);
4465 smp_wmb();
4466 conf->reshape_progress = MaxSector;
4467 conf->reshape_safe = MaxSector;
4468 spin_unlock_irq(&conf->device_lock);
4469
4470
4471
4472
4473 if (conf->mddev->queue) {
4474 int stripe = conf->geo.raid_disks *
4475 ((conf->mddev->chunk_sectors << 9) / PAGE_SIZE);
4476 stripe /= conf->geo.near_copies;
4477 if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
4478 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
4479 }
4480 conf->fullsync = 0;
4481}
4482
4483static int handle_reshape_read_error(struct mddev *mddev,
4484 struct r10bio *r10_bio)
4485{
4486
4487 int sectors = r10_bio->sectors;
4488 struct r10conf *conf = mddev->private;
4489 struct {
4490 struct r10bio r10_bio;
4491 struct r10dev devs[conf->copies];
4492 } on_stack;
4493 struct r10bio *r10b = &on_stack.r10_bio;
4494 int slot = 0;
4495 int idx = 0;
4496 struct bio_vec *bvec = r10_bio->master_bio->bi_io_vec;
4497
4498 r10b->sector = r10_bio->sector;
4499 __raid10_find_phys(&conf->prev, r10b);
4500
4501 while (sectors) {
4502 int s = sectors;
4503 int success = 0;
4504 int first_slot = slot;
4505
4506 if (s > (PAGE_SIZE >> 9))
4507 s = PAGE_SIZE >> 9;
4508
4509 while (!success) {
4510 int d = r10b->devs[slot].devnum;
4511 struct md_rdev *rdev = conf->mirrors[d].rdev;
4512 sector_t addr;
4513 if (rdev == NULL ||
4514 test_bit(Faulty, &rdev->flags) ||
4515 !test_bit(In_sync, &rdev->flags))
4516 goto failed;
4517
4518 addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
4519 success = sync_page_io(rdev,
4520 addr,
4521 s << 9,
4522 bvec[idx].bv_page,
4523 READ, false);
4524 if (success)
4525 break;
4526 failed:
4527 slot++;
4528 if (slot >= conf->copies)
4529 slot = 0;
4530 if (slot == first_slot)
4531 break;
4532 }
4533 if (!success) {
4534
4535 set_bit(MD_RECOVERY_INTR,
4536 &mddev->recovery);
4537 return -EIO;
4538 }
4539 sectors -= s;
4540 idx++;
4541 }
4542 return 0;
4543}
4544
4545static void end_reshape_write(struct bio *bio)
4546{
4547 struct r10bio *r10_bio = bio->bi_private;
4548 struct mddev *mddev = r10_bio->mddev;
4549 struct r10conf *conf = mddev->private;
4550 int d;
4551 int slot;
4552 int repl;
4553 struct md_rdev *rdev = NULL;
4554
4555 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
4556 if (repl)
4557 rdev = conf->mirrors[d].replacement;
4558 if (!rdev) {
4559 smp_mb();
4560 rdev = conf->mirrors[d].rdev;
4561 }
4562
4563 if (bio->bi_error) {
4564
4565 md_error(mddev, rdev);
4566 }
4567
4568 rdev_dec_pending(rdev, mddev);
4569 end_reshape_request(r10_bio);
4570}
4571
4572static void end_reshape_request(struct r10bio *r10_bio)
4573{
4574 if (!atomic_dec_and_test(&r10_bio->remaining))
4575 return;
4576 md_done_sync(r10_bio->mddev, r10_bio->sectors, 1);
4577 bio_put(r10_bio->master_bio);
4578 put_buf(r10_bio);
4579}
4580
4581static void raid10_finish_reshape(struct mddev *mddev)
4582{
4583 struct r10conf *conf = mddev->private;
4584
4585 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
4586 return;
4587
4588 if (mddev->delta_disks > 0) {
4589 sector_t size = raid10_size(mddev, 0, 0);
4590 md_set_array_sectors(mddev, size);
4591 if (mddev->recovery_cp > mddev->resync_max_sectors) {
4592 mddev->recovery_cp = mddev->resync_max_sectors;
4593 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4594 }
4595 mddev->resync_max_sectors = size;
4596 set_capacity(mddev->gendisk, mddev->array_sectors);
4597 revalidate_disk(mddev->gendisk);
4598 } else {
4599 int d;
4600 for (d = conf->geo.raid_disks ;
4601 d < conf->geo.raid_disks - mddev->delta_disks;
4602 d++) {
4603 struct md_rdev *rdev = conf->mirrors[d].rdev;
4604 if (rdev)
4605 clear_bit(In_sync, &rdev->flags);
4606 rdev = conf->mirrors[d].replacement;
4607 if (rdev)
4608 clear_bit(In_sync, &rdev->flags);
4609 }
4610 }
4611 mddev->layout = mddev->new_layout;
4612 mddev->chunk_sectors = 1 << conf->geo.chunk_shift;
4613 mddev->reshape_position = MaxSector;
4614 mddev->delta_disks = 0;
4615 mddev->reshape_backwards = 0;
4616}
4617
4618static struct md_personality raid10_personality =
4619{
4620 .name = "raid10",
4621 .level = 10,
4622 .owner = THIS_MODULE,
4623 .make_request = raid10_make_request,
4624 .run = raid10_run,
4625 .free = raid10_free,
4626 .status = raid10_status,
4627 .error_handler = raid10_error,
4628 .hot_add_disk = raid10_add_disk,
4629 .hot_remove_disk= raid10_remove_disk,
4630 .spare_active = raid10_spare_active,
4631 .sync_request = raid10_sync_request,
4632 .quiesce = raid10_quiesce,
4633 .size = raid10_size,
4634 .resize = raid10_resize,
4635 .takeover = raid10_takeover,
4636 .check_reshape = raid10_check_reshape,
4637 .start_reshape = raid10_start_reshape,
4638 .finish_reshape = raid10_finish_reshape,
4639 .congested = raid10_congested,
4640};
4641
4642static int __init raid_init(void)
4643{
4644 return register_md_personality(&raid10_personality);
4645}
4646
4647static void raid_exit(void)
4648{
4649 unregister_md_personality(&raid10_personality);
4650}
4651
4652module_init(raid_init);
4653module_exit(raid_exit);
4654MODULE_LICENSE("GPL");
4655MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
4656MODULE_ALIAS("md-personality-9");
4657MODULE_ALIAS("md-raid10");
4658MODULE_ALIAS("md-level-10");
4659
4660module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);
4661