1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/slab.h>
22#include <linux/delay.h>
23#include <linux/blkdev.h>
24#include <linux/module.h>
25#include <linux/seq_file.h>
26#include <linux/ratelimit.h>
27#include <linux/kthread.h>
28#include "md.h"
29#include "raid10.h"
30#include "raid0.h"
31#include "bitmap.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77#define NR_RAID10_BIOS 256
78
79
80
81
82
83
84#define IO_BLOCKED ((struct bio *)1)
85
86
87
88
89#define IO_MADE_GOOD ((struct bio *)2)
90
91#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
92
93
94
95
96
97static int max_queued_requests = 1024;
98
99static void allow_barrier(struct r10conf *conf);
100static void lower_barrier(struct r10conf *conf);
101static int _enough(struct r10conf *conf, int previous, int ignore);
102static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
103 int *skipped);
104static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
105static void end_reshape_write(struct bio *bio);
106static void end_reshape(struct r10conf *conf);
107
108static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
109{
110 struct r10conf *conf = data;
111 int size = offsetof(struct r10bio, devs[conf->copies]);
112
113
114
115 return kzalloc(size, gfp_flags);
116}
117
118static void r10bio_pool_free(void *r10_bio, void *data)
119{
120 kfree(r10_bio);
121}
122
123
124#define RESYNC_BLOCK_SIZE (64*1024)
125#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
126
127#define RESYNC_WINDOW (1024*1024)
128
129#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
130
131
132
133
134
135
136
137
138static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
139{
140 struct r10conf *conf = data;
141 struct page *page;
142 struct r10bio *r10_bio;
143 struct bio *bio;
144 int i, j;
145 int nalloc;
146
147 r10_bio = r10bio_pool_alloc(gfp_flags, conf);
148 if (!r10_bio)
149 return NULL;
150
151 if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery) ||
152 test_bit(MD_RECOVERY_RESHAPE, &conf->mddev->recovery))
153 nalloc = conf->copies;
154 else
155 nalloc = 2;
156
157
158
159
160 for (j = nalloc ; j-- ; ) {
161 bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
162 if (!bio)
163 goto out_free_bio;
164 r10_bio->devs[j].bio = bio;
165 if (!conf->have_replacement)
166 continue;
167 bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
168 if (!bio)
169 goto out_free_bio;
170 r10_bio->devs[j].repl_bio = bio;
171 }
172
173
174
175
176 for (j = 0 ; j < nalloc; j++) {
177 struct bio *rbio = r10_bio->devs[j].repl_bio;
178 bio = r10_bio->devs[j].bio;
179 for (i = 0; i < RESYNC_PAGES; i++) {
180 if (j > 0 && !test_bit(MD_RECOVERY_SYNC,
181 &conf->mddev->recovery)) {
182
183
184 struct bio *rbio = r10_bio->devs[0].bio;
185 page = rbio->bi_io_vec[i].bv_page;
186 get_page(page);
187 } else
188 page = alloc_page(gfp_flags);
189 if (unlikely(!page))
190 goto out_free_pages;
191
192 bio->bi_io_vec[i].bv_page = page;
193 if (rbio)
194 rbio->bi_io_vec[i].bv_page = page;
195 }
196 }
197
198 return r10_bio;
199
200out_free_pages:
201 for ( ; i > 0 ; i--)
202 safe_put_page(bio->bi_io_vec[i-1].bv_page);
203 while (j--)
204 for (i = 0; i < RESYNC_PAGES ; i++)
205 safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
206 j = 0;
207out_free_bio:
208 for ( ; j < nalloc; j++) {
209 if (r10_bio->devs[j].bio)
210 bio_put(r10_bio->devs[j].bio);
211 if (r10_bio->devs[j].repl_bio)
212 bio_put(r10_bio->devs[j].repl_bio);
213 }
214 r10bio_pool_free(r10_bio, conf);
215 return NULL;
216}
217
218static void r10buf_pool_free(void *__r10_bio, void *data)
219{
220 int i;
221 struct r10conf *conf = data;
222 struct r10bio *r10bio = __r10_bio;
223 int j;
224
225 for (j=0; j < conf->copies; j++) {
226 struct bio *bio = r10bio->devs[j].bio;
227 if (bio) {
228 for (i = 0; i < RESYNC_PAGES; i++) {
229 safe_put_page(bio->bi_io_vec[i].bv_page);
230 bio->bi_io_vec[i].bv_page = NULL;
231 }
232 bio_put(bio);
233 }
234 bio = r10bio->devs[j].repl_bio;
235 if (bio)
236 bio_put(bio);
237 }
238 r10bio_pool_free(r10bio, conf);
239}
240
241static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
242{
243 int i;
244
245 for (i = 0; i < conf->copies; i++) {
246 struct bio **bio = & r10_bio->devs[i].bio;
247 if (!BIO_SPECIAL(*bio))
248 bio_put(*bio);
249 *bio = NULL;
250 bio = &r10_bio->devs[i].repl_bio;
251 if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio))
252 bio_put(*bio);
253 *bio = NULL;
254 }
255}
256
257static void free_r10bio(struct r10bio *r10_bio)
258{
259 struct r10conf *conf = r10_bio->mddev->private;
260
261 put_all_bios(conf, r10_bio);
262 mempool_free(r10_bio, conf->r10bio_pool);
263}
264
265static void put_buf(struct r10bio *r10_bio)
266{
267 struct r10conf *conf = r10_bio->mddev->private;
268
269 mempool_free(r10_bio, conf->r10buf_pool);
270
271 lower_barrier(conf);
272}
273
274static void reschedule_retry(struct r10bio *r10_bio)
275{
276 unsigned long flags;
277 struct mddev *mddev = r10_bio->mddev;
278 struct r10conf *conf = mddev->private;
279
280 spin_lock_irqsave(&conf->device_lock, flags);
281 list_add(&r10_bio->retry_list, &conf->retry_list);
282 conf->nr_queued ++;
283 spin_unlock_irqrestore(&conf->device_lock, flags);
284
285
286 wake_up(&conf->wait_barrier);
287
288 md_wakeup_thread(mddev->thread);
289}
290
291
292
293
294
295
296static void raid_end_bio_io(struct r10bio *r10_bio)
297{
298 struct bio *bio = r10_bio->master_bio;
299 int done;
300 struct r10conf *conf = r10_bio->mddev->private;
301
302 if (bio->bi_phys_segments) {
303 unsigned long flags;
304 spin_lock_irqsave(&conf->device_lock, flags);
305 bio->bi_phys_segments--;
306 done = (bio->bi_phys_segments == 0);
307 spin_unlock_irqrestore(&conf->device_lock, flags);
308 } else
309 done = 1;
310 if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
311 bio->bi_error = -EIO;
312 if (done) {
313 bio_endio(bio);
314
315
316
317
318 allow_barrier(conf);
319 }
320 free_r10bio(r10_bio);
321}
322
323
324
325
326static inline void update_head_pos(int slot, struct r10bio *r10_bio)
327{
328 struct r10conf *conf = r10_bio->mddev->private;
329
330 conf->mirrors[r10_bio->devs[slot].devnum].head_position =
331 r10_bio->devs[slot].addr + (r10_bio->sectors);
332}
333
334
335
336
337static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
338 struct bio *bio, int *slotp, int *replp)
339{
340 int slot;
341 int repl = 0;
342
343 for (slot = 0; slot < conf->copies; slot++) {
344 if (r10_bio->devs[slot].bio == bio)
345 break;
346 if (r10_bio->devs[slot].repl_bio == bio) {
347 repl = 1;
348 break;
349 }
350 }
351
352 BUG_ON(slot == conf->copies);
353 update_head_pos(slot, r10_bio);
354
355 if (slotp)
356 *slotp = slot;
357 if (replp)
358 *replp = repl;
359 return r10_bio->devs[slot].devnum;
360}
361
362static void raid10_end_read_request(struct bio *bio)
363{
364 int uptodate = !bio->bi_error;
365 struct r10bio *r10_bio = bio->bi_private;
366 int slot, dev;
367 struct md_rdev *rdev;
368 struct r10conf *conf = r10_bio->mddev->private;
369
370 slot = r10_bio->read_slot;
371 dev = r10_bio->devs[slot].devnum;
372 rdev = r10_bio->devs[slot].rdev;
373
374
375
376 update_head_pos(slot, r10_bio);
377
378 if (uptodate) {
379
380
381
382
383
384
385
386
387
388 set_bit(R10BIO_Uptodate, &r10_bio->state);
389 } else {
390
391
392
393
394
395 if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state),
396 rdev->raid_disk))
397 uptodate = 1;
398 }
399 if (uptodate) {
400 raid_end_bio_io(r10_bio);
401 rdev_dec_pending(rdev, conf->mddev);
402 } else {
403
404
405
406 char b[BDEVNAME_SIZE];
407 printk_ratelimited(KERN_ERR
408 "md/raid10:%s: %s: rescheduling sector %llu\n",
409 mdname(conf->mddev),
410 bdevname(rdev->bdev, b),
411 (unsigned long long)r10_bio->sector);
412 set_bit(R10BIO_ReadError, &r10_bio->state);
413 reschedule_retry(r10_bio);
414 }
415}
416
417static void close_write(struct r10bio *r10_bio)
418{
419
420 bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
421 r10_bio->sectors,
422 !test_bit(R10BIO_Degraded, &r10_bio->state),
423 0);
424 md_write_end(r10_bio->mddev);
425}
426
427static void one_write_done(struct r10bio *r10_bio)
428{
429 if (atomic_dec_and_test(&r10_bio->remaining)) {
430 if (test_bit(R10BIO_WriteError, &r10_bio->state))
431 reschedule_retry(r10_bio);
432 else {
433 close_write(r10_bio);
434 if (test_bit(R10BIO_MadeGood, &r10_bio->state))
435 reschedule_retry(r10_bio);
436 else
437 raid_end_bio_io(r10_bio);
438 }
439 }
440}
441
442static void raid10_end_write_request(struct bio *bio)
443{
444 struct r10bio *r10_bio = bio->bi_private;
445 int dev;
446 int dec_rdev = 1;
447 struct r10conf *conf = r10_bio->mddev->private;
448 int slot, repl;
449 struct md_rdev *rdev = NULL;
450
451 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
452
453 if (repl)
454 rdev = conf->mirrors[dev].replacement;
455 if (!rdev) {
456 smp_rmb();
457 repl = 0;
458 rdev = conf->mirrors[dev].rdev;
459 }
460
461
462
463 if (bio->bi_error) {
464 if (repl)
465
466
467
468 md_error(rdev->mddev, rdev);
469 else {
470 set_bit(WriteErrorSeen, &rdev->flags);
471 if (!test_and_set_bit(WantReplacement, &rdev->flags))
472 set_bit(MD_RECOVERY_NEEDED,
473 &rdev->mddev->recovery);
474 set_bit(R10BIO_WriteError, &r10_bio->state);
475 dec_rdev = 0;
476 }
477 } else {
478
479
480
481
482
483
484
485
486
487 sector_t first_bad;
488 int bad_sectors;
489
490
491
492
493
494
495
496
497
498 if (test_bit(In_sync, &rdev->flags) &&
499 !test_bit(Faulty, &rdev->flags))
500 set_bit(R10BIO_Uptodate, &r10_bio->state);
501
502
503 if (is_badblock(rdev,
504 r10_bio->devs[slot].addr,
505 r10_bio->sectors,
506 &first_bad, &bad_sectors)) {
507 bio_put(bio);
508 if (repl)
509 r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
510 else
511 r10_bio->devs[slot].bio = IO_MADE_GOOD;
512 dec_rdev = 0;
513 set_bit(R10BIO_MadeGood, &r10_bio->state);
514 }
515 }
516
517
518
519
520
521
522 one_write_done(r10_bio);
523 if (dec_rdev)
524 rdev_dec_pending(rdev, conf->mddev);
525}
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio)
553{
554 int n,f;
555 sector_t sector;
556 sector_t chunk;
557 sector_t stripe;
558 int dev;
559 int slot = 0;
560 int last_far_set_start, last_far_set_size;
561
562 last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1;
563 last_far_set_start *= geo->far_set_size;
564
565 last_far_set_size = geo->far_set_size;
566 last_far_set_size += (geo->raid_disks % geo->far_set_size);
567
568
569 chunk = r10bio->sector >> geo->chunk_shift;
570 sector = r10bio->sector & geo->chunk_mask;
571
572 chunk *= geo->near_copies;
573 stripe = chunk;
574 dev = sector_div(stripe, geo->raid_disks);
575 if (geo->far_offset)
576 stripe *= geo->far_copies;
577
578 sector += stripe << geo->chunk_shift;
579
580
581 for (n = 0; n < geo->near_copies; n++) {
582 int d = dev;
583 int set;
584 sector_t s = sector;
585 r10bio->devs[slot].devnum = d;
586 r10bio->devs[slot].addr = s;
587 slot++;
588
589 for (f = 1; f < geo->far_copies; f++) {
590 set = d / geo->far_set_size;
591 d += geo->near_copies;
592
593 if ((geo->raid_disks % geo->far_set_size) &&
594 (d > last_far_set_start)) {
595 d -= last_far_set_start;
596 d %= last_far_set_size;
597 d += last_far_set_start;
598 } else {
599 d %= geo->far_set_size;
600 d += geo->far_set_size * set;
601 }
602 s += geo->stride;
603 r10bio->devs[slot].devnum = d;
604 r10bio->devs[slot].addr = s;
605 slot++;
606 }
607 dev++;
608 if (dev >= geo->raid_disks) {
609 dev = 0;
610 sector += (geo->chunk_mask + 1);
611 }
612 }
613}
614
615static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio)
616{
617 struct geom *geo = &conf->geo;
618
619 if (conf->reshape_progress != MaxSector &&
620 ((r10bio->sector >= conf->reshape_progress) !=
621 conf->mddev->reshape_backwards)) {
622 set_bit(R10BIO_Previous, &r10bio->state);
623 geo = &conf->prev;
624 } else
625 clear_bit(R10BIO_Previous, &r10bio->state);
626
627 __raid10_find_phys(geo, r10bio);
628}
629
630static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
631{
632 sector_t offset, chunk, vchunk;
633
634
635
636 struct geom *geo = &conf->geo;
637 int far_set_start = (dev / geo->far_set_size) * geo->far_set_size;
638 int far_set_size = geo->far_set_size;
639 int last_far_set_start;
640
641 if (geo->raid_disks % geo->far_set_size) {
642 last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1;
643 last_far_set_start *= geo->far_set_size;
644
645 if (dev >= last_far_set_start) {
646 far_set_size = geo->far_set_size;
647 far_set_size += (geo->raid_disks % geo->far_set_size);
648 far_set_start = last_far_set_start;
649 }
650 }
651
652 offset = sector & geo->chunk_mask;
653 if (geo->far_offset) {
654 int fc;
655 chunk = sector >> geo->chunk_shift;
656 fc = sector_div(chunk, geo->far_copies);
657 dev -= fc * geo->near_copies;
658 if (dev < far_set_start)
659 dev += far_set_size;
660 } else {
661 while (sector >= geo->stride) {
662 sector -= geo->stride;
663 if (dev < (geo->near_copies + far_set_start))
664 dev += far_set_size - geo->near_copies;
665 else
666 dev -= geo->near_copies;
667 }
668 chunk = sector >> geo->chunk_shift;
669 }
670 vchunk = chunk * geo->raid_disks + dev;
671 sector_div(vchunk, geo->near_copies);
672 return (vchunk << geo->chunk_shift) + offset;
673}
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694static struct md_rdev *read_balance(struct r10conf *conf,
695 struct r10bio *r10_bio,
696 int *max_sectors)
697{
698 const sector_t this_sector = r10_bio->sector;
699 int disk, slot;
700 int sectors = r10_bio->sectors;
701 int best_good_sectors;
702 sector_t new_distance, best_dist;
703 struct md_rdev *best_rdev, *rdev = NULL;
704 int do_balance;
705 int best_slot;
706 struct geom *geo = &conf->geo;
707
708 raid10_find_phys(conf, r10_bio);
709 rcu_read_lock();
710retry:
711 sectors = r10_bio->sectors;
712 best_slot = -1;
713 best_rdev = NULL;
714 best_dist = MaxSector;
715 best_good_sectors = 0;
716 do_balance = 1;
717
718
719
720
721
722
723 if (conf->mddev->recovery_cp < MaxSector
724 && (this_sector + sectors >= conf->next_resync))
725 do_balance = 0;
726
727 for (slot = 0; slot < conf->copies ; slot++) {
728 sector_t first_bad;
729 int bad_sectors;
730 sector_t dev_sector;
731
732 if (r10_bio->devs[slot].bio == IO_BLOCKED)
733 continue;
734 disk = r10_bio->devs[slot].devnum;
735 rdev = rcu_dereference(conf->mirrors[disk].replacement);
736 if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
737 r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
738 rdev = rcu_dereference(conf->mirrors[disk].rdev);
739 if (rdev == NULL ||
740 test_bit(Faulty, &rdev->flags))
741 continue;
742 if (!test_bit(In_sync, &rdev->flags) &&
743 r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
744 continue;
745
746 dev_sector = r10_bio->devs[slot].addr;
747 if (is_badblock(rdev, dev_sector, sectors,
748 &first_bad, &bad_sectors)) {
749 if (best_dist < MaxSector)
750
751 continue;
752 if (first_bad <= dev_sector) {
753
754
755
756
757 bad_sectors -= (dev_sector - first_bad);
758 if (!do_balance && sectors > bad_sectors)
759 sectors = bad_sectors;
760 if (best_good_sectors > sectors)
761 best_good_sectors = sectors;
762 } else {
763 sector_t good_sectors =
764 first_bad - dev_sector;
765 if (good_sectors > best_good_sectors) {
766 best_good_sectors = good_sectors;
767 best_slot = slot;
768 best_rdev = rdev;
769 }
770 if (!do_balance)
771
772 break;
773 }
774 continue;
775 } else
776 best_good_sectors = sectors;
777
778 if (!do_balance)
779 break;
780
781
782
783
784
785 if (geo->near_copies > 1 && !atomic_read(&rdev->nr_pending))
786 break;
787
788
789 if (geo->far_copies > 1)
790 new_distance = r10_bio->devs[slot].addr;
791 else
792 new_distance = abs(r10_bio->devs[slot].addr -
793 conf->mirrors[disk].head_position);
794 if (new_distance < best_dist) {
795 best_dist = new_distance;
796 best_slot = slot;
797 best_rdev = rdev;
798 }
799 }
800 if (slot >= conf->copies) {
801 slot = best_slot;
802 rdev = best_rdev;
803 }
804
805 if (slot >= 0) {
806 atomic_inc(&rdev->nr_pending);
807 if (test_bit(Faulty, &rdev->flags)) {
808
809
810
811 rdev_dec_pending(rdev, conf->mddev);
812 goto retry;
813 }
814 r10_bio->read_slot = slot;
815 } else
816 rdev = NULL;
817 rcu_read_unlock();
818 *max_sectors = best_good_sectors;
819
820 return rdev;
821}
822
823static int raid10_congested(struct mddev *mddev, int bits)
824{
825 struct r10conf *conf = mddev->private;
826 int i, ret = 0;
827
828 if ((bits & (1 << WB_async_congested)) &&
829 conf->pending_count >= max_queued_requests)
830 return 1;
831
832 rcu_read_lock();
833 for (i = 0;
834 (i < conf->geo.raid_disks || i < conf->prev.raid_disks)
835 && ret == 0;
836 i++) {
837 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
838 if (rdev && !test_bit(Faulty, &rdev->flags)) {
839 struct request_queue *q = bdev_get_queue(rdev->bdev);
840
841 ret |= bdi_congested(&q->backing_dev_info, bits);
842 }
843 }
844 rcu_read_unlock();
845 return ret;
846}
847
848static void flush_pending_writes(struct r10conf *conf)
849{
850
851
852
853 spin_lock_irq(&conf->device_lock);
854
855 if (conf->pending_bio_list.head) {
856 struct bio *bio;
857 bio = bio_list_get(&conf->pending_bio_list);
858 conf->pending_count = 0;
859 spin_unlock_irq(&conf->device_lock);
860
861
862 bitmap_unplug(conf->mddev->bitmap);
863 wake_up(&conf->wait_barrier);
864
865 while (bio) {
866 struct bio *next = bio->bi_next;
867 bio->bi_next = NULL;
868 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
869 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
870
871 bio_endio(bio);
872 else
873 generic_make_request(bio);
874 bio = next;
875 }
876 } else
877 spin_unlock_irq(&conf->device_lock);
878}
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902static void raise_barrier(struct r10conf *conf, int force)
903{
904 BUG_ON(force && !conf->barrier);
905 spin_lock_irq(&conf->resync_lock);
906
907
908 wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
909 conf->resync_lock);
910
911
912 conf->barrier++;
913
914
915 wait_event_lock_irq(conf->wait_barrier,
916 !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
917 conf->resync_lock);
918
919 spin_unlock_irq(&conf->resync_lock);
920}
921
922static void lower_barrier(struct r10conf *conf)
923{
924 unsigned long flags;
925 spin_lock_irqsave(&conf->resync_lock, flags);
926 conf->barrier--;
927 spin_unlock_irqrestore(&conf->resync_lock, flags);
928 wake_up(&conf->wait_barrier);
929}
930
931static void wait_barrier(struct r10conf *conf)
932{
933 spin_lock_irq(&conf->resync_lock);
934 if (conf->barrier) {
935 conf->nr_waiting++;
936
937
938
939
940
941
942
943
944
945 wait_event_lock_irq(conf->wait_barrier,
946 !conf->barrier ||
947 (conf->nr_pending &&
948 current->bio_list &&
949 !bio_list_empty(current->bio_list)),
950 conf->resync_lock);
951 conf->nr_waiting--;
952 }
953 conf->nr_pending++;
954 spin_unlock_irq(&conf->resync_lock);
955}
956
957static void allow_barrier(struct r10conf *conf)
958{
959 unsigned long flags;
960 spin_lock_irqsave(&conf->resync_lock, flags);
961 conf->nr_pending--;
962 spin_unlock_irqrestore(&conf->resync_lock, flags);
963 wake_up(&conf->wait_barrier);
964}
965
966static void freeze_array(struct r10conf *conf, int extra)
967{
968
969
970
971
972
973
974
975
976
977
978
979
980 spin_lock_irq(&conf->resync_lock);
981 conf->barrier++;
982 conf->nr_waiting++;
983 wait_event_lock_irq_cmd(conf->wait_barrier,
984 conf->nr_pending == conf->nr_queued+extra,
985 conf->resync_lock,
986 flush_pending_writes(conf));
987
988 spin_unlock_irq(&conf->resync_lock);
989}
990
991static void unfreeze_array(struct r10conf *conf)
992{
993
994 spin_lock_irq(&conf->resync_lock);
995 conf->barrier--;
996 conf->nr_waiting--;
997 wake_up(&conf->wait_barrier);
998 spin_unlock_irq(&conf->resync_lock);
999}
1000
1001static sector_t choose_data_offset(struct r10bio *r10_bio,
1002 struct md_rdev *rdev)
1003{
1004 if (!test_bit(MD_RECOVERY_RESHAPE, &rdev->mddev->recovery) ||
1005 test_bit(R10BIO_Previous, &r10_bio->state))
1006 return rdev->data_offset;
1007 else
1008 return rdev->new_data_offset;
1009}
1010
1011struct raid10_plug_cb {
1012 struct blk_plug_cb cb;
1013 struct bio_list pending;
1014 int pending_cnt;
1015};
1016
1017static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1018{
1019 struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb,
1020 cb);
1021 struct mddev *mddev = plug->cb.data;
1022 struct r10conf *conf = mddev->private;
1023 struct bio *bio;
1024
1025 if (from_schedule || current->bio_list) {
1026 spin_lock_irq(&conf->device_lock);
1027 bio_list_merge(&conf->pending_bio_list, &plug->pending);
1028 conf->pending_count += plug->pending_cnt;
1029 spin_unlock_irq(&conf->device_lock);
1030 wake_up(&conf->wait_barrier);
1031 md_wakeup_thread(mddev->thread);
1032 kfree(plug);
1033 return;
1034 }
1035
1036
1037 bio = bio_list_get(&plug->pending);
1038 bitmap_unplug(mddev->bitmap);
1039 wake_up(&conf->wait_barrier);
1040
1041 while (bio) {
1042 struct bio *next = bio->bi_next;
1043 bio->bi_next = NULL;
1044 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
1045 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
1046
1047 bio_endio(bio);
1048 else
1049 generic_make_request(bio);
1050 bio = next;
1051 }
1052 kfree(plug);
1053}
1054
1055static void __make_request(struct mddev *mddev, struct bio *bio)
1056{
1057 struct r10conf *conf = mddev->private;
1058 struct r10bio *r10_bio;
1059 struct bio *read_bio;
1060 int i;
1061 const int rw = bio_data_dir(bio);
1062 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
1063 const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
1064 const unsigned long do_discard = (bio->bi_rw
1065 & (REQ_DISCARD | REQ_SECURE));
1066 const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME);
1067 unsigned long flags;
1068 struct md_rdev *blocked_rdev;
1069 struct blk_plug_cb *cb;
1070 struct raid10_plug_cb *plug = NULL;
1071 int sectors_handled;
1072 int max_sectors;
1073 int sectors;
1074
1075
1076
1077
1078
1079
1080 wait_barrier(conf);
1081
1082 sectors = bio_sectors(bio);
1083 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1084 bio->bi_iter.bi_sector < conf->reshape_progress &&
1085 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
1086
1087
1088
1089 allow_barrier(conf);
1090 wait_event(conf->wait_barrier,
1091 conf->reshape_progress <= bio->bi_iter.bi_sector ||
1092 conf->reshape_progress >= bio->bi_iter.bi_sector +
1093 sectors);
1094 wait_barrier(conf);
1095 }
1096 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1097 bio_data_dir(bio) == WRITE &&
1098 (mddev->reshape_backwards
1099 ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
1100 bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
1101 : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
1102 bio->bi_iter.bi_sector < conf->reshape_progress))) {
1103
1104 mddev->reshape_position = conf->reshape_progress;
1105 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1106 set_bit(MD_CHANGE_PENDING, &mddev->flags);
1107 md_wakeup_thread(mddev->thread);
1108 wait_event(mddev->sb_wait,
1109 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
1110
1111 conf->reshape_safe = mddev->reshape_position;
1112 }
1113
1114 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1115
1116 r10_bio->master_bio = bio;
1117 r10_bio->sectors = sectors;
1118
1119 r10_bio->mddev = mddev;
1120 r10_bio->sector = bio->bi_iter.bi_sector;
1121 r10_bio->state = 0;
1122
1123
1124
1125
1126
1127
1128
1129
1130 bio->bi_phys_segments = 0;
1131 bio_clear_flag(bio, BIO_SEG_VALID);
1132
1133 if (rw == READ) {
1134
1135
1136
1137 struct md_rdev *rdev;
1138 int slot;
1139
1140read_again:
1141 rdev = read_balance(conf, r10_bio, &max_sectors);
1142 if (!rdev) {
1143 raid_end_bio_io(r10_bio);
1144 return;
1145 }
1146 slot = r10_bio->read_slot;
1147
1148 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1149 bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
1150 max_sectors);
1151
1152 r10_bio->devs[slot].bio = read_bio;
1153 r10_bio->devs[slot].rdev = rdev;
1154
1155 read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
1156 choose_data_offset(r10_bio, rdev);
1157 read_bio->bi_bdev = rdev->bdev;
1158 read_bio->bi_end_io = raid10_end_read_request;
1159 read_bio->bi_rw = READ | do_sync;
1160 read_bio->bi_private = r10_bio;
1161
1162 if (max_sectors < r10_bio->sectors) {
1163
1164
1165
1166 sectors_handled = (r10_bio->sector + max_sectors
1167 - bio->bi_iter.bi_sector);
1168 r10_bio->sectors = max_sectors;
1169 spin_lock_irq(&conf->device_lock);
1170 if (bio->bi_phys_segments == 0)
1171 bio->bi_phys_segments = 2;
1172 else
1173 bio->bi_phys_segments++;
1174 spin_unlock_irq(&conf->device_lock);
1175
1176
1177
1178
1179
1180 reschedule_retry(r10_bio);
1181
1182 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1183
1184 r10_bio->master_bio = bio;
1185 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1186 r10_bio->state = 0;
1187 r10_bio->mddev = mddev;
1188 r10_bio->sector = bio->bi_iter.bi_sector +
1189 sectors_handled;
1190 goto read_again;
1191 } else
1192 generic_make_request(read_bio);
1193 return;
1194 }
1195
1196
1197
1198
1199 if (conf->pending_count >= max_queued_requests) {
1200 md_wakeup_thread(mddev->thread);
1201 wait_event(conf->wait_barrier,
1202 conf->pending_count < max_queued_requests);
1203 }
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216 r10_bio->read_slot = -1;
1217 raid10_find_phys(conf, r10_bio);
1218retry_write:
1219 blocked_rdev = NULL;
1220 rcu_read_lock();
1221 max_sectors = r10_bio->sectors;
1222
1223 for (i = 0; i < conf->copies; i++) {
1224 int d = r10_bio->devs[i].devnum;
1225 struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
1226 struct md_rdev *rrdev = rcu_dereference(
1227 conf->mirrors[d].replacement);
1228 if (rdev == rrdev)
1229 rrdev = NULL;
1230 if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
1231 atomic_inc(&rdev->nr_pending);
1232 blocked_rdev = rdev;
1233 break;
1234 }
1235 if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
1236 atomic_inc(&rrdev->nr_pending);
1237 blocked_rdev = rrdev;
1238 break;
1239 }
1240 if (rdev && (test_bit(Faulty, &rdev->flags)))
1241 rdev = NULL;
1242 if (rrdev && (test_bit(Faulty, &rrdev->flags)))
1243 rrdev = NULL;
1244
1245 r10_bio->devs[i].bio = NULL;
1246 r10_bio->devs[i].repl_bio = NULL;
1247
1248 if (!rdev && !rrdev) {
1249 set_bit(R10BIO_Degraded, &r10_bio->state);
1250 continue;
1251 }
1252 if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
1253 sector_t first_bad;
1254 sector_t dev_sector = r10_bio->devs[i].addr;
1255 int bad_sectors;
1256 int is_bad;
1257
1258 is_bad = is_badblock(rdev, dev_sector,
1259 max_sectors,
1260 &first_bad, &bad_sectors);
1261 if (is_bad < 0) {
1262
1263
1264
1265 atomic_inc(&rdev->nr_pending);
1266 set_bit(BlockedBadBlocks, &rdev->flags);
1267 blocked_rdev = rdev;
1268 break;
1269 }
1270 if (is_bad && first_bad <= dev_sector) {
1271
1272 bad_sectors -= (dev_sector - first_bad);
1273 if (bad_sectors < max_sectors)
1274
1275
1276
1277 max_sectors = bad_sectors;
1278
1279
1280
1281
1282
1283
1284
1285
1286 continue;
1287 }
1288 if (is_bad) {
1289 int good_sectors = first_bad - dev_sector;
1290 if (good_sectors < max_sectors)
1291 max_sectors = good_sectors;
1292 }
1293 }
1294 if (rdev) {
1295 r10_bio->devs[i].bio = bio;
1296 atomic_inc(&rdev->nr_pending);
1297 }
1298 if (rrdev) {
1299 r10_bio->devs[i].repl_bio = bio;
1300 atomic_inc(&rrdev->nr_pending);
1301 }
1302 }
1303 rcu_read_unlock();
1304
1305 if (unlikely(blocked_rdev)) {
1306
1307 int j;
1308 int d;
1309
1310 for (j = 0; j < i; j++) {
1311 if (r10_bio->devs[j].bio) {
1312 d = r10_bio->devs[j].devnum;
1313 rdev_dec_pending(conf->mirrors[d].rdev, mddev);
1314 }
1315 if (r10_bio->devs[j].repl_bio) {
1316 struct md_rdev *rdev;
1317 d = r10_bio->devs[j].devnum;
1318 rdev = conf->mirrors[d].replacement;
1319 if (!rdev) {
1320
1321 smp_mb();
1322 rdev = conf->mirrors[d].rdev;
1323 }
1324 rdev_dec_pending(rdev, mddev);
1325 }
1326 }
1327 allow_barrier(conf);
1328 md_wait_for_blocked_rdev(blocked_rdev, mddev);
1329 wait_barrier(conf);
1330 goto retry_write;
1331 }
1332
1333 if (max_sectors < r10_bio->sectors) {
1334
1335
1336
1337 r10_bio->sectors = max_sectors;
1338 spin_lock_irq(&conf->device_lock);
1339 if (bio->bi_phys_segments == 0)
1340 bio->bi_phys_segments = 2;
1341 else
1342 bio->bi_phys_segments++;
1343 spin_unlock_irq(&conf->device_lock);
1344 }
1345 sectors_handled = r10_bio->sector + max_sectors -
1346 bio->bi_iter.bi_sector;
1347
1348 atomic_set(&r10_bio->remaining, 1);
1349 bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
1350
1351 for (i = 0; i < conf->copies; i++) {
1352 struct bio *mbio;
1353 int d = r10_bio->devs[i].devnum;
1354 if (r10_bio->devs[i].bio) {
1355 struct md_rdev *rdev = conf->mirrors[d].rdev;
1356 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1357 bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
1358 max_sectors);
1359 r10_bio->devs[i].bio = mbio;
1360
1361 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
1362 choose_data_offset(r10_bio,
1363 rdev));
1364 mbio->bi_bdev = rdev->bdev;
1365 mbio->bi_end_io = raid10_end_write_request;
1366 mbio->bi_rw =
1367 WRITE | do_sync | do_fua | do_discard | do_same;
1368 mbio->bi_private = r10_bio;
1369
1370 atomic_inc(&r10_bio->remaining);
1371
1372 cb = blk_check_plugged(raid10_unplug, mddev,
1373 sizeof(*plug));
1374 if (cb)
1375 plug = container_of(cb, struct raid10_plug_cb,
1376 cb);
1377 else
1378 plug = NULL;
1379 spin_lock_irqsave(&conf->device_lock, flags);
1380 if (plug) {
1381 bio_list_add(&plug->pending, mbio);
1382 plug->pending_cnt++;
1383 } else {
1384 bio_list_add(&conf->pending_bio_list, mbio);
1385 conf->pending_count++;
1386 }
1387 spin_unlock_irqrestore(&conf->device_lock, flags);
1388 if (!plug)
1389 md_wakeup_thread(mddev->thread);
1390 }
1391
1392 if (r10_bio->devs[i].repl_bio) {
1393 struct md_rdev *rdev = conf->mirrors[d].replacement;
1394 if (rdev == NULL) {
1395
1396 smp_mb();
1397 rdev = conf->mirrors[d].rdev;
1398 }
1399 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1400 bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
1401 max_sectors);
1402 r10_bio->devs[i].repl_bio = mbio;
1403
1404 mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
1405 choose_data_offset(
1406 r10_bio, rdev));
1407 mbio->bi_bdev = rdev->bdev;
1408 mbio->bi_end_io = raid10_end_write_request;
1409 mbio->bi_rw =
1410 WRITE | do_sync | do_fua | do_discard | do_same;
1411 mbio->bi_private = r10_bio;
1412
1413 atomic_inc(&r10_bio->remaining);
1414 spin_lock_irqsave(&conf->device_lock, flags);
1415 bio_list_add(&conf->pending_bio_list, mbio);
1416 conf->pending_count++;
1417 spin_unlock_irqrestore(&conf->device_lock, flags);
1418 if (!mddev_check_plugged(mddev))
1419 md_wakeup_thread(mddev->thread);
1420 }
1421 }
1422
1423
1424
1425
1426
1427 if (sectors_handled < bio_sectors(bio)) {
1428 one_write_done(r10_bio);
1429
1430
1431
1432 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1433
1434 r10_bio->master_bio = bio;
1435 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1436
1437 r10_bio->mddev = mddev;
1438 r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
1439 r10_bio->state = 0;
1440 goto retry_write;
1441 }
1442 one_write_done(r10_bio);
1443}
1444
1445static void make_request(struct mddev *mddev, struct bio *bio)
1446{
1447 struct r10conf *conf = mddev->private;
1448 sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
1449 int chunk_sects = chunk_mask + 1;
1450
1451 struct bio *split;
1452
1453 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
1454 md_flush_request(mddev, bio);
1455 return;
1456 }
1457
1458 md_write_start(mddev, bio);
1459
1460 do {
1461
1462
1463
1464
1465
1466 if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
1467 bio_sectors(bio) > chunk_sects
1468 && (conf->geo.near_copies < conf->geo.raid_disks
1469 || conf->prev.near_copies <
1470 conf->prev.raid_disks))) {
1471 split = bio_split(bio, chunk_sects -
1472 (bio->bi_iter.bi_sector &
1473 (chunk_sects - 1)),
1474 GFP_NOIO, fs_bio_set);
1475 bio_chain(split, bio);
1476 } else {
1477 split = bio;
1478 }
1479
1480 __make_request(mddev, split);
1481 } while (split != bio);
1482
1483
1484 wake_up(&conf->wait_barrier);
1485}
1486
1487static void status(struct seq_file *seq, struct mddev *mddev)
1488{
1489 struct r10conf *conf = mddev->private;
1490 int i;
1491
1492 if (conf->geo.near_copies < conf->geo.raid_disks)
1493 seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2);
1494 if (conf->geo.near_copies > 1)
1495 seq_printf(seq, " %d near-copies", conf->geo.near_copies);
1496 if (conf->geo.far_copies > 1) {
1497 if (conf->geo.far_offset)
1498 seq_printf(seq, " %d offset-copies", conf->geo.far_copies);
1499 else
1500 seq_printf(seq, " %d far-copies", conf->geo.far_copies);
1501 if (conf->geo.far_set_size != conf->geo.raid_disks)
1502 seq_printf(seq, " %d devices per set", conf->geo.far_set_size);
1503 }
1504 seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
1505 conf->geo.raid_disks - mddev->degraded);
1506 for (i = 0; i < conf->geo.raid_disks; i++)
1507 seq_printf(seq, "%s",
1508 conf->mirrors[i].rdev &&
1509 test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
1510 seq_printf(seq, "]");
1511}
1512
1513
1514
1515
1516
1517
1518static int _enough(struct r10conf *conf, int previous, int ignore)
1519{
1520 int first = 0;
1521 int has_enough = 0;
1522 int disks, ncopies;
1523 if (previous) {
1524 disks = conf->prev.raid_disks;
1525 ncopies = conf->prev.near_copies;
1526 } else {
1527 disks = conf->geo.raid_disks;
1528 ncopies = conf->geo.near_copies;
1529 }
1530
1531 rcu_read_lock();
1532 do {
1533 int n = conf->copies;
1534 int cnt = 0;
1535 int this = first;
1536 while (n--) {
1537 struct md_rdev *rdev;
1538 if (this != ignore &&
1539 (rdev = rcu_dereference(conf->mirrors[this].rdev)) &&
1540 test_bit(In_sync, &rdev->flags))
1541 cnt++;
1542 this = (this+1) % disks;
1543 }
1544 if (cnt == 0)
1545 goto out;
1546 first = (first + ncopies) % disks;
1547 } while (first != 0);
1548 has_enough = 1;
1549out:
1550 rcu_read_unlock();
1551 return has_enough;
1552}
1553
1554static int enough(struct r10conf *conf, int ignore)
1555{
1556
1557
1558
1559
1560
1561 return _enough(conf, 0, ignore) &&
1562 _enough(conf, 1, ignore);
1563}
1564
1565static void error(struct mddev *mddev, struct md_rdev *rdev)
1566{
1567 char b[BDEVNAME_SIZE];
1568 struct r10conf *conf = mddev->private;
1569 unsigned long flags;
1570
1571
1572
1573
1574
1575
1576
1577 spin_lock_irqsave(&conf->device_lock, flags);
1578 if (test_bit(In_sync, &rdev->flags)
1579 && !enough(conf, rdev->raid_disk)) {
1580
1581
1582
1583 spin_unlock_irqrestore(&conf->device_lock, flags);
1584 return;
1585 }
1586 if (test_and_clear_bit(In_sync, &rdev->flags))
1587 mddev->degraded++;
1588
1589
1590
1591 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1592 set_bit(Blocked, &rdev->flags);
1593 set_bit(Faulty, &rdev->flags);
1594 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1595 set_bit(MD_CHANGE_PENDING, &mddev->flags);
1596 spin_unlock_irqrestore(&conf->device_lock, flags);
1597 printk(KERN_ALERT
1598 "md/raid10:%s: Disk failure on %s, disabling device.\n"
1599 "md/raid10:%s: Operation continuing on %d devices.\n",
1600 mdname(mddev), bdevname(rdev->bdev, b),
1601 mdname(mddev), conf->geo.raid_disks - mddev->degraded);
1602}
1603
1604static void print_conf(struct r10conf *conf)
1605{
1606 int i;
1607 struct raid10_info *tmp;
1608
1609 printk(KERN_DEBUG "RAID10 conf printout:\n");
1610 if (!conf) {
1611 printk(KERN_DEBUG "(!conf)\n");
1612 return;
1613 }
1614 printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
1615 conf->geo.raid_disks);
1616
1617 for (i = 0; i < conf->geo.raid_disks; i++) {
1618 char b[BDEVNAME_SIZE];
1619 tmp = conf->mirrors + i;
1620 if (tmp->rdev)
1621 printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
1622 i, !test_bit(In_sync, &tmp->rdev->flags),
1623 !test_bit(Faulty, &tmp->rdev->flags),
1624 bdevname(tmp->rdev->bdev,b));
1625 }
1626}
1627
1628static void close_sync(struct r10conf *conf)
1629{
1630 wait_barrier(conf);
1631 allow_barrier(conf);
1632
1633 mempool_destroy(conf->r10buf_pool);
1634 conf->r10buf_pool = NULL;
1635}
1636
1637static int raid10_spare_active(struct mddev *mddev)
1638{
1639 int i;
1640 struct r10conf *conf = mddev->private;
1641 struct raid10_info *tmp;
1642 int count = 0;
1643 unsigned long flags;
1644
1645
1646
1647
1648
1649 for (i = 0; i < conf->geo.raid_disks; i++) {
1650 tmp = conf->mirrors + i;
1651 if (tmp->replacement
1652 && tmp->replacement->recovery_offset == MaxSector
1653 && !test_bit(Faulty, &tmp->replacement->flags)
1654 && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
1655
1656 if (!tmp->rdev
1657 || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
1658 count++;
1659 if (tmp->rdev) {
1660
1661
1662
1663
1664 set_bit(Faulty, &tmp->rdev->flags);
1665 sysfs_notify_dirent_safe(
1666 tmp->rdev->sysfs_state);
1667 }
1668 sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
1669 } else if (tmp->rdev
1670 && tmp->rdev->recovery_offset == MaxSector
1671 && !test_bit(Faulty, &tmp->rdev->flags)
1672 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
1673 count++;
1674 sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
1675 }
1676 }
1677 spin_lock_irqsave(&conf->device_lock, flags);
1678 mddev->degraded -= count;
1679 spin_unlock_irqrestore(&conf->device_lock, flags);
1680
1681 print_conf(conf);
1682 return count;
1683}
1684
1685static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1686{
1687 struct r10conf *conf = mddev->private;
1688 int err = -EEXIST;
1689 int mirror;
1690 int first = 0;
1691 int last = conf->geo.raid_disks - 1;
1692
1693 if (mddev->recovery_cp < MaxSector)
1694
1695
1696
1697 return -EBUSY;
1698 if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
1699 return -EINVAL;
1700
1701 if (rdev->raid_disk >= 0)
1702 first = last = rdev->raid_disk;
1703
1704 if (rdev->saved_raid_disk >= first &&
1705 conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
1706 mirror = rdev->saved_raid_disk;
1707 else
1708 mirror = first;
1709 for ( ; mirror <= last ; mirror++) {
1710 struct raid10_info *p = &conf->mirrors[mirror];
1711 if (p->recovery_disabled == mddev->recovery_disabled)
1712 continue;
1713 if (p->rdev) {
1714 if (!test_bit(WantReplacement, &p->rdev->flags) ||
1715 p->replacement != NULL)
1716 continue;
1717 clear_bit(In_sync, &rdev->flags);
1718 set_bit(Replacement, &rdev->flags);
1719 rdev->raid_disk = mirror;
1720 err = 0;
1721 if (mddev->gendisk)
1722 disk_stack_limits(mddev->gendisk, rdev->bdev,
1723 rdev->data_offset << 9);
1724 conf->fullsync = 1;
1725 rcu_assign_pointer(p->replacement, rdev);
1726 break;
1727 }
1728
1729 if (mddev->gendisk)
1730 disk_stack_limits(mddev->gendisk, rdev->bdev,
1731 rdev->data_offset << 9);
1732
1733 p->head_position = 0;
1734 p->recovery_disabled = mddev->recovery_disabled - 1;
1735 rdev->raid_disk = mirror;
1736 err = 0;
1737 if (rdev->saved_raid_disk != mirror)
1738 conf->fullsync = 1;
1739 rcu_assign_pointer(p->rdev, rdev);
1740 break;
1741 }
1742 mddev_suspend(mddev);
1743 md_integrity_add_rdev(rdev, mddev);
1744 mddev_resume(mddev);
1745 if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
1746 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
1747
1748 print_conf(conf);
1749 return err;
1750}
1751
1752static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
1753{
1754 struct r10conf *conf = mddev->private;
1755 int err = 0;
1756 int number = rdev->raid_disk;
1757 struct md_rdev **rdevp;
1758 struct raid10_info *p = conf->mirrors + number;
1759
1760 print_conf(conf);
1761 if (rdev == p->rdev)
1762 rdevp = &p->rdev;
1763 else if (rdev == p->replacement)
1764 rdevp = &p->replacement;
1765 else
1766 return 0;
1767
1768 if (test_bit(In_sync, &rdev->flags) ||
1769 atomic_read(&rdev->nr_pending)) {
1770 err = -EBUSY;
1771 goto abort;
1772 }
1773
1774
1775
1776 if (!test_bit(Faulty, &rdev->flags) &&
1777 mddev->recovery_disabled != p->recovery_disabled &&
1778 (!p->replacement || p->replacement == rdev) &&
1779 number < conf->geo.raid_disks &&
1780 enough(conf, -1)) {
1781 err = -EBUSY;
1782 goto abort;
1783 }
1784 *rdevp = NULL;
1785 synchronize_rcu();
1786 if (atomic_read(&rdev->nr_pending)) {
1787
1788 err = -EBUSY;
1789 *rdevp = rdev;
1790 goto abort;
1791 } else if (p->replacement) {
1792
1793 p->rdev = p->replacement;
1794 clear_bit(Replacement, &p->replacement->flags);
1795 smp_mb();
1796
1797
1798 p->replacement = NULL;
1799 clear_bit(WantReplacement, &rdev->flags);
1800 } else
1801
1802
1803
1804 clear_bit(WantReplacement, &rdev->flags);
1805
1806 err = md_integrity_register(mddev);
1807
1808abort:
1809
1810 print_conf(conf);
1811 return err;
1812}
1813
1814static void end_sync_read(struct bio *bio)
1815{
1816 struct r10bio *r10_bio = bio->bi_private;
1817 struct r10conf *conf = r10_bio->mddev->private;
1818 int d;
1819
1820 if (bio == r10_bio->master_bio) {
1821
1822 d = r10_bio->read_slot;
1823 } else
1824 d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
1825
1826 if (!bio->bi_error)
1827 set_bit(R10BIO_Uptodate, &r10_bio->state);
1828 else
1829
1830
1831
1832 atomic_add(r10_bio->sectors,
1833 &conf->mirrors[d].rdev->corrected_errors);
1834
1835
1836
1837
1838 rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev);
1839 if (test_bit(R10BIO_IsRecover, &r10_bio->state) ||
1840 atomic_dec_and_test(&r10_bio->remaining)) {
1841
1842
1843
1844 reschedule_retry(r10_bio);
1845 }
1846}
1847
1848static void end_sync_request(struct r10bio *r10_bio)
1849{
1850 struct mddev *mddev = r10_bio->mddev;
1851
1852 while (atomic_dec_and_test(&r10_bio->remaining)) {
1853 if (r10_bio->master_bio == NULL) {
1854
1855 sector_t s = r10_bio->sectors;
1856 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
1857 test_bit(R10BIO_WriteError, &r10_bio->state))
1858 reschedule_retry(r10_bio);
1859 else
1860 put_buf(r10_bio);
1861 md_done_sync(mddev, s, 1);
1862 break;
1863 } else {
1864 struct r10bio *r10_bio2 = (struct r10bio *)r10_bio->master_bio;
1865 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
1866 test_bit(R10BIO_WriteError, &r10_bio->state))
1867 reschedule_retry(r10_bio);
1868 else
1869 put_buf(r10_bio);
1870 r10_bio = r10_bio2;
1871 }
1872 }
1873}
1874
1875static void end_sync_write(struct bio *bio)
1876{
1877 struct r10bio *r10_bio = bio->bi_private;
1878 struct mddev *mddev = r10_bio->mddev;
1879 struct r10conf *conf = mddev->private;
1880 int d;
1881 sector_t first_bad;
1882 int bad_sectors;
1883 int slot;
1884 int repl;
1885 struct md_rdev *rdev = NULL;
1886
1887 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
1888 if (repl)
1889 rdev = conf->mirrors[d].replacement;
1890 else
1891 rdev = conf->mirrors[d].rdev;
1892
1893 if (bio->bi_error) {
1894 if (repl)
1895 md_error(mddev, rdev);
1896 else {
1897 set_bit(WriteErrorSeen, &rdev->flags);
1898 if (!test_and_set_bit(WantReplacement, &rdev->flags))
1899 set_bit(MD_RECOVERY_NEEDED,
1900 &rdev->mddev->recovery);
1901 set_bit(R10BIO_WriteError, &r10_bio->state);
1902 }
1903 } else if (is_badblock(rdev,
1904 r10_bio->devs[slot].addr,
1905 r10_bio->sectors,
1906 &first_bad, &bad_sectors))
1907 set_bit(R10BIO_MadeGood, &r10_bio->state);
1908
1909 rdev_dec_pending(rdev, mddev);
1910
1911 end_sync_request(r10_bio);
1912}
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
1931{
1932 struct r10conf *conf = mddev->private;
1933 int i, first;
1934 struct bio *tbio, *fbio;
1935 int vcnt;
1936
1937 atomic_set(&r10_bio->remaining, 1);
1938
1939
1940 for (i=0; i<conf->copies; i++)
1941 if (!r10_bio->devs[i].bio->bi_error)
1942 break;
1943
1944 if (i == conf->copies)
1945 goto done;
1946
1947 first = i;
1948 fbio = r10_bio->devs[i].bio;
1949 fbio->bi_iter.bi_size = r10_bio->sectors << 9;
1950 fbio->bi_iter.bi_idx = 0;
1951
1952 vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
1953
1954 for (i=0 ; i < conf->copies ; i++) {
1955 int j, d;
1956
1957 tbio = r10_bio->devs[i].bio;
1958
1959 if (tbio->bi_end_io != end_sync_read)
1960 continue;
1961 if (i == first)
1962 continue;
1963 if (!r10_bio->devs[i].bio->bi_error) {
1964
1965
1966
1967
1968 int sectors = r10_bio->sectors;
1969 for (j = 0; j < vcnt; j++) {
1970 int len = PAGE_SIZE;
1971 if (sectors < (len / 512))
1972 len = sectors * 512;
1973 if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
1974 page_address(tbio->bi_io_vec[j].bv_page),
1975 len))
1976 break;
1977 sectors -= len/512;
1978 }
1979 if (j == vcnt)
1980 continue;
1981 atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
1982 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
1983
1984 continue;
1985 }
1986
1987
1988
1989
1990
1991 bio_reset(tbio);
1992
1993 tbio->bi_vcnt = vcnt;
1994 tbio->bi_iter.bi_size = fbio->bi_iter.bi_size;
1995 tbio->bi_rw = WRITE;
1996 tbio->bi_private = r10_bio;
1997 tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
1998 tbio->bi_end_io = end_sync_write;
1999
2000 bio_copy_data(tbio, fbio);
2001
2002 d = r10_bio->devs[i].devnum;
2003 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
2004 atomic_inc(&r10_bio->remaining);
2005 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
2006
2007 tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
2008 tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
2009 generic_make_request(tbio);
2010 }
2011
2012
2013
2014
2015 for (i = 0; i < conf->copies; i++) {
2016 int d;
2017
2018 tbio = r10_bio->devs[i].repl_bio;
2019 if (!tbio || !tbio->bi_end_io)
2020 continue;
2021 if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
2022 && r10_bio->devs[i].bio != fbio)
2023 bio_copy_data(tbio, fbio);
2024 d = r10_bio->devs[i].devnum;
2025 atomic_inc(&r10_bio->remaining);
2026 md_sync_acct(conf->mirrors[d].replacement->bdev,
2027 bio_sectors(tbio));
2028 generic_make_request(tbio);
2029 }
2030
2031done:
2032 if (atomic_dec_and_test(&r10_bio->remaining)) {
2033 md_done_sync(mddev, r10_bio->sectors, 1);
2034 put_buf(r10_bio);
2035 }
2036}
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048static void fix_recovery_read_error(struct r10bio *r10_bio)
2049{
2050
2051
2052
2053
2054
2055
2056
2057 struct mddev *mddev = r10_bio->mddev;
2058 struct r10conf *conf = mddev->private;
2059 struct bio *bio = r10_bio->devs[0].bio;
2060 sector_t sect = 0;
2061 int sectors = r10_bio->sectors;
2062 int idx = 0;
2063 int dr = r10_bio->devs[0].devnum;
2064 int dw = r10_bio->devs[1].devnum;
2065
2066 while (sectors) {
2067 int s = sectors;
2068 struct md_rdev *rdev;
2069 sector_t addr;
2070 int ok;
2071
2072 if (s > (PAGE_SIZE>>9))
2073 s = PAGE_SIZE >> 9;
2074
2075 rdev = conf->mirrors[dr].rdev;
2076 addr = r10_bio->devs[0].addr + sect,
2077 ok = sync_page_io(rdev,
2078 addr,
2079 s << 9,
2080 bio->bi_io_vec[idx].bv_page,
2081 READ, false);
2082 if (ok) {
2083 rdev = conf->mirrors[dw].rdev;
2084 addr = r10_bio->devs[1].addr + sect;
2085 ok = sync_page_io(rdev,
2086 addr,
2087 s << 9,
2088 bio->bi_io_vec[idx].bv_page,
2089 WRITE, false);
2090 if (!ok) {
2091 set_bit(WriteErrorSeen, &rdev->flags);
2092 if (!test_and_set_bit(WantReplacement,
2093 &rdev->flags))
2094 set_bit(MD_RECOVERY_NEEDED,
2095 &rdev->mddev->recovery);
2096 }
2097 }
2098 if (!ok) {
2099
2100
2101
2102
2103 rdev_set_badblocks(rdev, addr, s, 0);
2104
2105 if (rdev != conf->mirrors[dw].rdev) {
2106
2107 struct md_rdev *rdev2 = conf->mirrors[dw].rdev;
2108 addr = r10_bio->devs[1].addr + sect;
2109 ok = rdev_set_badblocks(rdev2, addr, s, 0);
2110 if (!ok) {
2111
2112 printk(KERN_NOTICE
2113 "md/raid10:%s: recovery aborted"
2114 " due to read error\n",
2115 mdname(mddev));
2116
2117 conf->mirrors[dw].recovery_disabled
2118 = mddev->recovery_disabled;
2119 set_bit(MD_RECOVERY_INTR,
2120 &mddev->recovery);
2121 break;
2122 }
2123 }
2124 }
2125
2126 sectors -= s;
2127 sect += s;
2128 idx++;
2129 }
2130}
2131
2132static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2133{
2134 struct r10conf *conf = mddev->private;
2135 int d;
2136 struct bio *wbio, *wbio2;
2137
2138 if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
2139 fix_recovery_read_error(r10_bio);
2140 end_sync_request(r10_bio);
2141 return;
2142 }
2143
2144
2145
2146
2147
2148 d = r10_bio->devs[1].devnum;
2149 wbio = r10_bio->devs[1].bio;
2150 wbio2 = r10_bio->devs[1].repl_bio;
2151
2152
2153
2154
2155 if (wbio2 && !wbio2->bi_end_io)
2156 wbio2 = NULL;
2157 if (wbio->bi_end_io) {
2158 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
2159 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
2160 generic_make_request(wbio);
2161 }
2162 if (wbio2) {
2163 atomic_inc(&conf->mirrors[d].replacement->nr_pending);
2164 md_sync_acct(conf->mirrors[d].replacement->bdev,
2165 bio_sectors(wbio2));
2166 generic_make_request(wbio2);
2167 }
2168}
2169
2170
2171
2172
2173
2174
2175
2176static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
2177{
2178 struct timespec cur_time_mon;
2179 unsigned long hours_since_last;
2180 unsigned int read_errors = atomic_read(&rdev->read_errors);
2181
2182 ktime_get_ts(&cur_time_mon);
2183
2184 if (rdev->last_read_error.tv_sec == 0 &&
2185 rdev->last_read_error.tv_nsec == 0) {
2186
2187 rdev->last_read_error = cur_time_mon;
2188 return;
2189 }
2190
2191 hours_since_last = (cur_time_mon.tv_sec -
2192 rdev->last_read_error.tv_sec) / 3600;
2193
2194 rdev->last_read_error = cur_time_mon;
2195
2196
2197
2198
2199
2200
2201 if (hours_since_last >= 8 * sizeof(read_errors))
2202 atomic_set(&rdev->read_errors, 0);
2203 else
2204 atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
2205}
2206
2207static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
2208 int sectors, struct page *page, int rw)
2209{
2210 sector_t first_bad;
2211 int bad_sectors;
2212
2213 if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
2214 && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
2215 return -1;
2216 if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
2217
2218 return 1;
2219 if (rw == WRITE) {
2220 set_bit(WriteErrorSeen, &rdev->flags);
2221 if (!test_and_set_bit(WantReplacement, &rdev->flags))
2222 set_bit(MD_RECOVERY_NEEDED,
2223 &rdev->mddev->recovery);
2224 }
2225
2226 if (!rdev_set_badblocks(rdev, sector, sectors, 0))
2227 md_error(rdev->mddev, rdev);
2228 return 0;
2229}
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio)
2240{
2241 int sect = 0;
2242 int sectors = r10_bio->sectors;
2243 struct md_rdev*rdev;
2244 int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
2245 int d = r10_bio->devs[r10_bio->read_slot].devnum;
2246
2247
2248
2249
2250 rdev = conf->mirrors[d].rdev;
2251
2252 if (test_bit(Faulty, &rdev->flags))
2253
2254
2255 return;
2256
2257 check_decay_read_errors(mddev, rdev);
2258 atomic_inc(&rdev->read_errors);
2259 if (atomic_read(&rdev->read_errors) > max_read_errors) {
2260 char b[BDEVNAME_SIZE];
2261 bdevname(rdev->bdev, b);
2262
2263 printk(KERN_NOTICE
2264 "md/raid10:%s: %s: Raid device exceeded "
2265 "read_error threshold [cur %d:max %d]\n",
2266 mdname(mddev), b,
2267 atomic_read(&rdev->read_errors), max_read_errors);
2268 printk(KERN_NOTICE
2269 "md/raid10:%s: %s: Failing raid device\n",
2270 mdname(mddev), b);
2271 md_error(mddev, conf->mirrors[d].rdev);
2272 r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
2273 return;
2274 }
2275
2276 while(sectors) {
2277 int s = sectors;
2278 int sl = r10_bio->read_slot;
2279 int success = 0;
2280 int start;
2281
2282 if (s > (PAGE_SIZE>>9))
2283 s = PAGE_SIZE >> 9;
2284
2285 rcu_read_lock();
2286 do {
2287 sector_t first_bad;
2288 int bad_sectors;
2289
2290 d = r10_bio->devs[sl].devnum;
2291 rdev = rcu_dereference(conf->mirrors[d].rdev);
2292 if (rdev &&
2293 test_bit(In_sync, &rdev->flags) &&
2294 is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
2295 &first_bad, &bad_sectors) == 0) {
2296 atomic_inc(&rdev->nr_pending);
2297 rcu_read_unlock();
2298 success = sync_page_io(rdev,
2299 r10_bio->devs[sl].addr +
2300 sect,
2301 s<<9,
2302 conf->tmppage, READ, false);
2303 rdev_dec_pending(rdev, mddev);
2304 rcu_read_lock();
2305 if (success)
2306 break;
2307 }
2308 sl++;
2309 if (sl == conf->copies)
2310 sl = 0;
2311 } while (!success && sl != r10_bio->read_slot);
2312 rcu_read_unlock();
2313
2314 if (!success) {
2315
2316
2317
2318
2319 int dn = r10_bio->devs[r10_bio->read_slot].devnum;
2320 rdev = conf->mirrors[dn].rdev;
2321
2322 if (!rdev_set_badblocks(
2323 rdev,
2324 r10_bio->devs[r10_bio->read_slot].addr
2325 + sect,
2326 s, 0)) {
2327 md_error(mddev, rdev);
2328 r10_bio->devs[r10_bio->read_slot].bio
2329 = IO_BLOCKED;
2330 }
2331 break;
2332 }
2333
2334 start = sl;
2335
2336 rcu_read_lock();
2337 while (sl != r10_bio->read_slot) {
2338 char b[BDEVNAME_SIZE];
2339
2340 if (sl==0)
2341 sl = conf->copies;
2342 sl--;
2343 d = r10_bio->devs[sl].devnum;
2344 rdev = rcu_dereference(conf->mirrors[d].rdev);
2345 if (!rdev ||
2346 !test_bit(In_sync, &rdev->flags))
2347 continue;
2348
2349 atomic_inc(&rdev->nr_pending);
2350 rcu_read_unlock();
2351 if (r10_sync_page_io(rdev,
2352 r10_bio->devs[sl].addr +
2353 sect,
2354 s, conf->tmppage, WRITE)
2355 == 0) {
2356
2357 printk(KERN_NOTICE
2358 "md/raid10:%s: read correction "
2359 "write failed"
2360 " (%d sectors at %llu on %s)\n",
2361 mdname(mddev), s,
2362 (unsigned long long)(
2363 sect +
2364 choose_data_offset(r10_bio,
2365 rdev)),
2366 bdevname(rdev->bdev, b));
2367 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
2368 "drive\n",
2369 mdname(mddev),
2370 bdevname(rdev->bdev, b));
2371 }
2372 rdev_dec_pending(rdev, mddev);
2373 rcu_read_lock();
2374 }
2375 sl = start;
2376 while (sl != r10_bio->read_slot) {
2377 char b[BDEVNAME_SIZE];
2378
2379 if (sl==0)
2380 sl = conf->copies;
2381 sl--;
2382 d = r10_bio->devs[sl].devnum;
2383 rdev = rcu_dereference(conf->mirrors[d].rdev);
2384 if (!rdev ||
2385 !test_bit(In_sync, &rdev->flags))
2386 continue;
2387
2388 atomic_inc(&rdev->nr_pending);
2389 rcu_read_unlock();
2390 switch (r10_sync_page_io(rdev,
2391 r10_bio->devs[sl].addr +
2392 sect,
2393 s, conf->tmppage,
2394 READ)) {
2395 case 0:
2396
2397 printk(KERN_NOTICE
2398 "md/raid10:%s: unable to read back "
2399 "corrected sectors"
2400 " (%d sectors at %llu on %s)\n",
2401 mdname(mddev), s,
2402 (unsigned long long)(
2403 sect +
2404 choose_data_offset(r10_bio, rdev)),
2405 bdevname(rdev->bdev, b));
2406 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
2407 "drive\n",
2408 mdname(mddev),
2409 bdevname(rdev->bdev, b));
2410 break;
2411 case 1:
2412 printk(KERN_INFO
2413 "md/raid10:%s: read error corrected"
2414 " (%d sectors at %llu on %s)\n",
2415 mdname(mddev), s,
2416 (unsigned long long)(
2417 sect +
2418 choose_data_offset(r10_bio, rdev)),
2419 bdevname(rdev->bdev, b));
2420 atomic_add(s, &rdev->corrected_errors);
2421 }
2422
2423 rdev_dec_pending(rdev, mddev);
2424 rcu_read_lock();
2425 }
2426 rcu_read_unlock();
2427
2428 sectors -= s;
2429 sect += s;
2430 }
2431}
2432
2433static int narrow_write_error(struct r10bio *r10_bio, int i)
2434{
2435 struct bio *bio = r10_bio->master_bio;
2436 struct mddev *mddev = r10_bio->mddev;
2437 struct r10conf *conf = mddev->private;
2438 struct md_rdev *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450 int block_sectors;
2451 sector_t sector;
2452 int sectors;
2453 int sect_to_write = r10_bio->sectors;
2454 int ok = 1;
2455
2456 if (rdev->badblocks.shift < 0)
2457 return 0;
2458
2459 block_sectors = roundup(1 << rdev->badblocks.shift,
2460 bdev_logical_block_size(rdev->bdev) >> 9);
2461 sector = r10_bio->sector;
2462 sectors = ((r10_bio->sector + block_sectors)
2463 & ~(sector_t)(block_sectors - 1))
2464 - sector;
2465
2466 while (sect_to_write) {
2467 struct bio *wbio;
2468 if (sectors > sect_to_write)
2469 sectors = sect_to_write;
2470
2471 wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
2472 bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
2473 wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
2474 choose_data_offset(r10_bio, rdev) +
2475 (sector - r10_bio->sector));
2476 wbio->bi_bdev = rdev->bdev;
2477 if (submit_bio_wait(WRITE, wbio) < 0)
2478
2479 ok = rdev_set_badblocks(rdev, sector,
2480 sectors, 0)
2481 && ok;
2482
2483 bio_put(wbio);
2484 sect_to_write -= sectors;
2485 sector += sectors;
2486 sectors = block_sectors;
2487 }
2488 return ok;
2489}
2490
2491static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
2492{
2493 int slot = r10_bio->read_slot;
2494 struct bio *bio;
2495 struct r10conf *conf = mddev->private;
2496 struct md_rdev *rdev = r10_bio->devs[slot].rdev;
2497 char b[BDEVNAME_SIZE];
2498 unsigned long do_sync;
2499 int max_sectors;
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509 bio = r10_bio->devs[slot].bio;
2510 bdevname(bio->bi_bdev, b);
2511 bio_put(bio);
2512 r10_bio->devs[slot].bio = NULL;
2513
2514 if (mddev->ro == 0) {
2515 freeze_array(conf, 1);
2516 fix_read_error(conf, mddev, r10_bio);
2517 unfreeze_array(conf);
2518 } else
2519 r10_bio->devs[slot].bio = IO_BLOCKED;
2520
2521 rdev_dec_pending(rdev, mddev);
2522
2523read_more:
2524 rdev = read_balance(conf, r10_bio, &max_sectors);
2525 if (rdev == NULL) {
2526 printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
2527 " read error for block %llu\n",
2528 mdname(mddev), b,
2529 (unsigned long long)r10_bio->sector);
2530 raid_end_bio_io(r10_bio);
2531 return;
2532 }
2533
2534 do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
2535 slot = r10_bio->read_slot;
2536 printk_ratelimited(
2537 KERN_ERR
2538 "md/raid10:%s: %s: redirecting "
2539 "sector %llu to another mirror\n",
2540 mdname(mddev),
2541 bdevname(rdev->bdev, b),
2542 (unsigned long long)r10_bio->sector);
2543 bio = bio_clone_mddev(r10_bio->master_bio,
2544 GFP_NOIO, mddev);
2545 bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors);
2546 r10_bio->devs[slot].bio = bio;
2547 r10_bio->devs[slot].rdev = rdev;
2548 bio->bi_iter.bi_sector = r10_bio->devs[slot].addr
2549 + choose_data_offset(r10_bio, rdev);
2550 bio->bi_bdev = rdev->bdev;
2551 bio->bi_rw = READ | do_sync;
2552 bio->bi_private = r10_bio;
2553 bio->bi_end_io = raid10_end_read_request;
2554 if (max_sectors < r10_bio->sectors) {
2555
2556 struct bio *mbio = r10_bio->master_bio;
2557 int sectors_handled =
2558 r10_bio->sector + max_sectors
2559 - mbio->bi_iter.bi_sector;
2560 r10_bio->sectors = max_sectors;
2561 spin_lock_irq(&conf->device_lock);
2562 if (mbio->bi_phys_segments == 0)
2563 mbio->bi_phys_segments = 2;
2564 else
2565 mbio->bi_phys_segments++;
2566 spin_unlock_irq(&conf->device_lock);
2567 generic_make_request(bio);
2568
2569 r10_bio = mempool_alloc(conf->r10bio_pool,
2570 GFP_NOIO);
2571 r10_bio->master_bio = mbio;
2572 r10_bio->sectors = bio_sectors(mbio) - sectors_handled;
2573 r10_bio->state = 0;
2574 set_bit(R10BIO_ReadError,
2575 &r10_bio->state);
2576 r10_bio->mddev = mddev;
2577 r10_bio->sector = mbio->bi_iter.bi_sector
2578 + sectors_handled;
2579
2580 goto read_more;
2581 } else
2582 generic_make_request(bio);
2583}
2584
2585static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2586{
2587
2588
2589
2590
2591
2592
2593 int m;
2594 struct md_rdev *rdev;
2595
2596 if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
2597 test_bit(R10BIO_IsRecover, &r10_bio->state)) {
2598 for (m = 0; m < conf->copies; m++) {
2599 int dev = r10_bio->devs[m].devnum;
2600 rdev = conf->mirrors[dev].rdev;
2601 if (r10_bio->devs[m].bio == NULL)
2602 continue;
2603 if (!r10_bio->devs[m].bio->bi_error) {
2604 rdev_clear_badblocks(
2605 rdev,
2606 r10_bio->devs[m].addr,
2607 r10_bio->sectors, 0);
2608 } else {
2609 if (!rdev_set_badblocks(
2610 rdev,
2611 r10_bio->devs[m].addr,
2612 r10_bio->sectors, 0))
2613 md_error(conf->mddev, rdev);
2614 }
2615 rdev = conf->mirrors[dev].replacement;
2616 if (r10_bio->devs[m].repl_bio == NULL)
2617 continue;
2618
2619 if (!r10_bio->devs[m].repl_bio->bi_error) {
2620 rdev_clear_badblocks(
2621 rdev,
2622 r10_bio->devs[m].addr,
2623 r10_bio->sectors, 0);
2624 } else {
2625 if (!rdev_set_badblocks(
2626 rdev,
2627 r10_bio->devs[m].addr,
2628 r10_bio->sectors, 0))
2629 md_error(conf->mddev, rdev);
2630 }
2631 }
2632 put_buf(r10_bio);
2633 } else {
2634 bool fail = false;
2635 for (m = 0; m < conf->copies; m++) {
2636 int dev = r10_bio->devs[m].devnum;
2637 struct bio *bio = r10_bio->devs[m].bio;
2638 rdev = conf->mirrors[dev].rdev;
2639 if (bio == IO_MADE_GOOD) {
2640 rdev_clear_badblocks(
2641 rdev,
2642 r10_bio->devs[m].addr,
2643 r10_bio->sectors, 0);
2644 rdev_dec_pending(rdev, conf->mddev);
2645 } else if (bio != NULL && bio->bi_error) {
2646 fail = true;
2647 if (!narrow_write_error(r10_bio, m)) {
2648 md_error(conf->mddev, rdev);
2649 set_bit(R10BIO_Degraded,
2650 &r10_bio->state);
2651 }
2652 rdev_dec_pending(rdev, conf->mddev);
2653 }
2654 bio = r10_bio->devs[m].repl_bio;
2655 rdev = conf->mirrors[dev].replacement;
2656 if (rdev && bio == IO_MADE_GOOD) {
2657 rdev_clear_badblocks(
2658 rdev,
2659 r10_bio->devs[m].addr,
2660 r10_bio->sectors, 0);
2661 rdev_dec_pending(rdev, conf->mddev);
2662 }
2663 }
2664 if (fail) {
2665 spin_lock_irq(&conf->device_lock);
2666 list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
2667 spin_unlock_irq(&conf->device_lock);
2668 md_wakeup_thread(conf->mddev->thread);
2669 } else {
2670 if (test_bit(R10BIO_WriteError,
2671 &r10_bio->state))
2672 close_write(r10_bio);
2673 raid_end_bio_io(r10_bio);
2674 }
2675 }
2676}
2677
2678static void raid10d(struct md_thread *thread)
2679{
2680 struct mddev *mddev = thread->mddev;
2681 struct r10bio *r10_bio;
2682 unsigned long flags;
2683 struct r10conf *conf = mddev->private;
2684 struct list_head *head = &conf->retry_list;
2685 struct blk_plug plug;
2686
2687 md_check_recovery(mddev);
2688
2689 if (!list_empty_careful(&conf->bio_end_io_list) &&
2690 !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
2691 LIST_HEAD(tmp);
2692 spin_lock_irqsave(&conf->device_lock, flags);
2693 if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
2694 list_add(&tmp, &conf->bio_end_io_list);
2695 list_del_init(&conf->bio_end_io_list);
2696 }
2697 spin_unlock_irqrestore(&conf->device_lock, flags);
2698 while (!list_empty(&tmp)) {
2699 r10_bio = list_first_entry(&tmp, struct r10bio,
2700 retry_list);
2701 list_del(&r10_bio->retry_list);
2702 if (mddev->degraded)
2703 set_bit(R10BIO_Degraded, &r10_bio->state);
2704
2705 if (test_bit(R10BIO_WriteError,
2706 &r10_bio->state))
2707 close_write(r10_bio);
2708 raid_end_bio_io(r10_bio);
2709 }
2710 }
2711
2712 blk_start_plug(&plug);
2713 for (;;) {
2714
2715 flush_pending_writes(conf);
2716
2717 spin_lock_irqsave(&conf->device_lock, flags);
2718 if (list_empty(head)) {
2719 spin_unlock_irqrestore(&conf->device_lock, flags);
2720 break;
2721 }
2722 r10_bio = list_entry(head->prev, struct r10bio, retry_list);
2723 list_del(head->prev);
2724 conf->nr_queued--;
2725 spin_unlock_irqrestore(&conf->device_lock, flags);
2726
2727 mddev = r10_bio->mddev;
2728 conf = mddev->private;
2729 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
2730 test_bit(R10BIO_WriteError, &r10_bio->state))
2731 handle_write_completed(conf, r10_bio);
2732 else if (test_bit(R10BIO_IsReshape, &r10_bio->state))
2733 reshape_request_write(mddev, r10_bio);
2734 else if (test_bit(R10BIO_IsSync, &r10_bio->state))
2735 sync_request_write(mddev, r10_bio);
2736 else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
2737 recovery_request_write(mddev, r10_bio);
2738 else if (test_bit(R10BIO_ReadError, &r10_bio->state))
2739 handle_read_error(mddev, r10_bio);
2740 else {
2741
2742
2743
2744 int slot = r10_bio->read_slot;
2745 generic_make_request(r10_bio->devs[slot].bio);
2746 }
2747
2748 cond_resched();
2749 if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
2750 md_check_recovery(mddev);
2751 }
2752 blk_finish_plug(&plug);
2753}
2754
2755static int init_resync(struct r10conf *conf)
2756{
2757 int buffs;
2758 int i;
2759
2760 buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
2761 BUG_ON(conf->r10buf_pool);
2762 conf->have_replacement = 0;
2763 for (i = 0; i < conf->geo.raid_disks; i++)
2764 if (conf->mirrors[i].replacement)
2765 conf->have_replacement = 1;
2766 conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
2767 if (!conf->r10buf_pool)
2768 return -ENOMEM;
2769 conf->next_resync = 0;
2770 return 0;
2771}
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
2806 int *skipped)
2807{
2808 struct r10conf *conf = mddev->private;
2809 struct r10bio *r10_bio;
2810 struct bio *biolist = NULL, *bio;
2811 sector_t max_sector, nr_sectors;
2812 int i;
2813 int max_sync;
2814 sector_t sync_blocks;
2815 sector_t sectors_skipped = 0;
2816 int chunks_skipped = 0;
2817 sector_t chunk_mask = conf->geo.chunk_mask;
2818
2819 if (!conf->r10buf_pool)
2820 if (init_resync(conf))
2821 return 0;
2822
2823
2824
2825
2826
2827 if (mddev->bitmap == NULL &&
2828 mddev->recovery_cp == MaxSector &&
2829 mddev->reshape_position == MaxSector &&
2830 !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
2831 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
2832 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2833 conf->fullsync == 0) {
2834 *skipped = 1;
2835 return mddev->dev_sectors - sector_nr;
2836 }
2837
2838 skipped:
2839 max_sector = mddev->dev_sectors;
2840 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
2841 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2842 max_sector = mddev->resync_max_sectors;
2843 if (sector_nr >= max_sector) {
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
2854 end_reshape(conf);
2855 close_sync(conf);
2856 return 0;
2857 }
2858
2859 if (mddev->curr_resync < max_sector) {
2860 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
2861 bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
2862 &sync_blocks, 1);
2863 else for (i = 0; i < conf->geo.raid_disks; i++) {
2864 sector_t sect =
2865 raid10_find_virt(conf, mddev->curr_resync, i);
2866 bitmap_end_sync(mddev->bitmap, sect,
2867 &sync_blocks, 1);
2868 }
2869 } else {
2870
2871 if ((!mddev->bitmap || conf->fullsync)
2872 && conf->have_replacement
2873 && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2874
2875
2876
2877 for (i = 0; i < conf->geo.raid_disks; i++)
2878 if (conf->mirrors[i].replacement)
2879 conf->mirrors[i].replacement
2880 ->recovery_offset
2881 = MaxSector;
2882 }
2883 conf->fullsync = 0;
2884 }
2885 bitmap_close_sync(mddev->bitmap);
2886 close_sync(conf);
2887 *skipped = 1;
2888 return sectors_skipped;
2889 }
2890
2891 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2892 return reshape_request(mddev, sector_nr, skipped);
2893
2894 if (chunks_skipped >= conf->geo.raid_disks) {
2895
2896
2897
2898 *skipped = 1;
2899 return (max_sector - sector_nr) + sectors_skipped;
2900 }
2901
2902 if (max_sector > mddev->resync_max)
2903 max_sector = mddev->resync_max;
2904
2905
2906
2907
2908 if (conf->geo.near_copies < conf->geo.raid_disks &&
2909 max_sector > (sector_nr | chunk_mask))
2910 max_sector = (sector_nr | chunk_mask) + 1;
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927 max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
2928 if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2929
2930 int j;
2931 r10_bio = NULL;
2932
2933 for (i = 0 ; i < conf->geo.raid_disks; i++) {
2934 int still_degraded;
2935 struct r10bio *rb2;
2936 sector_t sect;
2937 int must_sync;
2938 int any_working;
2939 struct raid10_info *mirror = &conf->mirrors[i];
2940
2941 if ((mirror->rdev == NULL ||
2942 test_bit(In_sync, &mirror->rdev->flags))
2943 &&
2944 (mirror->replacement == NULL ||
2945 test_bit(Faulty,
2946 &mirror->replacement->flags)))
2947 continue;
2948
2949 still_degraded = 0;
2950
2951 rb2 = r10_bio;
2952 sect = raid10_find_virt(conf, sector_nr, i);
2953 if (sect >= mddev->resync_max_sectors) {
2954
2955
2956
2957 continue;
2958 }
2959
2960
2961
2962
2963 must_sync = bitmap_start_sync(mddev->bitmap, sect,
2964 &sync_blocks, 1);
2965 if (sync_blocks < max_sync)
2966 max_sync = sync_blocks;
2967 if (!must_sync &&
2968 mirror->replacement == NULL &&
2969 !conf->fullsync) {
2970
2971
2972
2973 chunks_skipped = -1;
2974 continue;
2975 }
2976
2977 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
2978 r10_bio->state = 0;
2979 raise_barrier(conf, rb2 != NULL);
2980 atomic_set(&r10_bio->remaining, 0);
2981
2982 r10_bio->master_bio = (struct bio*)rb2;
2983 if (rb2)
2984 atomic_inc(&rb2->remaining);
2985 r10_bio->mddev = mddev;
2986 set_bit(R10BIO_IsRecover, &r10_bio->state);
2987 r10_bio->sector = sect;
2988
2989 raid10_find_phys(conf, r10_bio);
2990
2991
2992
2993
2994 for (j = 0; j < conf->geo.raid_disks; j++)
2995 if (conf->mirrors[j].rdev == NULL ||
2996 test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
2997 still_degraded = 1;
2998 break;
2999 }
3000
3001 must_sync = bitmap_start_sync(mddev->bitmap, sect,
3002 &sync_blocks, still_degraded);
3003
3004 any_working = 0;
3005 for (j=0; j<conf->copies;j++) {
3006 int k;
3007 int d = r10_bio->devs[j].devnum;
3008 sector_t from_addr, to_addr;
3009 struct md_rdev *rdev;
3010 sector_t sector, first_bad;
3011 int bad_sectors;
3012 if (!conf->mirrors[d].rdev ||
3013 !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
3014 continue;
3015
3016 any_working = 1;
3017 rdev = conf->mirrors[d].rdev;
3018 sector = r10_bio->devs[j].addr;
3019
3020 if (is_badblock(rdev, sector, max_sync,
3021 &first_bad, &bad_sectors)) {
3022 if (first_bad > sector)
3023 max_sync = first_bad - sector;
3024 else {
3025 bad_sectors -= (sector
3026 - first_bad);
3027 if (max_sync > bad_sectors)
3028 max_sync = bad_sectors;
3029 continue;
3030 }
3031 }
3032 bio = r10_bio->devs[0].bio;
3033 bio_reset(bio);
3034 bio->bi_next = biolist;
3035 biolist = bio;
3036 bio->bi_private = r10_bio;
3037 bio->bi_end_io = end_sync_read;
3038 bio->bi_rw = READ;
3039 from_addr = r10_bio->devs[j].addr;
3040 bio->bi_iter.bi_sector = from_addr +
3041 rdev->data_offset;
3042 bio->bi_bdev = rdev->bdev;
3043 atomic_inc(&rdev->nr_pending);
3044
3045
3046 for (k=0; k<conf->copies; k++)
3047 if (r10_bio->devs[k].devnum == i)
3048 break;
3049 BUG_ON(k == conf->copies);
3050 to_addr = r10_bio->devs[k].addr;
3051 r10_bio->devs[0].devnum = d;
3052 r10_bio->devs[0].addr = from_addr;
3053 r10_bio->devs[1].devnum = i;
3054 r10_bio->devs[1].addr = to_addr;
3055
3056 rdev = mirror->rdev;
3057 if (!test_bit(In_sync, &rdev->flags)) {
3058 bio = r10_bio->devs[1].bio;
3059 bio_reset(bio);
3060 bio->bi_next = biolist;
3061 biolist = bio;
3062 bio->bi_private = r10_bio;
3063 bio->bi_end_io = end_sync_write;
3064 bio->bi_rw = WRITE;
3065 bio->bi_iter.bi_sector = to_addr
3066 + rdev->data_offset;
3067 bio->bi_bdev = rdev->bdev;
3068 atomic_inc(&r10_bio->remaining);
3069 } else
3070 r10_bio->devs[1].bio->bi_end_io = NULL;
3071
3072
3073 bio = r10_bio->devs[1].repl_bio;
3074 if (bio)
3075 bio->bi_end_io = NULL;
3076 rdev = mirror->replacement;
3077
3078
3079
3080
3081
3082
3083
3084
3085 if (rdev == NULL || bio == NULL ||
3086 test_bit(Faulty, &rdev->flags))
3087 break;
3088 bio_reset(bio);
3089 bio->bi_next = biolist;
3090 biolist = bio;
3091 bio->bi_private = r10_bio;
3092 bio->bi_end_io = end_sync_write;
3093 bio->bi_rw = WRITE;
3094 bio->bi_iter.bi_sector = to_addr +
3095 rdev->data_offset;
3096 bio->bi_bdev = rdev->bdev;
3097 atomic_inc(&r10_bio->remaining);
3098 break;
3099 }
3100 if (j == conf->copies) {
3101
3102
3103 if (any_working) {
3104
3105
3106
3107 int k;
3108 for (k = 0; k < conf->copies; k++)
3109 if (r10_bio->devs[k].devnum == i)
3110 break;
3111 if (!test_bit(In_sync,
3112 &mirror->rdev->flags)
3113 && !rdev_set_badblocks(
3114 mirror->rdev,
3115 r10_bio->devs[k].addr,
3116 max_sync, 0))
3117 any_working = 0;
3118 if (mirror->replacement &&
3119 !rdev_set_badblocks(
3120 mirror->replacement,
3121 r10_bio->devs[k].addr,
3122 max_sync, 0))
3123 any_working = 0;
3124 }
3125 if (!any_working) {
3126 if (!test_and_set_bit(MD_RECOVERY_INTR,
3127 &mddev->recovery))
3128 printk(KERN_INFO "md/raid10:%s: insufficient "
3129 "working devices for recovery.\n",
3130 mdname(mddev));
3131 mirror->recovery_disabled
3132 = mddev->recovery_disabled;
3133 }
3134 put_buf(r10_bio);
3135 if (rb2)
3136 atomic_dec(&rb2->remaining);
3137 r10_bio = rb2;
3138 break;
3139 }
3140 }
3141 if (biolist == NULL) {
3142 while (r10_bio) {
3143 struct r10bio *rb2 = r10_bio;
3144 r10_bio = (struct r10bio*) rb2->master_bio;
3145 rb2->master_bio = NULL;
3146 put_buf(rb2);
3147 }
3148 goto giveup;
3149 }
3150 } else {
3151
3152 int count = 0;
3153
3154 bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0);
3155
3156 if (!bitmap_start_sync(mddev->bitmap, sector_nr,
3157 &sync_blocks, mddev->degraded) &&
3158 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED,
3159 &mddev->recovery)) {
3160
3161 *skipped = 1;
3162 return sync_blocks + sectors_skipped;
3163 }
3164 if (sync_blocks < max_sync)
3165 max_sync = sync_blocks;
3166 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
3167 r10_bio->state = 0;
3168
3169 r10_bio->mddev = mddev;
3170 atomic_set(&r10_bio->remaining, 0);
3171 raise_barrier(conf, 0);
3172 conf->next_resync = sector_nr;
3173
3174 r10_bio->master_bio = NULL;
3175 r10_bio->sector = sector_nr;
3176 set_bit(R10BIO_IsSync, &r10_bio->state);
3177 raid10_find_phys(conf, r10_bio);
3178 r10_bio->sectors = (sector_nr | chunk_mask) - sector_nr + 1;
3179
3180 for (i = 0; i < conf->copies; i++) {
3181 int d = r10_bio->devs[i].devnum;
3182 sector_t first_bad, sector;
3183 int bad_sectors;
3184
3185 if (r10_bio->devs[i].repl_bio)
3186 r10_bio->devs[i].repl_bio->bi_end_io = NULL;
3187
3188 bio = r10_bio->devs[i].bio;
3189 bio_reset(bio);
3190 bio->bi_error = -EIO;
3191 if (conf->mirrors[d].rdev == NULL ||
3192 test_bit(Faulty, &conf->mirrors[d].rdev->flags))
3193 continue;
3194 sector = r10_bio->devs[i].addr;
3195 if (is_badblock(conf->mirrors[d].rdev,
3196 sector, max_sync,
3197 &first_bad, &bad_sectors)) {
3198 if (first_bad > sector)
3199 max_sync = first_bad - sector;
3200 else {
3201 bad_sectors -= (sector - first_bad);
3202 if (max_sync > bad_sectors)
3203 max_sync = bad_sectors;
3204 continue;
3205 }
3206 }
3207 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
3208 atomic_inc(&r10_bio->remaining);
3209 bio->bi_next = biolist;
3210 biolist = bio;
3211 bio->bi_private = r10_bio;
3212 bio->bi_end_io = end_sync_read;
3213 bio->bi_rw = READ;
3214 bio->bi_iter.bi_sector = sector +
3215 conf->mirrors[d].rdev->data_offset;
3216 bio->bi_bdev = conf->mirrors[d].rdev->bdev;
3217 count++;
3218
3219 if (conf->mirrors[d].replacement == NULL ||
3220 test_bit(Faulty,
3221 &conf->mirrors[d].replacement->flags))
3222 continue;
3223
3224
3225 bio = r10_bio->devs[i].repl_bio;
3226 bio_reset(bio);
3227 bio->bi_error = -EIO;
3228
3229 sector = r10_bio->devs[i].addr;
3230 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
3231 bio->bi_next = biolist;
3232 biolist = bio;
3233 bio->bi_private = r10_bio;
3234 bio->bi_end_io = end_sync_write;
3235 bio->bi_rw = WRITE;
3236 bio->bi_iter.bi_sector = sector +
3237 conf->mirrors[d].replacement->data_offset;
3238 bio->bi_bdev = conf->mirrors[d].replacement->bdev;
3239 count++;
3240 }
3241
3242 if (count < 2) {
3243 for (i=0; i<conf->copies; i++) {
3244 int d = r10_bio->devs[i].devnum;
3245 if (r10_bio->devs[i].bio->bi_end_io)
3246 rdev_dec_pending(conf->mirrors[d].rdev,
3247 mddev);
3248 if (r10_bio->devs[i].repl_bio &&
3249 r10_bio->devs[i].repl_bio->bi_end_io)
3250 rdev_dec_pending(
3251 conf->mirrors[d].replacement,
3252 mddev);
3253 }
3254 put_buf(r10_bio);
3255 biolist = NULL;
3256 goto giveup;
3257 }
3258 }
3259
3260 nr_sectors = 0;
3261 if (sector_nr + max_sync < max_sector)
3262 max_sector = sector_nr + max_sync;
3263 do {
3264 struct page *page;
3265 int len = PAGE_SIZE;
3266 if (sector_nr + (len>>9) > max_sector)
3267 len = (max_sector - sector_nr) << 9;
3268 if (len == 0)
3269 break;
3270 for (bio= biolist ; bio ; bio=bio->bi_next) {
3271 struct bio *bio2;
3272 page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
3273 if (bio_add_page(bio, page, len, 0))
3274 continue;
3275
3276
3277 bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
3278 for (bio2 = biolist;
3279 bio2 && bio2 != bio;
3280 bio2 = bio2->bi_next) {
3281
3282 bio2->bi_vcnt--;
3283 bio2->bi_iter.bi_size -= len;
3284 bio_clear_flag(bio2, BIO_SEG_VALID);
3285 }
3286 goto bio_full;
3287 }
3288 nr_sectors += len>>9;
3289 sector_nr += len>>9;
3290 } while (biolist->bi_vcnt < RESYNC_PAGES);
3291 bio_full:
3292 r10_bio->sectors = nr_sectors;
3293
3294 while (biolist) {
3295 bio = biolist;
3296 biolist = biolist->bi_next;
3297
3298 bio->bi_next = NULL;
3299 r10_bio = bio->bi_private;
3300 r10_bio->sectors = nr_sectors;
3301
3302 if (bio->bi_end_io == end_sync_read) {
3303 md_sync_acct(bio->bi_bdev, nr_sectors);
3304 bio->bi_error = 0;
3305 generic_make_request(bio);
3306 }
3307 }
3308
3309 if (sectors_skipped)
3310
3311
3312
3313 md_done_sync(mddev, sectors_skipped, 1);
3314
3315 return sectors_skipped + nr_sectors;
3316 giveup:
3317
3318
3319
3320
3321 if (sector_nr + max_sync < max_sector)
3322 max_sector = sector_nr + max_sync;
3323
3324 sectors_skipped += (max_sector - sector_nr);
3325 chunks_skipped ++;
3326 sector_nr = max_sector;
3327 goto skipped;
3328}
3329
3330static sector_t
3331raid10_size(struct mddev *mddev, sector_t sectors, int raid_disks)
3332{
3333 sector_t size;
3334 struct r10conf *conf = mddev->private;
3335
3336 if (!raid_disks)
3337 raid_disks = min(conf->geo.raid_disks,
3338 conf->prev.raid_disks);
3339 if (!sectors)
3340 sectors = conf->dev_sectors;
3341
3342 size = sectors >> conf->geo.chunk_shift;
3343 sector_div(size, conf->geo.far_copies);
3344 size = size * raid_disks;
3345 sector_div(size, conf->geo.near_copies);
3346
3347 return size << conf->geo.chunk_shift;
3348}
3349
3350static void calc_sectors(struct r10conf *conf, sector_t size)
3351{
3352
3353
3354
3355
3356
3357 size = size >> conf->geo.chunk_shift;
3358 sector_div(size, conf->geo.far_copies);
3359 size = size * conf->geo.raid_disks;
3360 sector_div(size, conf->geo.near_copies);
3361
3362
3363 size = size * conf->copies;
3364
3365
3366
3367
3368 size = DIV_ROUND_UP_SECTOR_T(size, conf->geo.raid_disks);
3369
3370 conf->dev_sectors = size << conf->geo.chunk_shift;
3371
3372 if (conf->geo.far_offset)
3373 conf->geo.stride = 1 << conf->geo.chunk_shift;
3374 else {
3375 sector_div(size, conf->geo.far_copies);
3376 conf->geo.stride = size << conf->geo.chunk_shift;
3377 }
3378}
3379
3380enum geo_type {geo_new, geo_old, geo_start};
3381static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
3382{
3383 int nc, fc, fo;
3384 int layout, chunk, disks;
3385 switch (new) {
3386 case geo_old:
3387 layout = mddev->layout;
3388 chunk = mddev->chunk_sectors;
3389 disks = mddev->raid_disks - mddev->delta_disks;
3390 break;
3391 case geo_new:
3392 layout = mddev->new_layout;
3393 chunk = mddev->new_chunk_sectors;
3394 disks = mddev->raid_disks;
3395 break;
3396 default:
3397 case geo_start:
3398
3399 layout = mddev->new_layout;
3400 chunk = mddev->new_chunk_sectors;
3401 disks = mddev->raid_disks + mddev->delta_disks;
3402 break;
3403 }
3404 if (layout >> 19)
3405 return -1;
3406 if (chunk < (PAGE_SIZE >> 9) ||
3407 !is_power_of_2(chunk))
3408 return -2;
3409 nc = layout & 255;
3410 fc = (layout >> 8) & 255;
3411 fo = layout & (1<<16);
3412 geo->raid_disks = disks;
3413 geo->near_copies = nc;
3414 geo->far_copies = fc;
3415 geo->far_offset = fo;
3416 switch (layout >> 17) {
3417 case 0:
3418 geo->far_set_size = disks;
3419 break;
3420 case 1:
3421
3422 geo->far_set_size = disks/fc;
3423 WARN(geo->far_set_size < fc,
3424 "This RAID10 layout does not provide data safety - please backup and create new array\n");
3425 break;
3426 case 2:
3427 geo->far_set_size = fc * nc;
3428 break;
3429 default:
3430 return -1;
3431 }
3432 geo->chunk_mask = chunk - 1;
3433 geo->chunk_shift = ffz(~chunk);
3434 return nc*fc;
3435}
3436
3437static struct r10conf *setup_conf(struct mddev *mddev)
3438{
3439 struct r10conf *conf = NULL;
3440 int err = -EINVAL;
3441 struct geom geo;
3442 int copies;
3443
3444 copies = setup_geo(&geo, mddev, geo_new);
3445
3446 if (copies == -2) {
3447 printk(KERN_ERR "md/raid10:%s: chunk size must be "
3448 "at least PAGE_SIZE(%ld) and be a power of 2.\n",
3449 mdname(mddev), PAGE_SIZE);
3450 goto out;
3451 }
3452
3453 if (copies < 2 || copies > mddev->raid_disks) {
3454 printk(KERN_ERR "md/raid10:%s: unsupported raid10 layout: 0x%8x\n",
3455 mdname(mddev), mddev->new_layout);
3456 goto out;
3457 }
3458
3459 err = -ENOMEM;
3460 conf = kzalloc(sizeof(struct r10conf), GFP_KERNEL);
3461 if (!conf)
3462 goto out;
3463
3464
3465 conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks +
3466 max(0,-mddev->delta_disks)),
3467 GFP_KERNEL);
3468 if (!conf->mirrors)
3469 goto out;
3470
3471 conf->tmppage = alloc_page(GFP_KERNEL);
3472 if (!conf->tmppage)
3473 goto out;
3474
3475 conf->geo = geo;
3476 conf->copies = copies;
3477 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
3478 r10bio_pool_free, conf);
3479 if (!conf->r10bio_pool)
3480 goto out;
3481
3482 calc_sectors(conf, mddev->dev_sectors);
3483 if (mddev->reshape_position == MaxSector) {
3484 conf->prev = conf->geo;
3485 conf->reshape_progress = MaxSector;
3486 } else {
3487 if (setup_geo(&conf->prev, mddev, geo_old) != conf->copies) {
3488 err = -EINVAL;
3489 goto out;
3490 }
3491 conf->reshape_progress = mddev->reshape_position;
3492 if (conf->prev.far_offset)
3493 conf->prev.stride = 1 << conf->prev.chunk_shift;
3494 else
3495
3496 conf->prev.stride = conf->dev_sectors;
3497 }
3498 conf->reshape_safe = conf->reshape_progress;
3499 spin_lock_init(&conf->device_lock);
3500 INIT_LIST_HEAD(&conf->retry_list);
3501 INIT_LIST_HEAD(&conf->bio_end_io_list);
3502
3503 spin_lock_init(&conf->resync_lock);
3504 init_waitqueue_head(&conf->wait_barrier);
3505
3506 conf->thread = md_register_thread(raid10d, mddev, "raid10");
3507 if (!conf->thread)
3508 goto out;
3509
3510 conf->mddev = mddev;
3511 return conf;
3512
3513 out:
3514 if (err == -ENOMEM)
3515 printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n",
3516 mdname(mddev));
3517 if (conf) {
3518 mempool_destroy(conf->r10bio_pool);
3519 kfree(conf->mirrors);
3520 safe_put_page(conf->tmppage);
3521 kfree(conf);
3522 }
3523 return ERR_PTR(err);
3524}
3525
3526static int run(struct mddev *mddev)
3527{
3528 struct r10conf *conf;
3529 int i, disk_idx, chunk_size;
3530 struct raid10_info *disk;
3531 struct md_rdev *rdev;
3532 sector_t size;
3533 sector_t min_offset_diff = 0;
3534 int first = 1;
3535 bool discard_supported = false;
3536
3537 if (mddev->private == NULL) {
3538 conf = setup_conf(mddev);
3539 if (IS_ERR(conf))
3540 return PTR_ERR(conf);
3541 mddev->private = conf;
3542 }
3543 conf = mddev->private;
3544 if (!conf)
3545 goto out;
3546
3547 mddev->thread = conf->thread;
3548 conf->thread = NULL;
3549
3550 chunk_size = mddev->chunk_sectors << 9;
3551 if (mddev->queue) {
3552 blk_queue_max_discard_sectors(mddev->queue,
3553 mddev->chunk_sectors);
3554 blk_queue_max_write_same_sectors(mddev->queue, 0);
3555 blk_queue_io_min(mddev->queue, chunk_size);
3556 if (conf->geo.raid_disks % conf->geo.near_copies)
3557 blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
3558 else
3559 blk_queue_io_opt(mddev->queue, chunk_size *
3560 (conf->geo.raid_disks / conf->geo.near_copies));
3561 }
3562
3563 rdev_for_each(rdev, mddev) {
3564 long long diff;
3565 struct request_queue *q;
3566
3567 disk_idx = rdev->raid_disk;
3568 if (disk_idx < 0)
3569 continue;
3570 if (disk_idx >= conf->geo.raid_disks &&
3571 disk_idx >= conf->prev.raid_disks)
3572 continue;
3573 disk = conf->mirrors + disk_idx;
3574
3575 if (test_bit(Replacement, &rdev->flags)) {
3576 if (disk->replacement)
3577 goto out_free_conf;
3578 disk->replacement = rdev;
3579 } else {
3580 if (disk->rdev)
3581 goto out_free_conf;
3582 disk->rdev = rdev;
3583 }
3584 q = bdev_get_queue(rdev->bdev);
3585 diff = (rdev->new_data_offset - rdev->data_offset);
3586 if (!mddev->reshape_backwards)
3587 diff = -diff;
3588 if (diff < 0)
3589 diff = 0;
3590 if (first || diff < min_offset_diff)
3591 min_offset_diff = diff;
3592
3593 if (mddev->gendisk)
3594 disk_stack_limits(mddev->gendisk, rdev->bdev,
3595 rdev->data_offset << 9);
3596
3597 disk->head_position = 0;
3598
3599 if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
3600 discard_supported = true;
3601 }
3602
3603 if (mddev->queue) {
3604 if (discard_supported)
3605 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
3606 mddev->queue);
3607 else
3608 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
3609 mddev->queue);
3610 }
3611
3612 if (!enough(conf, -1)) {
3613 printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
3614 mdname(mddev));
3615 goto out_free_conf;
3616 }
3617
3618 if (conf->reshape_progress != MaxSector) {
3619
3620 if (conf->geo.far_copies != 1 &&
3621 conf->geo.far_offset == 0)
3622 goto out_free_conf;
3623 if (conf->prev.far_copies != 1 &&
3624 conf->prev.far_offset == 0)
3625 goto out_free_conf;
3626 }
3627
3628 mddev->degraded = 0;
3629 for (i = 0;
3630 i < conf->geo.raid_disks
3631 || i < conf->prev.raid_disks;
3632 i++) {
3633
3634 disk = conf->mirrors + i;
3635
3636 if (!disk->rdev && disk->replacement) {
3637
3638 disk->rdev = disk->replacement;
3639 disk->replacement = NULL;
3640 clear_bit(Replacement, &disk->rdev->flags);
3641 }
3642
3643 if (!disk->rdev ||
3644 !test_bit(In_sync, &disk->rdev->flags)) {
3645 disk->head_position = 0;
3646 mddev->degraded++;
3647 if (disk->rdev &&
3648 disk->rdev->saved_raid_disk < 0)
3649 conf->fullsync = 1;
3650 }
3651 disk->recovery_disabled = mddev->recovery_disabled - 1;
3652 }
3653
3654 if (mddev->recovery_cp != MaxSector)
3655 printk(KERN_NOTICE "md/raid10:%s: not clean"
3656 " -- starting background reconstruction\n",
3657 mdname(mddev));
3658 printk(KERN_INFO
3659 "md/raid10:%s: active with %d out of %d devices\n",
3660 mdname(mddev), conf->geo.raid_disks - mddev->degraded,
3661 conf->geo.raid_disks);
3662
3663
3664
3665 mddev->dev_sectors = conf->dev_sectors;
3666 size = raid10_size(mddev, 0, 0);
3667 md_set_array_sectors(mddev, size);
3668 mddev->resync_max_sectors = size;
3669
3670 if (mddev->queue) {
3671 int stripe = conf->geo.raid_disks *
3672 ((mddev->chunk_sectors << 9) / PAGE_SIZE);
3673
3674
3675
3676
3677
3678 stripe /= conf->geo.near_copies;
3679 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
3680 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
3681 }
3682
3683 if (md_integrity_register(mddev))
3684 goto out_free_conf;
3685
3686 if (conf->reshape_progress != MaxSector) {
3687 unsigned long before_length, after_length;
3688
3689 before_length = ((1 << conf->prev.chunk_shift) *
3690 conf->prev.far_copies);
3691 after_length = ((1 << conf->geo.chunk_shift) *
3692 conf->geo.far_copies);
3693
3694 if (max(before_length, after_length) > min_offset_diff) {
3695
3696 printk("md/raid10: offset difference not enough to continue reshape\n");
3697 goto out_free_conf;
3698 }
3699 conf->offset_diff = min_offset_diff;
3700
3701 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
3702 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
3703 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
3704 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3705 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
3706 "reshape");
3707 }
3708
3709 return 0;
3710
3711out_free_conf:
3712 md_unregister_thread(&mddev->thread);
3713 mempool_destroy(conf->r10bio_pool);
3714 safe_put_page(conf->tmppage);
3715 kfree(conf->mirrors);
3716 kfree(conf);
3717 mddev->private = NULL;
3718out:
3719 return -EIO;
3720}
3721
3722static void raid10_free(struct mddev *mddev, void *priv)
3723{
3724 struct r10conf *conf = priv;
3725
3726 mempool_destroy(conf->r10bio_pool);
3727 safe_put_page(conf->tmppage);
3728 kfree(conf->mirrors);
3729 kfree(conf->mirrors_old);
3730 kfree(conf->mirrors_new);
3731 kfree(conf);
3732}
3733
3734static void raid10_quiesce(struct mddev *mddev, int state)
3735{
3736 struct r10conf *conf = mddev->private;
3737
3738 switch(state) {
3739 case 1:
3740 raise_barrier(conf, 0);
3741 break;
3742 case 0:
3743 lower_barrier(conf);
3744 break;
3745 }
3746}
3747
3748static int raid10_resize(struct mddev *mddev, sector_t sectors)
3749{
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762 struct r10conf *conf = mddev->private;
3763 sector_t oldsize, size;
3764
3765 if (mddev->reshape_position != MaxSector)
3766 return -EBUSY;
3767
3768 if (conf->geo.far_copies > 1 && !conf->geo.far_offset)
3769 return -EINVAL;
3770
3771 oldsize = raid10_size(mddev, 0, 0);
3772 size = raid10_size(mddev, sectors, 0);
3773 if (mddev->external_size &&
3774 mddev->array_sectors > size)
3775 return -EINVAL;
3776 if (mddev->bitmap) {
3777 int ret = bitmap_resize(mddev->bitmap, size, 0, 0);
3778 if (ret)
3779 return ret;
3780 }
3781 md_set_array_sectors(mddev, size);
3782 set_capacity(mddev->gendisk, mddev->array_sectors);
3783 revalidate_disk(mddev->gendisk);
3784 if (sectors > mddev->dev_sectors &&
3785 mddev->recovery_cp > oldsize) {
3786 mddev->recovery_cp = oldsize;
3787 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3788 }
3789 calc_sectors(conf, sectors);
3790 mddev->dev_sectors = conf->dev_sectors;
3791 mddev->resync_max_sectors = size;
3792 return 0;
3793}
3794
3795static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
3796{
3797 struct md_rdev *rdev;
3798 struct r10conf *conf;
3799
3800 if (mddev->degraded > 0) {
3801 printk(KERN_ERR "md/raid10:%s: Error: degraded raid0!\n",
3802 mdname(mddev));
3803 return ERR_PTR(-EINVAL);
3804 }
3805 sector_div(size, devs);
3806
3807
3808 mddev->new_level = 10;
3809
3810 mddev->new_layout = (1<<8) + 2;
3811 mddev->new_chunk_sectors = mddev->chunk_sectors;
3812 mddev->delta_disks = mddev->raid_disks;
3813 mddev->raid_disks *= 2;
3814
3815 mddev->recovery_cp = MaxSector;
3816 mddev->dev_sectors = size;
3817
3818 conf = setup_conf(mddev);
3819 if (!IS_ERR(conf)) {
3820 rdev_for_each(rdev, mddev)
3821 if (rdev->raid_disk >= 0) {
3822 rdev->new_raid_disk = rdev->raid_disk * 2;
3823 rdev->sectors = size;
3824 }
3825 conf->barrier = 1;
3826 }
3827
3828 return conf;
3829}
3830
3831static void *raid10_takeover(struct mddev *mddev)
3832{
3833 struct r0conf *raid0_conf;
3834
3835
3836
3837
3838 if (mddev->level == 0) {
3839
3840 raid0_conf = mddev->private;
3841 if (raid0_conf->nr_strip_zones > 1) {
3842 printk(KERN_ERR "md/raid10:%s: cannot takeover raid 0"
3843 " with more than one zone.\n",
3844 mdname(mddev));
3845 return ERR_PTR(-EINVAL);
3846 }
3847 return raid10_takeover_raid0(mddev,
3848 raid0_conf->strip_zone->zone_end,
3849 raid0_conf->strip_zone->nb_dev);
3850 }
3851 return ERR_PTR(-EINVAL);
3852}
3853
3854static int raid10_check_reshape(struct mddev *mddev)
3855{
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870 struct r10conf *conf = mddev->private;
3871 struct geom geo;
3872
3873 if (conf->geo.far_copies != 1 && !conf->geo.far_offset)
3874 return -EINVAL;
3875
3876 if (setup_geo(&geo, mddev, geo_start) != conf->copies)
3877
3878 return -EINVAL;
3879 if (geo.far_copies > 1 && !geo.far_offset)
3880
3881 return -EINVAL;
3882
3883 if (mddev->array_sectors & geo.chunk_mask)
3884
3885 return -EINVAL;
3886
3887 if (!enough(conf, -1))
3888 return -EINVAL;
3889
3890 kfree(conf->mirrors_new);
3891 conf->mirrors_new = NULL;
3892 if (mddev->delta_disks > 0) {
3893
3894 conf->mirrors_new = kzalloc(
3895 sizeof(struct raid10_info)
3896 *(mddev->raid_disks +
3897 mddev->delta_disks),
3898 GFP_KERNEL);
3899 if (!conf->mirrors_new)
3900 return -ENOMEM;
3901 }
3902 return 0;
3903}
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918static int calc_degraded(struct r10conf *conf)
3919{
3920 int degraded, degraded2;
3921 int i;
3922
3923 rcu_read_lock();
3924 degraded = 0;
3925
3926 for (i = 0; i < conf->prev.raid_disks; i++) {
3927 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
3928 if (!rdev || test_bit(Faulty, &rdev->flags))
3929 degraded++;
3930 else if (!test_bit(In_sync, &rdev->flags))
3931
3932
3933
3934
3935 degraded++;
3936 }
3937 rcu_read_unlock();
3938 if (conf->geo.raid_disks == conf->prev.raid_disks)
3939 return degraded;
3940 rcu_read_lock();
3941 degraded2 = 0;
3942 for (i = 0; i < conf->geo.raid_disks; i++) {
3943 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
3944 if (!rdev || test_bit(Faulty, &rdev->flags))
3945 degraded2++;
3946 else if (!test_bit(In_sync, &rdev->flags)) {
3947
3948
3949
3950
3951
3952 if (conf->geo.raid_disks <= conf->prev.raid_disks)
3953 degraded2++;
3954 }
3955 }
3956 rcu_read_unlock();
3957 if (degraded2 > degraded)
3958 return degraded2;
3959 return degraded;
3960}
3961
3962static int raid10_start_reshape(struct mddev *mddev)
3963{
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974 unsigned long before_length, after_length;
3975 sector_t min_offset_diff = 0;
3976 int first = 1;
3977 struct geom new;
3978 struct r10conf *conf = mddev->private;
3979 struct md_rdev *rdev;
3980 int spares = 0;
3981 int ret;
3982
3983 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3984 return -EBUSY;
3985
3986 if (setup_geo(&new, mddev, geo_start) != conf->copies)
3987 return -EINVAL;
3988
3989 before_length = ((1 << conf->prev.chunk_shift) *
3990 conf->prev.far_copies);
3991 after_length = ((1 << conf->geo.chunk_shift) *
3992 conf->geo.far_copies);
3993
3994 rdev_for_each(rdev, mddev) {
3995 if (!test_bit(In_sync, &rdev->flags)
3996 && !test_bit(Faulty, &rdev->flags))
3997 spares++;
3998 if (rdev->raid_disk >= 0) {
3999 long long diff = (rdev->new_data_offset
4000 - rdev->data_offset);
4001 if (!mddev->reshape_backwards)
4002 diff = -diff;
4003 if (diff < 0)
4004 diff = 0;
4005 if (first || diff < min_offset_diff)
4006 min_offset_diff = diff;
4007 }
4008 }
4009
4010 if (max(before_length, after_length) > min_offset_diff)
4011 return -EINVAL;
4012
4013 if (spares < mddev->delta_disks)
4014 return -EINVAL;
4015
4016 conf->offset_diff = min_offset_diff;
4017 spin_lock_irq(&conf->device_lock);
4018 if (conf->mirrors_new) {
4019 memcpy(conf->mirrors_new, conf->mirrors,
4020 sizeof(struct raid10_info)*conf->prev.raid_disks);
4021 smp_mb();
4022 kfree(conf->mirrors_old);
4023 conf->mirrors_old = conf->mirrors;
4024 conf->mirrors = conf->mirrors_new;
4025 conf->mirrors_new = NULL;
4026 }
4027 setup_geo(&conf->geo, mddev, geo_start);
4028 smp_mb();
4029 if (mddev->reshape_backwards) {
4030 sector_t size = raid10_size(mddev, 0, 0);
4031 if (size < mddev->array_sectors) {
4032 spin_unlock_irq(&conf->device_lock);
4033 printk(KERN_ERR "md/raid10:%s: array size must be reduce before number of disks\n",
4034 mdname(mddev));
4035 return -EINVAL;
4036 }
4037 mddev->resync_max_sectors = size;
4038 conf->reshape_progress = size;
4039 } else
4040 conf->reshape_progress = 0;
4041 conf->reshape_safe = conf->reshape_progress;
4042 spin_unlock_irq(&conf->device_lock);
4043
4044 if (mddev->delta_disks && mddev->bitmap) {
4045 ret = bitmap_resize(mddev->bitmap,
4046 raid10_size(mddev, 0,
4047 conf->geo.raid_disks),
4048 0, 0);
4049 if (ret)
4050 goto abort;
4051 }
4052 if (mddev->delta_disks > 0) {
4053 rdev_for_each(rdev, mddev)
4054 if (rdev->raid_disk < 0 &&
4055 !test_bit(Faulty, &rdev->flags)) {
4056 if (raid10_add_disk(mddev, rdev) == 0) {
4057 if (rdev->raid_disk >=
4058 conf->prev.raid_disks)
4059 set_bit(In_sync, &rdev->flags);
4060 else
4061 rdev->recovery_offset = 0;
4062
4063 if (sysfs_link_rdev(mddev, rdev))
4064 ;
4065 }
4066 } else if (rdev->raid_disk >= conf->prev.raid_disks
4067 && !test_bit(Faulty, &rdev->flags)) {
4068
4069 set_bit(In_sync, &rdev->flags);
4070 }
4071 }
4072
4073
4074
4075
4076 spin_lock_irq(&conf->device_lock);
4077 mddev->degraded = calc_degraded(conf);
4078 spin_unlock_irq(&conf->device_lock);
4079 mddev->raid_disks = conf->geo.raid_disks;
4080 mddev->reshape_position = conf->reshape_progress;
4081 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4082
4083 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4084 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4085 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
4086 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
4087 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4088
4089 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
4090 "reshape");
4091 if (!mddev->sync_thread) {
4092 ret = -EAGAIN;
4093 goto abort;
4094 }
4095 conf->reshape_checkpoint = jiffies;
4096 md_wakeup_thread(mddev->sync_thread);
4097 md_new_event(mddev);
4098 return 0;
4099
4100abort:
4101 mddev->recovery = 0;
4102 spin_lock_irq(&conf->device_lock);
4103 conf->geo = conf->prev;
4104 mddev->raid_disks = conf->geo.raid_disks;
4105 rdev_for_each(rdev, mddev)
4106 rdev->new_data_offset = rdev->data_offset;
4107 smp_wmb();
4108 conf->reshape_progress = MaxSector;
4109 conf->reshape_safe = MaxSector;
4110 mddev->reshape_position = MaxSector;
4111 spin_unlock_irq(&conf->device_lock);
4112 return ret;
4113}
4114
4115
4116
4117
4118
4119
4120
4121static sector_t last_dev_address(sector_t s, struct geom *geo)
4122{
4123 s = (s | geo->chunk_mask) + 1;
4124 s >>= geo->chunk_shift;
4125 s *= geo->near_copies;
4126 s = DIV_ROUND_UP_SECTOR_T(s, geo->raid_disks);
4127 s *= geo->far_copies;
4128 s <<= geo->chunk_shift;
4129 return s;
4130}
4131
4132
4133
4134
4135
4136static sector_t first_dev_address(sector_t s, struct geom *geo)
4137{
4138 s >>= geo->chunk_shift;
4139 s *= geo->near_copies;
4140 sector_div(s, geo->raid_disks);
4141 s *= geo->far_copies;
4142 s <<= geo->chunk_shift;
4143 return s;
4144}
4145
4146static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
4147 int *skipped)
4148{
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186 struct r10conf *conf = mddev->private;
4187 struct r10bio *r10_bio;
4188 sector_t next, safe, last;
4189 int max_sectors;
4190 int nr_sectors;
4191 int s;
4192 struct md_rdev *rdev;
4193 int need_flush = 0;
4194 struct bio *blist;
4195 struct bio *bio, *read_bio;
4196 int sectors_done = 0;
4197
4198 if (sector_nr == 0) {
4199
4200 if (mddev->reshape_backwards &&
4201 conf->reshape_progress < raid10_size(mddev, 0, 0)) {
4202 sector_nr = (raid10_size(mddev, 0, 0)
4203 - conf->reshape_progress);
4204 } else if (!mddev->reshape_backwards &&
4205 conf->reshape_progress > 0)
4206 sector_nr = conf->reshape_progress;
4207 if (sector_nr) {
4208 mddev->curr_resync_completed = sector_nr;
4209 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
4210 *skipped = 1;
4211 return sector_nr;
4212 }
4213 }
4214
4215
4216
4217
4218
4219 if (mddev->reshape_backwards) {
4220
4221
4222
4223 next = first_dev_address(conf->reshape_progress - 1,
4224 &conf->geo);
4225
4226
4227
4228
4229 safe = last_dev_address(conf->reshape_safe - 1,
4230 &conf->prev);
4231
4232 if (next + conf->offset_diff < safe)
4233 need_flush = 1;
4234
4235 last = conf->reshape_progress - 1;
4236 sector_nr = last & ~(sector_t)(conf->geo.chunk_mask
4237 & conf->prev.chunk_mask);
4238 if (sector_nr + RESYNC_BLOCK_SIZE/512 < last)
4239 sector_nr = last + 1 - RESYNC_BLOCK_SIZE/512;
4240 } else {
4241
4242
4243
4244 next = last_dev_address(conf->reshape_progress, &conf->geo);
4245
4246
4247
4248
4249 safe = first_dev_address(conf->reshape_safe, &conf->prev);
4250
4251
4252
4253
4254 if (next > safe + conf->offset_diff)
4255 need_flush = 1;
4256
4257 sector_nr = conf->reshape_progress;
4258 last = sector_nr | (conf->geo.chunk_mask
4259 & conf->prev.chunk_mask);
4260
4261 if (sector_nr + RESYNC_BLOCK_SIZE/512 <= last)
4262 last = sector_nr + RESYNC_BLOCK_SIZE/512 - 1;
4263 }
4264
4265 if (need_flush ||
4266 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
4267
4268 wait_barrier(conf);
4269 mddev->reshape_position = conf->reshape_progress;
4270 if (mddev->reshape_backwards)
4271 mddev->curr_resync_completed = raid10_size(mddev, 0, 0)
4272 - conf->reshape_progress;
4273 else
4274 mddev->curr_resync_completed = conf->reshape_progress;
4275 conf->reshape_checkpoint = jiffies;
4276 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4277 md_wakeup_thread(mddev->thread);
4278 wait_event(mddev->sb_wait, mddev->flags == 0 ||
4279 test_bit(MD_RECOVERY_INTR, &mddev->recovery));
4280 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
4281 allow_barrier(conf);
4282 return sectors_done;
4283 }
4284 conf->reshape_safe = mddev->reshape_position;
4285 allow_barrier(conf);
4286 }
4287
4288read_more:
4289
4290 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
4291 r10_bio->state = 0;
4292 raise_barrier(conf, sectors_done != 0);
4293 atomic_set(&r10_bio->remaining, 0);
4294 r10_bio->mddev = mddev;
4295 r10_bio->sector = sector_nr;
4296 set_bit(R10BIO_IsReshape, &r10_bio->state);
4297 r10_bio->sectors = last - sector_nr + 1;
4298 rdev = read_balance(conf, r10_bio, &max_sectors);
4299 BUG_ON(!test_bit(R10BIO_Previous, &r10_bio->state));
4300
4301 if (!rdev) {
4302
4303
4304
4305
4306 mempool_free(r10_bio, conf->r10buf_pool);
4307 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4308 return sectors_done;
4309 }
4310
4311 read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);
4312
4313 read_bio->bi_bdev = rdev->bdev;
4314 read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
4315 + rdev->data_offset);
4316 read_bio->bi_private = r10_bio;
4317 read_bio->bi_end_io = end_sync_read;
4318 read_bio->bi_rw = READ;
4319 read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
4320 read_bio->bi_error = 0;
4321 read_bio->bi_vcnt = 0;
4322 read_bio->bi_iter.bi_size = 0;
4323 r10_bio->master_bio = read_bio;
4324 r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
4325
4326
4327 __raid10_find_phys(&conf->geo, r10_bio);
4328
4329 blist = read_bio;
4330 read_bio->bi_next = NULL;
4331
4332 for (s = 0; s < conf->copies*2; s++) {
4333 struct bio *b;
4334 int d = r10_bio->devs[s/2].devnum;
4335 struct md_rdev *rdev2;
4336 if (s&1) {
4337 rdev2 = conf->mirrors[d].replacement;
4338 b = r10_bio->devs[s/2].repl_bio;
4339 } else {
4340 rdev2 = conf->mirrors[d].rdev;
4341 b = r10_bio->devs[s/2].bio;
4342 }
4343 if (!rdev2 || test_bit(Faulty, &rdev2->flags))
4344 continue;
4345
4346 bio_reset(b);
4347 b->bi_bdev = rdev2->bdev;
4348 b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
4349 rdev2->new_data_offset;
4350 b->bi_private = r10_bio;
4351 b->bi_end_io = end_reshape_write;
4352 b->bi_rw = WRITE;
4353 b->bi_next = blist;
4354 blist = b;
4355 }
4356
4357
4358
4359 nr_sectors = 0;
4360 for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) {
4361 struct page *page = r10_bio->devs[0].bio->bi_io_vec[s/(PAGE_SIZE>>9)].bv_page;
4362 int len = (max_sectors - s) << 9;
4363 if (len > PAGE_SIZE)
4364 len = PAGE_SIZE;
4365 for (bio = blist; bio ; bio = bio->bi_next) {
4366 struct bio *bio2;
4367 if (bio_add_page(bio, page, len, 0))
4368 continue;
4369
4370
4371 for (bio2 = blist;
4372 bio2 && bio2 != bio;
4373 bio2 = bio2->bi_next) {
4374
4375 bio2->bi_vcnt--;
4376 bio2->bi_iter.bi_size -= len;
4377 bio_clear_flag(bio2, BIO_SEG_VALID);
4378 }
4379 goto bio_full;
4380 }
4381 sector_nr += len >> 9;
4382 nr_sectors += len >> 9;
4383 }
4384bio_full:
4385 r10_bio->sectors = nr_sectors;
4386
4387
4388 md_sync_acct(read_bio->bi_bdev, r10_bio->sectors);
4389 atomic_inc(&r10_bio->remaining);
4390 read_bio->bi_next = NULL;
4391 generic_make_request(read_bio);
4392 sector_nr += nr_sectors;
4393 sectors_done += nr_sectors;
4394 if (sector_nr <= last)
4395 goto read_more;
4396
4397
4398
4399
4400 if (mddev->reshape_backwards)
4401 conf->reshape_progress -= sectors_done;
4402 else
4403 conf->reshape_progress += sectors_done;
4404
4405 return sectors_done;
4406}
4407
4408static void end_reshape_request(struct r10bio *r10_bio);
4409static int handle_reshape_read_error(struct mddev *mddev,
4410 struct r10bio *r10_bio);
4411static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
4412{
4413
4414
4415
4416
4417
4418 struct r10conf *conf = mddev->private;
4419 int s;
4420
4421 if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
4422 if (handle_reshape_read_error(mddev, r10_bio) < 0) {
4423
4424 md_done_sync(mddev, r10_bio->sectors, 0);
4425 return;
4426 }
4427
4428
4429
4430
4431 atomic_set(&r10_bio->remaining, 1);
4432 for (s = 0; s < conf->copies*2; s++) {
4433 struct bio *b;
4434 int d = r10_bio->devs[s/2].devnum;
4435 struct md_rdev *rdev;
4436 if (s&1) {
4437 rdev = conf->mirrors[d].replacement;
4438 b = r10_bio->devs[s/2].repl_bio;
4439 } else {
4440 rdev = conf->mirrors[d].rdev;
4441 b = r10_bio->devs[s/2].bio;
4442 }
4443 if (!rdev || test_bit(Faulty, &rdev->flags))
4444 continue;
4445 atomic_inc(&rdev->nr_pending);
4446 md_sync_acct(b->bi_bdev, r10_bio->sectors);
4447 atomic_inc(&r10_bio->remaining);
4448 b->bi_next = NULL;
4449 generic_make_request(b);
4450 }
4451 end_reshape_request(r10_bio);
4452}
4453
4454static void end_reshape(struct r10conf *conf)
4455{
4456 if (test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery))
4457 return;
4458
4459 spin_lock_irq(&conf->device_lock);
4460 conf->prev = conf->geo;
4461 md_finish_reshape(conf->mddev);
4462 smp_wmb();
4463 conf->reshape_progress = MaxSector;
4464 conf->reshape_safe = MaxSector;
4465 spin_unlock_irq(&conf->device_lock);
4466
4467
4468
4469
4470 if (conf->mddev->queue) {
4471 int stripe = conf->geo.raid_disks *
4472 ((conf->mddev->chunk_sectors << 9) / PAGE_SIZE);
4473 stripe /= conf->geo.near_copies;
4474 if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
4475 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
4476 }
4477 conf->fullsync = 0;
4478}
4479
4480static int handle_reshape_read_error(struct mddev *mddev,
4481 struct r10bio *r10_bio)
4482{
4483
4484 int sectors = r10_bio->sectors;
4485 struct r10conf *conf = mddev->private;
4486 struct {
4487 struct r10bio r10_bio;
4488 struct r10dev devs[conf->copies];
4489 } on_stack;
4490 struct r10bio *r10b = &on_stack.r10_bio;
4491 int slot = 0;
4492 int idx = 0;
4493 struct bio_vec *bvec = r10_bio->master_bio->bi_io_vec;
4494
4495 r10b->sector = r10_bio->sector;
4496 __raid10_find_phys(&conf->prev, r10b);
4497
4498 while (sectors) {
4499 int s = sectors;
4500 int success = 0;
4501 int first_slot = slot;
4502
4503 if (s > (PAGE_SIZE >> 9))
4504 s = PAGE_SIZE >> 9;
4505
4506 while (!success) {
4507 int d = r10b->devs[slot].devnum;
4508 struct md_rdev *rdev = conf->mirrors[d].rdev;
4509 sector_t addr;
4510 if (rdev == NULL ||
4511 test_bit(Faulty, &rdev->flags) ||
4512 !test_bit(In_sync, &rdev->flags))
4513 goto failed;
4514
4515 addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
4516 success = sync_page_io(rdev,
4517 addr,
4518 s << 9,
4519 bvec[idx].bv_page,
4520 READ, false);
4521 if (success)
4522 break;
4523 failed:
4524 slot++;
4525 if (slot >= conf->copies)
4526 slot = 0;
4527 if (slot == first_slot)
4528 break;
4529 }
4530 if (!success) {
4531
4532 set_bit(MD_RECOVERY_INTR,
4533 &mddev->recovery);
4534 return -EIO;
4535 }
4536 sectors -= s;
4537 idx++;
4538 }
4539 return 0;
4540}
4541
4542static void end_reshape_write(struct bio *bio)
4543{
4544 struct r10bio *r10_bio = bio->bi_private;
4545 struct mddev *mddev = r10_bio->mddev;
4546 struct r10conf *conf = mddev->private;
4547 int d;
4548 int slot;
4549 int repl;
4550 struct md_rdev *rdev = NULL;
4551
4552 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
4553 if (repl)
4554 rdev = conf->mirrors[d].replacement;
4555 if (!rdev) {
4556 smp_mb();
4557 rdev = conf->mirrors[d].rdev;
4558 }
4559
4560 if (bio->bi_error) {
4561
4562 md_error(mddev, rdev);
4563 }
4564
4565 rdev_dec_pending(rdev, mddev);
4566 end_reshape_request(r10_bio);
4567}
4568
4569static void end_reshape_request(struct r10bio *r10_bio)
4570{
4571 if (!atomic_dec_and_test(&r10_bio->remaining))
4572 return;
4573 md_done_sync(r10_bio->mddev, r10_bio->sectors, 1);
4574 bio_put(r10_bio->master_bio);
4575 put_buf(r10_bio);
4576}
4577
4578static void raid10_finish_reshape(struct mddev *mddev)
4579{
4580 struct r10conf *conf = mddev->private;
4581
4582 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
4583 return;
4584
4585 if (mddev->delta_disks > 0) {
4586 sector_t size = raid10_size(mddev, 0, 0);
4587 md_set_array_sectors(mddev, size);
4588 if (mddev->recovery_cp > mddev->resync_max_sectors) {
4589 mddev->recovery_cp = mddev->resync_max_sectors;
4590 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4591 }
4592 mddev->resync_max_sectors = size;
4593 set_capacity(mddev->gendisk, mddev->array_sectors);
4594 revalidate_disk(mddev->gendisk);
4595 } else {
4596 int d;
4597 for (d = conf->geo.raid_disks ;
4598 d < conf->geo.raid_disks - mddev->delta_disks;
4599 d++) {
4600 struct md_rdev *rdev = conf->mirrors[d].rdev;
4601 if (rdev)
4602 clear_bit(In_sync, &rdev->flags);
4603 rdev = conf->mirrors[d].replacement;
4604 if (rdev)
4605 clear_bit(In_sync, &rdev->flags);
4606 }
4607 }
4608 mddev->layout = mddev->new_layout;
4609 mddev->chunk_sectors = 1 << conf->geo.chunk_shift;
4610 mddev->reshape_position = MaxSector;
4611 mddev->delta_disks = 0;
4612 mddev->reshape_backwards = 0;
4613}
4614
4615static struct md_personality raid10_personality =
4616{
4617 .name = "raid10",
4618 .level = 10,
4619 .owner = THIS_MODULE,
4620 .make_request = make_request,
4621 .run = run,
4622 .free = raid10_free,
4623 .status = status,
4624 .error_handler = error,
4625 .hot_add_disk = raid10_add_disk,
4626 .hot_remove_disk= raid10_remove_disk,
4627 .spare_active = raid10_spare_active,
4628 .sync_request = sync_request,
4629 .quiesce = raid10_quiesce,
4630 .size = raid10_size,
4631 .resize = raid10_resize,
4632 .takeover = raid10_takeover,
4633 .check_reshape = raid10_check_reshape,
4634 .start_reshape = raid10_start_reshape,
4635 .finish_reshape = raid10_finish_reshape,
4636 .congested = raid10_congested,
4637};
4638
4639static int __init raid_init(void)
4640{
4641 return register_md_personality(&raid10_personality);
4642}
4643
4644static void raid_exit(void)
4645{
4646 unregister_md_personality(&raid10_personality);
4647}
4648
4649module_init(raid_init);
4650module_exit(raid_exit);
4651MODULE_LICENSE("GPL");
4652MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
4653MODULE_ALIAS("md-personality-9");
4654MODULE_ALIAS("md-raid10");
4655MODULE_ALIAS("md-level-10");
4656
4657module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);
4658